parselly 1.2.0 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +56 -9
- data/Rakefile +10 -0
- data/lib/parselly/lexer.rb +278 -68
- data/lib/parselly/node.rb +434 -205
- data/lib/parselly/parser.rb +799 -325
- data/lib/parselly/version.rb +1 -1
- data/lib/parselly.rb +57 -10
- data/parser.y +454 -101
- metadata +3 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 5bc554d1e8b9c0bba096e513d21aa054f35589a7e52f1c0e2108c0639ec1027b
|
|
4
|
+
data.tar.gz: 55f3bec1107b38b70bdd4995375fe8cab816036fe86d6cf27e96566689cce03f
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 5dc070854c73e51ac4e21dde9910cd687c788f2bdfe99c9c22870d2e73222dcfc9679cf26e54e9c81032284a201b840e0af395958347f3e0bc10967294f990d0
|
|
7
|
+
data.tar.gz: 37a0b28ade0b3d74a062fee148d18d7448f200ac04bbb107bdf036a92808b7dc5b803a179dbb4363100fb290351d14e504651a07ac74b9dd68829e5c36af3a28
|
data/README.md
CHANGED
|
@@ -1,30 +1,77 @@
|
|
|
1
1
|
# Parselly [](https://badge.fury.io/rb/parselly) [](https://github.com/ydah/parselly/actions/workflows/test.yml)
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
Pure Ruby CSS selector parser.
|
|
4
4
|
|
|
5
5
|
## Installation
|
|
6
6
|
|
|
7
|
-
Add this line to your application's Gemfile:
|
|
8
7
|
```ruby
|
|
9
8
|
gem 'parselly'
|
|
10
9
|
```
|
|
11
10
|
|
|
12
|
-
And then execute:
|
|
13
11
|
```bash
|
|
14
12
|
bundle install
|
|
15
13
|
```
|
|
16
14
|
|
|
17
|
-
Or install it
|
|
15
|
+
Or install it directly:
|
|
16
|
+
|
|
18
17
|
```bash
|
|
19
18
|
gem install parselly
|
|
20
19
|
```
|
|
21
20
|
|
|
22
|
-
|
|
21
|
+
Requires Ruby 2.7 or newer.
|
|
22
|
+
|
|
23
|
+
## Usage
|
|
24
|
+
|
|
25
|
+
```ruby
|
|
26
|
+
require 'parselly'
|
|
27
|
+
|
|
28
|
+
ast = Parselly.parse('article#main.content[data-state="open"] > a:hover')
|
|
29
|
+
|
|
30
|
+
ast.ids
|
|
31
|
+
#=> ["main"]
|
|
32
|
+
|
|
33
|
+
ast.attributes
|
|
34
|
+
#=> [{ name: "data-state", operator: "=", value: "open" }]
|
|
35
|
+
|
|
36
|
+
ast.pseudo_class_names
|
|
37
|
+
#=> ["hover"]
|
|
38
|
+
|
|
39
|
+
ast.specificity
|
|
40
|
+
#=> [1, 3, 2]
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
Strict parsing raises `Parselly::LexError` or `Parselly::SyntaxError` for invalid selectors:
|
|
44
|
+
|
|
45
|
+
```ruby
|
|
46
|
+
Parselly.parse('div >')
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
Use tolerant mode when you want a `Parselly::ParseResult` instead:
|
|
50
|
+
|
|
51
|
+
```ruby
|
|
52
|
+
result = Parselly.parse('div >', tolerant: true)
|
|
53
|
+
|
|
54
|
+
result.success?
|
|
55
|
+
#=> false
|
|
56
|
+
|
|
57
|
+
result.errors.first[:message]
|
|
58
|
+
#=> "Parse error: unexpected $end '' at 1:6"
|
|
59
|
+
```
|
|
23
60
|
|
|
24
|
-
|
|
61
|
+
Use `Parselly.sanitize` to escape text for a CSS identifier:
|
|
25
62
|
|
|
26
|
-
|
|
63
|
+
```ruby
|
|
64
|
+
Parselly.sanitize('1st item')
|
|
65
|
+
#=> "\\31 st\\ item"
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
## Development
|
|
69
|
+
|
|
70
|
+
```bash
|
|
71
|
+
bin/setup
|
|
72
|
+
bundle exec rake
|
|
73
|
+
```
|
|
27
74
|
|
|
28
|
-
##
|
|
75
|
+
## License
|
|
29
76
|
|
|
30
|
-
|
|
77
|
+
MIT
|
data/Rakefile
CHANGED
|
@@ -7,6 +7,16 @@ namespace 'build' do
|
|
|
7
7
|
task :parser do
|
|
8
8
|
sh 'bundle exec racc parser.y --embedded --frozen -o lib/parselly/parser.rb -t --log-file=parser.output'
|
|
9
9
|
end
|
|
10
|
+
|
|
11
|
+
desc 'verify generated parser files are in sync'
|
|
12
|
+
task check_parser: :parser do
|
|
13
|
+
sh 'git diff --exit-code lib/parselly/parser.rb parser.output'
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
desc 'run parser benchmarks'
|
|
18
|
+
task :benchmark do
|
|
19
|
+
ruby 'benchmark/parser_benchmark.rb'
|
|
10
20
|
end
|
|
11
21
|
|
|
12
22
|
require 'rspec/core/rake_task'
|
data/lib/parselly/lexer.rb
CHANGED
|
@@ -5,12 +5,76 @@ require 'strscan'
|
|
|
5
5
|
module Parselly
|
|
6
6
|
class Lexer
|
|
7
7
|
Identifier = Struct.new(:value, :raw) do
|
|
8
|
+
attr_accessor :position
|
|
9
|
+
|
|
8
10
|
def to_s
|
|
9
11
|
value
|
|
10
12
|
end
|
|
13
|
+
|
|
14
|
+
def ==(other)
|
|
15
|
+
other.respond_to?(:value) ? value == other.value : value == other
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
TokenValue = Struct.new(:value, :raw, :position, :quote, keyword_init: true) do
|
|
20
|
+
def to_s
|
|
21
|
+
value.to_s
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def ==(other)
|
|
25
|
+
other.respond_to?(:value) ? value == other.value : value == other
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
Token = Struct.new(:type, :value, :position, keyword_init: true) do
|
|
30
|
+
def [](index)
|
|
31
|
+
to_ary[index]
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def []=(index, new_value)
|
|
35
|
+
case index
|
|
36
|
+
when 0
|
|
37
|
+
self.type = new_value
|
|
38
|
+
when 1
|
|
39
|
+
self.value = new_value
|
|
40
|
+
when 2
|
|
41
|
+
self.position = new_value
|
|
42
|
+
else
|
|
43
|
+
raise IndexError, "index #{index} outside of token"
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def first
|
|
48
|
+
type
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def last
|
|
52
|
+
position
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def to_ary
|
|
56
|
+
[type, value, position]
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
alias to_a to_ary
|
|
60
|
+
|
|
61
|
+
def ==(other)
|
|
62
|
+
return super unless other.respond_to?(:to_ary)
|
|
63
|
+
|
|
64
|
+
other_type, other_value, other_position = other.to_ary
|
|
65
|
+
return false unless type == other_type
|
|
66
|
+
return false unless value == other_value
|
|
67
|
+
return position == other_position unless position.is_a?(Hash) && other_position.is_a?(Hash)
|
|
68
|
+
|
|
69
|
+
other_position.all? { |key, expected| position[key] == expected }
|
|
70
|
+
end
|
|
11
71
|
end
|
|
12
72
|
|
|
13
73
|
TOKENS = {
|
|
74
|
+
# Namespace and column combinators
|
|
75
|
+
'|' => :PIPE,
|
|
76
|
+
'||' => :COLUMN,
|
|
77
|
+
|
|
14
78
|
# Combinators
|
|
15
79
|
'>' => :CHILD,
|
|
16
80
|
'+' => :ADJACENT,
|
|
@@ -37,27 +101,42 @@ module Parselly
|
|
|
37
101
|
'*=' => :SUBSTRINGMATCH
|
|
38
102
|
}.freeze
|
|
39
103
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
SINGLE_CHAR_OPERATOR_REGEX = /[
|
|
50
|
-
WHITESPACE_REGEX = /[ \t\n\r]+/.freeze
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
IDENTIFIER_REGEX = /
|
|
104
|
+
MULTI_CHAR_TOKENS = {
|
|
105
|
+
'~=' => :INCLUDES,
|
|
106
|
+
'|=' => :DASHMATCH,
|
|
107
|
+
'^=' => :PREFIXMATCH,
|
|
108
|
+
'$=' => :SUFFIXMATCH,
|
|
109
|
+
'*=' => :SUBSTRINGMATCH,
|
|
110
|
+
'||' => :COLUMN
|
|
111
|
+
}.freeze
|
|
112
|
+
|
|
113
|
+
SINGLE_CHAR_OPERATOR_REGEX = /[|>+~\[\]():,.#*=-]/.freeze
|
|
114
|
+
WHITESPACE_REGEX = /[ \t\n\r\f]+/.freeze
|
|
115
|
+
COMMENT_REGEX = %r{/\*[^*]*\*+(?:[^/*][^*]*\*+)*/}.freeze
|
|
116
|
+
ESCAPE_SEQUENCE = /\\(?:[0-9a-fA-F]{1,6}[ \t\n\r\f]?|[^\n\r\f])/.freeze
|
|
117
|
+
IDENTIFIER_REGEX = /
|
|
118
|
+
(?:
|
|
119
|
+
--
|
|
120
|
+
|
|
|
121
|
+
-?(?:[a-zA-Z_]|[^\x00-\x7F]|#{ESCAPE_SEQUENCE})
|
|
122
|
+
)
|
|
123
|
+
(?:[a-zA-Z0-9_-]|[^\x00-\x7F]|#{ESCAPE_SEQUENCE})*
|
|
124
|
+
/x.freeze
|
|
54
125
|
NUMBER_REGEX = /\d+(\.\d+)?/.freeze
|
|
55
|
-
|
|
126
|
+
HEX_ESCAPE_REGEX = /\\([0-9a-fA-F]{1,6})([ \t\n\r\f])?/.freeze
|
|
127
|
+
ESCAPED_NEWLINE_REGEX = /\\(?:\r\n|[\n\r\f])/.freeze
|
|
128
|
+
SIMPLE_ESCAPE_REGEX = /\\([^\n\r\f])/.freeze
|
|
129
|
+
REPLACEMENT_CHARACTER = "\uFFFD"
|
|
56
130
|
|
|
57
131
|
attr_reader :line, :column
|
|
58
132
|
|
|
59
133
|
def initialize(input)
|
|
60
|
-
|
|
134
|
+
unless input.valid_encoding?
|
|
135
|
+
raise_lexer_error('Invalid input encoding', { line: 1, column: 1, offset: 0 })
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
preprocessed_input, @offset_map = preprocess_input(input)
|
|
139
|
+
@scanner = StringScanner.new(preprocessed_input)
|
|
61
140
|
@line = 1
|
|
62
141
|
@column = 1
|
|
63
142
|
@tokens = []
|
|
@@ -65,54 +144,99 @@ module Parselly
|
|
|
65
144
|
|
|
66
145
|
def tokenize
|
|
67
146
|
until @scanner.eos?
|
|
68
|
-
|
|
147
|
+
skip_ignored
|
|
69
148
|
break if @scanner.eos?
|
|
70
149
|
|
|
71
|
-
|
|
150
|
+
start_position = current_position
|
|
72
151
|
|
|
73
|
-
if (token = scan_string)
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
152
|
+
if (token = scan_string(start_position))
|
|
153
|
+
type, value = token
|
|
154
|
+
@tokens << build_token(type, value, start_position)
|
|
155
|
+
elsif (value = scan_number)
|
|
156
|
+
@tokens << build_token(:NUMBER, value, start_position)
|
|
157
|
+
elsif (type = scan_operator)
|
|
158
|
+
@tokens << build_token(type, @scanner.matched, start_position)
|
|
159
|
+
elsif (value = scan_identifier(start_position))
|
|
160
|
+
@tokens << build_token(:IDENT, value, start_position)
|
|
81
161
|
else
|
|
82
162
|
char = @scanner.getch
|
|
83
|
-
|
|
163
|
+
raise_lexer_error("Unexpected character: #{char}", start_position)
|
|
84
164
|
end
|
|
85
165
|
end
|
|
86
166
|
|
|
87
|
-
@tokens <<
|
|
167
|
+
@tokens << Token.new(type: false, value: nil, position: eof_position)
|
|
88
168
|
@tokens
|
|
89
169
|
end
|
|
90
170
|
|
|
91
171
|
private
|
|
92
172
|
|
|
93
|
-
def
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
173
|
+
def preprocess_input(input)
|
|
174
|
+
output = +''
|
|
175
|
+
offset_map = { 0 => 0 }
|
|
176
|
+
chars = input.each_char.to_a
|
|
177
|
+
original_offset = 0
|
|
178
|
+
index = 0
|
|
179
|
+
|
|
180
|
+
while index < chars.length
|
|
181
|
+
char = chars[index]
|
|
182
|
+
original_start = original_offset
|
|
183
|
+
original_offset += char.bytesize
|
|
184
|
+
|
|
185
|
+
if char == "\r"
|
|
186
|
+
if chars[index + 1] == "\n"
|
|
187
|
+
index += 1
|
|
188
|
+
original_offset += chars[index].bytesize
|
|
189
|
+
end
|
|
190
|
+
append_preprocessed(output, offset_map, "\n", original_start, original_offset)
|
|
191
|
+
elsif char == "\f"
|
|
192
|
+
append_preprocessed(output, offset_map, "\n", original_start, original_offset)
|
|
193
|
+
elsif char == "\0" || surrogate_codepoint?(char)
|
|
194
|
+
append_preprocessed(output, offset_map, REPLACEMENT_CHARACTER, original_start, original_offset)
|
|
100
195
|
else
|
|
101
|
-
|
|
196
|
+
append_preprocessed(output, offset_map, char, original_start, original_offset)
|
|
102
197
|
end
|
|
198
|
+
|
|
199
|
+
index += 1
|
|
103
200
|
end
|
|
201
|
+
|
|
202
|
+
offset_map[output.bytesize] = original_offset
|
|
203
|
+
[output, offset_map]
|
|
104
204
|
end
|
|
105
205
|
|
|
106
|
-
def
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
206
|
+
def append_preprocessed(output, offset_map, value, original_start, original_end)
|
|
207
|
+
offset_map[output.bytesize] = original_start
|
|
208
|
+
output << value
|
|
209
|
+
offset_map[output.bytesize] = original_end
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
def surrogate_codepoint?(char)
|
|
213
|
+
char.ord.between?(0xD800, 0xDFFF)
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
def skip_ignored
|
|
217
|
+
loop do
|
|
218
|
+
if @scanner.scan(WHITESPACE_REGEX)
|
|
219
|
+
update_position(@scanner.matched)
|
|
220
|
+
elsif @scanner.peek(2) == '/*'
|
|
221
|
+
pos = { line: @line, column: @column, offset: @scanner.pos }
|
|
222
|
+
unless @scanner.scan(COMMENT_REGEX)
|
|
223
|
+
raise_lexer_error('Unterminated comment', pos)
|
|
224
|
+
end
|
|
110
225
|
update_position(@scanner.matched)
|
|
111
|
-
|
|
226
|
+
else
|
|
227
|
+
break
|
|
112
228
|
end
|
|
113
229
|
end
|
|
230
|
+
end
|
|
231
|
+
|
|
232
|
+
def scan_operator
|
|
233
|
+
two_chars = @scanner.peek(2)
|
|
234
|
+
if (token = MULTI_CHAR_TOKENS[two_chars])
|
|
235
|
+
@scanner.pos += 2
|
|
236
|
+
update_position(two_chars)
|
|
237
|
+
return token
|
|
238
|
+
end
|
|
114
239
|
|
|
115
|
-
# Single character operators
|
|
116
240
|
return unless @scanner.scan(SINGLE_CHAR_OPERATOR_REGEX)
|
|
117
241
|
|
|
118
242
|
char = @scanner.matched
|
|
@@ -120,25 +244,26 @@ module Parselly
|
|
|
120
244
|
TOKENS[char]
|
|
121
245
|
end
|
|
122
246
|
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
str[1..-2] # Remove quotes
|
|
247
|
+
def scan_string(position)
|
|
248
|
+
quote = @scanner.peek(1)
|
|
249
|
+
return unless quote == '"' || quote == "'"
|
|
250
|
+
|
|
251
|
+
@scanner.getch
|
|
252
|
+
update_position(quote)
|
|
253
|
+
raw = +''
|
|
254
|
+
|
|
255
|
+
until @scanner.eos?
|
|
256
|
+
char = @scanner.peek(1)
|
|
257
|
+
return build_string_token(:STRING, raw, position, quote) if char == quote && consume_string_char(raw)
|
|
258
|
+
return build_string_token(:BAD_STRING, raw, position, quote) if newline?(char)
|
|
259
|
+
|
|
260
|
+
consume_string_char(raw)
|
|
138
261
|
end
|
|
262
|
+
|
|
263
|
+
build_string_token(:STRING, raw, position, quote)
|
|
139
264
|
end
|
|
140
265
|
|
|
141
|
-
def scan_identifier
|
|
266
|
+
def scan_identifier(position)
|
|
142
267
|
# Match identifiers with optional escape sequences
|
|
143
268
|
# CSS allows \<any-char> as escape in identifiers (e.g., .hover\:bg-blue-500)
|
|
144
269
|
#
|
|
@@ -150,9 +275,7 @@ module Parselly
|
|
|
150
275
|
|
|
151
276
|
ident = @scanner.matched
|
|
152
277
|
update_position(ident)
|
|
153
|
-
|
|
154
|
-
normalized = ident.gsub(ESCAPE_REGEX, '\1')
|
|
155
|
-
Identifier.new(normalized, ident)
|
|
278
|
+
Identifier.new(unescape_css(ident), ident).tap { |identifier| identifier.position = position }
|
|
156
279
|
end
|
|
157
280
|
|
|
158
281
|
def scan_number
|
|
@@ -163,15 +286,102 @@ module Parselly
|
|
|
163
286
|
num
|
|
164
287
|
end
|
|
165
288
|
|
|
289
|
+
def consume_string_char(raw)
|
|
290
|
+
char = @scanner.getch
|
|
291
|
+
update_position(char)
|
|
292
|
+
return true if char == '"' || char == "'"
|
|
293
|
+
|
|
294
|
+
raw << char
|
|
295
|
+
return true unless char == '\\'
|
|
296
|
+
return true if @scanner.eos?
|
|
297
|
+
|
|
298
|
+
escaped = @scanner.getch
|
|
299
|
+
update_position(escaped)
|
|
300
|
+
raw << escaped
|
|
301
|
+
true
|
|
302
|
+
end
|
|
303
|
+
|
|
304
|
+
def build_string_token(type, raw, position, quote)
|
|
305
|
+
[type, TokenValue.new(value: unescape_css(raw), raw: raw, position: position, quote: quote)]
|
|
306
|
+
end
|
|
307
|
+
|
|
308
|
+
def newline?(char)
|
|
309
|
+
char == "\n" || char == "\r" || char == "\f"
|
|
310
|
+
end
|
|
311
|
+
|
|
166
312
|
def update_position(text)
|
|
167
|
-
text.
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
@column = 1
|
|
171
|
-
else
|
|
172
|
-
@column += 1
|
|
173
|
-
end
|
|
313
|
+
unless text.match?(/[\n\r\f]/)
|
|
314
|
+
@column += text.each_char.count
|
|
315
|
+
return
|
|
174
316
|
end
|
|
317
|
+
|
|
318
|
+
lines = text.split(/\r\n|[\n\r\f]/, -1)
|
|
319
|
+
@line += lines.length - 1
|
|
320
|
+
@column = lines.last.each_char.count + 1
|
|
321
|
+
end
|
|
322
|
+
|
|
323
|
+
def current_position
|
|
324
|
+
{ line: @line, column: @column, offset: original_offset(@scanner.pos) }
|
|
325
|
+
end
|
|
326
|
+
|
|
327
|
+
def original_offset(preprocessed_offset)
|
|
328
|
+
@offset_map.fetch(preprocessed_offset, preprocessed_offset)
|
|
329
|
+
end
|
|
330
|
+
|
|
331
|
+
def build_token(type, value, start_position)
|
|
332
|
+
position = start_position.merge(
|
|
333
|
+
start_line: start_position[:line],
|
|
334
|
+
start_column: start_position[:column],
|
|
335
|
+
start_offset: start_position[:offset],
|
|
336
|
+
end_line: @line,
|
|
337
|
+
end_column: @column,
|
|
338
|
+
end_offset: original_offset(@scanner.pos)
|
|
339
|
+
)
|
|
340
|
+
|
|
341
|
+
value.position = position if value.respond_to?(:position=)
|
|
342
|
+
Token.new(type: type, value: value, position: position)
|
|
343
|
+
end
|
|
344
|
+
|
|
345
|
+
def eof_position
|
|
346
|
+
current_position.merge(
|
|
347
|
+
start_line: @line,
|
|
348
|
+
start_column: @column,
|
|
349
|
+
start_offset: original_offset(@scanner.pos),
|
|
350
|
+
end_line: @line,
|
|
351
|
+
end_column: @column,
|
|
352
|
+
end_offset: original_offset(@scanner.pos)
|
|
353
|
+
)
|
|
354
|
+
end
|
|
355
|
+
|
|
356
|
+
def unescape_css(value)
|
|
357
|
+
value
|
|
358
|
+
.gsub(ESCAPED_NEWLINE_REGEX, '')
|
|
359
|
+
.gsub(HEX_ESCAPE_REGEX) { decode_hex_escape(Regexp.last_match(1)) }
|
|
360
|
+
.gsub(SIMPLE_ESCAPE_REGEX, '\1')
|
|
361
|
+
end
|
|
362
|
+
|
|
363
|
+
def decode_hex_escape(hex)
|
|
364
|
+
codepoint = hex.to_i(16)
|
|
365
|
+
return REPLACEMENT_CHARACTER if codepoint.zero? || codepoint > 0x10FFFF
|
|
366
|
+
|
|
367
|
+
codepoint.chr(Encoding::UTF_8)
|
|
368
|
+
rescue RangeError
|
|
369
|
+
REPLACEMENT_CHARACTER
|
|
370
|
+
end
|
|
371
|
+
|
|
372
|
+
def raise_lexer_error(message, position)
|
|
373
|
+
error = {
|
|
374
|
+
message: "#{message} at #{position[:line]}:#{position[:column]} (offset #{position[:offset]})",
|
|
375
|
+
line: position[:line],
|
|
376
|
+
column: position[:column],
|
|
377
|
+
offset: position[:offset]
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
if defined?(Parselly::LexError)
|
|
381
|
+
raise Parselly::LexError, error
|
|
382
|
+
end
|
|
383
|
+
|
|
384
|
+
raise error[:message]
|
|
175
385
|
end
|
|
176
386
|
end
|
|
177
387
|
end
|