parselly 1.1.0 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +56 -9
- data/Rakefile +10 -0
- data/lib/parselly/lexer.rb +284 -67
- data/lib/parselly/node.rb +491 -177
- data/lib/parselly/parser.rb +874 -295
- data/lib/parselly/version.rb +1 -1
- data/lib/parselly.rb +65 -3
- data/parser.y +528 -77
- metadata +3 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 5bc554d1e8b9c0bba096e513d21aa054f35589a7e52f1c0e2108c0639ec1027b
|
|
4
|
+
data.tar.gz: 55f3bec1107b38b70bdd4995375fe8cab816036fe86d6cf27e96566689cce03f
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 5dc070854c73e51ac4e21dde9910cd687c788f2bdfe99c9c22870d2e73222dcfc9679cf26e54e9c81032284a201b840e0af395958347f3e0bc10967294f990d0
|
|
7
|
+
data.tar.gz: 37a0b28ade0b3d74a062fee148d18d7448f200ac04bbb107bdf036a92808b7dc5b803a179dbb4363100fb290351d14e504651a07ac74b9dd68829e5c36af3a28
|
data/README.md
CHANGED
|
@@ -1,30 +1,77 @@
|
|
|
1
1
|
# Parselly [](https://badge.fury.io/rb/parselly) [](https://github.com/ydah/parselly/actions/workflows/test.yml)
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
Pure Ruby CSS selector parser.
|
|
4
4
|
|
|
5
5
|
## Installation
|
|
6
6
|
|
|
7
|
-
Add this line to your application's Gemfile:
|
|
8
7
|
```ruby
|
|
9
8
|
gem 'parselly'
|
|
10
9
|
```
|
|
11
10
|
|
|
12
|
-
And then execute:
|
|
13
11
|
```bash
|
|
14
12
|
bundle install
|
|
15
13
|
```
|
|
16
14
|
|
|
17
|
-
Or install it
|
|
15
|
+
Or install it directly:
|
|
16
|
+
|
|
18
17
|
```bash
|
|
19
18
|
gem install parselly
|
|
20
19
|
```
|
|
21
20
|
|
|
22
|
-
|
|
21
|
+
Requires Ruby 2.7 or newer.
|
|
22
|
+
|
|
23
|
+
## Usage
|
|
24
|
+
|
|
25
|
+
```ruby
|
|
26
|
+
require 'parselly'
|
|
27
|
+
|
|
28
|
+
ast = Parselly.parse('article#main.content[data-state="open"] > a:hover')
|
|
29
|
+
|
|
30
|
+
ast.ids
|
|
31
|
+
#=> ["main"]
|
|
32
|
+
|
|
33
|
+
ast.attributes
|
|
34
|
+
#=> [{ name: "data-state", operator: "=", value: "open" }]
|
|
35
|
+
|
|
36
|
+
ast.pseudo_class_names
|
|
37
|
+
#=> ["hover"]
|
|
38
|
+
|
|
39
|
+
ast.specificity
|
|
40
|
+
#=> [1, 3, 2]
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
Strict parsing raises `Parselly::LexError` or `Parselly::SyntaxError` for invalid selectors:
|
|
44
|
+
|
|
45
|
+
```ruby
|
|
46
|
+
Parselly.parse('div >')
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
Use tolerant mode when you want a `Parselly::ParseResult` instead:
|
|
50
|
+
|
|
51
|
+
```ruby
|
|
52
|
+
result = Parselly.parse('div >', tolerant: true)
|
|
53
|
+
|
|
54
|
+
result.success?
|
|
55
|
+
#=> false
|
|
56
|
+
|
|
57
|
+
result.errors.first[:message]
|
|
58
|
+
#=> "Parse error: unexpected $end '' at 1:6"
|
|
59
|
+
```
|
|
23
60
|
|
|
24
|
-
|
|
61
|
+
Use `Parselly.sanitize` to escape text for a CSS identifier:
|
|
25
62
|
|
|
26
|
-
|
|
63
|
+
```ruby
|
|
64
|
+
Parselly.sanitize('1st item')
|
|
65
|
+
#=> "\\31 st\\ item"
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
## Development
|
|
69
|
+
|
|
70
|
+
```bash
|
|
71
|
+
bin/setup
|
|
72
|
+
bundle exec rake
|
|
73
|
+
```
|
|
27
74
|
|
|
28
|
-
##
|
|
75
|
+
## License
|
|
29
76
|
|
|
30
|
-
|
|
77
|
+
MIT
|
data/Rakefile
CHANGED
|
@@ -7,6 +7,16 @@ namespace 'build' do
|
|
|
7
7
|
task :parser do
|
|
8
8
|
sh 'bundle exec racc parser.y --embedded --frozen -o lib/parselly/parser.rb -t --log-file=parser.output'
|
|
9
9
|
end
|
|
10
|
+
|
|
11
|
+
desc 'verify generated parser files are in sync'
|
|
12
|
+
task check_parser: :parser do
|
|
13
|
+
sh 'git diff --exit-code lib/parselly/parser.rb parser.output'
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
desc 'run parser benchmarks'
|
|
18
|
+
task :benchmark do
|
|
19
|
+
ruby 'benchmark/parser_benchmark.rb'
|
|
10
20
|
end
|
|
11
21
|
|
|
12
22
|
require 'rspec/core/rake_task'
|
data/lib/parselly/lexer.rb
CHANGED
|
@@ -4,7 +4,77 @@ require 'strscan'
|
|
|
4
4
|
|
|
5
5
|
module Parselly
|
|
6
6
|
class Lexer
|
|
7
|
+
Identifier = Struct.new(:value, :raw) do
|
|
8
|
+
attr_accessor :position
|
|
9
|
+
|
|
10
|
+
def to_s
|
|
11
|
+
value
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def ==(other)
|
|
15
|
+
other.respond_to?(:value) ? value == other.value : value == other
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
TokenValue = Struct.new(:value, :raw, :position, :quote, keyword_init: true) do
|
|
20
|
+
def to_s
|
|
21
|
+
value.to_s
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def ==(other)
|
|
25
|
+
other.respond_to?(:value) ? value == other.value : value == other
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
Token = Struct.new(:type, :value, :position, keyword_init: true) do
|
|
30
|
+
def [](index)
|
|
31
|
+
to_ary[index]
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def []=(index, new_value)
|
|
35
|
+
case index
|
|
36
|
+
when 0
|
|
37
|
+
self.type = new_value
|
|
38
|
+
when 1
|
|
39
|
+
self.value = new_value
|
|
40
|
+
when 2
|
|
41
|
+
self.position = new_value
|
|
42
|
+
else
|
|
43
|
+
raise IndexError, "index #{index} outside of token"
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def first
|
|
48
|
+
type
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def last
|
|
52
|
+
position
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def to_ary
|
|
56
|
+
[type, value, position]
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
alias to_a to_ary
|
|
60
|
+
|
|
61
|
+
def ==(other)
|
|
62
|
+
return super unless other.respond_to?(:to_ary)
|
|
63
|
+
|
|
64
|
+
other_type, other_value, other_position = other.to_ary
|
|
65
|
+
return false unless type == other_type
|
|
66
|
+
return false unless value == other_value
|
|
67
|
+
return position == other_position unless position.is_a?(Hash) && other_position.is_a?(Hash)
|
|
68
|
+
|
|
69
|
+
other_position.all? { |key, expected| position[key] == expected }
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
|
|
7
73
|
TOKENS = {
|
|
74
|
+
# Namespace and column combinators
|
|
75
|
+
'|' => :PIPE,
|
|
76
|
+
'||' => :COLUMN,
|
|
77
|
+
|
|
8
78
|
# Combinators
|
|
9
79
|
'>' => :CHILD,
|
|
10
80
|
'+' => :ADJACENT,
|
|
@@ -31,27 +101,42 @@ module Parselly
|
|
|
31
101
|
'*=' => :SUBSTRINGMATCH
|
|
32
102
|
}.freeze
|
|
33
103
|
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
SINGLE_CHAR_OPERATOR_REGEX = /[
|
|
44
|
-
WHITESPACE_REGEX = /[ \t\n\r]+/.freeze
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
IDENTIFIER_REGEX = /
|
|
104
|
+
MULTI_CHAR_TOKENS = {
|
|
105
|
+
'~=' => :INCLUDES,
|
|
106
|
+
'|=' => :DASHMATCH,
|
|
107
|
+
'^=' => :PREFIXMATCH,
|
|
108
|
+
'$=' => :SUFFIXMATCH,
|
|
109
|
+
'*=' => :SUBSTRINGMATCH,
|
|
110
|
+
'||' => :COLUMN
|
|
111
|
+
}.freeze
|
|
112
|
+
|
|
113
|
+
SINGLE_CHAR_OPERATOR_REGEX = /[|>+~\[\]():,.#*=-]/.freeze
|
|
114
|
+
WHITESPACE_REGEX = /[ \t\n\r\f]+/.freeze
|
|
115
|
+
COMMENT_REGEX = %r{/\*[^*]*\*+(?:[^/*][^*]*\*+)*/}.freeze
|
|
116
|
+
ESCAPE_SEQUENCE = /\\(?:[0-9a-fA-F]{1,6}[ \t\n\r\f]?|[^\n\r\f])/.freeze
|
|
117
|
+
IDENTIFIER_REGEX = /
|
|
118
|
+
(?:
|
|
119
|
+
--
|
|
120
|
+
|
|
|
121
|
+
-?(?:[a-zA-Z_]|[^\x00-\x7F]|#{ESCAPE_SEQUENCE})
|
|
122
|
+
)
|
|
123
|
+
(?:[a-zA-Z0-9_-]|[^\x00-\x7F]|#{ESCAPE_SEQUENCE})*
|
|
124
|
+
/x.freeze
|
|
48
125
|
NUMBER_REGEX = /\d+(\.\d+)?/.freeze
|
|
49
|
-
|
|
126
|
+
HEX_ESCAPE_REGEX = /\\([0-9a-fA-F]{1,6})([ \t\n\r\f])?/.freeze
|
|
127
|
+
ESCAPED_NEWLINE_REGEX = /\\(?:\r\n|[\n\r\f])/.freeze
|
|
128
|
+
SIMPLE_ESCAPE_REGEX = /\\([^\n\r\f])/.freeze
|
|
129
|
+
REPLACEMENT_CHARACTER = "\uFFFD"
|
|
50
130
|
|
|
51
131
|
attr_reader :line, :column
|
|
52
132
|
|
|
53
133
|
def initialize(input)
|
|
54
|
-
|
|
134
|
+
unless input.valid_encoding?
|
|
135
|
+
raise_lexer_error('Invalid input encoding', { line: 1, column: 1, offset: 0 })
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
preprocessed_input, @offset_map = preprocess_input(input)
|
|
139
|
+
@scanner = StringScanner.new(preprocessed_input)
|
|
55
140
|
@line = 1
|
|
56
141
|
@column = 1
|
|
57
142
|
@tokens = []
|
|
@@ -59,54 +144,99 @@ module Parselly
|
|
|
59
144
|
|
|
60
145
|
def tokenize
|
|
61
146
|
until @scanner.eos?
|
|
62
|
-
|
|
147
|
+
skip_ignored
|
|
63
148
|
break if @scanner.eos?
|
|
64
149
|
|
|
65
|
-
|
|
150
|
+
start_position = current_position
|
|
66
151
|
|
|
67
|
-
if (token = scan_string)
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
152
|
+
if (token = scan_string(start_position))
|
|
153
|
+
type, value = token
|
|
154
|
+
@tokens << build_token(type, value, start_position)
|
|
155
|
+
elsif (value = scan_number)
|
|
156
|
+
@tokens << build_token(:NUMBER, value, start_position)
|
|
157
|
+
elsif (type = scan_operator)
|
|
158
|
+
@tokens << build_token(type, @scanner.matched, start_position)
|
|
159
|
+
elsif (value = scan_identifier(start_position))
|
|
160
|
+
@tokens << build_token(:IDENT, value, start_position)
|
|
75
161
|
else
|
|
76
162
|
char = @scanner.getch
|
|
77
|
-
|
|
163
|
+
raise_lexer_error("Unexpected character: #{char}", start_position)
|
|
78
164
|
end
|
|
79
165
|
end
|
|
80
166
|
|
|
81
|
-
@tokens <<
|
|
167
|
+
@tokens << Token.new(type: false, value: nil, position: eof_position)
|
|
82
168
|
@tokens
|
|
83
169
|
end
|
|
84
170
|
|
|
85
171
|
private
|
|
86
172
|
|
|
87
|
-
def
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
173
|
+
def preprocess_input(input)
|
|
174
|
+
output = +''
|
|
175
|
+
offset_map = { 0 => 0 }
|
|
176
|
+
chars = input.each_char.to_a
|
|
177
|
+
original_offset = 0
|
|
178
|
+
index = 0
|
|
179
|
+
|
|
180
|
+
while index < chars.length
|
|
181
|
+
char = chars[index]
|
|
182
|
+
original_start = original_offset
|
|
183
|
+
original_offset += char.bytesize
|
|
184
|
+
|
|
185
|
+
if char == "\r"
|
|
186
|
+
if chars[index + 1] == "\n"
|
|
187
|
+
index += 1
|
|
188
|
+
original_offset += chars[index].bytesize
|
|
189
|
+
end
|
|
190
|
+
append_preprocessed(output, offset_map, "\n", original_start, original_offset)
|
|
191
|
+
elsif char == "\f"
|
|
192
|
+
append_preprocessed(output, offset_map, "\n", original_start, original_offset)
|
|
193
|
+
elsif char == "\0" || surrogate_codepoint?(char)
|
|
194
|
+
append_preprocessed(output, offset_map, REPLACEMENT_CHARACTER, original_start, original_offset)
|
|
94
195
|
else
|
|
95
|
-
|
|
196
|
+
append_preprocessed(output, offset_map, char, original_start, original_offset)
|
|
96
197
|
end
|
|
198
|
+
|
|
199
|
+
index += 1
|
|
97
200
|
end
|
|
201
|
+
|
|
202
|
+
offset_map[output.bytesize] = original_offset
|
|
203
|
+
[output, offset_map]
|
|
98
204
|
end
|
|
99
205
|
|
|
100
|
-
def
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
206
|
+
def append_preprocessed(output, offset_map, value, original_start, original_end)
|
|
207
|
+
offset_map[output.bytesize] = original_start
|
|
208
|
+
output << value
|
|
209
|
+
offset_map[output.bytesize] = original_end
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
def surrogate_codepoint?(char)
|
|
213
|
+
char.ord.between?(0xD800, 0xDFFF)
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
def skip_ignored
|
|
217
|
+
loop do
|
|
218
|
+
if @scanner.scan(WHITESPACE_REGEX)
|
|
219
|
+
update_position(@scanner.matched)
|
|
220
|
+
elsif @scanner.peek(2) == '/*'
|
|
221
|
+
pos = { line: @line, column: @column, offset: @scanner.pos }
|
|
222
|
+
unless @scanner.scan(COMMENT_REGEX)
|
|
223
|
+
raise_lexer_error('Unterminated comment', pos)
|
|
224
|
+
end
|
|
104
225
|
update_position(@scanner.matched)
|
|
105
|
-
|
|
226
|
+
else
|
|
227
|
+
break
|
|
106
228
|
end
|
|
107
229
|
end
|
|
230
|
+
end
|
|
231
|
+
|
|
232
|
+
def scan_operator
|
|
233
|
+
two_chars = @scanner.peek(2)
|
|
234
|
+
if (token = MULTI_CHAR_TOKENS[two_chars])
|
|
235
|
+
@scanner.pos += 2
|
|
236
|
+
update_position(two_chars)
|
|
237
|
+
return token
|
|
238
|
+
end
|
|
108
239
|
|
|
109
|
-
# Single character operators
|
|
110
240
|
return unless @scanner.scan(SINGLE_CHAR_OPERATOR_REGEX)
|
|
111
241
|
|
|
112
242
|
char = @scanner.matched
|
|
@@ -114,25 +244,26 @@ module Parselly
|
|
|
114
244
|
TOKENS[char]
|
|
115
245
|
end
|
|
116
246
|
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
str[1..-2] # Remove quotes
|
|
247
|
+
def scan_string(position)
|
|
248
|
+
quote = @scanner.peek(1)
|
|
249
|
+
return unless quote == '"' || quote == "'"
|
|
250
|
+
|
|
251
|
+
@scanner.getch
|
|
252
|
+
update_position(quote)
|
|
253
|
+
raw = +''
|
|
254
|
+
|
|
255
|
+
until @scanner.eos?
|
|
256
|
+
char = @scanner.peek(1)
|
|
257
|
+
return build_string_token(:STRING, raw, position, quote) if char == quote && consume_string_char(raw)
|
|
258
|
+
return build_string_token(:BAD_STRING, raw, position, quote) if newline?(char)
|
|
259
|
+
|
|
260
|
+
consume_string_char(raw)
|
|
132
261
|
end
|
|
262
|
+
|
|
263
|
+
build_string_token(:STRING, raw, position, quote)
|
|
133
264
|
end
|
|
134
265
|
|
|
135
|
-
def scan_identifier
|
|
266
|
+
def scan_identifier(position)
|
|
136
267
|
# Match identifiers with optional escape sequences
|
|
137
268
|
# CSS allows \<any-char> as escape in identifiers (e.g., .hover\:bg-blue-500)
|
|
138
269
|
#
|
|
@@ -144,8 +275,7 @@ module Parselly
|
|
|
144
275
|
|
|
145
276
|
ident = @scanner.matched
|
|
146
277
|
update_position(ident)
|
|
147
|
-
|
|
148
|
-
ident.gsub(ESCAPE_REGEX, '\1')
|
|
278
|
+
Identifier.new(unescape_css(ident), ident).tap { |identifier| identifier.position = position }
|
|
149
279
|
end
|
|
150
280
|
|
|
151
281
|
def scan_number
|
|
@@ -156,15 +286,102 @@ module Parselly
|
|
|
156
286
|
num
|
|
157
287
|
end
|
|
158
288
|
|
|
289
|
+
def consume_string_char(raw)
|
|
290
|
+
char = @scanner.getch
|
|
291
|
+
update_position(char)
|
|
292
|
+
return true if char == '"' || char == "'"
|
|
293
|
+
|
|
294
|
+
raw << char
|
|
295
|
+
return true unless char == '\\'
|
|
296
|
+
return true if @scanner.eos?
|
|
297
|
+
|
|
298
|
+
escaped = @scanner.getch
|
|
299
|
+
update_position(escaped)
|
|
300
|
+
raw << escaped
|
|
301
|
+
true
|
|
302
|
+
end
|
|
303
|
+
|
|
304
|
+
def build_string_token(type, raw, position, quote)
|
|
305
|
+
[type, TokenValue.new(value: unescape_css(raw), raw: raw, position: position, quote: quote)]
|
|
306
|
+
end
|
|
307
|
+
|
|
308
|
+
def newline?(char)
|
|
309
|
+
char == "\n" || char == "\r" || char == "\f"
|
|
310
|
+
end
|
|
311
|
+
|
|
159
312
|
def update_position(text)
|
|
160
|
-
text.
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
313
|
+
unless text.match?(/[\n\r\f]/)
|
|
314
|
+
@column += text.each_char.count
|
|
315
|
+
return
|
|
316
|
+
end
|
|
317
|
+
|
|
318
|
+
lines = text.split(/\r\n|[\n\r\f]/, -1)
|
|
319
|
+
@line += lines.length - 1
|
|
320
|
+
@column = lines.last.each_char.count + 1
|
|
321
|
+
end
|
|
322
|
+
|
|
323
|
+
def current_position
|
|
324
|
+
{ line: @line, column: @column, offset: original_offset(@scanner.pos) }
|
|
325
|
+
end
|
|
326
|
+
|
|
327
|
+
def original_offset(preprocessed_offset)
|
|
328
|
+
@offset_map.fetch(preprocessed_offset, preprocessed_offset)
|
|
329
|
+
end
|
|
330
|
+
|
|
331
|
+
def build_token(type, value, start_position)
|
|
332
|
+
position = start_position.merge(
|
|
333
|
+
start_line: start_position[:line],
|
|
334
|
+
start_column: start_position[:column],
|
|
335
|
+
start_offset: start_position[:offset],
|
|
336
|
+
end_line: @line,
|
|
337
|
+
end_column: @column,
|
|
338
|
+
end_offset: original_offset(@scanner.pos)
|
|
339
|
+
)
|
|
340
|
+
|
|
341
|
+
value.position = position if value.respond_to?(:position=)
|
|
342
|
+
Token.new(type: type, value: value, position: position)
|
|
343
|
+
end
|
|
344
|
+
|
|
345
|
+
def eof_position
|
|
346
|
+
current_position.merge(
|
|
347
|
+
start_line: @line,
|
|
348
|
+
start_column: @column,
|
|
349
|
+
start_offset: original_offset(@scanner.pos),
|
|
350
|
+
end_line: @line,
|
|
351
|
+
end_column: @column,
|
|
352
|
+
end_offset: original_offset(@scanner.pos)
|
|
353
|
+
)
|
|
354
|
+
end
|
|
355
|
+
|
|
356
|
+
def unescape_css(value)
|
|
357
|
+
value
|
|
358
|
+
.gsub(ESCAPED_NEWLINE_REGEX, '')
|
|
359
|
+
.gsub(HEX_ESCAPE_REGEX) { decode_hex_escape(Regexp.last_match(1)) }
|
|
360
|
+
.gsub(SIMPLE_ESCAPE_REGEX, '\1')
|
|
361
|
+
end
|
|
362
|
+
|
|
363
|
+
def decode_hex_escape(hex)
|
|
364
|
+
codepoint = hex.to_i(16)
|
|
365
|
+
return REPLACEMENT_CHARACTER if codepoint.zero? || codepoint > 0x10FFFF
|
|
366
|
+
|
|
367
|
+
codepoint.chr(Encoding::UTF_8)
|
|
368
|
+
rescue RangeError
|
|
369
|
+
REPLACEMENT_CHARACTER
|
|
370
|
+
end
|
|
371
|
+
|
|
372
|
+
def raise_lexer_error(message, position)
|
|
373
|
+
error = {
|
|
374
|
+
message: "#{message} at #{position[:line]}:#{position[:column]} (offset #{position[:offset]})",
|
|
375
|
+
line: position[:line],
|
|
376
|
+
column: position[:column],
|
|
377
|
+
offset: position[:offset]
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
if defined?(Parselly::LexError)
|
|
381
|
+
raise Parselly::LexError, error
|
|
167
382
|
end
|
|
383
|
+
|
|
384
|
+
raise error[:message]
|
|
168
385
|
end
|
|
169
386
|
end
|
|
170
387
|
end
|