gammo 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +3 -0
- data/Gemfile.lock +9 -1
- data/README.md +402 -2
- data/Rakefile +6 -0
- data/lib/gammo/attribute.rb +13 -4
- data/lib/gammo/attributes.rb +95 -0
- data/lib/gammo/node.rb +120 -26
- data/lib/gammo/parser.rb +3 -1
- data/lib/gammo/version.rb +1 -1
- data/lib/gammo/xpath.rb +74 -0
- data/lib/gammo/xpath/ast/axis.rb +231 -0
- data/lib/gammo/xpath/ast/expression.rb +250 -0
- data/lib/gammo/xpath/ast/function.rb +179 -0
- data/lib/gammo/xpath/ast/node_test.rb +86 -0
- data/lib/gammo/xpath/ast/path.rb +100 -0
- data/lib/gammo/xpath/ast/subclassify.rb +35 -0
- data/lib/gammo/xpath/ast/value.rb +150 -0
- data/lib/gammo/xpath/context.rb +23 -0
- data/lib/gammo/xpath/errors.rb +9 -0
- data/lib/gammo/xpath/node_set.rb +43 -0
- data/lib/gammo/xpath/parser.rb +1099 -0
- data/lib/gammo/xpath/parser.y +513 -0
- data/misc/table.erubi +1 -1
- metadata +16 -2
@@ -0,0 +1,513 @@
|
|
1
|
+
class Gammo::XPath::Parser
|
2
|
+
|
3
|
+
token T_SLASH
|
4
|
+
T_SLASHSLASH
|
5
|
+
T_PIPE
|
6
|
+
T_PLUS
|
7
|
+
T_MINUS
|
8
|
+
T_EQ
|
9
|
+
T_NEQ
|
10
|
+
T_LT
|
11
|
+
T_GT
|
12
|
+
T_LTE
|
13
|
+
T_GTE
|
14
|
+
T_AND
|
15
|
+
T_OR
|
16
|
+
T_DIV
|
17
|
+
T_MOD
|
18
|
+
T_MUL
|
19
|
+
T_LPAREN
|
20
|
+
T_RPAREN
|
21
|
+
T_LBRACK
|
22
|
+
T_RBRACK
|
23
|
+
T_DOT
|
24
|
+
T_DOTDOT
|
25
|
+
T_AT
|
26
|
+
T_COMMA
|
27
|
+
T_COLONCOLON
|
28
|
+
T_NC_NAME
|
29
|
+
T_Q_NAME
|
30
|
+
T_FUNCTION_NAME
|
31
|
+
T_NAME_TEST
|
32
|
+
T_NODE_TYPE
|
33
|
+
T_AXIS_NAME
|
34
|
+
T_VARIABLE_REFERENCE
|
35
|
+
T_LITERAL
|
36
|
+
T_NUMBER
|
37
|
+
|
38
|
+
start expr
|
39
|
+
|
40
|
+
rule
|
41
|
+
location_path:
|
42
|
+
relative_location_path {
|
43
|
+
result = val[0]
|
44
|
+
result.absolute = false
|
45
|
+
}
|
46
|
+
| absolute_location_path {
|
47
|
+
result = val[0]
|
48
|
+
result.absolute = true
|
49
|
+
}
|
50
|
+
|
51
|
+
absolute_location_path:
|
52
|
+
T_SLASH { result = AST::LocationPath.new }
|
53
|
+
| T_SLASH relative_location_path { result = val[1] }
|
54
|
+
| descendant_or_self relative_location_path {
|
55
|
+
result = val[1]
|
56
|
+
result.insert_first_step(val[0])
|
57
|
+
}
|
58
|
+
|
59
|
+
relative_location_path:
|
60
|
+
step {
|
61
|
+
result = AST::LocationPath.new
|
62
|
+
result.append_step(val[0])
|
63
|
+
}
|
64
|
+
| relative_location_path T_SLASH step {
|
65
|
+
result = val[0]
|
66
|
+
result.append_step(val[2])
|
67
|
+
}
|
68
|
+
| relative_location_path T_SLASHSLASH step {
|
69
|
+
result = val[0]
|
70
|
+
result.append_step(val[1])
|
71
|
+
result.append_step(val[2])
|
72
|
+
}
|
73
|
+
|
74
|
+
step:
|
75
|
+
node_test optional_predicates {
|
76
|
+
result = AST::Axis::Child.new(node_test: val[0], predicates: val[1])
|
77
|
+
}
|
78
|
+
| axis_specifier node_test optional_predicates {
|
79
|
+
axis_base_class = val[0]
|
80
|
+
axis_base_class = AST::Axis.fetch(axis_base_class.gsub(/-/, '_')) if axis_base_class.instance_of?(String)
|
81
|
+
result = axis_base_class.new(node_test: val[1], predicates: val[2])
|
82
|
+
}
|
83
|
+
| abbreviated_step
|
84
|
+
|
85
|
+
axis_specifier:
|
86
|
+
T_AXIS_NAME T_COLONCOLON | T_AT { result = AST::Axis::Attribute }
|
87
|
+
|
88
|
+
node_test:
|
89
|
+
T_NAME_TEST {
|
90
|
+
local, namespace = expand_qname(val[0])
|
91
|
+
result = AST::NodeTest::Name.new(local: local, namespace: namespace)
|
92
|
+
}
|
93
|
+
| T_NODE_TYPE T_LPAREN T_RPAREN {
|
94
|
+
result = AST::NodeTest.fetch(val[0]).new
|
95
|
+
}
|
96
|
+
|
97
|
+
descendant_or_self:
|
98
|
+
T_SLASHSLASH {
|
99
|
+
result = AST::Axis::DescendantOrSelf.new(node_test: AST::NodeTest::Any.new)
|
100
|
+
}
|
101
|
+
|
102
|
+
# Since there is no way that defining repeated expressions,
|
103
|
+
# need to define an original rule for handling that case recursively.
|
104
|
+
# TODO(kunpei): need test
|
105
|
+
repeatable_predicates:
|
106
|
+
predicate { result = [AST::Predicate.new(val[0])] }
|
107
|
+
| repeatable_predicates predicate {
|
108
|
+
result = val[0]
|
109
|
+
result << val[1]
|
110
|
+
}
|
111
|
+
|
112
|
+
optional_predicates:
|
113
|
+
| repeatable_predicates { result = val[0] }
|
114
|
+
|
115
|
+
predicate: T_LBRACK predicate_expr T_RBRACK { result = val[1] }
|
116
|
+
predicate_expr: expr
|
117
|
+
|
118
|
+
abbreviated_step:
|
119
|
+
T_DOT { result = AST::Axis::Self.new(node_test: AST::NodeTest::Any.new) }
|
120
|
+
| T_DOTDOT { result = AST::Axis::Parent.new(node_test: AST::NodeTest::Any.new) }
|
121
|
+
|
122
|
+
expr: or_expr
|
123
|
+
|
124
|
+
primary_expr:
|
125
|
+
T_VARIABLE_REFERENCE { result = AST::Value::VariableReference.new(val[0]) }
|
126
|
+
| T_LPAREN expr T_RPAREN { result = val[1] }
|
127
|
+
| T_LITERAL { result = AST::Value::String.new(val[0].to_s) }
|
128
|
+
| T_NUMBER { result = AST::Value::Number.new(val[0].include?(?.) ? val[0].to_f : val[0].to_i) }
|
129
|
+
| function_call
|
130
|
+
|
131
|
+
function_call:
|
132
|
+
T_FUNCTION_NAME T_LPAREN arguments T_RPAREN {
|
133
|
+
result = AST::Function.fetch(val[0]).new(*val[2])
|
134
|
+
}
|
135
|
+
| T_FUNCTION_NAME T_LPAREN T_RPAREN {
|
136
|
+
result = AST::Function.fetch(val[0]).new
|
137
|
+
}
|
138
|
+
|
139
|
+
argument: expr
|
140
|
+
|
141
|
+
# Since there is no way that defining repeated expressions,
|
142
|
+
# need to define an original rule for handling that case recursively.
|
143
|
+
# TODO(kunpei): need test
|
144
|
+
arguments:
|
145
|
+
argument {
|
146
|
+
result = []
|
147
|
+
result << val[0]
|
148
|
+
}
|
149
|
+
| arguments T_COMMA argument {
|
150
|
+
result = val[0]
|
151
|
+
result << val[2]
|
152
|
+
}
|
153
|
+
|
154
|
+
union_expr:
|
155
|
+
path_expr
|
156
|
+
| union_expr T_PIPE path_expr {
|
157
|
+
result = AST::UnionExpr.new(val[0], val[2])
|
158
|
+
}
|
159
|
+
|
160
|
+
path_expr:
|
161
|
+
location_path
|
162
|
+
| filter_expr
|
163
|
+
| filter_expr T_SLASH relative_location_path {
|
164
|
+
val[2].absolute = true
|
165
|
+
result = AST::Path.new(val[0], val[2])
|
166
|
+
}
|
167
|
+
| filter_expr descendant_or_self relative_location_path {
|
168
|
+
val[2].insert_first_step(val[1])
|
169
|
+
val[2].absolute = true
|
170
|
+
result = AST::Path.new(val[0], val[2])
|
171
|
+
}
|
172
|
+
|
173
|
+
filter_expr:
|
174
|
+
primary_expr
|
175
|
+
| primary_expr repeatable_predicates {
|
176
|
+
result = AST::Filter.new(val[0], predicates: val[1])
|
177
|
+
}
|
178
|
+
|
179
|
+
or_expr:
|
180
|
+
and_expr
|
181
|
+
| or_expr T_OR and_expr { result = AST::OrExpr.new(a: val[0], b: val[2]) }
|
182
|
+
|
183
|
+
and_expr:
|
184
|
+
equality_expr
|
185
|
+
| and_expr T_AND equality_expr { result = AST::AndExpr.new(a: val[0], b: val[2]) }
|
186
|
+
|
187
|
+
equality_expr:
|
188
|
+
relational_expr
|
189
|
+
| equality_expr T_EQ relational_expr { result = AST::EqExpr.new(val[0], val[2]) }
|
190
|
+
| equality_expr T_NEQ relational_expr { result = AST::NeqExpr.new(val[0], val[2]) }
|
191
|
+
|
192
|
+
relational_expr:
|
193
|
+
additive_expr
|
194
|
+
| relational_expr T_LT additive_expr { result = AST::LtExpr.new(val[0], val[2]) }
|
195
|
+
| relational_expr T_GT additive_expr { result = AST::GtExpr.new(val[0], val[2]) }
|
196
|
+
| relational_expr T_LTE additive_expr { result = AST::LteExpr.new(val[0], val[2]) }
|
197
|
+
| relational_expr T_GTE additive_expr { result = AST::GteExpr.new(val[0], val[2]) }
|
198
|
+
|
199
|
+
additive_expr:
|
200
|
+
multiplicative_expr
|
201
|
+
| additive_expr T_PLUS multiplicative_expr {
|
202
|
+
result = AST::PlusExpr.new(val[0], val[2])
|
203
|
+
}
|
204
|
+
| additive_expr T_MINUS multiplicative_expr {
|
205
|
+
result = AST::MinusExpr.new(val[0], val[2])
|
206
|
+
}
|
207
|
+
|
208
|
+
multiplicative_expr:
|
209
|
+
unary_expr
|
210
|
+
| multiplicative_expr T_MUL unary_expr {
|
211
|
+
result = AST::MultiplyExpr.new(val[0], val[2])
|
212
|
+
}
|
213
|
+
| multiplicative_expr T_DIV unary_expr {
|
214
|
+
result = AST::DividedExpr.new(val[0], val[2])
|
215
|
+
}
|
216
|
+
| multiplicative_expr T_MOD unary_expr {
|
217
|
+
result = AST::ModuloExpr.new(val[0], val[2])
|
218
|
+
}
|
219
|
+
|
220
|
+
unary_expr:
|
221
|
+
union_expr
|
222
|
+
| T_MINUS unary_expr {
|
223
|
+
result = AST::Negative.new(val[1])
|
224
|
+
}
|
225
|
+
end
|
226
|
+
|
227
|
+
---- inner
|
228
|
+
|
229
|
+
# 2.2 Characters (Extensible Markup Language (XML) 1.0 (Fifth Edition))
|
230
|
+
#
|
231
|
+
# This represents "Char" range defined in 2.2 Characters.
|
232
|
+
# [2] Char ::=
|
233
|
+
# [#x1-#xD7FF] |
|
234
|
+
# [#xE000-#xFFFD] |
|
235
|
+
# [#x10000-#x10FFFF] /* any Unicode character, excluding the surrogate blocks, FFFE, and FFFF. */
|
236
|
+
#
|
237
|
+
# @see https://www.w3.org/TR/xml11/#charsets
|
238
|
+
CHAR = /[\x9\xA\xD\u{20}-\u{d7ff}\u{e000}-\u{fffd}\u{10000}-\u{10ffff}]/
|
239
|
+
|
240
|
+
# 2.3 Common Syntactic Constructs (Extensible Markup Language (XML) 1.0 (Fifth Edition))
|
241
|
+
#
|
242
|
+
# [3] S ::= (#x20 | #x9 | #xD | #A)+
|
243
|
+
#
|
244
|
+
# @see https://www.w3.org/TR/xml11/#NT-S
|
245
|
+
S = /[\x20\x9\xD\xA]/
|
246
|
+
|
247
|
+
# [4] NameStartChar ::=
|
248
|
+
# ":" |
|
249
|
+
# [A-Z] |
|
250
|
+
# "_" |
|
251
|
+
# [a-z] |
|
252
|
+
# [#xC0-#xD6] |
|
253
|
+
# [#xD8-#xF6] |
|
254
|
+
# [#xF8-#x2FF] |
|
255
|
+
# [#x370-#x37D] |
|
256
|
+
# [#x37F-#x1FFF] |
|
257
|
+
# [#x200C-#x200D] |
|
258
|
+
# [#x2070-#x218F] |
|
259
|
+
# [#x2C00-#x2FEF] |
|
260
|
+
# [#x3001-#xD7FF] |
|
261
|
+
# [#xF900-#xFDCF] |
|
262
|
+
# [#xFDF0-#xFFFD] |
|
263
|
+
# [#x10000-#xEFFFF]
|
264
|
+
#
|
265
|
+
# @see https://www.w3.org/TR/xml11/#NT-NameStartChar
|
266
|
+
name_start_chars = %w[
|
267
|
+
:
|
268
|
+
a-zA-Z_
|
269
|
+
\\u00c0-\\u00d6
|
270
|
+
\\u00d8-\\u00f6
|
271
|
+
\\u00f8-\\u02ff
|
272
|
+
\\u0370-\\u037d
|
273
|
+
\\u037f-\\u1fff
|
274
|
+
\\u200c-\\u200d
|
275
|
+
\\u2070-\\u218f
|
276
|
+
\\u2c00-\\u2fef
|
277
|
+
\\u3001-\\ud7ff
|
278
|
+
\\uf900-\\ufdcf
|
279
|
+
\\ufdf0-\\ufffd
|
280
|
+
\\u{10000}-\\u{effff}
|
281
|
+
]
|
282
|
+
NAME_START_CHARS = /[#{name_start_chars.join}]/
|
283
|
+
|
284
|
+
# [4a] NameChar ::=
|
285
|
+
# NameStartChar |
|
286
|
+
# "-" |
|
287
|
+
# "." |
|
288
|
+
# [0-9] |
|
289
|
+
# #xB7 |
|
290
|
+
# [#x0300-#x036F] |
|
291
|
+
# [#x203F-#x2040]
|
292
|
+
#
|
293
|
+
# @see https://www.w3.org/TR/xml11/#NT-NameChar
|
294
|
+
name_chars = name_start_chars + %w[
|
295
|
+
\\-
|
296
|
+
\\.
|
297
|
+
0-9
|
298
|
+
\\u00b7
|
299
|
+
\\u0300-\\u036f
|
300
|
+
\\u203f-\\u2040
|
301
|
+
]
|
302
|
+
NAME_CHARS = /[#{name_chars.join}]/
|
303
|
+
|
304
|
+
# [5] Name ::= NameStartChar (NameChar)*
|
305
|
+
#
|
306
|
+
# @see https://www.w3.org/TR/1999/REC-xpath-19991116/#NT-Name
|
307
|
+
NAME = /#{NAME_START_CHARS}#{NAME_CHARS}*/
|
308
|
+
|
309
|
+
# 2.3. Axes
|
310
|
+
#
|
311
|
+
# [6] AxisName ::=
|
312
|
+
# 'ancestor'
|
313
|
+
# | 'ancestor-or-self'
|
314
|
+
# | 'attribute'
|
315
|
+
# | 'child'
|
316
|
+
# | 'descendant'
|
317
|
+
# | 'descendant-or-self'
|
318
|
+
# | 'following'
|
319
|
+
# | 'following-sibling'
|
320
|
+
# | 'namespace'
|
321
|
+
# | 'parent'
|
322
|
+
# | 'preceding'
|
323
|
+
# | 'preceding-sibling'
|
324
|
+
# | 'self'
|
325
|
+
#
|
326
|
+
# @see https://www.w3.org/TR/1999/REC-xpath-19991116/#NT-AxisName
|
327
|
+
AXES = /
|
328
|
+
ancestor-or-self|
|
329
|
+
ancestor|
|
330
|
+
attribute|
|
331
|
+
child|
|
332
|
+
descendant-or-self|
|
333
|
+
descendant|
|
334
|
+
following-sibling|
|
335
|
+
following|
|
336
|
+
namespace|
|
337
|
+
parent|
|
338
|
+
preceding-sibling|
|
339
|
+
preceding|
|
340
|
+
self
|
341
|
+
/x
|
342
|
+
|
343
|
+
# 3 Declaring Namespaces
|
344
|
+
#
|
345
|
+
# The "NCName" is picked from the section.
|
346
|
+
#
|
347
|
+
# Note that we need to take care of exceptional handling.
|
348
|
+
#
|
349
|
+
# [4] NCName ::= NCNameStartChar NCNameChar* /* An XML Name, minus the ":" */
|
350
|
+
# [5] NCNamrChar ::= NameChar - ':'
|
351
|
+
# [6] NCNameStartChar ::= NameStartChar - ':'
|
352
|
+
#
|
353
|
+
# @see https://www.w3.org/TR/xml-names11/#ns-decl
|
354
|
+
NC_NAME_CHARS = /[#{(name_chars - [':']).join}]/
|
355
|
+
NC_NAME_START_CHARS = /[#{(name_start_chars - [':']).join}]/
|
356
|
+
NC_NAME = /#{NC_NAME_START_CHARS}#{NC_NAME_CHARS}*/
|
357
|
+
|
358
|
+
# 4. Qualified Names
|
359
|
+
#
|
360
|
+
# The rules for "QName", "PrefixedName", "UnprefixedName", "Prefix" and
|
361
|
+
# "LocalPart" are picked from the section.
|
362
|
+
#
|
363
|
+
# [7] QName ::= PrefixedName | UnprefixedName
|
364
|
+
# [8] PrefixedName ::= Prefix ':' LocalPart
|
365
|
+
# [9] UnprefixedName ::= LocalPart
|
366
|
+
# [10] Prefix ::= NCName
|
367
|
+
# [11] LocalPart ::= NCName
|
368
|
+
#
|
369
|
+
# @see https://www.w3.org/TR/xml-names11/#ns-qualnames
|
370
|
+
PREFIX = NC_NAME
|
371
|
+
LOCAL_PART = NC_NAME
|
372
|
+
PREFIXED_NAME = /#{PREFIX}:#{LOCAL_PART}/
|
373
|
+
UNPREFIXED_NAME = LOCAL_PART
|
374
|
+
Q_NAME = /#{PREFIXED_NAME}|#{UNPREFIXED_NAME}/
|
375
|
+
|
376
|
+
# 3.7 Lexical Structure
|
377
|
+
#
|
378
|
+
# The rules for "NodeType" and "Digits" are picked from the section.
|
379
|
+
# @see https://www.w3.org/TR/1999/REC-xpath-19991116/#exprlex
|
380
|
+
DIGITS = /[0-9]+/
|
381
|
+
NODE_TYPE = /comment|text|processing-instruction|node/
|
382
|
+
|
383
|
+
# EXPR_TOKENS is defined for tokenizing primitive tokens for "ExprToken",
|
384
|
+
# except other rules.
|
385
|
+
# @see https://www.w3.org/TR/1999/REC-xpath-19991116/#NT-ExprToken
|
386
|
+
EXPR_TOKENS = {
|
387
|
+
'(' => :T_LPAREN,
|
388
|
+
')' => :T_RPAREN,
|
389
|
+
'[' => :T_LBRACK,
|
390
|
+
']' => :T_RBRACK,
|
391
|
+
'.' => :T_DOT,
|
392
|
+
'..' => :T_DOTDOT,
|
393
|
+
'@' => :T_AT,
|
394
|
+
',' => :T_COMMA,
|
395
|
+
'::' => :T_COLONCOLON
|
396
|
+
}.freeze
|
397
|
+
# Declaring the regexp consisting of EXPR_TOKENS keys to keep the token order.
|
398
|
+
EXPRS = /\(|\)|\[|\]|@|,|::|\.\.|\./
|
399
|
+
|
400
|
+
# OPERATOR_TOKENS is defined for tokenizing primitive tokens for "Operator"
|
401
|
+
# and "OperatorName" except other rules.
|
402
|
+
# @see https://www.w3.org/TR/1999/REC-xpath-19991116/#NT-Operator
|
403
|
+
OPERATOR_TOKENS = {
|
404
|
+
'and' => :T_AND,
|
405
|
+
'or' => :T_OR,
|
406
|
+
'mod' => :T_MOD,
|
407
|
+
'div' => :T_DIV,
|
408
|
+
'/' => :T_SLASH,
|
409
|
+
'//' => :T_SLASHSLASH,
|
410
|
+
"|" => :T_PIPE,
|
411
|
+
'+' => :T_PLUS,
|
412
|
+
'-' => :T_MINUS,
|
413
|
+
'=' => :T_EQ,
|
414
|
+
'!=' => :T_NEQ,
|
415
|
+
'<' => :T_LT,
|
416
|
+
'>' => :T_GT,
|
417
|
+
'<=' => :T_LTE,
|
418
|
+
'>=' => :T_GTE
|
419
|
+
}.freeze
|
420
|
+
# Declaring the regexp consisting of OPERATOR_TOKENS keys to keep the token order.
|
421
|
+
OPERATORS = /and|or|mod|div|\/\/|\/|\||\+|-|\=|!=|<=|>=|<|>/
|
422
|
+
|
423
|
+
require 'strscan'
|
424
|
+
require 'forwardable'
|
425
|
+
require 'gammo/xpath/errors'
|
426
|
+
require 'gammo/xpath/ast/axis'
|
427
|
+
require 'gammo/xpath/ast/expression'
|
428
|
+
require 'gammo/xpath/ast/function'
|
429
|
+
require 'gammo/xpath/ast/node_test'
|
430
|
+
require 'gammo/xpath/ast/path'
|
431
|
+
require 'gammo/xpath/ast/value'
|
432
|
+
|
433
|
+
extend Forwardable
|
434
|
+
def_delegators :@scanner, :scan, :eos?
|
435
|
+
|
436
|
+
def initialize(input)
|
437
|
+
super()
|
438
|
+
@yydebug = true
|
439
|
+
@input = input
|
440
|
+
@scanner = StringScanner.new(input)
|
441
|
+
end
|
442
|
+
|
443
|
+
def parse
|
444
|
+
@query = []
|
445
|
+
advance { |symbol, val| @query << [symbol, val] }
|
446
|
+
do_parse
|
447
|
+
end
|
448
|
+
|
449
|
+
def next_token
|
450
|
+
@query.shift
|
451
|
+
end
|
452
|
+
|
453
|
+
def lookup_namespace_uri(prefix)
|
454
|
+
prefix == 'xml' ? 'http://www.w3.org/XML/1998/namespace' : nil
|
455
|
+
end
|
456
|
+
|
457
|
+
def expand_qname(qname)
|
458
|
+
return [qname, nil] unless colon = qname.index(':')
|
459
|
+
namespace_uri = lookup_namespace_uri(qname.slice(0..colon))
|
460
|
+
fail ParseError, 'invalid qname: %s' % qname unless namespace_uri
|
461
|
+
[qname.slice(colon..-1), namespace_uri]
|
462
|
+
end
|
463
|
+
|
464
|
+
def token(symbol, val, &block)
|
465
|
+
@prev_token = symbol
|
466
|
+
block.call(symbol, val)
|
467
|
+
end
|
468
|
+
|
469
|
+
def fetch(key, constraints)
|
470
|
+
unless symbol = constraints[key]
|
471
|
+
fail ParseError, "unexpected token: #{symbol}, want = #{constraints.keys}"
|
472
|
+
end
|
473
|
+
yield symbol
|
474
|
+
end
|
475
|
+
|
476
|
+
def advance(&block)
|
477
|
+
@prev_token = nil
|
478
|
+
until eos?
|
479
|
+
case
|
480
|
+
# Skip whitespace everywhere.
|
481
|
+
when scan(/#{S}+/) then next
|
482
|
+
when expr = scan(EXPRS)
|
483
|
+
fetch(expr, EXPR_TOKENS) do |symbol|
|
484
|
+
token(symbol, expr, &block)
|
485
|
+
end
|
486
|
+
when operator = scan(OPERATORS)
|
487
|
+
fetch operator, OPERATOR_TOKENS do |symbol|
|
488
|
+
# "div" is available in both operator and name_test tokens.
|
489
|
+
if symbol == :T_DIV && @prev_token != :T_NUMBER
|
490
|
+
token(:T_NAME_TEST, operator, &block)
|
491
|
+
next
|
492
|
+
end
|
493
|
+
token(symbol, operator, &block)
|
494
|
+
end
|
495
|
+
when axis = scan(AXES) then token(:T_AXIS_NAME, axis, &block)
|
496
|
+
when node_type = scan(NODE_TYPE)
|
497
|
+
# NOTE: processing-instruction is not supported by Gammo.
|
498
|
+
token(:T_NODE_TYPE, node_type, &block)
|
499
|
+
when name = scan(/\*|#{NC_NAME}|#{Q_NAME}/)
|
500
|
+
if name == ?* && @prev_token == :T_NUMBER
|
501
|
+
token(:T_MUL, name, &block)
|
502
|
+
next
|
503
|
+
end
|
504
|
+
# TODO: Stripping should be taken care by regexp.
|
505
|
+
token @scanner.peek(1) == ?( ? :T_FUNCTION_NAME : :T_NAME_TEST, name.strip, &block
|
506
|
+
when literal = scan(/"[^"]*"|'[^']*'/) then token(:T_LITERAL, literal, &block)
|
507
|
+
when number = scan(/#{DIGITS}(\.(#{DIGITS})?)?/) then token(:T_NUMBER, number, &block)
|
508
|
+
when ref = scan(/\$#{Q_NAME}/) then token(:T_VARIABLE_REFERENCE, ref, &block)
|
509
|
+
else
|
510
|
+
fail ParseError, "unexpected token: #{@scanner.string[@scanner.pos..-1]}"
|
511
|
+
end
|
512
|
+
end
|
513
|
+
end
|