rley 0.5.11 → 0.5.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/examples/general/SRL/lib/ast_builder.rb +229 -38
- data/examples/general/SRL/lib/grammar.rb +39 -7
- data/examples/general/SRL/lib/regex/alternation.rb +3 -2
- data/examples/general/SRL/lib/regex/anchor.rb +48 -0
- data/examples/general/SRL/lib/regex/capturing_group.rb +50 -0
- data/examples/general/SRL/lib/regex/char_class.rb +3 -2
- data/examples/general/SRL/lib/regex/char_range.rb +4 -2
- data/examples/general/SRL/lib/regex/char_shorthand.rb +3 -2
- data/examples/general/SRL/lib/regex/character.rb +16 -14
- data/examples/general/SRL/lib/regex/concatenation.rb +8 -7
- data/examples/general/SRL/lib/regex/expression.rb +26 -5
- data/examples/general/SRL/lib/regex/lookaround.rb +47 -0
- data/examples/general/SRL/lib/regex/match_option.rb +36 -0
- data/examples/general/SRL/lib/regex/non_capturing_group.rb +3 -2
- data/examples/general/SRL/lib/regex/repetition.rb +3 -2
- data/examples/general/SRL/lib/regex/wildcard.rb +3 -2
- data/examples/general/SRL/lib/regex_repr.rb +3 -0
- data/examples/general/SRL/lib/tokenizer.rb +26 -7
- data/examples/general/SRL/spec/integration_spec.rb +148 -5
- data/lib/rley/constants.rb +1 -1
- data/lib/rley/parser/parse_tree_builder.rb +1 -1
- metadata +6 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 78c5a2a83d1691c6c470f2fb4bf347f7dca44cb6
|
4
|
+
data.tar.gz: 9c1f62cdf775e71e33ceecfc99db58298b4e3b82
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 662e52aa9aae198f6eb5e9ed1750a1bfa873969ad9bedea4ca7225babef3d9fa648b709c4bef4959432d62cf8e1733d556ce911efec9885a0d59e6d4972c0f50
|
7
|
+
data.tar.gz: ab114da248a85b5e78fdb2ba9affca357a52b0cbe45cbaa97b1ad41becf71b0fa89db6e1f4487c0a23c59b9b9b83db5101e511f2b711a0d4f9a324ac2c3f3a2d
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,12 @@
|
|
1
|
+
### 0.5.12 / 2018-02-03
|
2
|
+
* [CHANGE] Simple Regex Language is fully supported!...
|
3
|
+
* [CHANGE] File `examples/general/SRL/grammar.rb added missing rule productions for Simple Regex Language.
|
4
|
+
* [CHANGE] File `examples/general/SRL/ast_builder.rb Added transformation rules missing regular expressions features.
|
5
|
+
* [CHANGE] File `examples/general/SRL/spac/integration_spec.rb Added tests for covering SRL.
|
6
|
+
* [NEW] File `examples/general/SRL/lib/anchor.rb Added support for anchors in regular expressions.
|
7
|
+
* [NEW] File `examples/general/SRL/lib/capturing_group.rb Added support for single character in regular expressions.
|
8
|
+
* [NEW] File `examples/general/SRL/lib/lookaround.rb Added support for lookaround in regular expressions.
|
9
|
+
|
1
10
|
### 0.5.11 / 2018-01-25
|
2
11
|
* [NEW] File `left.rb` added in `examples/general` folder for showing use of left-recursive rules.
|
3
12
|
* [NEW] File `right.rb` added in `examples/general` folder for showing use of right-recursive rules (less performant).
|
@@ -14,6 +14,8 @@ class ASTBuilder < Rley::Parser::ParseTreeBuilder
|
|
14
14
|
|
15
15
|
Terminal2NodeClass = { }.freeze
|
16
16
|
|
17
|
+
attr_reader :options
|
18
|
+
|
17
19
|
protected
|
18
20
|
|
19
21
|
# Overriding method.
|
@@ -36,33 +38,109 @@ class ASTBuilder < Rley::Parser::ParseTreeBuilder
|
|
36
38
|
# @param theChildren [Array] Children nodes (one per rhs symbol)
|
37
39
|
def new_parent_node(aProduction, aRange, theTokens, theChildren)
|
38
40
|
node = case aProduction.name
|
39
|
-
when 'srl_0' # rule 'srl' => '
|
41
|
+
when 'srl_0' # rule 'srl' => 'expression'
|
40
42
|
return_first_child(aRange, theTokens, theChildren)
|
41
|
-
|
42
|
-
when '
|
43
|
+
|
44
|
+
when 'expression_0' # rule 'expression' => %w[pattern separator flags]
|
45
|
+
reduce_expression_0(aProduction, aRange, theTokens, theChildren)
|
46
|
+
|
47
|
+
when 'expression_1' # rule 'expression' => 'pattern'
|
48
|
+
return_first_child(aRange, theTokens, theChildren)
|
49
|
+
|
50
|
+
when 'pattern_0' # rule 'pattern' => %w[pattern separator quantifiable]
|
43
51
|
reduce_pattern_0(aProduction, aRange, theTokens, theChildren)
|
44
52
|
|
45
|
-
when 'pattern_1' # rule 'pattern' =>
|
46
|
-
reduce_pattern_1(aProduction, aRange, theTokens, theChildren)
|
47
|
-
|
48
|
-
when 'pattern_2' # rule 'pattern' => 'quantifiable'
|
53
|
+
when 'pattern_1' # rule 'pattern' => 'quantifiable'
|
49
54
|
return_first_child(aRange, theTokens, theChildren)
|
50
55
|
|
51
|
-
when '
|
56
|
+
when 'separator_0' # rule 'separator' => 'COMMA'
|
52
57
|
return_first_child(aRange, theTokens, theChildren)
|
53
58
|
|
54
|
-
when '
|
59
|
+
when 'separator_1' # rule 'separator' => []
|
60
|
+
nil
|
61
|
+
|
62
|
+
when 'flags_0' # rule 'flags' => %[flags separator single_flag]
|
63
|
+
### NEW
|
64
|
+
reduce_flags_0(aProduction, aRange, theTokens, theChildren)
|
65
|
+
|
66
|
+
when 'single_flag_0' # rule 'single_flag' => %w[CASE INSENSITIVE]
|
67
|
+
### NEW
|
68
|
+
reduce_single_flag_0(aProduction, aRange, theTokens, theChildren)
|
69
|
+
|
70
|
+
when 'single_flag_1' # rule 'single_flag' => %w[MULTI LINE]
|
71
|
+
### NEW
|
72
|
+
reduce_single_flag_1(aProduction, aRange, theTokens, theChildren)
|
73
|
+
|
74
|
+
when 'single_flag_2' # rule 'single_flag' => %w[ALL LAZY]
|
75
|
+
### NEW
|
76
|
+
reduce_single_flag_2(aProduction, aRange, theTokens, theChildren)
|
77
|
+
|
78
|
+
# rule 'quantifiable' => %w[begin_anchor anchorable end_anchor]
|
79
|
+
when 'quantifiable_0'
|
80
|
+
reduce_quantifiable_0(aProduction, aRange, theTokens, theChildren)
|
81
|
+
|
82
|
+
# rule 'quantifiable' => %w[begin_anchor anchorable]
|
83
|
+
when 'quantifiable_1'
|
55
84
|
reduce_quantifiable_1(aProduction, aRange, theTokens, theChildren)
|
56
85
|
|
86
|
+
# rule 'quantifiable' => %w[anchorable end_anchor]
|
87
|
+
when 'quantifiable_2'
|
88
|
+
reduce_quantifiable_2(aProduction, aRange, theTokens, theChildren)
|
89
|
+
|
90
|
+
when 'quantifiable_3' # rule 'quantifiable' => 'anchorable'
|
91
|
+
return_first_child(aRange, theTokens, theChildren)
|
92
|
+
|
93
|
+
# rule 'begin_anchor' => %w[STARTS WITH]
|
94
|
+
# rule 'begin_anchor' => %w[BEGIN WITH]
|
95
|
+
when 'begin_anchor_0', 'begin_anchor_1'
|
96
|
+
reduce_begin_anchor_0(aProduction, aRange, theTokens, theChildren)
|
97
|
+
|
98
|
+
when 'end_anchor_0' # rule 'end_anchor' => %w[MUST END]
|
99
|
+
reduce_end_anchor_0(aProduction, aRange, theTokens, theChildren)
|
100
|
+
|
101
|
+
when 'anchorable_0' # rule 'anchorable' => 'assertable'
|
102
|
+
return_first_child(aRange, theTokens, theChildren)
|
103
|
+
|
104
|
+
when 'anchorable_1' # rule 'anchorable' => %w[assertable assertion]
|
105
|
+
reduce_anchorable_1(aProduction, aRange, theTokens, theChildren)
|
106
|
+
|
107
|
+
when 'anchorable_1' # rule 'anchorable' => %w[assertable assertion]
|
108
|
+
reduce_anchorable_1(aProduction, aRange, theTokens, theChildren)
|
109
|
+
|
110
|
+
# rule 'assertion' => %w[IF FOLLOWED BY assertable]
|
111
|
+
when 'assertion_0'
|
112
|
+
reduce_assertion_0(aProduction, aRange, theTokens, theChildren)
|
113
|
+
|
114
|
+
# rule 'assertion' => %w[IF NOT FOLLOWED BY assertable]
|
115
|
+
when 'assertion_1'
|
116
|
+
reduce_assertion_1(aProduction, aRange, theTokens, theChildren)
|
117
|
+
|
118
|
+
# rule 'assertion' => %w[IF ALREADY HAD assertable]
|
119
|
+
when 'assertion_2'
|
120
|
+
reduce_assertion_2(aProduction, aRange, theTokens, theChildren)
|
121
|
+
|
122
|
+
# rule 'assertion' => %w[IF NOT ALREADY HAD assertable]
|
123
|
+
when 'assertion_3'
|
124
|
+
reduce_assertion_3(aProduction, aRange, theTokens, theChildren)
|
125
|
+
|
126
|
+
when 'assertable_0' # rule 'assertable' => 'term'
|
127
|
+
return_first_child(aRange, theTokens, theChildren)
|
128
|
+
|
129
|
+
when 'assertable_1' # rule 'assertable' => %w[term quantifier]
|
130
|
+
reduce_assertable_1(aProduction, aRange, theTokens, theChildren)
|
131
|
+
|
57
132
|
when 'term_0' # rule 'term' => 'atom'
|
58
133
|
return_first_child(aRange, theTokens, theChildren)
|
59
134
|
|
60
135
|
when 'term_1' # rule 'term' => 'alternation'
|
61
136
|
return_first_child(aRange, theTokens, theChildren)
|
62
|
-
|
137
|
+
|
63
138
|
when 'term_2' # rule 'term' => 'grouping'
|
64
139
|
return_first_child(aRange, theTokens, theChildren)
|
65
140
|
|
141
|
+
when 'term_3' # rule 'term' => 'capturing_group'
|
142
|
+
return_first_child(aRange, theTokens, theChildren)
|
143
|
+
|
66
144
|
when 'atom_0' # rule 'atom' => 'letter_range'
|
67
145
|
return_first_child(aRange, theTokens, theChildren)
|
68
146
|
|
@@ -133,19 +211,34 @@ class ASTBuilder < Rley::Parser::ParseTreeBuilder
|
|
133
211
|
when 'alternation_0'
|
134
212
|
reduce_alternation_0(aProduction, aRange, theTokens, theChildren)
|
135
213
|
|
136
|
-
# rule 'alternatives' => %w[alternatives
|
214
|
+
# rule 'alternatives' => %w[alternatives separator quantifiable]
|
137
215
|
when 'alternatives_0'
|
138
216
|
reduce_alternatives_0(aProduction, aRange, theTokens, theChildren)
|
139
217
|
|
140
|
-
# rule 'alternatives' =>
|
141
|
-
when 'alternatives_1'
|
218
|
+
when 'alternatives_1' # rule 'alternatives' => 'quantifiable'
|
142
219
|
reduce_alternatives_1(aProduction, aRange, theTokens, theChildren)
|
143
220
|
|
144
|
-
when '
|
145
|
-
|
221
|
+
when 'grouping_0' # rule 'grouping' => %w[LPAREN pattern RPAREN]
|
222
|
+
reduce_grouping_0(aProduction, aRange, theTokens, theChildren)
|
223
|
+
|
224
|
+
# rule 'capturing_group' => %w[CAPTURE assertable]
|
225
|
+
when 'capturing_group_0'
|
226
|
+
reduce_capturing_group_0(aProduction, aRange, theTokens, theChildren)
|
227
|
+
|
228
|
+
# rule 'capturing_group' => %w[CAPTURE assertable UNTIL assertable]
|
229
|
+
when 'capturing_group_1'
|
230
|
+
reduce_capturing_group_1(aProduction, aRange, theTokens, theChildren)
|
231
|
+
|
232
|
+
# rule 'capturing_group' => %w[CAPTURE assertable AS var_name]
|
233
|
+
when 'capturing_group_2'
|
234
|
+
reduce_capturing_group_2(aProduction, aRange, theTokens, theChildren)
|
235
|
+
|
236
|
+
# rule 'capturing_group' => %w[CAPTURE assertable AS var_name UNTIL assertable]
|
237
|
+
when 'capturing_group_3'
|
238
|
+
reduce_capturing_group_3(aProduction, aRange, theTokens, theChildren)
|
146
239
|
|
147
|
-
when '
|
148
|
-
|
240
|
+
when 'var_name_0' # rule 'var_name' => 'STRING_LIT'
|
241
|
+
return_first_child(aRange, theTokens, theChildren)
|
149
242
|
|
150
243
|
when 'quantifier_0' # rule 'quantifier' => 'ONCE'
|
151
244
|
multiplicity(1, 1)
|
@@ -205,11 +298,11 @@ class ASTBuilder < Rley::Parser::ParseTreeBuilder
|
|
205
298
|
result = Regex::Concatenation.new(*chars)
|
206
299
|
else
|
207
300
|
if to_escape && Regex::Character::MetaChars.include?(aString)
|
208
|
-
result = Regex::Concatenation.new(Regex::Character.new("\\"),
|
301
|
+
result = Regex::Concatenation.new(Regex::Character.new("\\"),
|
209
302
|
Regex::Character.new(aString))
|
210
303
|
else
|
211
304
|
result = Regex::Character.new(aString)
|
212
|
-
end
|
305
|
+
end
|
213
306
|
end
|
214
307
|
|
215
308
|
return result
|
@@ -237,21 +330,100 @@ class ASTBuilder < Rley::Parser::ParseTreeBuilder
|
|
237
330
|
def repetition(expressionToRepeat, aMultiplicity)
|
238
331
|
return Regex::Repetition.new(expressionToRepeat, aMultiplicity)
|
239
332
|
end
|
240
|
-
|
241
|
-
# rule '
|
333
|
+
|
334
|
+
# rule 'expression' => %w[pattern separator flags]
|
335
|
+
def reduce_expression_0(aProduction, aRange, theTokens, theChildren)
|
336
|
+
@options = theChildren[2] if theChildren[2]
|
337
|
+
return_first_child(aRange, theTokens, theChildren)
|
338
|
+
end
|
339
|
+
|
340
|
+
# rule 'pattern' => %w[pattern separator quantifiable]
|
242
341
|
def reduce_pattern_0(aProduction, aRange, theTokens, theChildren)
|
243
342
|
return Regex::Concatenation.new(theChildren[0], theChildren[2])
|
244
343
|
end
|
245
344
|
|
246
|
-
# rule '
|
247
|
-
def
|
248
|
-
|
345
|
+
# rule 'flags' => %[flags separator single_flag]
|
346
|
+
def reduce_flags_0(aProduction, aRange, theTokens, theChildren)
|
347
|
+
theChildren[0] << theChildren[2]
|
249
348
|
end
|
250
349
|
|
251
|
-
# rule '
|
350
|
+
# rule 'single_flag' => %w[CASE INSENSITIVE]
|
351
|
+
def reduce_single_flag_0(aProduction, aRange, theTokens, theChildren)
|
352
|
+
return [ Regex::MatchOption.new(:IGNORECASE, true) ]
|
353
|
+
end
|
354
|
+
|
355
|
+
# rule 'single_flag' => %w[MULTI LINE]
|
356
|
+
def reduce_single_flag_1(aProduction, aRange, theTokens, theChildren)
|
357
|
+
return [ Regex::MatchOption.new(:MULTILINE, true) ]
|
358
|
+
end
|
359
|
+
|
360
|
+
# rule 'single_flag' => %w[ALL LAZY]
|
361
|
+
def reduce_single_flag_2(aProduction, aRange, theTokens, theChildren)
|
362
|
+
return [ Regex::MatchOption.new(:ALL_LAZY, true) ]
|
363
|
+
end
|
364
|
+
|
365
|
+
# rule 'quantifiable' => %w[begin_anchor anchorable end_anchor]
|
366
|
+
def reduce_quantifiable_0(aProduction, aRange, theTokens, theChildren)
|
367
|
+
theChildren[1].begin_anchor = theChildren[0]
|
368
|
+
theChildren[1].end_anchor = theChildren[2]
|
369
|
+
return theChildren[1]
|
370
|
+
end
|
371
|
+
|
372
|
+
# rule 'quantifiable' => %w[begin_anchor anchorable]
|
252
373
|
def reduce_quantifiable_1(aProduction, aRange, theTokens, theChildren)
|
253
|
-
|
254
|
-
|
374
|
+
theChildren[1].begin_anchor = theChildren[0]
|
375
|
+
return theChildren[1]
|
376
|
+
end
|
377
|
+
|
378
|
+
# rule 'quantifiable' => %w[anchorable end_anchor]
|
379
|
+
def reduce_quantifiable_2(aProduction, aRange, theTokens, theChildren)
|
380
|
+
theChildren[0].end_anchor = theChildren[1]
|
381
|
+
return theChildren[0]
|
382
|
+
end
|
383
|
+
|
384
|
+
# rule 'begin_anchor' => %w[STARTS WITH]
|
385
|
+
# rule 'begin_anchor' => %w[BEGIN WITH]
|
386
|
+
def reduce_begin_anchor_0(aProduction, aRange, theTokens, theChildren)
|
387
|
+
return Regex::Anchor.new('^')
|
388
|
+
end
|
389
|
+
|
390
|
+
# rule 'end_anchor' => %w[MUST END]
|
391
|
+
def reduce_end_anchor_0(aProduction, aRange, theTokens, theChildren)
|
392
|
+
return Regex::Anchor.new('$')
|
393
|
+
end
|
394
|
+
|
395
|
+
|
396
|
+
# rule 'anchorable' => %w[assertable assertion]
|
397
|
+
def reduce_anchorable_1(aProduction, aRange, theTokens, theChildren)
|
398
|
+
assertion = theChildren.last
|
399
|
+
assertion.children.unshift(theChildren[0])
|
400
|
+
return assertion
|
401
|
+
end
|
402
|
+
|
403
|
+
# rule 'assertion' => %w[IF FOLLOWED BY assertable]
|
404
|
+
def reduce_assertion_0(aProduction, aRange, theTokens, theChildren)
|
405
|
+
return Regex::Lookaround.new(theChildren.last, :ahead, :positive)
|
406
|
+
end
|
407
|
+
|
408
|
+
# rule 'assertion' => %w[IF NOT FOLLOWED BY assertable]
|
409
|
+
def reduce_assertion_1(aProduction, aRange, theTokens, theChildren)
|
410
|
+
return Regex::Lookaround.new(theChildren.last, :ahead, :negative)
|
411
|
+
end
|
412
|
+
|
413
|
+
# rule 'assertion' => %w[IF ALREADY HAD assertable]
|
414
|
+
def reduce_assertion_2(aProduction, aRange, theTokens, theChildren)
|
415
|
+
return Regex::Lookaround.new(theChildren.last, :behind, :positive)
|
416
|
+
end
|
417
|
+
|
418
|
+
# rule 'assertion' => %w[IF NOT ALREADY HAD assertable]
|
419
|
+
def reduce_assertion_3(aProduction, aRange, theTokens, theChildren)
|
420
|
+
return Regex::Lookaround.new(theChildren.last, :behind, :negative)
|
421
|
+
end
|
422
|
+
|
423
|
+
# rule 'anchorable' => %w[term quantifier]
|
424
|
+
def reduce_assertable_1(aProduction, aRange, theTokens, theChildren)
|
425
|
+
quantifier = theChildren[1]
|
426
|
+
term = theChildren[0]
|
255
427
|
repetition(term, quantifier)
|
256
428
|
end
|
257
429
|
|
@@ -348,37 +520,56 @@ class ASTBuilder < Rley::Parser::ParseTreeBuilder
|
|
348
520
|
raw_literal = theChildren[-1].token.lexeme.dup
|
349
521
|
return string_literal(raw_literal)
|
350
522
|
end
|
351
|
-
|
523
|
+
|
352
524
|
# rule 'alternation' => %w[ANY OF LPAREN alternatives RPAREN]
|
353
525
|
def reduce_alternation_0(aProduction, aRange, theTokens, theChildren)
|
354
526
|
return Regex::Alternation.new(*theChildren[3])
|
355
527
|
end
|
356
528
|
|
357
|
-
# rule 'alternatives' => %w[alternatives
|
529
|
+
# rule 'alternatives' => %w[alternatives separator quantifiable]
|
358
530
|
def reduce_alternatives_0(aProduction, aRange, theTokens, theChildren)
|
359
531
|
return theChildren[0] << theChildren[-1]
|
360
532
|
end
|
361
533
|
|
362
|
-
# rule 'alternatives' => %w[alternatives quantifiable]
|
363
|
-
def reduce_alternatives_1(aProduction, aRange, theTokens, theChildren)
|
364
|
-
return theChildren[0] << theChildren[-1]
|
365
|
-
end
|
366
|
-
|
367
534
|
# rule 'alternatives' => 'quantifiable'
|
368
|
-
def
|
535
|
+
def reduce_alternatives_1(aProduction, aRange, theTokens, theChildren)
|
369
536
|
return [theChildren.last]
|
370
537
|
end
|
371
|
-
|
538
|
+
|
372
539
|
# rule 'grouping' => %w[LPAREN pattern RPAREN]
|
373
540
|
def reduce_grouping_0(aProduction, aRange, theTokens, theChildren)
|
374
|
-
return Regex::NonCapturingGroup.new(theChildren[1])
|
541
|
+
return Regex::NonCapturingGroup.new(theChildren[1])
|
375
542
|
end
|
376
543
|
|
544
|
+
# rule 'capturing_group' => %w[CAPTURE assertable]
|
545
|
+
def reduce_capturing_group_0(aProduction, aRange, theTokens, theChildren)
|
546
|
+
return Regex::CapturingGroup.new(theChildren[1])
|
547
|
+
end
|
548
|
+
|
549
|
+
# rule 'capturing_group' => %w[CAPTURE assertable UNTIL assertable]
|
550
|
+
def reduce_capturing_group_1(aProduction, aRange, theTokens, theChildren)
|
551
|
+
group = Regex::CapturingGroup.new(theChildren[1])
|
552
|
+
return Regex::Concatenation.new(group, theChildren[3])
|
553
|
+
end
|
554
|
+
|
555
|
+
# rule 'capturing_group' => %w[CAPTURE assertable AS var_name]
|
556
|
+
def reduce_capturing_group_2(aProduction, aRange, theTokens, theChildren)
|
557
|
+
name = theChildren[3].token.lexeme.dup
|
558
|
+
return Regex::CapturingGroup.new(theChildren[1], name)
|
559
|
+
end
|
560
|
+
|
561
|
+
# rule 'capturing_group' => %w[CAPTURE assertable AS var_name UNTIL assertable]
|
562
|
+
def reduce_capturing_group_3(aProduction, aRange, theTokens, theChildren)
|
563
|
+
name = theChildren[3].token.lexeme.dup
|
564
|
+
group = Regex::CapturingGroup.new(theChildren[1], name)
|
565
|
+
return Regex::Concatenation.new(group, theChildren[5])
|
566
|
+
end
|
567
|
+
|
377
568
|
# rule 'quantifier' => %w[EXACTLY count TIMES]
|
378
569
|
def reduce_quantifier_2(aProduction, aRange, theTokens, theChildren)
|
379
570
|
count = theChildren[1].token.lexeme.to_i
|
380
571
|
multiplicity(count, count)
|
381
|
-
end
|
572
|
+
end
|
382
573
|
|
383
574
|
# rule 'quantifier' => %w[BETWEEN count AND count times_suffix]
|
384
575
|
def reduce_quantifier_3(aProduction, aRange, theTokens, theChildren)
|
@@ -9,6 +9,8 @@ module SRL
|
|
9
9
|
add_terminals('LPAREN', 'RPAREN', 'COMMA')
|
10
10
|
add_terminals('DIGIT_LIT', 'INTEGER', 'LETTER_LIT')
|
11
11
|
add_terminals('LITERALLY', 'STRING_LIT')
|
12
|
+
add_terminals('BEGIN', 'STARTS', 'WITH')
|
13
|
+
add_terminals('MUST', 'END')
|
12
14
|
add_terminals('UPPERCASE', 'LETTER', 'FROM', 'TO')
|
13
15
|
add_terminals('DIGIT', 'NUMBER', 'ANY', 'NO')
|
14
16
|
add_terminals('CHARACTER', 'WHITESPACE', 'ANYTHING')
|
@@ -17,16 +19,42 @@ module SRL
|
|
17
19
|
add_terminals('EXACTLY', 'TIMES', 'ONCE', 'TWICE')
|
18
20
|
add_terminals('BETWEEN', 'AND', 'OPTIONAL', 'OR')
|
19
21
|
add_terminals('MORE', 'NEVER', 'AT', 'LEAST')
|
22
|
+
add_terminals('IF', 'FOLLOWED', 'BY', 'NOT')
|
23
|
+
add_terminals('ALREADY', 'HAD')
|
24
|
+
add_terminals('CAPTURE', 'AS', 'UNTIL')
|
25
|
+
add_terminals('CASE', 'INSENSITIVE', 'MULTI', 'ALL')
|
26
|
+
add_terminals('LAZY')
|
20
27
|
|
21
|
-
rule 'srl' => '
|
22
|
-
rule '
|
23
|
-
rule '
|
28
|
+
rule 'srl' => 'expression'
|
29
|
+
rule 'expression' => %w[pattern separator flags]
|
30
|
+
rule 'expression' => 'pattern'
|
31
|
+
rule 'pattern' => %w[pattern separator quantifiable]
|
24
32
|
rule 'pattern' => 'quantifiable'
|
25
|
-
rule '
|
26
|
-
rule '
|
33
|
+
rule 'separator' => 'COMMA'
|
34
|
+
rule 'separator' => []
|
35
|
+
rule 'flags' => %[flags separator single_flag]
|
36
|
+
rule 'single_flag' => %w[CASE INSENSITIVE]
|
37
|
+
rule 'single_flag' => %w[MULTI LINE]
|
38
|
+
rule 'single_flag' => %w[ALL LAZY]
|
39
|
+
rule 'quantifiable' => %w[begin_anchor anchorable end_anchor]
|
40
|
+
rule 'quantifiable' => %w[begin_anchor anchorable]
|
41
|
+
rule 'quantifiable' => %w[anchorable end_anchor]
|
42
|
+
rule 'quantifiable' => 'anchorable'
|
43
|
+
rule 'begin_anchor' => %w[STARTS WITH]
|
44
|
+
rule 'begin_anchor' => %w[BEGIN WITH]
|
45
|
+
rule 'end_anchor' => %w[MUST END]
|
46
|
+
rule 'anchorable' => 'assertable'
|
47
|
+
rule 'anchorable' => %w[assertable assertion]
|
48
|
+
rule 'assertion' => %w[IF FOLLOWED BY assertable]
|
49
|
+
rule 'assertion' => %w[IF NOT FOLLOWED BY assertable]
|
50
|
+
rule 'assertion' => %w[IF ALREADY HAD assertable]
|
51
|
+
rule 'assertion' => %w[IF NOT ALREADY HAD assertable]
|
52
|
+
rule 'assertable' => 'term'
|
53
|
+
rule 'assertable' => %w[term quantifier]
|
27
54
|
rule 'term' => 'atom'
|
28
55
|
rule 'term' => 'alternation'
|
29
56
|
rule 'term' => 'grouping'
|
57
|
+
rule 'term' => 'capturing_group'
|
30
58
|
rule 'atom' => 'letter_range'
|
31
59
|
rule 'atom' => 'digit_range'
|
32
60
|
rule 'atom' => 'character_class'
|
@@ -49,10 +77,14 @@ module SRL
|
|
49
77
|
rule 'special_char' => %w[NEW LINE]
|
50
78
|
rule 'literal' => %w[LITERALLY STRING_LIT]
|
51
79
|
rule 'alternation' => %w[ANY OF LPAREN alternatives RPAREN]
|
52
|
-
rule 'alternatives' => %w[alternatives
|
53
|
-
rule 'alternatives' => %w[alternatives quantifiable]
|
80
|
+
rule 'alternatives' => %w[alternatives separator quantifiable]
|
54
81
|
rule 'alternatives' => 'quantifiable'
|
55
82
|
rule 'grouping' => %w[LPAREN pattern RPAREN]
|
83
|
+
rule 'capturing_group' => %w[CAPTURE assertable]
|
84
|
+
rule 'capturing_group' => %w[CAPTURE assertable UNTIL assertable]
|
85
|
+
rule 'capturing_group' => %w[CAPTURE assertable AS var_name]
|
86
|
+
rule 'capturing_group' => %w[CAPTURE assertable AS var_name UNTIL assertable]
|
87
|
+
rule 'var_name' => 'STRING_LIT'
|
56
88
|
rule 'quantifier' => 'ONCE'
|
57
89
|
rule 'quantifier' => 'TWICE'
|
58
90
|
rule 'quantifier' => %w[EXACTLY count TIMES]
|
@@ -13,10 +13,11 @@ class Alternation < PolyadicExpression
|
|
13
13
|
super(theChildren)
|
14
14
|
end
|
15
15
|
|
16
|
-
|
16
|
+
protected
|
17
|
+
|
17
18
|
# Conversion method re-definition.
|
18
19
|
# Purpose: Return the String representation of the concatented expressions.
|
19
|
-
def
|
20
|
+
def text_repr()
|
20
21
|
result_children = children.map { |aChild| aChild.to_str() }
|
21
22
|
result = '(?:' + result_children.join('|') + ')'
|
22
23
|
|
@@ -0,0 +1,48 @@
|
|
1
|
+
# File: anchor.rb
|
2
|
+
|
3
|
+
require_relative "atomic_expression" # Access the superclass
|
4
|
+
|
5
|
+
module Regex # This module is used as a namespace
|
6
|
+
# An anchor is a zero-width assertion based on the current position.
|
7
|
+
class Anchor < AtomicExpression
|
8
|
+
# A Hash for converting a lexeme to a symbolic value
|
9
|
+
AnchorToSymbol = {
|
10
|
+
# Lexeme => Symbol value
|
11
|
+
'^' => :soLine, # Start of line
|
12
|
+
'$' => :eoLine, # End of line
|
13
|
+
'\A' => :soSubject,
|
14
|
+
'\b' => :wordBoundary,
|
15
|
+
'\B' => :nonAtWordBoundary,
|
16
|
+
'\G' => :firstMatch,
|
17
|
+
'\z' => :eoSubject,
|
18
|
+
'\Z' => :eoSubjectOrBeforeNLAtEnd
|
19
|
+
}
|
20
|
+
|
21
|
+
# A symbolic value that identifies the type of assertion to perform
|
22
|
+
attr_reader(:kind)
|
23
|
+
|
24
|
+
# Constructor
|
25
|
+
# @param aKind [String] Lexeme representation of the anchor
|
26
|
+
def initialize(aKind)
|
27
|
+
@kind = valid_kind(aKind)
|
28
|
+
end
|
29
|
+
|
30
|
+
public
|
31
|
+
|
32
|
+
# Conversion method re-definition.
|
33
|
+
# Purpose: Return the String representation of the expression.
|
34
|
+
def to_str()
|
35
|
+
return AnchorToSymbol.rassoc(kind).first()
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
# Return the symbolic value corresponding to the given lexeme.
|
41
|
+
def valid_kind(aKind)
|
42
|
+
return AnchorToSymbol[aKind]
|
43
|
+
end
|
44
|
+
|
45
|
+
end # class
|
46
|
+
end # module
|
47
|
+
|
48
|
+
# End of file
|
@@ -0,0 +1,50 @@
|
|
1
|
+
# File: capturing_group.rb
|
2
|
+
|
3
|
+
require_relative "monadic_expression" # Access the superclass
|
4
|
+
|
5
|
+
module Regex # This module is used as a namespace
|
6
|
+
|
7
|
+
# An association between a capture variable and an expression
|
8
|
+
# the subject text in the same serial arrangement
|
9
|
+
class CapturingGroup < MonadicExpression
|
10
|
+
# The capture variable id. It is a Fixnum when the capture group gets a sequence number,
|
11
|
+
# a String when it is an user-defined name
|
12
|
+
attr_reader(:id)
|
13
|
+
|
14
|
+
# When true, then capturing group forbids backtracking requests from its parent expression.
|
15
|
+
attr_reader(:no_backtrack)
|
16
|
+
|
17
|
+
# Constructor.
|
18
|
+
# [aChildExpression] A sub-expression to match. When successful the matching text is assigned to the capture variable.
|
19
|
+
# [theId] The id of the capture variable.
|
20
|
+
# [noBacktrack] A flag that specifies whether the capturing group forbids backtracking requests from its parent expression.
|
21
|
+
def initialize(aChildExpression, theId = nil, noBacktrack = false)
|
22
|
+
super(aChildExpression)
|
23
|
+
@id = theId
|
24
|
+
@no_backtrack = noBacktrack
|
25
|
+
end
|
26
|
+
|
27
|
+
public
|
28
|
+
# Return true iff the capturing group has a name (and not )
|
29
|
+
def named?()
|
30
|
+
return id.kind_of?(String)
|
31
|
+
end
|
32
|
+
|
33
|
+
# Conversion method re-definition.
|
34
|
+
# Purpose: Return the String representation of the captured expression.
|
35
|
+
def to_str()
|
36
|
+
prefix = named? ? "?<#{id}>" : ''
|
37
|
+
atomic = no_backtrack ? '?>' : ''
|
38
|
+
if child.is_a?(Regex::NonCapturingGroup)
|
39
|
+
# Minor optimization
|
40
|
+
result = '(' + atomic + prefix + child.child.to_str + ")"
|
41
|
+
else
|
42
|
+
result = '(' + atomic + prefix + child.to_str + ")"
|
43
|
+
end
|
44
|
+
return result
|
45
|
+
end
|
46
|
+
|
47
|
+
end # class
|
48
|
+
end # module
|
49
|
+
|
50
|
+
# End of file
|
@@ -18,10 +18,11 @@ module Regex # This module is used as a namespace
|
|
18
18
|
@negated = to_negate
|
19
19
|
end
|
20
20
|
|
21
|
-
|
21
|
+
protected
|
22
|
+
|
22
23
|
# Conversion method re-definition.
|
23
24
|
# Purpose: Return the String representation of the character class.
|
24
|
-
def
|
25
|
+
def text_repr()
|
25
26
|
result_children = children.inject('') do |subResult, aChild|
|
26
27
|
if aChild.kind_of?(Regex::Character) && Metachars.include?(aChild.codepoint)
|
27
28
|
subResult << "\\" # Escape meta-character...
|
@@ -26,11 +26,13 @@ public
|
|
26
26
|
# Return the upper bound of the range.
|
27
27
|
def upper()
|
28
28
|
return children.last
|
29
|
-
end
|
29
|
+
end
|
30
|
+
|
31
|
+
protected
|
30
32
|
|
31
33
|
# Conversion method re-definition.
|
32
34
|
# Purpose: Return the String representation of the concatented expressions.
|
33
|
-
def
|
35
|
+
def text_repr()
|
34
36
|
result = lower.to_str() + '-' + upper.to_str()
|
35
37
|
|
36
38
|
return result
|
@@ -28,10 +28,11 @@ module Regex # This module is used as a namespace
|
|
28
28
|
@shortname = valid_shortname(aShortname)
|
29
29
|
end
|
30
30
|
|
31
|
-
|
31
|
+
protected
|
32
|
+
|
32
33
|
# Conversion method re-definition.
|
33
34
|
# Purpose: Return the String representation of the expression.
|
34
|
-
def
|
35
|
+
def text_repr()
|
35
36
|
return "\\#{shortname}"
|
36
37
|
end
|
37
38
|
|
@@ -106,20 +106,6 @@ public
|
|
106
106
|
self.class.codepoint2char(@codepoint)
|
107
107
|
end
|
108
108
|
|
109
|
-
# Conversion method re-definition.
|
110
|
-
# Purpose: Return the String representation of the expression.
|
111
|
-
# If the Character was initially from a text (the lexeme), then the lexeme is returned back.
|
112
|
-
# Otherwise the character corresponding to the codepoint is returned.
|
113
|
-
def to_str()
|
114
|
-
if lexeme.nil?
|
115
|
-
result = char()
|
116
|
-
else
|
117
|
-
result = lexeme.dup()
|
118
|
-
end
|
119
|
-
|
120
|
-
return result
|
121
|
-
end
|
122
|
-
|
123
109
|
# Returns true iff this Character and parameter 'another' represent the same character.
|
124
110
|
# [another] any Object. The way the equality is tested depends on the another's class
|
125
111
|
# Example:
|
@@ -152,6 +138,22 @@ public
|
|
152
138
|
def explain()
|
153
139
|
return "the character '#{to_str()}'"
|
154
140
|
end
|
141
|
+
|
142
|
+
protected
|
143
|
+
|
144
|
+
# Conversion method re-definition.
|
145
|
+
# Purpose: Return the String representation of the expression.
|
146
|
+
# If the Character was initially from a text (the lexeme), then the lexeme is returned back.
|
147
|
+
# Otherwise the character corresponding to the codepoint is returned.
|
148
|
+
def text_repr()
|
149
|
+
if lexeme.nil?
|
150
|
+
result = char()
|
151
|
+
else
|
152
|
+
result = lexeme.dup()
|
153
|
+
end
|
154
|
+
|
155
|
+
return result
|
156
|
+
end
|
155
157
|
|
156
158
|
private
|
157
159
|
# Convertion method that returns a codepoint for the given two characters (digram) escape sequence.
|
@@ -4,24 +4,25 @@ require_relative 'polyadic_expression' # Access the superclass
|
|
4
4
|
|
5
5
|
module Regex # This module is used as a namespace
|
6
6
|
|
7
|
-
# Abstract class. A n-ary matching operator.
|
7
|
+
# Abstract class. A n-ary matching operator.
|
8
8
|
# It succeeds when each child succeeds to match the subject text in the same
|
9
9
|
# serial arrangement than defined by this concatenation.
|
10
10
|
class Concatenation < PolyadicExpression
|
11
|
-
|
11
|
+
|
12
12
|
# Constructor.
|
13
13
|
def initialize(*theChildren)
|
14
14
|
super(theChildren)
|
15
15
|
end
|
16
|
-
|
17
|
-
|
16
|
+
|
17
|
+
protected
|
18
|
+
|
18
19
|
# Conversion method re-definition.
|
19
20
|
# Purpose: Return the String representation of the concatented expressions.
|
20
|
-
def
|
21
|
-
result = children.inject('') { |result, aChild|
|
21
|
+
def text_repr()
|
22
|
+
result = children.inject('') { |result, aChild|
|
22
23
|
result << aChild.to_str()
|
23
24
|
}
|
24
|
-
|
25
|
+
|
25
26
|
return result
|
26
27
|
end
|
27
28
|
|
@@ -5,7 +5,11 @@ require_relative 'abstract_method'
|
|
5
5
|
module Regex # This module is used as a namespace
|
6
6
|
|
7
7
|
# Abstract class. The generalization of any valid regular (sub)expression.
|
8
|
-
class Expression
|
8
|
+
class Expression
|
9
|
+
attr_accessor :begin_anchor
|
10
|
+
attr_accessor :end_anchor
|
11
|
+
|
12
|
+
# Constructor
|
9
13
|
def initialize()
|
10
14
|
end
|
11
15
|
|
@@ -20,20 +24,37 @@ public
|
|
20
24
|
def cardinality(theParentOptions) abstract_method
|
21
25
|
end
|
22
26
|
|
23
|
-
protected
|
24
27
|
# Determine the matching options to apply to this object, given the options coming from the parent
|
25
28
|
# and options that are local to this object. Local options take precedence.
|
26
|
-
#
|
29
|
+
# @param theParentOptions [Hash] matching options. They are overridden by options with same name
|
27
30
|
# that are bound to this object.
|
28
31
|
def options(theParentOptions)
|
29
32
|
resulting_options = theParentOptions.merge(@local_options)
|
30
33
|
return resulting_options
|
31
34
|
end
|
32
35
|
|
33
|
-
#
|
36
|
+
# Template method.
|
34
37
|
# Purpose: Return the String representation of the expression.
|
35
|
-
def to_str()
|
38
|
+
def to_str()
|
39
|
+
result = ''
|
40
|
+
result << prefix
|
41
|
+
result << text_repr
|
42
|
+
result << suffix
|
43
|
+
|
44
|
+
return result
|
36
45
|
end
|
46
|
+
|
47
|
+
protected
|
48
|
+
|
49
|
+
def prefix()
|
50
|
+
begin_anchor ? begin_anchor.to_str : ''
|
51
|
+
end
|
52
|
+
|
53
|
+
def suffix()
|
54
|
+
end_anchor ? end_anchor.to_str : ''
|
55
|
+
end
|
56
|
+
|
57
|
+
|
37
58
|
|
38
59
|
end # class
|
39
60
|
|
@@ -0,0 +1,47 @@
|
|
1
|
+
# File: Lookaround.rb
|
2
|
+
|
3
|
+
########################
|
4
|
+
# TODO: make it a binary expression
|
5
|
+
########################
|
6
|
+
|
7
|
+
|
8
|
+
require_relative 'polyadic_expression' # Access the superclass
|
9
|
+
|
10
|
+
module Regex # This module is used as a namespace
|
11
|
+
# Lookaround is a zero-width assertion just like the start and end of line anchors.
|
12
|
+
# The difference is that lookarounds will actually match characters, but only return the result of the match: match or no match.
|
13
|
+
# That is why they are called "assertions". They do not consume characters from the subject,
|
14
|
+
# but only assert whether a match is possible or not.
|
15
|
+
class Lookaround < PolyadicExpression
|
16
|
+
# The "direction" of the lookaround. Can be ahead or behind. It specifies the relative position of the
|
17
|
+
# expression to match compared to the current 'position' in the subject text.
|
18
|
+
attr_reader(:dir)
|
19
|
+
|
20
|
+
# The kind indicates whether the assertion is positive (succeeds when there is a match) or negative
|
21
|
+
# (assertion succeeds when there is NO match).
|
22
|
+
attr_reader(:kind)
|
23
|
+
|
24
|
+
# Constructor.
|
25
|
+
# [assertedExpression] A sub-expression to match.
|
26
|
+
# [theDir] One of the following values: [ :ahead, :behind ]
|
27
|
+
# [theKind] One of the following values: [ :positive, :negative ]
|
28
|
+
def initialize(assertedExpression, theDir, theKind)
|
29
|
+
super([assertedExpression])
|
30
|
+
@dir, @kind = theDir, theKind
|
31
|
+
end
|
32
|
+
|
33
|
+
public
|
34
|
+
# Conversion method re-definition.
|
35
|
+
# Purpose: Return the String representation of the captured expression.
|
36
|
+
def to_str()
|
37
|
+
result = children[0].to_str
|
38
|
+
dir_syntax = (dir == :ahead) ? '' : '<'
|
39
|
+
kind_syntax = (kind == :positive)? '=' : '!'
|
40
|
+
result << '(?' + dir_syntax + kind_syntax + children[1].to_str + ")"
|
41
|
+
return result
|
42
|
+
end
|
43
|
+
|
44
|
+
end # class
|
45
|
+
end # module
|
46
|
+
|
47
|
+
# End of file
|
@@ -0,0 +1,36 @@
|
|
1
|
+
# File: MatchOption.rb
|
2
|
+
|
3
|
+
module Regex # This module is used as a namespace
|
4
|
+
|
5
|
+
# Represents an option that influences the way a regular (sub)expression can perform its matching.
|
6
|
+
class MatchOption
|
7
|
+
# The symbolic name of the option
|
8
|
+
attr_reader(:name)
|
9
|
+
|
10
|
+
# An indicator that tells whether the option is turned on or off
|
11
|
+
attr_reader(:setting)
|
12
|
+
|
13
|
+
# Constructor.
|
14
|
+
def initialize(theName, theSetting)
|
15
|
+
@name, @setting = theName, theSetting
|
16
|
+
end
|
17
|
+
|
18
|
+
public
|
19
|
+
# Equality operator
|
20
|
+
def ==(another)
|
21
|
+
return true if self.object_id == another.object_id
|
22
|
+
|
23
|
+
if another.kind_of?(MatchOption)
|
24
|
+
isEqual = ((name == another.name) && (setting == another.setting))
|
25
|
+
else
|
26
|
+
isEqual = false
|
27
|
+
end
|
28
|
+
|
29
|
+
return isEqual
|
30
|
+
end
|
31
|
+
|
32
|
+
end # class
|
33
|
+
|
34
|
+
end # module
|
35
|
+
|
36
|
+
# End of file
|
@@ -14,10 +14,11 @@ module Regex # This module is used as a namespace
|
|
14
14
|
super(aChildExpression)
|
15
15
|
end
|
16
16
|
|
17
|
-
|
17
|
+
protected
|
18
|
+
|
18
19
|
# Conversion method re-definition.
|
19
20
|
# Purpose: Return the String representation of the captured expression.
|
20
|
-
def
|
21
|
+
def text_repr()
|
21
22
|
result = '(?:' + all_child_text() + ")"
|
22
23
|
return result
|
23
24
|
end
|
@@ -16,10 +16,11 @@ class Repetition < MonadicExpression
|
|
16
16
|
@multiplicity = aMultiplicity
|
17
17
|
end
|
18
18
|
|
19
|
-
|
19
|
+
protected
|
20
|
+
|
20
21
|
# Conversion method re-definition.
|
21
22
|
# Purpose: Return the String representation of the concatented expressions.
|
22
|
-
def
|
23
|
+
def text_repr()
|
23
24
|
result = all_child_text() + multiplicity.to_str()
|
24
25
|
return result
|
25
26
|
end
|
@@ -8,3 +8,6 @@ require_relative './regex/char_shorthand'
|
|
8
8
|
require_relative './regex/wildcard'
|
9
9
|
require_relative './regex/alternation'
|
10
10
|
require_relative './regex/non_capturing_group'
|
11
|
+
require_relative './regex/anchor'
|
12
|
+
require_relative './regex/lookaround'
|
13
|
+
require_relative './regex/capturing_group'
|
@@ -22,41 +22,60 @@ module SRL
|
|
22
22
|
')' => 'RPAREN',
|
23
23
|
',' => 'COMMA'
|
24
24
|
}.freeze
|
25
|
-
|
25
|
+
|
26
26
|
# Here are all the SRL keywords (in uppercase)
|
27
27
|
@@keywords = %w[
|
28
|
+
ALL
|
29
|
+
ALREADY
|
28
30
|
AND
|
29
31
|
ANY
|
30
32
|
ANYTHING
|
33
|
+
AS
|
31
34
|
AT
|
32
35
|
BACKSLASH
|
36
|
+
BEGIN
|
33
37
|
BETWEEN
|
38
|
+
BY
|
39
|
+
CAPTURE
|
40
|
+
CASE
|
34
41
|
CHARACTER
|
35
42
|
DIGIT
|
43
|
+
END
|
36
44
|
EXACTLY
|
45
|
+
FOLLOWED
|
37
46
|
FROM
|
47
|
+
HAD
|
48
|
+
IF
|
49
|
+
INSENSITIVE
|
50
|
+
LAZY
|
38
51
|
LEAST
|
39
52
|
LETTER
|
40
53
|
LINE
|
41
54
|
LITERALLY
|
42
55
|
MORE
|
56
|
+
MULTI
|
57
|
+
MUST
|
43
58
|
NEVER
|
44
59
|
NEW
|
45
60
|
NO
|
61
|
+
NOT
|
46
62
|
NUMBER
|
47
63
|
OF
|
48
64
|
ONCE
|
49
65
|
ONE
|
50
66
|
OPTIONAL
|
51
67
|
OR
|
68
|
+
STARTS
|
52
69
|
TAB
|
53
70
|
TIMES
|
54
71
|
TO
|
55
72
|
TWICE
|
73
|
+
UNTIL
|
56
74
|
UPPERCASE
|
57
75
|
WHITESPACE
|
76
|
+
WITH
|
58
77
|
].map { |x| [x, x] } .to_h
|
59
|
-
|
78
|
+
|
60
79
|
class ScanError < StandardError; end
|
61
80
|
|
62
81
|
def initialize(source, aGrammar)
|
@@ -81,16 +100,16 @@ module SRL
|
|
81
100
|
skip_whitespaces
|
82
101
|
curr_ch = scanner.peek(1)
|
83
102
|
return nil if curr_ch.nil? || curr_ch.empty?
|
84
|
-
|
103
|
+
|
85
104
|
token = nil
|
86
105
|
|
87
106
|
if '(),'.include? curr_ch
|
88
107
|
# Delimiters, separators => single character token
|
89
|
-
token = build_token(@@lexeme2name[curr_ch], scanner.getch)
|
108
|
+
token = build_token(@@lexeme2name[curr_ch], scanner.getch)
|
90
109
|
elsif (lexeme = scanner.scan(/[0-9]{2,}/))
|
91
110
|
token = build_token('INTEGER', lexeme) # An integer has two or more digits
|
92
111
|
elsif (lexeme = scanner.scan(/[0-9]/))
|
93
|
-
token = build_token('DIGIT_LIT', lexeme)
|
112
|
+
token = build_token('DIGIT_LIT', lexeme)
|
94
113
|
elsif (lexeme = scanner.scan(/[a-zA-Z]{2,}/))
|
95
114
|
token = build_token(@@keywords[lexeme.upcase], lexeme)
|
96
115
|
# TODO: handle case unknown identifier
|
@@ -111,7 +130,7 @@ module SRL
|
|
111
130
|
|
112
131
|
return token
|
113
132
|
end
|
114
|
-
|
133
|
+
|
115
134
|
def build_token(aSymbolName, aLexeme)
|
116
135
|
token_type = name2symbol[aSymbolName]
|
117
136
|
begin
|
@@ -120,7 +139,7 @@ module SRL
|
|
120
139
|
puts "Failing with '#{aSymbolName}' and '#{aLexeme}'"
|
121
140
|
raise ex
|
122
141
|
end
|
123
|
-
|
142
|
+
|
124
143
|
return token
|
125
144
|
end
|
126
145
|
|
@@ -151,7 +151,6 @@ describe 'Integration tests:' do
|
|
151
151
|
end
|
152
152
|
end # context
|
153
153
|
|
154
|
-
|
155
154
|
context 'Parsing special character declarations:' do
|
156
155
|
it "should parse 'tab' syntax" do
|
157
156
|
result = parse('tab')
|
@@ -197,11 +196,16 @@ describe 'Integration tests:' do
|
|
197
196
|
message_prefix = /Premature end of input after ','/
|
198
197
|
expect(result.failure_reason.message).to match(message_prefix)
|
199
198
|
end
|
199
|
+
|
200
|
+
it 'should parse concatenation' do
|
201
|
+
result = parse('any of (literally "sample", (digit once or more))')
|
202
|
+
expect(result).to be_success
|
203
|
+
|
204
|
+
regexp = regexp_repr(result)
|
205
|
+
expect(regexp.to_str).to eq('(?:sample|(?:\d+))')
|
206
|
+
end
|
200
207
|
|
201
|
-
it "should parse a sequence of patterns" do
|
202
|
-
#
|
203
|
-
# DEBUG When I put a comma at the end ... looping endlessly
|
204
|
-
#
|
208
|
+
it "should parse a long sequence of patterns" do
|
205
209
|
source = <<-ENDS
|
206
210
|
any of (any character, one of "._%-+") once or more,
|
207
211
|
literally "@",
|
@@ -289,6 +293,145 @@ ENDS
|
|
289
293
|
expect(regexp.to_str).to eq('[p-t]{10,}')
|
290
294
|
end
|
291
295
|
end # context
|
296
|
+
|
297
|
+
context 'Parsing lookaround:' do
|
298
|
+
it 'should parse positive lookahead' do
|
299
|
+
result = parse('letter if followed by (anything once or more, digit)')
|
300
|
+
expect(result).to be_success
|
301
|
+
|
302
|
+
regexp = regexp_repr(result)
|
303
|
+
expect(regexp.to_str).to eq('[a-z](?=(?:.+\d))')
|
304
|
+
end
|
305
|
+
|
306
|
+
it 'should parse negative lookahead' do
|
307
|
+
result = parse('letter if not followed by (anything once or more, digit)')
|
308
|
+
expect(result).to be_success
|
309
|
+
|
310
|
+
regexp = regexp_repr(result)
|
311
|
+
expect(regexp.to_str).to eq('[a-z](?!(?:.+\d))')
|
312
|
+
end
|
313
|
+
|
314
|
+
it 'should parse positive lookbehind' do
|
315
|
+
result = parse('literally "bar" if already had literally "foo"')
|
316
|
+
expect(result).to be_success
|
317
|
+
|
318
|
+
regexp = regexp_repr(result)
|
319
|
+
expect(regexp.to_str).to eq('bar(?<=foo)')
|
320
|
+
end
|
321
|
+
|
322
|
+
it 'should parse negative lookbehind' do
|
323
|
+
result = parse('literally "bar" if not already had literally "foo"')
|
324
|
+
expect(result).to be_success
|
325
|
+
|
326
|
+
regexp = regexp_repr(result)
|
327
|
+
expect(regexp.to_str).to eq('bar(?<!foo)')
|
328
|
+
end
|
329
|
+
end # context
|
330
|
+
|
331
|
+
context 'Parsing capturing group:' do
|
332
|
+
it 'should parse simple anonymous capturing group' do
|
333
|
+
result = parse('capture(literally "sample")')
|
334
|
+
expect(result).to be_success
|
335
|
+
|
336
|
+
regexp = regexp_repr(result)
|
337
|
+
expect(regexp.to_str).to eq('(sample)')
|
338
|
+
end
|
339
|
+
|
340
|
+
it 'should parse complex anonymous capturing group' do
|
341
|
+
result = parse('capture(any of (literally "sample", (digit once or more)))')
|
342
|
+
expect(result).to be_success
|
343
|
+
|
344
|
+
regexp = regexp_repr(result)
|
345
|
+
expect(regexp.to_str).to eq('((?:sample|(?:\d+)))')
|
346
|
+
end
|
347
|
+
|
348
|
+
it 'should parse simple anonymous until capturing group' do
|
349
|
+
result = parse('capture anything once or more until literally "!"')
|
350
|
+
expect(result).to be_success
|
351
|
+
|
352
|
+
regexp = regexp_repr(result)
|
353
|
+
expect(regexp.to_str).to eq('(.+)!')
|
354
|
+
end
|
355
|
+
|
356
|
+
it 'should parse complex named capturing group' do
|
357
|
+
result = parse('capture(any of (literally "sample", (digit once or more))) as "foo"')
|
358
|
+
expect(result).to be_success
|
359
|
+
|
360
|
+
regexp = regexp_repr(result)
|
361
|
+
expect(regexp.to_str).to eq('(?<foo>(?:sample|(?:\d+)))')
|
362
|
+
end
|
363
|
+
|
364
|
+
it 'should parse a sequence with named capturing groups' do
|
365
|
+
source = <<-ENDS
|
366
|
+
capture (anything once or more) as "first",
|
367
|
+
literally " - ",
|
368
|
+
capture literally "second part" as "second"
|
369
|
+
ENDS
|
370
|
+
result = parse(source)
|
371
|
+
expect(result).to be_success
|
372
|
+
|
373
|
+
regexp = regexp_repr(result)
|
374
|
+
expect(regexp.to_str).to eq('(?<first>.+) - (?<second>second part)')
|
375
|
+
end
|
376
|
+
|
377
|
+
it 'should parse complex named until capturing group' do
|
378
|
+
result = parse('capture (anything once or more) as "foo" until literally "m"')
|
379
|
+
expect(result).to be_success
|
380
|
+
|
381
|
+
regexp = regexp_repr(result)
|
382
|
+
expect(regexp.to_str).to eq('(?<foo>.+)m')
|
383
|
+
end
|
384
|
+
|
385
|
+
end # context
|
386
|
+
|
387
|
+
context 'Parsing anchors:' do
|
388
|
+
it 'should parse begin anchors' do
|
389
|
+
result = parse('starts with literally "match"')
|
390
|
+
expect(result).to be_success
|
391
|
+
|
392
|
+
regexp = regexp_repr(result)
|
393
|
+
expect(regexp.to_str).to eq('^match')
|
394
|
+
end
|
395
|
+
|
396
|
+
it 'should parse begin anchors (alternative syntax)' do
|
397
|
+
result = parse('begin with literally "match"')
|
398
|
+
expect(result).to be_success
|
399
|
+
|
400
|
+
regexp = regexp_repr(result)
|
401
|
+
expect(regexp.to_str).to eq('^match')
|
402
|
+
end
|
403
|
+
|
404
|
+
it 'should parse end anchors' do
|
405
|
+
result = parse('literally "match" must end')
|
406
|
+
expect(result).to be_success
|
407
|
+
|
408
|
+
regexp = regexp_repr(result)
|
409
|
+
expect(regexp.to_str).to eq('match$')
|
410
|
+
end
|
411
|
+
|
412
|
+
it 'should parse combination of begin and end anchors' do
|
413
|
+
result = parse('starts with literally "match" must end')
|
414
|
+
expect(result).to be_success
|
415
|
+
|
416
|
+
regexp = regexp_repr(result)
|
417
|
+
expect(regexp.to_str).to eq('^match$')
|
418
|
+
end
|
419
|
+
|
420
|
+
it "should accept anchor with a sequence of patterns" do
|
421
|
+
source = <<-ENDS
|
422
|
+
begin with any of (digit, letter, one of ".-") once or more,
|
423
|
+
literally ".",
|
424
|
+
letter at least 2 times must end
|
425
|
+
ENDS
|
426
|
+
|
427
|
+
result = parse(source)
|
428
|
+
expect(result).to be_success
|
429
|
+
|
430
|
+
regexp = regexp_repr(result)
|
431
|
+
# SRL expect: (?:\w|[\._%\-\+])+(?:@)(?:[0-9]|[a-z]|[\.\-])+(?:\.)[a-z]{2,}
|
432
|
+
expect(regexp.to_str).to eq('^(?:\d|[a-z]|[.\-])+\.[a-z]{2,}$')
|
433
|
+
end
|
434
|
+
end # context
|
292
435
|
end # describe
|
293
436
|
|
294
437
|
|
data/lib/rley/constants.rb
CHANGED
@@ -135,7 +135,7 @@ module Rley # This module is used as a namespace
|
|
135
135
|
process_middle_entry(anEntry, anIndex)
|
136
136
|
end
|
137
137
|
else
|
138
|
-
$stderr.puts "Internal Errore '#{anEvent}'"
|
138
|
+
$stderr.puts "Internal Errore '#{anEvent}', entry: #{anEntry}, index: #{anIndex}"
|
139
139
|
raise NotImplementedError
|
140
140
|
end
|
141
141
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rley
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.12
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dimitri Geshef
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-02-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: coveralls
|
@@ -149,7 +149,9 @@ files:
|
|
149
149
|
- examples/general/SRL/lib/parser.rb
|
150
150
|
- examples/general/SRL/lib/regex/abstract_method.rb
|
151
151
|
- examples/general/SRL/lib/regex/alternation.rb
|
152
|
+
- examples/general/SRL/lib/regex/anchor.rb
|
152
153
|
- examples/general/SRL/lib/regex/atomic_expression.rb
|
154
|
+
- examples/general/SRL/lib/regex/capturing_group.rb
|
153
155
|
- examples/general/SRL/lib/regex/char_class.rb
|
154
156
|
- examples/general/SRL/lib/regex/char_range.rb
|
155
157
|
- examples/general/SRL/lib/regex/char_shorthand.rb
|
@@ -157,6 +159,8 @@ files:
|
|
157
159
|
- examples/general/SRL/lib/regex/compound_expression.rb
|
158
160
|
- examples/general/SRL/lib/regex/concatenation.rb
|
159
161
|
- examples/general/SRL/lib/regex/expression.rb
|
162
|
+
- examples/general/SRL/lib/regex/lookaround.rb
|
163
|
+
- examples/general/SRL/lib/regex/match_option.rb
|
160
164
|
- examples/general/SRL/lib/regex/monadic_expression.rb
|
161
165
|
- examples/general/SRL/lib/regex/multiplicity.rb
|
162
166
|
- examples/general/SRL/lib/regex/non_capturing_group.rb
|