rley 0.5.11 → 0.5.12
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/examples/general/SRL/lib/ast_builder.rb +229 -38
- data/examples/general/SRL/lib/grammar.rb +39 -7
- data/examples/general/SRL/lib/regex/alternation.rb +3 -2
- data/examples/general/SRL/lib/regex/anchor.rb +48 -0
- data/examples/general/SRL/lib/regex/capturing_group.rb +50 -0
- data/examples/general/SRL/lib/regex/char_class.rb +3 -2
- data/examples/general/SRL/lib/regex/char_range.rb +4 -2
- data/examples/general/SRL/lib/regex/char_shorthand.rb +3 -2
- data/examples/general/SRL/lib/regex/character.rb +16 -14
- data/examples/general/SRL/lib/regex/concatenation.rb +8 -7
- data/examples/general/SRL/lib/regex/expression.rb +26 -5
- data/examples/general/SRL/lib/regex/lookaround.rb +47 -0
- data/examples/general/SRL/lib/regex/match_option.rb +36 -0
- data/examples/general/SRL/lib/regex/non_capturing_group.rb +3 -2
- data/examples/general/SRL/lib/regex/repetition.rb +3 -2
- data/examples/general/SRL/lib/regex/wildcard.rb +3 -2
- data/examples/general/SRL/lib/regex_repr.rb +3 -0
- data/examples/general/SRL/lib/tokenizer.rb +26 -7
- data/examples/general/SRL/spec/integration_spec.rb +148 -5
- data/lib/rley/constants.rb +1 -1
- data/lib/rley/parser/parse_tree_builder.rb +1 -1
- metadata +6 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 78c5a2a83d1691c6c470f2fb4bf347f7dca44cb6
|
4
|
+
data.tar.gz: 9c1f62cdf775e71e33ceecfc99db58298b4e3b82
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 662e52aa9aae198f6eb5e9ed1750a1bfa873969ad9bedea4ca7225babef3d9fa648b709c4bef4959432d62cf8e1733d556ce911efec9885a0d59e6d4972c0f50
|
7
|
+
data.tar.gz: ab114da248a85b5e78fdb2ba9affca357a52b0cbe45cbaa97b1ad41becf71b0fa89db6e1f4487c0a23c59b9b9b83db5101e511f2b711a0d4f9a324ac2c3f3a2d
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,12 @@
|
|
1
|
+
### 0.5.12 / 2018-02-03
|
2
|
+
* [CHANGE] Simple Regex Language is fully supported!...
|
3
|
+
* [CHANGE] File `examples/general/SRL/grammar.rb added missing rule productions for Simple Regex Language.
|
4
|
+
* [CHANGE] File `examples/general/SRL/ast_builder.rb Added transformation rules missing regular expressions features.
|
5
|
+
* [CHANGE] File `examples/general/SRL/spac/integration_spec.rb Added tests for covering SRL.
|
6
|
+
* [NEW] File `examples/general/SRL/lib/anchor.rb Added support for anchors in regular expressions.
|
7
|
+
* [NEW] File `examples/general/SRL/lib/capturing_group.rb Added support for single character in regular expressions.
|
8
|
+
* [NEW] File `examples/general/SRL/lib/lookaround.rb Added support for lookaround in regular expressions.
|
9
|
+
|
1
10
|
### 0.5.11 / 2018-01-25
|
2
11
|
* [NEW] File `left.rb` added in `examples/general` folder for showing use of left-recursive rules.
|
3
12
|
* [NEW] File `right.rb` added in `examples/general` folder for showing use of right-recursive rules (less performant).
|
@@ -14,6 +14,8 @@ class ASTBuilder < Rley::Parser::ParseTreeBuilder
|
|
14
14
|
|
15
15
|
Terminal2NodeClass = { }.freeze
|
16
16
|
|
17
|
+
attr_reader :options
|
18
|
+
|
17
19
|
protected
|
18
20
|
|
19
21
|
# Overriding method.
|
@@ -36,33 +38,109 @@ class ASTBuilder < Rley::Parser::ParseTreeBuilder
|
|
36
38
|
# @param theChildren [Array] Children nodes (one per rhs symbol)
|
37
39
|
def new_parent_node(aProduction, aRange, theTokens, theChildren)
|
38
40
|
node = case aProduction.name
|
39
|
-
when 'srl_0' # rule 'srl' => '
|
41
|
+
when 'srl_0' # rule 'srl' => 'expression'
|
40
42
|
return_first_child(aRange, theTokens, theChildren)
|
41
|
-
|
42
|
-
when '
|
43
|
+
|
44
|
+
when 'expression_0' # rule 'expression' => %w[pattern separator flags]
|
45
|
+
reduce_expression_0(aProduction, aRange, theTokens, theChildren)
|
46
|
+
|
47
|
+
when 'expression_1' # rule 'expression' => 'pattern'
|
48
|
+
return_first_child(aRange, theTokens, theChildren)
|
49
|
+
|
50
|
+
when 'pattern_0' # rule 'pattern' => %w[pattern separator quantifiable]
|
43
51
|
reduce_pattern_0(aProduction, aRange, theTokens, theChildren)
|
44
52
|
|
45
|
-
when 'pattern_1' # rule 'pattern' =>
|
46
|
-
reduce_pattern_1(aProduction, aRange, theTokens, theChildren)
|
47
|
-
|
48
|
-
when 'pattern_2' # rule 'pattern' => 'quantifiable'
|
53
|
+
when 'pattern_1' # rule 'pattern' => 'quantifiable'
|
49
54
|
return_first_child(aRange, theTokens, theChildren)
|
50
55
|
|
51
|
-
when '
|
56
|
+
when 'separator_0' # rule 'separator' => 'COMMA'
|
52
57
|
return_first_child(aRange, theTokens, theChildren)
|
53
58
|
|
54
|
-
when '
|
59
|
+
when 'separator_1' # rule 'separator' => []
|
60
|
+
nil
|
61
|
+
|
62
|
+
when 'flags_0' # rule 'flags' => %[flags separator single_flag]
|
63
|
+
### NEW
|
64
|
+
reduce_flags_0(aProduction, aRange, theTokens, theChildren)
|
65
|
+
|
66
|
+
when 'single_flag_0' # rule 'single_flag' => %w[CASE INSENSITIVE]
|
67
|
+
### NEW
|
68
|
+
reduce_single_flag_0(aProduction, aRange, theTokens, theChildren)
|
69
|
+
|
70
|
+
when 'single_flag_1' # rule 'single_flag' => %w[MULTI LINE]
|
71
|
+
### NEW
|
72
|
+
reduce_single_flag_1(aProduction, aRange, theTokens, theChildren)
|
73
|
+
|
74
|
+
when 'single_flag_2' # rule 'single_flag' => %w[ALL LAZY]
|
75
|
+
### NEW
|
76
|
+
reduce_single_flag_2(aProduction, aRange, theTokens, theChildren)
|
77
|
+
|
78
|
+
# rule 'quantifiable' => %w[begin_anchor anchorable end_anchor]
|
79
|
+
when 'quantifiable_0'
|
80
|
+
reduce_quantifiable_0(aProduction, aRange, theTokens, theChildren)
|
81
|
+
|
82
|
+
# rule 'quantifiable' => %w[begin_anchor anchorable]
|
83
|
+
when 'quantifiable_1'
|
55
84
|
reduce_quantifiable_1(aProduction, aRange, theTokens, theChildren)
|
56
85
|
|
86
|
+
# rule 'quantifiable' => %w[anchorable end_anchor]
|
87
|
+
when 'quantifiable_2'
|
88
|
+
reduce_quantifiable_2(aProduction, aRange, theTokens, theChildren)
|
89
|
+
|
90
|
+
when 'quantifiable_3' # rule 'quantifiable' => 'anchorable'
|
91
|
+
return_first_child(aRange, theTokens, theChildren)
|
92
|
+
|
93
|
+
# rule 'begin_anchor' => %w[STARTS WITH]
|
94
|
+
# rule 'begin_anchor' => %w[BEGIN WITH]
|
95
|
+
when 'begin_anchor_0', 'begin_anchor_1'
|
96
|
+
reduce_begin_anchor_0(aProduction, aRange, theTokens, theChildren)
|
97
|
+
|
98
|
+
when 'end_anchor_0' # rule 'end_anchor' => %w[MUST END]
|
99
|
+
reduce_end_anchor_0(aProduction, aRange, theTokens, theChildren)
|
100
|
+
|
101
|
+
when 'anchorable_0' # rule 'anchorable' => 'assertable'
|
102
|
+
return_first_child(aRange, theTokens, theChildren)
|
103
|
+
|
104
|
+
when 'anchorable_1' # rule 'anchorable' => %w[assertable assertion]
|
105
|
+
reduce_anchorable_1(aProduction, aRange, theTokens, theChildren)
|
106
|
+
|
107
|
+
when 'anchorable_1' # rule 'anchorable' => %w[assertable assertion]
|
108
|
+
reduce_anchorable_1(aProduction, aRange, theTokens, theChildren)
|
109
|
+
|
110
|
+
# rule 'assertion' => %w[IF FOLLOWED BY assertable]
|
111
|
+
when 'assertion_0'
|
112
|
+
reduce_assertion_0(aProduction, aRange, theTokens, theChildren)
|
113
|
+
|
114
|
+
# rule 'assertion' => %w[IF NOT FOLLOWED BY assertable]
|
115
|
+
when 'assertion_1'
|
116
|
+
reduce_assertion_1(aProduction, aRange, theTokens, theChildren)
|
117
|
+
|
118
|
+
# rule 'assertion' => %w[IF ALREADY HAD assertable]
|
119
|
+
when 'assertion_2'
|
120
|
+
reduce_assertion_2(aProduction, aRange, theTokens, theChildren)
|
121
|
+
|
122
|
+
# rule 'assertion' => %w[IF NOT ALREADY HAD assertable]
|
123
|
+
when 'assertion_3'
|
124
|
+
reduce_assertion_3(aProduction, aRange, theTokens, theChildren)
|
125
|
+
|
126
|
+
when 'assertable_0' # rule 'assertable' => 'term'
|
127
|
+
return_first_child(aRange, theTokens, theChildren)
|
128
|
+
|
129
|
+
when 'assertable_1' # rule 'assertable' => %w[term quantifier]
|
130
|
+
reduce_assertable_1(aProduction, aRange, theTokens, theChildren)
|
131
|
+
|
57
132
|
when 'term_0' # rule 'term' => 'atom'
|
58
133
|
return_first_child(aRange, theTokens, theChildren)
|
59
134
|
|
60
135
|
when 'term_1' # rule 'term' => 'alternation'
|
61
136
|
return_first_child(aRange, theTokens, theChildren)
|
62
|
-
|
137
|
+
|
63
138
|
when 'term_2' # rule 'term' => 'grouping'
|
64
139
|
return_first_child(aRange, theTokens, theChildren)
|
65
140
|
|
141
|
+
when 'term_3' # rule 'term' => 'capturing_group'
|
142
|
+
return_first_child(aRange, theTokens, theChildren)
|
143
|
+
|
66
144
|
when 'atom_0' # rule 'atom' => 'letter_range'
|
67
145
|
return_first_child(aRange, theTokens, theChildren)
|
68
146
|
|
@@ -133,19 +211,34 @@ class ASTBuilder < Rley::Parser::ParseTreeBuilder
|
|
133
211
|
when 'alternation_0'
|
134
212
|
reduce_alternation_0(aProduction, aRange, theTokens, theChildren)
|
135
213
|
|
136
|
-
# rule 'alternatives' => %w[alternatives
|
214
|
+
# rule 'alternatives' => %w[alternatives separator quantifiable]
|
137
215
|
when 'alternatives_0'
|
138
216
|
reduce_alternatives_0(aProduction, aRange, theTokens, theChildren)
|
139
217
|
|
140
|
-
# rule 'alternatives' =>
|
141
|
-
when 'alternatives_1'
|
218
|
+
when 'alternatives_1' # rule 'alternatives' => 'quantifiable'
|
142
219
|
reduce_alternatives_1(aProduction, aRange, theTokens, theChildren)
|
143
220
|
|
144
|
-
when '
|
145
|
-
|
221
|
+
when 'grouping_0' # rule 'grouping' => %w[LPAREN pattern RPAREN]
|
222
|
+
reduce_grouping_0(aProduction, aRange, theTokens, theChildren)
|
223
|
+
|
224
|
+
# rule 'capturing_group' => %w[CAPTURE assertable]
|
225
|
+
when 'capturing_group_0'
|
226
|
+
reduce_capturing_group_0(aProduction, aRange, theTokens, theChildren)
|
227
|
+
|
228
|
+
# rule 'capturing_group' => %w[CAPTURE assertable UNTIL assertable]
|
229
|
+
when 'capturing_group_1'
|
230
|
+
reduce_capturing_group_1(aProduction, aRange, theTokens, theChildren)
|
231
|
+
|
232
|
+
# rule 'capturing_group' => %w[CAPTURE assertable AS var_name]
|
233
|
+
when 'capturing_group_2'
|
234
|
+
reduce_capturing_group_2(aProduction, aRange, theTokens, theChildren)
|
235
|
+
|
236
|
+
# rule 'capturing_group' => %w[CAPTURE assertable AS var_name UNTIL assertable]
|
237
|
+
when 'capturing_group_3'
|
238
|
+
reduce_capturing_group_3(aProduction, aRange, theTokens, theChildren)
|
146
239
|
|
147
|
-
when '
|
148
|
-
|
240
|
+
when 'var_name_0' # rule 'var_name' => 'STRING_LIT'
|
241
|
+
return_first_child(aRange, theTokens, theChildren)
|
149
242
|
|
150
243
|
when 'quantifier_0' # rule 'quantifier' => 'ONCE'
|
151
244
|
multiplicity(1, 1)
|
@@ -205,11 +298,11 @@ class ASTBuilder < Rley::Parser::ParseTreeBuilder
|
|
205
298
|
result = Regex::Concatenation.new(*chars)
|
206
299
|
else
|
207
300
|
if to_escape && Regex::Character::MetaChars.include?(aString)
|
208
|
-
result = Regex::Concatenation.new(Regex::Character.new("\\"),
|
301
|
+
result = Regex::Concatenation.new(Regex::Character.new("\\"),
|
209
302
|
Regex::Character.new(aString))
|
210
303
|
else
|
211
304
|
result = Regex::Character.new(aString)
|
212
|
-
end
|
305
|
+
end
|
213
306
|
end
|
214
307
|
|
215
308
|
return result
|
@@ -237,21 +330,100 @@ class ASTBuilder < Rley::Parser::ParseTreeBuilder
|
|
237
330
|
def repetition(expressionToRepeat, aMultiplicity)
|
238
331
|
return Regex::Repetition.new(expressionToRepeat, aMultiplicity)
|
239
332
|
end
|
240
|
-
|
241
|
-
# rule '
|
333
|
+
|
334
|
+
# rule 'expression' => %w[pattern separator flags]
|
335
|
+
def reduce_expression_0(aProduction, aRange, theTokens, theChildren)
|
336
|
+
@options = theChildren[2] if theChildren[2]
|
337
|
+
return_first_child(aRange, theTokens, theChildren)
|
338
|
+
end
|
339
|
+
|
340
|
+
# rule 'pattern' => %w[pattern separator quantifiable]
|
242
341
|
def reduce_pattern_0(aProduction, aRange, theTokens, theChildren)
|
243
342
|
return Regex::Concatenation.new(theChildren[0], theChildren[2])
|
244
343
|
end
|
245
344
|
|
246
|
-
# rule '
|
247
|
-
def
|
248
|
-
|
345
|
+
# rule 'flags' => %[flags separator single_flag]
|
346
|
+
def reduce_flags_0(aProduction, aRange, theTokens, theChildren)
|
347
|
+
theChildren[0] << theChildren[2]
|
249
348
|
end
|
250
349
|
|
251
|
-
# rule '
|
350
|
+
# rule 'single_flag' => %w[CASE INSENSITIVE]
|
351
|
+
def reduce_single_flag_0(aProduction, aRange, theTokens, theChildren)
|
352
|
+
return [ Regex::MatchOption.new(:IGNORECASE, true) ]
|
353
|
+
end
|
354
|
+
|
355
|
+
# rule 'single_flag' => %w[MULTI LINE]
|
356
|
+
def reduce_single_flag_1(aProduction, aRange, theTokens, theChildren)
|
357
|
+
return [ Regex::MatchOption.new(:MULTILINE, true) ]
|
358
|
+
end
|
359
|
+
|
360
|
+
# rule 'single_flag' => %w[ALL LAZY]
|
361
|
+
def reduce_single_flag_2(aProduction, aRange, theTokens, theChildren)
|
362
|
+
return [ Regex::MatchOption.new(:ALL_LAZY, true) ]
|
363
|
+
end
|
364
|
+
|
365
|
+
# rule 'quantifiable' => %w[begin_anchor anchorable end_anchor]
|
366
|
+
def reduce_quantifiable_0(aProduction, aRange, theTokens, theChildren)
|
367
|
+
theChildren[1].begin_anchor = theChildren[0]
|
368
|
+
theChildren[1].end_anchor = theChildren[2]
|
369
|
+
return theChildren[1]
|
370
|
+
end
|
371
|
+
|
372
|
+
# rule 'quantifiable' => %w[begin_anchor anchorable]
|
252
373
|
def reduce_quantifiable_1(aProduction, aRange, theTokens, theChildren)
|
253
|
-
|
254
|
-
|
374
|
+
theChildren[1].begin_anchor = theChildren[0]
|
375
|
+
return theChildren[1]
|
376
|
+
end
|
377
|
+
|
378
|
+
# rule 'quantifiable' => %w[anchorable end_anchor]
|
379
|
+
def reduce_quantifiable_2(aProduction, aRange, theTokens, theChildren)
|
380
|
+
theChildren[0].end_anchor = theChildren[1]
|
381
|
+
return theChildren[0]
|
382
|
+
end
|
383
|
+
|
384
|
+
# rule 'begin_anchor' => %w[STARTS WITH]
|
385
|
+
# rule 'begin_anchor' => %w[BEGIN WITH]
|
386
|
+
def reduce_begin_anchor_0(aProduction, aRange, theTokens, theChildren)
|
387
|
+
return Regex::Anchor.new('^')
|
388
|
+
end
|
389
|
+
|
390
|
+
# rule 'end_anchor' => %w[MUST END]
|
391
|
+
def reduce_end_anchor_0(aProduction, aRange, theTokens, theChildren)
|
392
|
+
return Regex::Anchor.new('$')
|
393
|
+
end
|
394
|
+
|
395
|
+
|
396
|
+
# rule 'anchorable' => %w[assertable assertion]
|
397
|
+
def reduce_anchorable_1(aProduction, aRange, theTokens, theChildren)
|
398
|
+
assertion = theChildren.last
|
399
|
+
assertion.children.unshift(theChildren[0])
|
400
|
+
return assertion
|
401
|
+
end
|
402
|
+
|
403
|
+
# rule 'assertion' => %w[IF FOLLOWED BY assertable]
|
404
|
+
def reduce_assertion_0(aProduction, aRange, theTokens, theChildren)
|
405
|
+
return Regex::Lookaround.new(theChildren.last, :ahead, :positive)
|
406
|
+
end
|
407
|
+
|
408
|
+
# rule 'assertion' => %w[IF NOT FOLLOWED BY assertable]
|
409
|
+
def reduce_assertion_1(aProduction, aRange, theTokens, theChildren)
|
410
|
+
return Regex::Lookaround.new(theChildren.last, :ahead, :negative)
|
411
|
+
end
|
412
|
+
|
413
|
+
# rule 'assertion' => %w[IF ALREADY HAD assertable]
|
414
|
+
def reduce_assertion_2(aProduction, aRange, theTokens, theChildren)
|
415
|
+
return Regex::Lookaround.new(theChildren.last, :behind, :positive)
|
416
|
+
end
|
417
|
+
|
418
|
+
# rule 'assertion' => %w[IF NOT ALREADY HAD assertable]
|
419
|
+
def reduce_assertion_3(aProduction, aRange, theTokens, theChildren)
|
420
|
+
return Regex::Lookaround.new(theChildren.last, :behind, :negative)
|
421
|
+
end
|
422
|
+
|
423
|
+
# rule 'anchorable' => %w[term quantifier]
|
424
|
+
def reduce_assertable_1(aProduction, aRange, theTokens, theChildren)
|
425
|
+
quantifier = theChildren[1]
|
426
|
+
term = theChildren[0]
|
255
427
|
repetition(term, quantifier)
|
256
428
|
end
|
257
429
|
|
@@ -348,37 +520,56 @@ class ASTBuilder < Rley::Parser::ParseTreeBuilder
|
|
348
520
|
raw_literal = theChildren[-1].token.lexeme.dup
|
349
521
|
return string_literal(raw_literal)
|
350
522
|
end
|
351
|
-
|
523
|
+
|
352
524
|
# rule 'alternation' => %w[ANY OF LPAREN alternatives RPAREN]
|
353
525
|
def reduce_alternation_0(aProduction, aRange, theTokens, theChildren)
|
354
526
|
return Regex::Alternation.new(*theChildren[3])
|
355
527
|
end
|
356
528
|
|
357
|
-
# rule 'alternatives' => %w[alternatives
|
529
|
+
# rule 'alternatives' => %w[alternatives separator quantifiable]
|
358
530
|
def reduce_alternatives_0(aProduction, aRange, theTokens, theChildren)
|
359
531
|
return theChildren[0] << theChildren[-1]
|
360
532
|
end
|
361
533
|
|
362
|
-
# rule 'alternatives' => %w[alternatives quantifiable]
|
363
|
-
def reduce_alternatives_1(aProduction, aRange, theTokens, theChildren)
|
364
|
-
return theChildren[0] << theChildren[-1]
|
365
|
-
end
|
366
|
-
|
367
534
|
# rule 'alternatives' => 'quantifiable'
|
368
|
-
def
|
535
|
+
def reduce_alternatives_1(aProduction, aRange, theTokens, theChildren)
|
369
536
|
return [theChildren.last]
|
370
537
|
end
|
371
|
-
|
538
|
+
|
372
539
|
# rule 'grouping' => %w[LPAREN pattern RPAREN]
|
373
540
|
def reduce_grouping_0(aProduction, aRange, theTokens, theChildren)
|
374
|
-
return Regex::NonCapturingGroup.new(theChildren[1])
|
541
|
+
return Regex::NonCapturingGroup.new(theChildren[1])
|
375
542
|
end
|
376
543
|
|
544
|
+
# rule 'capturing_group' => %w[CAPTURE assertable]
|
545
|
+
def reduce_capturing_group_0(aProduction, aRange, theTokens, theChildren)
|
546
|
+
return Regex::CapturingGroup.new(theChildren[1])
|
547
|
+
end
|
548
|
+
|
549
|
+
# rule 'capturing_group' => %w[CAPTURE assertable UNTIL assertable]
|
550
|
+
def reduce_capturing_group_1(aProduction, aRange, theTokens, theChildren)
|
551
|
+
group = Regex::CapturingGroup.new(theChildren[1])
|
552
|
+
return Regex::Concatenation.new(group, theChildren[3])
|
553
|
+
end
|
554
|
+
|
555
|
+
# rule 'capturing_group' => %w[CAPTURE assertable AS var_name]
|
556
|
+
def reduce_capturing_group_2(aProduction, aRange, theTokens, theChildren)
|
557
|
+
name = theChildren[3].token.lexeme.dup
|
558
|
+
return Regex::CapturingGroup.new(theChildren[1], name)
|
559
|
+
end
|
560
|
+
|
561
|
+
# rule 'capturing_group' => %w[CAPTURE assertable AS var_name UNTIL assertable]
|
562
|
+
def reduce_capturing_group_3(aProduction, aRange, theTokens, theChildren)
|
563
|
+
name = theChildren[3].token.lexeme.dup
|
564
|
+
group = Regex::CapturingGroup.new(theChildren[1], name)
|
565
|
+
return Regex::Concatenation.new(group, theChildren[5])
|
566
|
+
end
|
567
|
+
|
377
568
|
# rule 'quantifier' => %w[EXACTLY count TIMES]
|
378
569
|
def reduce_quantifier_2(aProduction, aRange, theTokens, theChildren)
|
379
570
|
count = theChildren[1].token.lexeme.to_i
|
380
571
|
multiplicity(count, count)
|
381
|
-
end
|
572
|
+
end
|
382
573
|
|
383
574
|
# rule 'quantifier' => %w[BETWEEN count AND count times_suffix]
|
384
575
|
def reduce_quantifier_3(aProduction, aRange, theTokens, theChildren)
|
@@ -9,6 +9,8 @@ module SRL
|
|
9
9
|
add_terminals('LPAREN', 'RPAREN', 'COMMA')
|
10
10
|
add_terminals('DIGIT_LIT', 'INTEGER', 'LETTER_LIT')
|
11
11
|
add_terminals('LITERALLY', 'STRING_LIT')
|
12
|
+
add_terminals('BEGIN', 'STARTS', 'WITH')
|
13
|
+
add_terminals('MUST', 'END')
|
12
14
|
add_terminals('UPPERCASE', 'LETTER', 'FROM', 'TO')
|
13
15
|
add_terminals('DIGIT', 'NUMBER', 'ANY', 'NO')
|
14
16
|
add_terminals('CHARACTER', 'WHITESPACE', 'ANYTHING')
|
@@ -17,16 +19,42 @@ module SRL
|
|
17
19
|
add_terminals('EXACTLY', 'TIMES', 'ONCE', 'TWICE')
|
18
20
|
add_terminals('BETWEEN', 'AND', 'OPTIONAL', 'OR')
|
19
21
|
add_terminals('MORE', 'NEVER', 'AT', 'LEAST')
|
22
|
+
add_terminals('IF', 'FOLLOWED', 'BY', 'NOT')
|
23
|
+
add_terminals('ALREADY', 'HAD')
|
24
|
+
add_terminals('CAPTURE', 'AS', 'UNTIL')
|
25
|
+
add_terminals('CASE', 'INSENSITIVE', 'MULTI', 'ALL')
|
26
|
+
add_terminals('LAZY')
|
20
27
|
|
21
|
-
rule 'srl' => '
|
22
|
-
rule '
|
23
|
-
rule '
|
28
|
+
rule 'srl' => 'expression'
|
29
|
+
rule 'expression' => %w[pattern separator flags]
|
30
|
+
rule 'expression' => 'pattern'
|
31
|
+
rule 'pattern' => %w[pattern separator quantifiable]
|
24
32
|
rule 'pattern' => 'quantifiable'
|
25
|
-
rule '
|
26
|
-
rule '
|
33
|
+
rule 'separator' => 'COMMA'
|
34
|
+
rule 'separator' => []
|
35
|
+
rule 'flags' => %[flags separator single_flag]
|
36
|
+
rule 'single_flag' => %w[CASE INSENSITIVE]
|
37
|
+
rule 'single_flag' => %w[MULTI LINE]
|
38
|
+
rule 'single_flag' => %w[ALL LAZY]
|
39
|
+
rule 'quantifiable' => %w[begin_anchor anchorable end_anchor]
|
40
|
+
rule 'quantifiable' => %w[begin_anchor anchorable]
|
41
|
+
rule 'quantifiable' => %w[anchorable end_anchor]
|
42
|
+
rule 'quantifiable' => 'anchorable'
|
43
|
+
rule 'begin_anchor' => %w[STARTS WITH]
|
44
|
+
rule 'begin_anchor' => %w[BEGIN WITH]
|
45
|
+
rule 'end_anchor' => %w[MUST END]
|
46
|
+
rule 'anchorable' => 'assertable'
|
47
|
+
rule 'anchorable' => %w[assertable assertion]
|
48
|
+
rule 'assertion' => %w[IF FOLLOWED BY assertable]
|
49
|
+
rule 'assertion' => %w[IF NOT FOLLOWED BY assertable]
|
50
|
+
rule 'assertion' => %w[IF ALREADY HAD assertable]
|
51
|
+
rule 'assertion' => %w[IF NOT ALREADY HAD assertable]
|
52
|
+
rule 'assertable' => 'term'
|
53
|
+
rule 'assertable' => %w[term quantifier]
|
27
54
|
rule 'term' => 'atom'
|
28
55
|
rule 'term' => 'alternation'
|
29
56
|
rule 'term' => 'grouping'
|
57
|
+
rule 'term' => 'capturing_group'
|
30
58
|
rule 'atom' => 'letter_range'
|
31
59
|
rule 'atom' => 'digit_range'
|
32
60
|
rule 'atom' => 'character_class'
|
@@ -49,10 +77,14 @@ module SRL
|
|
49
77
|
rule 'special_char' => %w[NEW LINE]
|
50
78
|
rule 'literal' => %w[LITERALLY STRING_LIT]
|
51
79
|
rule 'alternation' => %w[ANY OF LPAREN alternatives RPAREN]
|
52
|
-
rule 'alternatives' => %w[alternatives
|
53
|
-
rule 'alternatives' => %w[alternatives quantifiable]
|
80
|
+
rule 'alternatives' => %w[alternatives separator quantifiable]
|
54
81
|
rule 'alternatives' => 'quantifiable'
|
55
82
|
rule 'grouping' => %w[LPAREN pattern RPAREN]
|
83
|
+
rule 'capturing_group' => %w[CAPTURE assertable]
|
84
|
+
rule 'capturing_group' => %w[CAPTURE assertable UNTIL assertable]
|
85
|
+
rule 'capturing_group' => %w[CAPTURE assertable AS var_name]
|
86
|
+
rule 'capturing_group' => %w[CAPTURE assertable AS var_name UNTIL assertable]
|
87
|
+
rule 'var_name' => 'STRING_LIT'
|
56
88
|
rule 'quantifier' => 'ONCE'
|
57
89
|
rule 'quantifier' => 'TWICE'
|
58
90
|
rule 'quantifier' => %w[EXACTLY count TIMES]
|
@@ -13,10 +13,11 @@ class Alternation < PolyadicExpression
|
|
13
13
|
super(theChildren)
|
14
14
|
end
|
15
15
|
|
16
|
-
|
16
|
+
protected
|
17
|
+
|
17
18
|
# Conversion method re-definition.
|
18
19
|
# Purpose: Return the String representation of the concatented expressions.
|
19
|
-
def
|
20
|
+
def text_repr()
|
20
21
|
result_children = children.map { |aChild| aChild.to_str() }
|
21
22
|
result = '(?:' + result_children.join('|') + ')'
|
22
23
|
|
@@ -0,0 +1,48 @@
|
|
1
|
+
# File: anchor.rb
|
2
|
+
|
3
|
+
require_relative "atomic_expression" # Access the superclass
|
4
|
+
|
5
|
+
module Regex # This module is used as a namespace
|
6
|
+
# An anchor is a zero-width assertion based on the current position.
|
7
|
+
class Anchor < AtomicExpression
|
8
|
+
# A Hash for converting a lexeme to a symbolic value
|
9
|
+
AnchorToSymbol = {
|
10
|
+
# Lexeme => Symbol value
|
11
|
+
'^' => :soLine, # Start of line
|
12
|
+
'$' => :eoLine, # End of line
|
13
|
+
'\A' => :soSubject,
|
14
|
+
'\b' => :wordBoundary,
|
15
|
+
'\B' => :nonAtWordBoundary,
|
16
|
+
'\G' => :firstMatch,
|
17
|
+
'\z' => :eoSubject,
|
18
|
+
'\Z' => :eoSubjectOrBeforeNLAtEnd
|
19
|
+
}
|
20
|
+
|
21
|
+
# A symbolic value that identifies the type of assertion to perform
|
22
|
+
attr_reader(:kind)
|
23
|
+
|
24
|
+
# Constructor
|
25
|
+
# @param aKind [String] Lexeme representation of the anchor
|
26
|
+
def initialize(aKind)
|
27
|
+
@kind = valid_kind(aKind)
|
28
|
+
end
|
29
|
+
|
30
|
+
public
|
31
|
+
|
32
|
+
# Conversion method re-definition.
|
33
|
+
# Purpose: Return the String representation of the expression.
|
34
|
+
def to_str()
|
35
|
+
return AnchorToSymbol.rassoc(kind).first()
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
# Return the symbolic value corresponding to the given lexeme.
|
41
|
+
def valid_kind(aKind)
|
42
|
+
return AnchorToSymbol[aKind]
|
43
|
+
end
|
44
|
+
|
45
|
+
end # class
|
46
|
+
end # module
|
47
|
+
|
48
|
+
# End of file
|
@@ -0,0 +1,50 @@
|
|
1
|
+
# File: capturing_group.rb
|
2
|
+
|
3
|
+
require_relative "monadic_expression" # Access the superclass
|
4
|
+
|
5
|
+
module Regex # This module is used as a namespace
|
6
|
+
|
7
|
+
# An association between a capture variable and an expression
|
8
|
+
# the subject text in the same serial arrangement
|
9
|
+
class CapturingGroup < MonadicExpression
|
10
|
+
# The capture variable id. It is a Fixnum when the capture group gets a sequence number,
|
11
|
+
# a String when it is an user-defined name
|
12
|
+
attr_reader(:id)
|
13
|
+
|
14
|
+
# When true, then capturing group forbids backtracking requests from its parent expression.
|
15
|
+
attr_reader(:no_backtrack)
|
16
|
+
|
17
|
+
# Constructor.
|
18
|
+
# [aChildExpression] A sub-expression to match. When successful the matching text is assigned to the capture variable.
|
19
|
+
# [theId] The id of the capture variable.
|
20
|
+
# [noBacktrack] A flag that specifies whether the capturing group forbids backtracking requests from its parent expression.
|
21
|
+
def initialize(aChildExpression, theId = nil, noBacktrack = false)
|
22
|
+
super(aChildExpression)
|
23
|
+
@id = theId
|
24
|
+
@no_backtrack = noBacktrack
|
25
|
+
end
|
26
|
+
|
27
|
+
public
|
28
|
+
# Return true iff the capturing group has a name (and not )
|
29
|
+
def named?()
|
30
|
+
return id.kind_of?(String)
|
31
|
+
end
|
32
|
+
|
33
|
+
# Conversion method re-definition.
|
34
|
+
# Purpose: Return the String representation of the captured expression.
|
35
|
+
def to_str()
|
36
|
+
prefix = named? ? "?<#{id}>" : ''
|
37
|
+
atomic = no_backtrack ? '?>' : ''
|
38
|
+
if child.is_a?(Regex::NonCapturingGroup)
|
39
|
+
# Minor optimization
|
40
|
+
result = '(' + atomic + prefix + child.child.to_str + ")"
|
41
|
+
else
|
42
|
+
result = '(' + atomic + prefix + child.to_str + ")"
|
43
|
+
end
|
44
|
+
return result
|
45
|
+
end
|
46
|
+
|
47
|
+
end # class
|
48
|
+
end # module
|
49
|
+
|
50
|
+
# End of file
|
@@ -18,10 +18,11 @@ module Regex # This module is used as a namespace
|
|
18
18
|
@negated = to_negate
|
19
19
|
end
|
20
20
|
|
21
|
-
|
21
|
+
protected
|
22
|
+
|
22
23
|
# Conversion method re-definition.
|
23
24
|
# Purpose: Return the String representation of the character class.
|
24
|
-
def
|
25
|
+
def text_repr()
|
25
26
|
result_children = children.inject('') do |subResult, aChild|
|
26
27
|
if aChild.kind_of?(Regex::Character) && Metachars.include?(aChild.codepoint)
|
27
28
|
subResult << "\\" # Escape meta-character...
|
@@ -26,11 +26,13 @@ public
|
|
26
26
|
# Return the upper bound of the range.
|
27
27
|
def upper()
|
28
28
|
return children.last
|
29
|
-
end
|
29
|
+
end
|
30
|
+
|
31
|
+
protected
|
30
32
|
|
31
33
|
# Conversion method re-definition.
|
32
34
|
# Purpose: Return the String representation of the concatented expressions.
|
33
|
-
def
|
35
|
+
def text_repr()
|
34
36
|
result = lower.to_str() + '-' + upper.to_str()
|
35
37
|
|
36
38
|
return result
|
@@ -28,10 +28,11 @@ module Regex # This module is used as a namespace
|
|
28
28
|
@shortname = valid_shortname(aShortname)
|
29
29
|
end
|
30
30
|
|
31
|
-
|
31
|
+
protected
|
32
|
+
|
32
33
|
# Conversion method re-definition.
|
33
34
|
# Purpose: Return the String representation of the expression.
|
34
|
-
def
|
35
|
+
def text_repr()
|
35
36
|
return "\\#{shortname}"
|
36
37
|
end
|
37
38
|
|
@@ -106,20 +106,6 @@ public
|
|
106
106
|
self.class.codepoint2char(@codepoint)
|
107
107
|
end
|
108
108
|
|
109
|
-
# Conversion method re-definition.
|
110
|
-
# Purpose: Return the String representation of the expression.
|
111
|
-
# If the Character was initially from a text (the lexeme), then the lexeme is returned back.
|
112
|
-
# Otherwise the character corresponding to the codepoint is returned.
|
113
|
-
def to_str()
|
114
|
-
if lexeme.nil?
|
115
|
-
result = char()
|
116
|
-
else
|
117
|
-
result = lexeme.dup()
|
118
|
-
end
|
119
|
-
|
120
|
-
return result
|
121
|
-
end
|
122
|
-
|
123
109
|
# Returns true iff this Character and parameter 'another' represent the same character.
|
124
110
|
# [another] any Object. The way the equality is tested depends on the another's class
|
125
111
|
# Example:
|
@@ -152,6 +138,22 @@ public
|
|
152
138
|
def explain()
|
153
139
|
return "the character '#{to_str()}'"
|
154
140
|
end
|
141
|
+
|
142
|
+
protected
|
143
|
+
|
144
|
+
# Conversion method re-definition.
|
145
|
+
# Purpose: Return the String representation of the expression.
|
146
|
+
# If the Character was initially from a text (the lexeme), then the lexeme is returned back.
|
147
|
+
# Otherwise the character corresponding to the codepoint is returned.
|
148
|
+
def text_repr()
|
149
|
+
if lexeme.nil?
|
150
|
+
result = char()
|
151
|
+
else
|
152
|
+
result = lexeme.dup()
|
153
|
+
end
|
154
|
+
|
155
|
+
return result
|
156
|
+
end
|
155
157
|
|
156
158
|
private
|
157
159
|
# Convertion method that returns a codepoint for the given two characters (digram) escape sequence.
|
@@ -4,24 +4,25 @@ require_relative 'polyadic_expression' # Access the superclass
|
|
4
4
|
|
5
5
|
module Regex # This module is used as a namespace
|
6
6
|
|
7
|
-
# Abstract class. A n-ary matching operator.
|
7
|
+
# Abstract class. A n-ary matching operator.
|
8
8
|
# It succeeds when each child succeeds to match the subject text in the same
|
9
9
|
# serial arrangement than defined by this concatenation.
|
10
10
|
class Concatenation < PolyadicExpression
|
11
|
-
|
11
|
+
|
12
12
|
# Constructor.
|
13
13
|
def initialize(*theChildren)
|
14
14
|
super(theChildren)
|
15
15
|
end
|
16
|
-
|
17
|
-
|
16
|
+
|
17
|
+
protected
|
18
|
+
|
18
19
|
# Conversion method re-definition.
|
19
20
|
# Purpose: Return the String representation of the concatented expressions.
|
20
|
-
def
|
21
|
-
result = children.inject('') { |result, aChild|
|
21
|
+
def text_repr()
|
22
|
+
result = children.inject('') { |result, aChild|
|
22
23
|
result << aChild.to_str()
|
23
24
|
}
|
24
|
-
|
25
|
+
|
25
26
|
return result
|
26
27
|
end
|
27
28
|
|
@@ -5,7 +5,11 @@ require_relative 'abstract_method'
|
|
5
5
|
module Regex # This module is used as a namespace
|
6
6
|
|
7
7
|
# Abstract class. The generalization of any valid regular (sub)expression.
|
8
|
-
class Expression
|
8
|
+
class Expression
|
9
|
+
attr_accessor :begin_anchor
|
10
|
+
attr_accessor :end_anchor
|
11
|
+
|
12
|
+
# Constructor
|
9
13
|
def initialize()
|
10
14
|
end
|
11
15
|
|
@@ -20,20 +24,37 @@ public
|
|
20
24
|
def cardinality(theParentOptions) abstract_method
|
21
25
|
end
|
22
26
|
|
23
|
-
protected
|
24
27
|
# Determine the matching options to apply to this object, given the options coming from the parent
|
25
28
|
# and options that are local to this object. Local options take precedence.
|
26
|
-
#
|
29
|
+
# @param theParentOptions [Hash] matching options. They are overridden by options with same name
|
27
30
|
# that are bound to this object.
|
28
31
|
def options(theParentOptions)
|
29
32
|
resulting_options = theParentOptions.merge(@local_options)
|
30
33
|
return resulting_options
|
31
34
|
end
|
32
35
|
|
33
|
-
#
|
36
|
+
# Template method.
|
34
37
|
# Purpose: Return the String representation of the expression.
|
35
|
-
def to_str()
|
38
|
+
def to_str()
|
39
|
+
result = ''
|
40
|
+
result << prefix
|
41
|
+
result << text_repr
|
42
|
+
result << suffix
|
43
|
+
|
44
|
+
return result
|
36
45
|
end
|
46
|
+
|
47
|
+
protected
|
48
|
+
|
49
|
+
def prefix()
|
50
|
+
begin_anchor ? begin_anchor.to_str : ''
|
51
|
+
end
|
52
|
+
|
53
|
+
def suffix()
|
54
|
+
end_anchor ? end_anchor.to_str : ''
|
55
|
+
end
|
56
|
+
|
57
|
+
|
37
58
|
|
38
59
|
end # class
|
39
60
|
|
@@ -0,0 +1,47 @@
|
|
1
|
+
# File: Lookaround.rb
|
2
|
+
|
3
|
+
########################
|
4
|
+
# TODO: make it a binary expression
|
5
|
+
########################
|
6
|
+
|
7
|
+
|
8
|
+
require_relative 'polyadic_expression' # Access the superclass
|
9
|
+
|
10
|
+
module Regex # This module is used as a namespace
|
11
|
+
# Lookaround is a zero-width assertion just like the start and end of line anchors.
|
12
|
+
# The difference is that lookarounds will actually match characters, but only return the result of the match: match or no match.
|
13
|
+
# That is why they are called "assertions". They do not consume characters from the subject,
|
14
|
+
# but only assert whether a match is possible or not.
|
15
|
+
class Lookaround < PolyadicExpression
|
16
|
+
# The "direction" of the lookaround. Can be ahead or behind. It specifies the relative position of the
|
17
|
+
# expression to match compared to the current 'position' in the subject text.
|
18
|
+
attr_reader(:dir)
|
19
|
+
|
20
|
+
# The kind indicates whether the assertion is positive (succeeds when there is a match) or negative
|
21
|
+
# (assertion succeeds when there is NO match).
|
22
|
+
attr_reader(:kind)
|
23
|
+
|
24
|
+
# Constructor.
|
25
|
+
# [assertedExpression] A sub-expression to match.
|
26
|
+
# [theDir] One of the following values: [ :ahead, :behind ]
|
27
|
+
# [theKind] One of the following values: [ :positive, :negative ]
|
28
|
+
def initialize(assertedExpression, theDir, theKind)
|
29
|
+
super([assertedExpression])
|
30
|
+
@dir, @kind = theDir, theKind
|
31
|
+
end
|
32
|
+
|
33
|
+
public
|
34
|
+
# Conversion method re-definition.
|
35
|
+
# Purpose: Return the String representation of the captured expression.
|
36
|
+
def to_str()
|
37
|
+
result = children[0].to_str
|
38
|
+
dir_syntax = (dir == :ahead) ? '' : '<'
|
39
|
+
kind_syntax = (kind == :positive)? '=' : '!'
|
40
|
+
result << '(?' + dir_syntax + kind_syntax + children[1].to_str + ")"
|
41
|
+
return result
|
42
|
+
end
|
43
|
+
|
44
|
+
end # class
|
45
|
+
end # module
|
46
|
+
|
47
|
+
# End of file
|
@@ -0,0 +1,36 @@
|
|
1
|
+
# File: MatchOption.rb
|
2
|
+
|
3
|
+
module Regex # This module is used as a namespace
|
4
|
+
|
5
|
+
# Represents an option that influences the way a regular (sub)expression can perform its matching.
|
6
|
+
class MatchOption
|
7
|
+
# The symbolic name of the option
|
8
|
+
attr_reader(:name)
|
9
|
+
|
10
|
+
# An indicator that tells whether the option is turned on or off
|
11
|
+
attr_reader(:setting)
|
12
|
+
|
13
|
+
# Constructor.
|
14
|
+
def initialize(theName, theSetting)
|
15
|
+
@name, @setting = theName, theSetting
|
16
|
+
end
|
17
|
+
|
18
|
+
public
|
19
|
+
# Equality operator
|
20
|
+
def ==(another)
|
21
|
+
return true if self.object_id == another.object_id
|
22
|
+
|
23
|
+
if another.kind_of?(MatchOption)
|
24
|
+
isEqual = ((name == another.name) && (setting == another.setting))
|
25
|
+
else
|
26
|
+
isEqual = false
|
27
|
+
end
|
28
|
+
|
29
|
+
return isEqual
|
30
|
+
end
|
31
|
+
|
32
|
+
end # class
|
33
|
+
|
34
|
+
end # module
|
35
|
+
|
36
|
+
# End of file
|
@@ -14,10 +14,11 @@ module Regex # This module is used as a namespace
|
|
14
14
|
super(aChildExpression)
|
15
15
|
end
|
16
16
|
|
17
|
-
|
17
|
+
protected
|
18
|
+
|
18
19
|
# Conversion method re-definition.
|
19
20
|
# Purpose: Return the String representation of the captured expression.
|
20
|
-
def
|
21
|
+
def text_repr()
|
21
22
|
result = '(?:' + all_child_text() + ")"
|
22
23
|
return result
|
23
24
|
end
|
@@ -16,10 +16,11 @@ class Repetition < MonadicExpression
|
|
16
16
|
@multiplicity = aMultiplicity
|
17
17
|
end
|
18
18
|
|
19
|
-
|
19
|
+
protected
|
20
|
+
|
20
21
|
# Conversion method re-definition.
|
21
22
|
# Purpose: Return the String representation of the concatented expressions.
|
22
|
-
def
|
23
|
+
def text_repr()
|
23
24
|
result = all_child_text() + multiplicity.to_str()
|
24
25
|
return result
|
25
26
|
end
|
@@ -8,3 +8,6 @@ require_relative './regex/char_shorthand'
|
|
8
8
|
require_relative './regex/wildcard'
|
9
9
|
require_relative './regex/alternation'
|
10
10
|
require_relative './regex/non_capturing_group'
|
11
|
+
require_relative './regex/anchor'
|
12
|
+
require_relative './regex/lookaround'
|
13
|
+
require_relative './regex/capturing_group'
|
@@ -22,41 +22,60 @@ module SRL
|
|
22
22
|
')' => 'RPAREN',
|
23
23
|
',' => 'COMMA'
|
24
24
|
}.freeze
|
25
|
-
|
25
|
+
|
26
26
|
# Here are all the SRL keywords (in uppercase)
|
27
27
|
@@keywords = %w[
|
28
|
+
ALL
|
29
|
+
ALREADY
|
28
30
|
AND
|
29
31
|
ANY
|
30
32
|
ANYTHING
|
33
|
+
AS
|
31
34
|
AT
|
32
35
|
BACKSLASH
|
36
|
+
BEGIN
|
33
37
|
BETWEEN
|
38
|
+
BY
|
39
|
+
CAPTURE
|
40
|
+
CASE
|
34
41
|
CHARACTER
|
35
42
|
DIGIT
|
43
|
+
END
|
36
44
|
EXACTLY
|
45
|
+
FOLLOWED
|
37
46
|
FROM
|
47
|
+
HAD
|
48
|
+
IF
|
49
|
+
INSENSITIVE
|
50
|
+
LAZY
|
38
51
|
LEAST
|
39
52
|
LETTER
|
40
53
|
LINE
|
41
54
|
LITERALLY
|
42
55
|
MORE
|
56
|
+
MULTI
|
57
|
+
MUST
|
43
58
|
NEVER
|
44
59
|
NEW
|
45
60
|
NO
|
61
|
+
NOT
|
46
62
|
NUMBER
|
47
63
|
OF
|
48
64
|
ONCE
|
49
65
|
ONE
|
50
66
|
OPTIONAL
|
51
67
|
OR
|
68
|
+
STARTS
|
52
69
|
TAB
|
53
70
|
TIMES
|
54
71
|
TO
|
55
72
|
TWICE
|
73
|
+
UNTIL
|
56
74
|
UPPERCASE
|
57
75
|
WHITESPACE
|
76
|
+
WITH
|
58
77
|
].map { |x| [x, x] } .to_h
|
59
|
-
|
78
|
+
|
60
79
|
class ScanError < StandardError; end
|
61
80
|
|
62
81
|
def initialize(source, aGrammar)
|
@@ -81,16 +100,16 @@ module SRL
|
|
81
100
|
skip_whitespaces
|
82
101
|
curr_ch = scanner.peek(1)
|
83
102
|
return nil if curr_ch.nil? || curr_ch.empty?
|
84
|
-
|
103
|
+
|
85
104
|
token = nil
|
86
105
|
|
87
106
|
if '(),'.include? curr_ch
|
88
107
|
# Delimiters, separators => single character token
|
89
|
-
token = build_token(@@lexeme2name[curr_ch], scanner.getch)
|
108
|
+
token = build_token(@@lexeme2name[curr_ch], scanner.getch)
|
90
109
|
elsif (lexeme = scanner.scan(/[0-9]{2,}/))
|
91
110
|
token = build_token('INTEGER', lexeme) # An integer has two or more digits
|
92
111
|
elsif (lexeme = scanner.scan(/[0-9]/))
|
93
|
-
token = build_token('DIGIT_LIT', lexeme)
|
112
|
+
token = build_token('DIGIT_LIT', lexeme)
|
94
113
|
elsif (lexeme = scanner.scan(/[a-zA-Z]{2,}/))
|
95
114
|
token = build_token(@@keywords[lexeme.upcase], lexeme)
|
96
115
|
# TODO: handle case unknown identifier
|
@@ -111,7 +130,7 @@ module SRL
|
|
111
130
|
|
112
131
|
return token
|
113
132
|
end
|
114
|
-
|
133
|
+
|
115
134
|
def build_token(aSymbolName, aLexeme)
|
116
135
|
token_type = name2symbol[aSymbolName]
|
117
136
|
begin
|
@@ -120,7 +139,7 @@ module SRL
|
|
120
139
|
puts "Failing with '#{aSymbolName}' and '#{aLexeme}'"
|
121
140
|
raise ex
|
122
141
|
end
|
123
|
-
|
142
|
+
|
124
143
|
return token
|
125
144
|
end
|
126
145
|
|
@@ -151,7 +151,6 @@ describe 'Integration tests:' do
|
|
151
151
|
end
|
152
152
|
end # context
|
153
153
|
|
154
|
-
|
155
154
|
context 'Parsing special character declarations:' do
|
156
155
|
it "should parse 'tab' syntax" do
|
157
156
|
result = parse('tab')
|
@@ -197,11 +196,16 @@ describe 'Integration tests:' do
|
|
197
196
|
message_prefix = /Premature end of input after ','/
|
198
197
|
expect(result.failure_reason.message).to match(message_prefix)
|
199
198
|
end
|
199
|
+
|
200
|
+
it 'should parse concatenation' do
|
201
|
+
result = parse('any of (literally "sample", (digit once or more))')
|
202
|
+
expect(result).to be_success
|
203
|
+
|
204
|
+
regexp = regexp_repr(result)
|
205
|
+
expect(regexp.to_str).to eq('(?:sample|(?:\d+))')
|
206
|
+
end
|
200
207
|
|
201
|
-
it "should parse a sequence of patterns" do
|
202
|
-
#
|
203
|
-
# DEBUG When I put a comma at the end ... looping endlessly
|
204
|
-
#
|
208
|
+
it "should parse a long sequence of patterns" do
|
205
209
|
source = <<-ENDS
|
206
210
|
any of (any character, one of "._%-+") once or more,
|
207
211
|
literally "@",
|
@@ -289,6 +293,145 @@ ENDS
|
|
289
293
|
expect(regexp.to_str).to eq('[p-t]{10,}')
|
290
294
|
end
|
291
295
|
end # context
|
296
|
+
|
297
|
+
context 'Parsing lookaround:' do
|
298
|
+
it 'should parse positive lookahead' do
|
299
|
+
result = parse('letter if followed by (anything once or more, digit)')
|
300
|
+
expect(result).to be_success
|
301
|
+
|
302
|
+
regexp = regexp_repr(result)
|
303
|
+
expect(regexp.to_str).to eq('[a-z](?=(?:.+\d))')
|
304
|
+
end
|
305
|
+
|
306
|
+
it 'should parse negative lookahead' do
|
307
|
+
result = parse('letter if not followed by (anything once or more, digit)')
|
308
|
+
expect(result).to be_success
|
309
|
+
|
310
|
+
regexp = regexp_repr(result)
|
311
|
+
expect(regexp.to_str).to eq('[a-z](?!(?:.+\d))')
|
312
|
+
end
|
313
|
+
|
314
|
+
it 'should parse positive lookbehind' do
|
315
|
+
result = parse('literally "bar" if already had literally "foo"')
|
316
|
+
expect(result).to be_success
|
317
|
+
|
318
|
+
regexp = regexp_repr(result)
|
319
|
+
expect(regexp.to_str).to eq('bar(?<=foo)')
|
320
|
+
end
|
321
|
+
|
322
|
+
it 'should parse negative lookbehind' do
|
323
|
+
result = parse('literally "bar" if not already had literally "foo"')
|
324
|
+
expect(result).to be_success
|
325
|
+
|
326
|
+
regexp = regexp_repr(result)
|
327
|
+
expect(regexp.to_str).to eq('bar(?<!foo)')
|
328
|
+
end
|
329
|
+
end # context
|
330
|
+
|
331
|
+
context 'Parsing capturing group:' do
|
332
|
+
it 'should parse simple anonymous capturing group' do
|
333
|
+
result = parse('capture(literally "sample")')
|
334
|
+
expect(result).to be_success
|
335
|
+
|
336
|
+
regexp = regexp_repr(result)
|
337
|
+
expect(regexp.to_str).to eq('(sample)')
|
338
|
+
end
|
339
|
+
|
340
|
+
it 'should parse complex anonymous capturing group' do
|
341
|
+
result = parse('capture(any of (literally "sample", (digit once or more)))')
|
342
|
+
expect(result).to be_success
|
343
|
+
|
344
|
+
regexp = regexp_repr(result)
|
345
|
+
expect(regexp.to_str).to eq('((?:sample|(?:\d+)))')
|
346
|
+
end
|
347
|
+
|
348
|
+
it 'should parse simple anonymous until capturing group' do
|
349
|
+
result = parse('capture anything once or more until literally "!"')
|
350
|
+
expect(result).to be_success
|
351
|
+
|
352
|
+
regexp = regexp_repr(result)
|
353
|
+
expect(regexp.to_str).to eq('(.+)!')
|
354
|
+
end
|
355
|
+
|
356
|
+
it 'should parse complex named capturing group' do
|
357
|
+
result = parse('capture(any of (literally "sample", (digit once or more))) as "foo"')
|
358
|
+
expect(result).to be_success
|
359
|
+
|
360
|
+
regexp = regexp_repr(result)
|
361
|
+
expect(regexp.to_str).to eq('(?<foo>(?:sample|(?:\d+)))')
|
362
|
+
end
|
363
|
+
|
364
|
+
it 'should parse a sequence with named capturing groups' do
|
365
|
+
source = <<-ENDS
|
366
|
+
capture (anything once or more) as "first",
|
367
|
+
literally " - ",
|
368
|
+
capture literally "second part" as "second"
|
369
|
+
ENDS
|
370
|
+
result = parse(source)
|
371
|
+
expect(result).to be_success
|
372
|
+
|
373
|
+
regexp = regexp_repr(result)
|
374
|
+
expect(regexp.to_str).to eq('(?<first>.+) - (?<second>second part)')
|
375
|
+
end
|
376
|
+
|
377
|
+
it 'should parse complex named until capturing group' do
|
378
|
+
result = parse('capture (anything once or more) as "foo" until literally "m"')
|
379
|
+
expect(result).to be_success
|
380
|
+
|
381
|
+
regexp = regexp_repr(result)
|
382
|
+
expect(regexp.to_str).to eq('(?<foo>.+)m')
|
383
|
+
end
|
384
|
+
|
385
|
+
end # context
|
386
|
+
|
387
|
+
context 'Parsing anchors:' do
|
388
|
+
it 'should parse begin anchors' do
|
389
|
+
result = parse('starts with literally "match"')
|
390
|
+
expect(result).to be_success
|
391
|
+
|
392
|
+
regexp = regexp_repr(result)
|
393
|
+
expect(regexp.to_str).to eq('^match')
|
394
|
+
end
|
395
|
+
|
396
|
+
it 'should parse begin anchors (alternative syntax)' do
|
397
|
+
result = parse('begin with literally "match"')
|
398
|
+
expect(result).to be_success
|
399
|
+
|
400
|
+
regexp = regexp_repr(result)
|
401
|
+
expect(regexp.to_str).to eq('^match')
|
402
|
+
end
|
403
|
+
|
404
|
+
it 'should parse end anchors' do
|
405
|
+
result = parse('literally "match" must end')
|
406
|
+
expect(result).to be_success
|
407
|
+
|
408
|
+
regexp = regexp_repr(result)
|
409
|
+
expect(regexp.to_str).to eq('match$')
|
410
|
+
end
|
411
|
+
|
412
|
+
it 'should parse combination of begin and end anchors' do
|
413
|
+
result = parse('starts with literally "match" must end')
|
414
|
+
expect(result).to be_success
|
415
|
+
|
416
|
+
regexp = regexp_repr(result)
|
417
|
+
expect(regexp.to_str).to eq('^match$')
|
418
|
+
end
|
419
|
+
|
420
|
+
it "should accept anchor with a sequence of patterns" do
|
421
|
+
source = <<-ENDS
|
422
|
+
begin with any of (digit, letter, one of ".-") once or more,
|
423
|
+
literally ".",
|
424
|
+
letter at least 2 times must end
|
425
|
+
ENDS
|
426
|
+
|
427
|
+
result = parse(source)
|
428
|
+
expect(result).to be_success
|
429
|
+
|
430
|
+
regexp = regexp_repr(result)
|
431
|
+
# SRL expect: (?:\w|[\._%\-\+])+(?:@)(?:[0-9]|[a-z]|[\.\-])+(?:\.)[a-z]{2,}
|
432
|
+
expect(regexp.to_str).to eq('^(?:\d|[a-z]|[.\-])+\.[a-z]{2,}$')
|
433
|
+
end
|
434
|
+
end # context
|
292
435
|
end # describe
|
293
436
|
|
294
437
|
|
data/lib/rley/constants.rb
CHANGED
@@ -135,7 +135,7 @@ module Rley # This module is used as a namespace
|
|
135
135
|
process_middle_entry(anEntry, anIndex)
|
136
136
|
end
|
137
137
|
else
|
138
|
-
$stderr.puts "Internal Errore '#{anEvent}'"
|
138
|
+
$stderr.puts "Internal Errore '#{anEvent}', entry: #{anEntry}, index: #{anIndex}"
|
139
139
|
raise NotImplementedError
|
140
140
|
end
|
141
141
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rley
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.12
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dimitri Geshef
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-02-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: coveralls
|
@@ -149,7 +149,9 @@ files:
|
|
149
149
|
- examples/general/SRL/lib/parser.rb
|
150
150
|
- examples/general/SRL/lib/regex/abstract_method.rb
|
151
151
|
- examples/general/SRL/lib/regex/alternation.rb
|
152
|
+
- examples/general/SRL/lib/regex/anchor.rb
|
152
153
|
- examples/general/SRL/lib/regex/atomic_expression.rb
|
154
|
+
- examples/general/SRL/lib/regex/capturing_group.rb
|
153
155
|
- examples/general/SRL/lib/regex/char_class.rb
|
154
156
|
- examples/general/SRL/lib/regex/char_range.rb
|
155
157
|
- examples/general/SRL/lib/regex/char_shorthand.rb
|
@@ -157,6 +159,8 @@ files:
|
|
157
159
|
- examples/general/SRL/lib/regex/compound_expression.rb
|
158
160
|
- examples/general/SRL/lib/regex/concatenation.rb
|
159
161
|
- examples/general/SRL/lib/regex/expression.rb
|
162
|
+
- examples/general/SRL/lib/regex/lookaround.rb
|
163
|
+
- examples/general/SRL/lib/regex/match_option.rb
|
160
164
|
- examples/general/SRL/lib/regex/monadic_expression.rb
|
161
165
|
- examples/general/SRL/lib/regex/multiplicity.rb
|
162
166
|
- examples/general/SRL/lib/regex/non_capturing_group.rb
|