rley 0.5.11 → 0.5.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 0ec06612f299302b861fbaeb04b75c0040a026cf
4
- data.tar.gz: d68438efcbacceb2ae4319ac268492e93db35265
3
+ metadata.gz: 78c5a2a83d1691c6c470f2fb4bf347f7dca44cb6
4
+ data.tar.gz: 9c1f62cdf775e71e33ceecfc99db58298b4e3b82
5
5
  SHA512:
6
- metadata.gz: ec3be765a424028c986ea4812cf6f1485f04285beb2b9d8fffc774fc0b61108d4d6758a09a648132562752ab25904fb38f8ee57ecff90d0a70bca253150ed130
7
- data.tar.gz: 2463def65eecbefed2bbfffc61e63e88dca2d0498078e83bc742811e540718e95e75f3896fa31b5bdc9068f5420906f389615470a86831dbcb5025824645775d
6
+ metadata.gz: 662e52aa9aae198f6eb5e9ed1750a1bfa873969ad9bedea4ca7225babef3d9fa648b709c4bef4959432d62cf8e1733d556ce911efec9885a0d59e6d4972c0f50
7
+ data.tar.gz: ab114da248a85b5e78fdb2ba9affca357a52b0cbe45cbaa97b1ad41becf71b0fa89db6e1f4487c0a23c59b9b9b83db5101e511f2b711a0d4f9a324ac2c3f3a2d
data/CHANGELOG.md CHANGED
@@ -1,3 +1,12 @@
1
+ ### 0.5.12 / 2018-02-03
2
+ * [CHANGE] Simple Regex Language is fully supported!...
3
+ * [CHANGE] File `examples/general/SRL/grammar.rb added missing rule productions for Simple Regex Language.
4
+ * [CHANGE] File `examples/general/SRL/ast_builder.rb Added transformation rules missing regular expressions features.
5
+ * [CHANGE] File `examples/general/SRL/spac/integration_spec.rb Added tests for covering SRL.
6
+ * [NEW] File `examples/general/SRL/lib/anchor.rb Added support for anchors in regular expressions.
7
+ * [NEW] File `examples/general/SRL/lib/capturing_group.rb Added support for single character in regular expressions.
8
+ * [NEW] File `examples/general/SRL/lib/lookaround.rb Added support for lookaround in regular expressions.
9
+
1
10
  ### 0.5.11 / 2018-01-25
2
11
  * [NEW] File `left.rb` added in `examples/general` folder for showing use of left-recursive rules.
3
12
  * [NEW] File `right.rb` added in `examples/general` folder for showing use of right-recursive rules (less performant).
@@ -14,6 +14,8 @@ class ASTBuilder < Rley::Parser::ParseTreeBuilder
14
14
 
15
15
  Terminal2NodeClass = { }.freeze
16
16
 
17
+ attr_reader :options
18
+
17
19
  protected
18
20
 
19
21
  # Overriding method.
@@ -36,33 +38,109 @@ class ASTBuilder < Rley::Parser::ParseTreeBuilder
36
38
  # @param theChildren [Array] Children nodes (one per rhs symbol)
37
39
  def new_parent_node(aProduction, aRange, theTokens, theChildren)
38
40
  node = case aProduction.name
39
- when 'srl_0' # rule 'srl' => 'pattern'
41
+ when 'srl_0' # rule 'srl' => 'expression'
40
42
  return_first_child(aRange, theTokens, theChildren)
41
-
42
- when 'pattern_0' # rule 'pattern' => %w[pattern COMMA quantifiable]
43
+
44
+ when 'expression_0' # rule 'expression' => %w[pattern separator flags]
45
+ reduce_expression_0(aProduction, aRange, theTokens, theChildren)
46
+
47
+ when 'expression_1' # rule 'expression' => 'pattern'
48
+ return_first_child(aRange, theTokens, theChildren)
49
+
50
+ when 'pattern_0' # rule 'pattern' => %w[pattern separator quantifiable]
43
51
  reduce_pattern_0(aProduction, aRange, theTokens, theChildren)
44
52
 
45
- when 'pattern_1' # rule 'pattern' => %w[pattern quantifiable]
46
- reduce_pattern_1(aProduction, aRange, theTokens, theChildren)
47
-
48
- when 'pattern_2' # rule 'pattern' => 'quantifiable'
53
+ when 'pattern_1' # rule 'pattern' => 'quantifiable'
49
54
  return_first_child(aRange, theTokens, theChildren)
50
55
 
51
- when 'quantifiable_0' # rule 'quantifiable' => 'term'
56
+ when 'separator_0' # rule 'separator' => 'COMMA'
52
57
  return_first_child(aRange, theTokens, theChildren)
53
58
 
54
- when 'quantifiable_1' # rule 'quantifiable' = %w[term quantifier]
59
+ when 'separator_1' # rule 'separator' => []
60
+ nil
61
+
62
+ when 'flags_0' # rule 'flags' => %[flags separator single_flag]
63
+ ### NEW
64
+ reduce_flags_0(aProduction, aRange, theTokens, theChildren)
65
+
66
+ when 'single_flag_0' # rule 'single_flag' => %w[CASE INSENSITIVE]
67
+ ### NEW
68
+ reduce_single_flag_0(aProduction, aRange, theTokens, theChildren)
69
+
70
+ when 'single_flag_1' # rule 'single_flag' => %w[MULTI LINE]
71
+ ### NEW
72
+ reduce_single_flag_1(aProduction, aRange, theTokens, theChildren)
73
+
74
+ when 'single_flag_2' # rule 'single_flag' => %w[ALL LAZY]
75
+ ### NEW
76
+ reduce_single_flag_2(aProduction, aRange, theTokens, theChildren)
77
+
78
+ # rule 'quantifiable' => %w[begin_anchor anchorable end_anchor]
79
+ when 'quantifiable_0'
80
+ reduce_quantifiable_0(aProduction, aRange, theTokens, theChildren)
81
+
82
+ # rule 'quantifiable' => %w[begin_anchor anchorable]
83
+ when 'quantifiable_1'
55
84
  reduce_quantifiable_1(aProduction, aRange, theTokens, theChildren)
56
85
 
86
+ # rule 'quantifiable' => %w[anchorable end_anchor]
87
+ when 'quantifiable_2'
88
+ reduce_quantifiable_2(aProduction, aRange, theTokens, theChildren)
89
+
90
+ when 'quantifiable_3' # rule 'quantifiable' => 'anchorable'
91
+ return_first_child(aRange, theTokens, theChildren)
92
+
93
+ # rule 'begin_anchor' => %w[STARTS WITH]
94
+ # rule 'begin_anchor' => %w[BEGIN WITH]
95
+ when 'begin_anchor_0', 'begin_anchor_1'
96
+ reduce_begin_anchor_0(aProduction, aRange, theTokens, theChildren)
97
+
98
+ when 'end_anchor_0' # rule 'end_anchor' => %w[MUST END]
99
+ reduce_end_anchor_0(aProduction, aRange, theTokens, theChildren)
100
+
101
+ when 'anchorable_0' # rule 'anchorable' => 'assertable'
102
+ return_first_child(aRange, theTokens, theChildren)
103
+
104
+ when 'anchorable_1' # rule 'anchorable' => %w[assertable assertion]
105
+ reduce_anchorable_1(aProduction, aRange, theTokens, theChildren)
106
+
107
+ when 'anchorable_1' # rule 'anchorable' => %w[assertable assertion]
108
+ reduce_anchorable_1(aProduction, aRange, theTokens, theChildren)
109
+
110
+ # rule 'assertion' => %w[IF FOLLOWED BY assertable]
111
+ when 'assertion_0'
112
+ reduce_assertion_0(aProduction, aRange, theTokens, theChildren)
113
+
114
+ # rule 'assertion' => %w[IF NOT FOLLOWED BY assertable]
115
+ when 'assertion_1'
116
+ reduce_assertion_1(aProduction, aRange, theTokens, theChildren)
117
+
118
+ # rule 'assertion' => %w[IF ALREADY HAD assertable]
119
+ when 'assertion_2'
120
+ reduce_assertion_2(aProduction, aRange, theTokens, theChildren)
121
+
122
+ # rule 'assertion' => %w[IF NOT ALREADY HAD assertable]
123
+ when 'assertion_3'
124
+ reduce_assertion_3(aProduction, aRange, theTokens, theChildren)
125
+
126
+ when 'assertable_0' # rule 'assertable' => 'term'
127
+ return_first_child(aRange, theTokens, theChildren)
128
+
129
+ when 'assertable_1' # rule 'assertable' => %w[term quantifier]
130
+ reduce_assertable_1(aProduction, aRange, theTokens, theChildren)
131
+
57
132
  when 'term_0' # rule 'term' => 'atom'
58
133
  return_first_child(aRange, theTokens, theChildren)
59
134
 
60
135
  when 'term_1' # rule 'term' => 'alternation'
61
136
  return_first_child(aRange, theTokens, theChildren)
62
-
137
+
63
138
  when 'term_2' # rule 'term' => 'grouping'
64
139
  return_first_child(aRange, theTokens, theChildren)
65
140
 
141
+ when 'term_3' # rule 'term' => 'capturing_group'
142
+ return_first_child(aRange, theTokens, theChildren)
143
+
66
144
  when 'atom_0' # rule 'atom' => 'letter_range'
67
145
  return_first_child(aRange, theTokens, theChildren)
68
146
 
@@ -133,19 +211,34 @@ class ASTBuilder < Rley::Parser::ParseTreeBuilder
133
211
  when 'alternation_0'
134
212
  reduce_alternation_0(aProduction, aRange, theTokens, theChildren)
135
213
 
136
- # rule 'alternatives' => %w[alternatives COMMA quantifiable]
214
+ # rule 'alternatives' => %w[alternatives separator quantifiable]
137
215
  when 'alternatives_0'
138
216
  reduce_alternatives_0(aProduction, aRange, theTokens, theChildren)
139
217
 
140
- # rule 'alternatives' => %w[alternatives quantifiable]
141
- when 'alternatives_1'
218
+ when 'alternatives_1' # rule 'alternatives' => 'quantifiable'
142
219
  reduce_alternatives_1(aProduction, aRange, theTokens, theChildren)
143
220
 
144
- when 'alternatives_2' # rule 'alternatives' => 'quantifiable'
145
- reduce_alternatives_2(aProduction, aRange, theTokens, theChildren)
221
+ when 'grouping_0' # rule 'grouping' => %w[LPAREN pattern RPAREN]
222
+ reduce_grouping_0(aProduction, aRange, theTokens, theChildren)
223
+
224
+ # rule 'capturing_group' => %w[CAPTURE assertable]
225
+ when 'capturing_group_0'
226
+ reduce_capturing_group_0(aProduction, aRange, theTokens, theChildren)
227
+
228
+ # rule 'capturing_group' => %w[CAPTURE assertable UNTIL assertable]
229
+ when 'capturing_group_1'
230
+ reduce_capturing_group_1(aProduction, aRange, theTokens, theChildren)
231
+
232
+ # rule 'capturing_group' => %w[CAPTURE assertable AS var_name]
233
+ when 'capturing_group_2'
234
+ reduce_capturing_group_2(aProduction, aRange, theTokens, theChildren)
235
+
236
+ # rule 'capturing_group' => %w[CAPTURE assertable AS var_name UNTIL assertable]
237
+ when 'capturing_group_3'
238
+ reduce_capturing_group_3(aProduction, aRange, theTokens, theChildren)
146
239
 
147
- when 'grouping' # rule 'grouping' => %w[LPAREN pattern RPAREN]
148
- reduce_grouping_0(aProduction, aRange, theTokens, theChildren)
240
+ when 'var_name_0' # rule 'var_name' => 'STRING_LIT'
241
+ return_first_child(aRange, theTokens, theChildren)
149
242
 
150
243
  when 'quantifier_0' # rule 'quantifier' => 'ONCE'
151
244
  multiplicity(1, 1)
@@ -205,11 +298,11 @@ class ASTBuilder < Rley::Parser::ParseTreeBuilder
205
298
  result = Regex::Concatenation.new(*chars)
206
299
  else
207
300
  if to_escape && Regex::Character::MetaChars.include?(aString)
208
- result = Regex::Concatenation.new(Regex::Character.new("\\"),
301
+ result = Regex::Concatenation.new(Regex::Character.new("\\"),
209
302
  Regex::Character.new(aString))
210
303
  else
211
304
  result = Regex::Character.new(aString)
212
- end
305
+ end
213
306
  end
214
307
 
215
308
  return result
@@ -237,21 +330,100 @@ class ASTBuilder < Rley::Parser::ParseTreeBuilder
237
330
  def repetition(expressionToRepeat, aMultiplicity)
238
331
  return Regex::Repetition.new(expressionToRepeat, aMultiplicity)
239
332
  end
240
-
241
- # rule 'pattern' => %w[pattern COMMA quantifiable]
333
+
334
+ # rule 'expression' => %w[pattern separator flags]
335
+ def reduce_expression_0(aProduction, aRange, theTokens, theChildren)
336
+ @options = theChildren[2] if theChildren[2]
337
+ return_first_child(aRange, theTokens, theChildren)
338
+ end
339
+
340
+ # rule 'pattern' => %w[pattern separator quantifiable]
242
341
  def reduce_pattern_0(aProduction, aRange, theTokens, theChildren)
243
342
  return Regex::Concatenation.new(theChildren[0], theChildren[2])
244
343
  end
245
344
 
246
- # rule 'pattern' => %w[pattern quantifiable]
247
- def reduce_pattern_1(aProduction, aRange, theTokens, theChildren)
248
- return Regex::Concatenation.new(theChildren[0], theChildren[1])
345
+ # rule 'flags' => %[flags separator single_flag]
346
+ def reduce_flags_0(aProduction, aRange, theTokens, theChildren)
347
+ theChildren[0] << theChildren[2]
249
348
  end
250
349
 
251
- # rule 'quantifiable' => %w[term quantifier]
350
+ # rule 'single_flag' => %w[CASE INSENSITIVE]
351
+ def reduce_single_flag_0(aProduction, aRange, theTokens, theChildren)
352
+ return [ Regex::MatchOption.new(:IGNORECASE, true) ]
353
+ end
354
+
355
+ # rule 'single_flag' => %w[MULTI LINE]
356
+ def reduce_single_flag_1(aProduction, aRange, theTokens, theChildren)
357
+ return [ Regex::MatchOption.new(:MULTILINE, true) ]
358
+ end
359
+
360
+ # rule 'single_flag' => %w[ALL LAZY]
361
+ def reduce_single_flag_2(aProduction, aRange, theTokens, theChildren)
362
+ return [ Regex::MatchOption.new(:ALL_LAZY, true) ]
363
+ end
364
+
365
+ # rule 'quantifiable' => %w[begin_anchor anchorable end_anchor]
366
+ def reduce_quantifiable_0(aProduction, aRange, theTokens, theChildren)
367
+ theChildren[1].begin_anchor = theChildren[0]
368
+ theChildren[1].end_anchor = theChildren[2]
369
+ return theChildren[1]
370
+ end
371
+
372
+ # rule 'quantifiable' => %w[begin_anchor anchorable]
252
373
  def reduce_quantifiable_1(aProduction, aRange, theTokens, theChildren)
253
- quantifier = theChildren.last
254
- term = theChildren.first
374
+ theChildren[1].begin_anchor = theChildren[0]
375
+ return theChildren[1]
376
+ end
377
+
378
+ # rule 'quantifiable' => %w[anchorable end_anchor]
379
+ def reduce_quantifiable_2(aProduction, aRange, theTokens, theChildren)
380
+ theChildren[0].end_anchor = theChildren[1]
381
+ return theChildren[0]
382
+ end
383
+
384
+ # rule 'begin_anchor' => %w[STARTS WITH]
385
+ # rule 'begin_anchor' => %w[BEGIN WITH]
386
+ def reduce_begin_anchor_0(aProduction, aRange, theTokens, theChildren)
387
+ return Regex::Anchor.new('^')
388
+ end
389
+
390
+ # rule 'end_anchor' => %w[MUST END]
391
+ def reduce_end_anchor_0(aProduction, aRange, theTokens, theChildren)
392
+ return Regex::Anchor.new('$')
393
+ end
394
+
395
+
396
+ # rule 'anchorable' => %w[assertable assertion]
397
+ def reduce_anchorable_1(aProduction, aRange, theTokens, theChildren)
398
+ assertion = theChildren.last
399
+ assertion.children.unshift(theChildren[0])
400
+ return assertion
401
+ end
402
+
403
+ # rule 'assertion' => %w[IF FOLLOWED BY assertable]
404
+ def reduce_assertion_0(aProduction, aRange, theTokens, theChildren)
405
+ return Regex::Lookaround.new(theChildren.last, :ahead, :positive)
406
+ end
407
+
408
+ # rule 'assertion' => %w[IF NOT FOLLOWED BY assertable]
409
+ def reduce_assertion_1(aProduction, aRange, theTokens, theChildren)
410
+ return Regex::Lookaround.new(theChildren.last, :ahead, :negative)
411
+ end
412
+
413
+ # rule 'assertion' => %w[IF ALREADY HAD assertable]
414
+ def reduce_assertion_2(aProduction, aRange, theTokens, theChildren)
415
+ return Regex::Lookaround.new(theChildren.last, :behind, :positive)
416
+ end
417
+
418
+ # rule 'assertion' => %w[IF NOT ALREADY HAD assertable]
419
+ def reduce_assertion_3(aProduction, aRange, theTokens, theChildren)
420
+ return Regex::Lookaround.new(theChildren.last, :behind, :negative)
421
+ end
422
+
423
+ # rule 'anchorable' => %w[term quantifier]
424
+ def reduce_assertable_1(aProduction, aRange, theTokens, theChildren)
425
+ quantifier = theChildren[1]
426
+ term = theChildren[0]
255
427
  repetition(term, quantifier)
256
428
  end
257
429
 
@@ -348,37 +520,56 @@ class ASTBuilder < Rley::Parser::ParseTreeBuilder
348
520
  raw_literal = theChildren[-1].token.lexeme.dup
349
521
  return string_literal(raw_literal)
350
522
  end
351
-
523
+
352
524
  # rule 'alternation' => %w[ANY OF LPAREN alternatives RPAREN]
353
525
  def reduce_alternation_0(aProduction, aRange, theTokens, theChildren)
354
526
  return Regex::Alternation.new(*theChildren[3])
355
527
  end
356
528
 
357
- # rule 'alternatives' => %w[alternatives COMMA quantifiable]
529
+ # rule 'alternatives' => %w[alternatives separator quantifiable]
358
530
  def reduce_alternatives_0(aProduction, aRange, theTokens, theChildren)
359
531
  return theChildren[0] << theChildren[-1]
360
532
  end
361
533
 
362
- # rule 'alternatives' => %w[alternatives quantifiable]
363
- def reduce_alternatives_1(aProduction, aRange, theTokens, theChildren)
364
- return theChildren[0] << theChildren[-1]
365
- end
366
-
367
534
  # rule 'alternatives' => 'quantifiable'
368
- def reduce_alternatives_2(aProduction, aRange, theTokens, theChildren)
535
+ def reduce_alternatives_1(aProduction, aRange, theTokens, theChildren)
369
536
  return [theChildren.last]
370
537
  end
371
-
538
+
372
539
  # rule 'grouping' => %w[LPAREN pattern RPAREN]
373
540
  def reduce_grouping_0(aProduction, aRange, theTokens, theChildren)
374
- return Regex::NonCapturingGroup.new(theChildren[1])
541
+ return Regex::NonCapturingGroup.new(theChildren[1])
375
542
  end
376
543
 
544
+ # rule 'capturing_group' => %w[CAPTURE assertable]
545
+ def reduce_capturing_group_0(aProduction, aRange, theTokens, theChildren)
546
+ return Regex::CapturingGroup.new(theChildren[1])
547
+ end
548
+
549
+ # rule 'capturing_group' => %w[CAPTURE assertable UNTIL assertable]
550
+ def reduce_capturing_group_1(aProduction, aRange, theTokens, theChildren)
551
+ group = Regex::CapturingGroup.new(theChildren[1])
552
+ return Regex::Concatenation.new(group, theChildren[3])
553
+ end
554
+
555
+ # rule 'capturing_group' => %w[CAPTURE assertable AS var_name]
556
+ def reduce_capturing_group_2(aProduction, aRange, theTokens, theChildren)
557
+ name = theChildren[3].token.lexeme.dup
558
+ return Regex::CapturingGroup.new(theChildren[1], name)
559
+ end
560
+
561
+ # rule 'capturing_group' => %w[CAPTURE assertable AS var_name UNTIL assertable]
562
+ def reduce_capturing_group_3(aProduction, aRange, theTokens, theChildren)
563
+ name = theChildren[3].token.lexeme.dup
564
+ group = Regex::CapturingGroup.new(theChildren[1], name)
565
+ return Regex::Concatenation.new(group, theChildren[5])
566
+ end
567
+
377
568
  # rule 'quantifier' => %w[EXACTLY count TIMES]
378
569
  def reduce_quantifier_2(aProduction, aRange, theTokens, theChildren)
379
570
  count = theChildren[1].token.lexeme.to_i
380
571
  multiplicity(count, count)
381
- end
572
+ end
382
573
 
383
574
  # rule 'quantifier' => %w[BETWEEN count AND count times_suffix]
384
575
  def reduce_quantifier_3(aProduction, aRange, theTokens, theChildren)
@@ -9,6 +9,8 @@ module SRL
9
9
  add_terminals('LPAREN', 'RPAREN', 'COMMA')
10
10
  add_terminals('DIGIT_LIT', 'INTEGER', 'LETTER_LIT')
11
11
  add_terminals('LITERALLY', 'STRING_LIT')
12
+ add_terminals('BEGIN', 'STARTS', 'WITH')
13
+ add_terminals('MUST', 'END')
12
14
  add_terminals('UPPERCASE', 'LETTER', 'FROM', 'TO')
13
15
  add_terminals('DIGIT', 'NUMBER', 'ANY', 'NO')
14
16
  add_terminals('CHARACTER', 'WHITESPACE', 'ANYTHING')
@@ -17,16 +19,42 @@ module SRL
17
19
  add_terminals('EXACTLY', 'TIMES', 'ONCE', 'TWICE')
18
20
  add_terminals('BETWEEN', 'AND', 'OPTIONAL', 'OR')
19
21
  add_terminals('MORE', 'NEVER', 'AT', 'LEAST')
22
+ add_terminals('IF', 'FOLLOWED', 'BY', 'NOT')
23
+ add_terminals('ALREADY', 'HAD')
24
+ add_terminals('CAPTURE', 'AS', 'UNTIL')
25
+ add_terminals('CASE', 'INSENSITIVE', 'MULTI', 'ALL')
26
+ add_terminals('LAZY')
20
27
 
21
- rule 'srl' => 'pattern'
22
- rule 'pattern' => %w[pattern COMMA quantifiable]
23
- rule 'pattern' => %w[pattern quantifiable]
28
+ rule 'srl' => 'expression'
29
+ rule 'expression' => %w[pattern separator flags]
30
+ rule 'expression' => 'pattern'
31
+ rule 'pattern' => %w[pattern separator quantifiable]
24
32
  rule 'pattern' => 'quantifiable'
25
- rule 'quantifiable' => 'term'
26
- rule 'quantifiable' => %w[term quantifier]
33
+ rule 'separator' => 'COMMA'
34
+ rule 'separator' => []
35
+ rule 'flags' => %[flags separator single_flag]
36
+ rule 'single_flag' => %w[CASE INSENSITIVE]
37
+ rule 'single_flag' => %w[MULTI LINE]
38
+ rule 'single_flag' => %w[ALL LAZY]
39
+ rule 'quantifiable' => %w[begin_anchor anchorable end_anchor]
40
+ rule 'quantifiable' => %w[begin_anchor anchorable]
41
+ rule 'quantifiable' => %w[anchorable end_anchor]
42
+ rule 'quantifiable' => 'anchorable'
43
+ rule 'begin_anchor' => %w[STARTS WITH]
44
+ rule 'begin_anchor' => %w[BEGIN WITH]
45
+ rule 'end_anchor' => %w[MUST END]
46
+ rule 'anchorable' => 'assertable'
47
+ rule 'anchorable' => %w[assertable assertion]
48
+ rule 'assertion' => %w[IF FOLLOWED BY assertable]
49
+ rule 'assertion' => %w[IF NOT FOLLOWED BY assertable]
50
+ rule 'assertion' => %w[IF ALREADY HAD assertable]
51
+ rule 'assertion' => %w[IF NOT ALREADY HAD assertable]
52
+ rule 'assertable' => 'term'
53
+ rule 'assertable' => %w[term quantifier]
27
54
  rule 'term' => 'atom'
28
55
  rule 'term' => 'alternation'
29
56
  rule 'term' => 'grouping'
57
+ rule 'term' => 'capturing_group'
30
58
  rule 'atom' => 'letter_range'
31
59
  rule 'atom' => 'digit_range'
32
60
  rule 'atom' => 'character_class'
@@ -49,10 +77,14 @@ module SRL
49
77
  rule 'special_char' => %w[NEW LINE]
50
78
  rule 'literal' => %w[LITERALLY STRING_LIT]
51
79
  rule 'alternation' => %w[ANY OF LPAREN alternatives RPAREN]
52
- rule 'alternatives' => %w[alternatives COMMA quantifiable]
53
- rule 'alternatives' => %w[alternatives quantifiable]
80
+ rule 'alternatives' => %w[alternatives separator quantifiable]
54
81
  rule 'alternatives' => 'quantifiable'
55
82
  rule 'grouping' => %w[LPAREN pattern RPAREN]
83
+ rule 'capturing_group' => %w[CAPTURE assertable]
84
+ rule 'capturing_group' => %w[CAPTURE assertable UNTIL assertable]
85
+ rule 'capturing_group' => %w[CAPTURE assertable AS var_name]
86
+ rule 'capturing_group' => %w[CAPTURE assertable AS var_name UNTIL assertable]
87
+ rule 'var_name' => 'STRING_LIT'
56
88
  rule 'quantifier' => 'ONCE'
57
89
  rule 'quantifier' => 'TWICE'
58
90
  rule 'quantifier' => %w[EXACTLY count TIMES]
@@ -13,10 +13,11 @@ class Alternation < PolyadicExpression
13
13
  super(theChildren)
14
14
  end
15
15
 
16
- public
16
+ protected
17
+
17
18
  # Conversion method re-definition.
18
19
  # Purpose: Return the String representation of the concatented expressions.
19
- def to_str()
20
+ def text_repr()
20
21
  result_children = children.map { |aChild| aChild.to_str() }
21
22
  result = '(?:' + result_children.join('|') + ')'
22
23
 
@@ -0,0 +1,48 @@
1
+ # File: anchor.rb
2
+
3
+ require_relative "atomic_expression" # Access the superclass
4
+
5
+ module Regex # This module is used as a namespace
6
+ # An anchor is a zero-width assertion based on the current position.
7
+ class Anchor < AtomicExpression
8
+ # A Hash for converting a lexeme to a symbolic value
9
+ AnchorToSymbol = {
10
+ # Lexeme => Symbol value
11
+ '^' => :soLine, # Start of line
12
+ '$' => :eoLine, # End of line
13
+ '\A' => :soSubject,
14
+ '\b' => :wordBoundary,
15
+ '\B' => :nonAtWordBoundary,
16
+ '\G' => :firstMatch,
17
+ '\z' => :eoSubject,
18
+ '\Z' => :eoSubjectOrBeforeNLAtEnd
19
+ }
20
+
21
+ # A symbolic value that identifies the type of assertion to perform
22
+ attr_reader(:kind)
23
+
24
+ # Constructor
25
+ # @param aKind [String] Lexeme representation of the anchor
26
+ def initialize(aKind)
27
+ @kind = valid_kind(aKind)
28
+ end
29
+
30
+ public
31
+
32
+ # Conversion method re-definition.
33
+ # Purpose: Return the String representation of the expression.
34
+ def to_str()
35
+ return AnchorToSymbol.rassoc(kind).first()
36
+ end
37
+
38
+ private
39
+
40
+ # Return the symbolic value corresponding to the given lexeme.
41
+ def valid_kind(aKind)
42
+ return AnchorToSymbol[aKind]
43
+ end
44
+
45
+ end # class
46
+ end # module
47
+
48
+ # End of file
@@ -0,0 +1,50 @@
1
+ # File: capturing_group.rb
2
+
3
+ require_relative "monadic_expression" # Access the superclass
4
+
5
+ module Regex # This module is used as a namespace
6
+
7
+ # An association between a capture variable and an expression
8
+ # the subject text in the same serial arrangement
9
+ class CapturingGroup < MonadicExpression
10
+ # The capture variable id. It is a Fixnum when the capture group gets a sequence number,
11
+ # a String when it is an user-defined name
12
+ attr_reader(:id)
13
+
14
+ # When true, then capturing group forbids backtracking requests from its parent expression.
15
+ attr_reader(:no_backtrack)
16
+
17
+ # Constructor.
18
+ # [aChildExpression] A sub-expression to match. When successful the matching text is assigned to the capture variable.
19
+ # [theId] The id of the capture variable.
20
+ # [noBacktrack] A flag that specifies whether the capturing group forbids backtracking requests from its parent expression.
21
+ def initialize(aChildExpression, theId = nil, noBacktrack = false)
22
+ super(aChildExpression)
23
+ @id = theId
24
+ @no_backtrack = noBacktrack
25
+ end
26
+
27
+ public
28
+ # Return true iff the capturing group has a name (and not )
29
+ def named?()
30
+ return id.kind_of?(String)
31
+ end
32
+
33
+ # Conversion method re-definition.
34
+ # Purpose: Return the String representation of the captured expression.
35
+ def to_str()
36
+ prefix = named? ? "?<#{id}>" : ''
37
+ atomic = no_backtrack ? '?>' : ''
38
+ if child.is_a?(Regex::NonCapturingGroup)
39
+ # Minor optimization
40
+ result = '(' + atomic + prefix + child.child.to_str + ")"
41
+ else
42
+ result = '(' + atomic + prefix + child.to_str + ")"
43
+ end
44
+ return result
45
+ end
46
+
47
+ end # class
48
+ end # module
49
+
50
+ # End of file
@@ -18,10 +18,11 @@ module Regex # This module is used as a namespace
18
18
  @negated = to_negate
19
19
  end
20
20
 
21
- public
21
+ protected
22
+
22
23
  # Conversion method re-definition.
23
24
  # Purpose: Return the String representation of the character class.
24
- def to_str()
25
+ def text_repr()
25
26
  result_children = children.inject('') do |subResult, aChild|
26
27
  if aChild.kind_of?(Regex::Character) && Metachars.include?(aChild.codepoint)
27
28
  subResult << "\\" # Escape meta-character...
@@ -26,11 +26,13 @@ public
26
26
  # Return the upper bound of the range.
27
27
  def upper()
28
28
  return children.last
29
- end
29
+ end
30
+
31
+ protected
30
32
 
31
33
  # Conversion method re-definition.
32
34
  # Purpose: Return the String representation of the concatented expressions.
33
- def to_str()
35
+ def text_repr()
34
36
  result = lower.to_str() + '-' + upper.to_str()
35
37
 
36
38
  return result
@@ -28,10 +28,11 @@ module Regex # This module is used as a namespace
28
28
  @shortname = valid_shortname(aShortname)
29
29
  end
30
30
 
31
- public
31
+ protected
32
+
32
33
  # Conversion method re-definition.
33
34
  # Purpose: Return the String representation of the expression.
34
- def to_str()
35
+ def text_repr()
35
36
  return "\\#{shortname}"
36
37
  end
37
38
 
@@ -106,20 +106,6 @@ public
106
106
  self.class.codepoint2char(@codepoint)
107
107
  end
108
108
 
109
- # Conversion method re-definition.
110
- # Purpose: Return the String representation of the expression.
111
- # If the Character was initially from a text (the lexeme), then the lexeme is returned back.
112
- # Otherwise the character corresponding to the codepoint is returned.
113
- def to_str()
114
- if lexeme.nil?
115
- result = char()
116
- else
117
- result = lexeme.dup()
118
- end
119
-
120
- return result
121
- end
122
-
123
109
  # Returns true iff this Character and parameter 'another' represent the same character.
124
110
  # [another] any Object. The way the equality is tested depends on the another's class
125
111
  # Example:
@@ -152,6 +138,22 @@ public
152
138
  def explain()
153
139
  return "the character '#{to_str()}'"
154
140
  end
141
+
142
+ protected
143
+
144
+ # Conversion method re-definition.
145
+ # Purpose: Return the String representation of the expression.
146
+ # If the Character was initially from a text (the lexeme), then the lexeme is returned back.
147
+ # Otherwise the character corresponding to the codepoint is returned.
148
+ def text_repr()
149
+ if lexeme.nil?
150
+ result = char()
151
+ else
152
+ result = lexeme.dup()
153
+ end
154
+
155
+ return result
156
+ end
155
157
 
156
158
  private
157
159
  # Convertion method that returns a codepoint for the given two characters (digram) escape sequence.
@@ -4,24 +4,25 @@ require_relative 'polyadic_expression' # Access the superclass
4
4
 
5
5
  module Regex # This module is used as a namespace
6
6
 
7
- # Abstract class. A n-ary matching operator.
7
+ # Abstract class. A n-ary matching operator.
8
8
  # It succeeds when each child succeeds to match the subject text in the same
9
9
  # serial arrangement than defined by this concatenation.
10
10
  class Concatenation < PolyadicExpression
11
-
11
+
12
12
  # Constructor.
13
13
  def initialize(*theChildren)
14
14
  super(theChildren)
15
15
  end
16
-
17
- public
16
+
17
+ protected
18
+
18
19
  # Conversion method re-definition.
19
20
  # Purpose: Return the String representation of the concatented expressions.
20
- def to_str()
21
- result = children.inject('') { |result, aChild|
21
+ def text_repr()
22
+ result = children.inject('') { |result, aChild|
22
23
  result << aChild.to_str()
23
24
  }
24
-
25
+
25
26
  return result
26
27
  end
27
28
 
@@ -5,7 +5,11 @@ require_relative 'abstract_method'
5
5
  module Regex # This module is used as a namespace
6
6
 
7
7
  # Abstract class. The generalization of any valid regular (sub)expression.
8
- class Expression
8
+ class Expression
9
+ attr_accessor :begin_anchor
10
+ attr_accessor :end_anchor
11
+
12
+ # Constructor
9
13
  def initialize()
10
14
  end
11
15
 
@@ -20,20 +24,37 @@ public
20
24
  def cardinality(theParentOptions) abstract_method
21
25
  end
22
26
 
23
- protected
24
27
  # Determine the matching options to apply to this object, given the options coming from the parent
25
28
  # and options that are local to this object. Local options take precedence.
26
- # [theParentOptions] a Hash of matching options. They are overridden by options with same name
29
+ # @param theParentOptions [Hash] matching options. They are overridden by options with same name
27
30
  # that are bound to this object.
28
31
  def options(theParentOptions)
29
32
  resulting_options = theParentOptions.merge(@local_options)
30
33
  return resulting_options
31
34
  end
32
35
 
33
- # Abstract conversion method.
36
+ # Template method.
34
37
  # Purpose: Return the String representation of the expression.
35
- def to_str() abstract_method
38
+ def to_str()
39
+ result = ''
40
+ result << prefix
41
+ result << text_repr
42
+ result << suffix
43
+
44
+ return result
36
45
  end
46
+
47
+ protected
48
+
49
+ def prefix()
50
+ begin_anchor ? begin_anchor.to_str : ''
51
+ end
52
+
53
+ def suffix()
54
+ end_anchor ? end_anchor.to_str : ''
55
+ end
56
+
57
+
37
58
 
38
59
  end # class
39
60
 
@@ -0,0 +1,47 @@
1
+ # File: Lookaround.rb
2
+
3
+ ########################
4
+ # TODO: make it a binary expression
5
+ ########################
6
+
7
+
8
+ require_relative 'polyadic_expression' # Access the superclass
9
+
10
+ module Regex # This module is used as a namespace
11
+ # Lookaround is a zero-width assertion just like the start and end of line anchors.
12
+ # The difference is that lookarounds will actually match characters, but only return the result of the match: match or no match.
13
+ # That is why they are called "assertions". They do not consume characters from the subject,
14
+ # but only assert whether a match is possible or not.
15
+ class Lookaround < PolyadicExpression
16
+ # The "direction" of the lookaround. Can be ahead or behind. It specifies the relative position of the
17
+ # expression to match compared to the current 'position' in the subject text.
18
+ attr_reader(:dir)
19
+
20
+ # The kind indicates whether the assertion is positive (succeeds when there is a match) or negative
21
+ # (assertion succeeds when there is NO match).
22
+ attr_reader(:kind)
23
+
24
+ # Constructor.
25
+ # [assertedExpression] A sub-expression to match.
26
+ # [theDir] One of the following values: [ :ahead, :behind ]
27
+ # [theKind] One of the following values: [ :positive, :negative ]
28
+ def initialize(assertedExpression, theDir, theKind)
29
+ super([assertedExpression])
30
+ @dir, @kind = theDir, theKind
31
+ end
32
+
33
+ public
34
+ # Conversion method re-definition.
35
+ # Purpose: Return the String representation of the captured expression.
36
+ def to_str()
37
+ result = children[0].to_str
38
+ dir_syntax = (dir == :ahead) ? '' : '<'
39
+ kind_syntax = (kind == :positive)? '=' : '!'
40
+ result << '(?' + dir_syntax + kind_syntax + children[1].to_str + ")"
41
+ return result
42
+ end
43
+
44
+ end # class
45
+ end # module
46
+
47
+ # End of file
@@ -0,0 +1,36 @@
1
+ # File: MatchOption.rb
2
+
3
+ module Regex # This module is used as a namespace
4
+
5
+ # Represents an option that influences the way a regular (sub)expression can perform its matching.
6
+ class MatchOption
7
+ # The symbolic name of the option
8
+ attr_reader(:name)
9
+
10
+ # An indicator that tells whether the option is turned on or off
11
+ attr_reader(:setting)
12
+
13
+ # Constructor.
14
+ def initialize(theName, theSetting)
15
+ @name, @setting = theName, theSetting
16
+ end
17
+
18
+ public
19
+ # Equality operator
20
+ def ==(another)
21
+ return true if self.object_id == another.object_id
22
+
23
+ if another.kind_of?(MatchOption)
24
+ isEqual = ((name == another.name) && (setting == another.setting))
25
+ else
26
+ isEqual = false
27
+ end
28
+
29
+ return isEqual
30
+ end
31
+
32
+ end # class
33
+
34
+ end # module
35
+
36
+ # End of file
@@ -14,10 +14,11 @@ module Regex # This module is used as a namespace
14
14
  super(aChildExpression)
15
15
  end
16
16
 
17
- public
17
+ protected
18
+
18
19
  # Conversion method re-definition.
19
20
  # Purpose: Return the String representation of the captured expression.
20
- def to_str()
21
+ def text_repr()
21
22
  result = '(?:' + all_child_text() + ")"
22
23
  return result
23
24
  end
@@ -16,10 +16,11 @@ class Repetition < MonadicExpression
16
16
  @multiplicity = aMultiplicity
17
17
  end
18
18
 
19
- public
19
+ protected
20
+
20
21
  # Conversion method re-definition.
21
22
  # Purpose: Return the String representation of the concatented expressions.
22
- def to_str()
23
+ def text_repr()
23
24
  result = all_child_text() + multiplicity.to_str()
24
25
  return result
25
26
  end
@@ -12,10 +12,11 @@ class Wildcard < AtomicExpression
12
12
  super
13
13
  end
14
14
 
15
- public
15
+ protected
16
+
16
17
  # Conversion method re-definition.
17
18
  # Purpose: Return the String representation of the expression.
18
- def to_str()
19
+ def text_repr()
19
20
  return '.'
20
21
  end
21
22
 
@@ -8,3 +8,6 @@ require_relative './regex/char_shorthand'
8
8
  require_relative './regex/wildcard'
9
9
  require_relative './regex/alternation'
10
10
  require_relative './regex/non_capturing_group'
11
+ require_relative './regex/anchor'
12
+ require_relative './regex/lookaround'
13
+ require_relative './regex/capturing_group'
@@ -22,41 +22,60 @@ module SRL
22
22
  ')' => 'RPAREN',
23
23
  ',' => 'COMMA'
24
24
  }.freeze
25
-
25
+
26
26
  # Here are all the SRL keywords (in uppercase)
27
27
  @@keywords = %w[
28
+ ALL
29
+ ALREADY
28
30
  AND
29
31
  ANY
30
32
  ANYTHING
33
+ AS
31
34
  AT
32
35
  BACKSLASH
36
+ BEGIN
33
37
  BETWEEN
38
+ BY
39
+ CAPTURE
40
+ CASE
34
41
  CHARACTER
35
42
  DIGIT
43
+ END
36
44
  EXACTLY
45
+ FOLLOWED
37
46
  FROM
47
+ HAD
48
+ IF
49
+ INSENSITIVE
50
+ LAZY
38
51
  LEAST
39
52
  LETTER
40
53
  LINE
41
54
  LITERALLY
42
55
  MORE
56
+ MULTI
57
+ MUST
43
58
  NEVER
44
59
  NEW
45
60
  NO
61
+ NOT
46
62
  NUMBER
47
63
  OF
48
64
  ONCE
49
65
  ONE
50
66
  OPTIONAL
51
67
  OR
68
+ STARTS
52
69
  TAB
53
70
  TIMES
54
71
  TO
55
72
  TWICE
73
+ UNTIL
56
74
  UPPERCASE
57
75
  WHITESPACE
76
+ WITH
58
77
  ].map { |x| [x, x] } .to_h
59
-
78
+
60
79
  class ScanError < StandardError; end
61
80
 
62
81
  def initialize(source, aGrammar)
@@ -81,16 +100,16 @@ module SRL
81
100
  skip_whitespaces
82
101
  curr_ch = scanner.peek(1)
83
102
  return nil if curr_ch.nil? || curr_ch.empty?
84
-
103
+
85
104
  token = nil
86
105
 
87
106
  if '(),'.include? curr_ch
88
107
  # Delimiters, separators => single character token
89
- token = build_token(@@lexeme2name[curr_ch], scanner.getch)
108
+ token = build_token(@@lexeme2name[curr_ch], scanner.getch)
90
109
  elsif (lexeme = scanner.scan(/[0-9]{2,}/))
91
110
  token = build_token('INTEGER', lexeme) # An integer has two or more digits
92
111
  elsif (lexeme = scanner.scan(/[0-9]/))
93
- token = build_token('DIGIT_LIT', lexeme)
112
+ token = build_token('DIGIT_LIT', lexeme)
94
113
  elsif (lexeme = scanner.scan(/[a-zA-Z]{2,}/))
95
114
  token = build_token(@@keywords[lexeme.upcase], lexeme)
96
115
  # TODO: handle case unknown identifier
@@ -111,7 +130,7 @@ module SRL
111
130
 
112
131
  return token
113
132
  end
114
-
133
+
115
134
  def build_token(aSymbolName, aLexeme)
116
135
  token_type = name2symbol[aSymbolName]
117
136
  begin
@@ -120,7 +139,7 @@ module SRL
120
139
  puts "Failing with '#{aSymbolName}' and '#{aLexeme}'"
121
140
  raise ex
122
141
  end
123
-
142
+
124
143
  return token
125
144
  end
126
145
 
@@ -151,7 +151,6 @@ describe 'Integration tests:' do
151
151
  end
152
152
  end # context
153
153
 
154
-
155
154
  context 'Parsing special character declarations:' do
156
155
  it "should parse 'tab' syntax" do
157
156
  result = parse('tab')
@@ -197,11 +196,16 @@ describe 'Integration tests:' do
197
196
  message_prefix = /Premature end of input after ','/
198
197
  expect(result.failure_reason.message).to match(message_prefix)
199
198
  end
199
+
200
+ it 'should parse concatenation' do
201
+ result = parse('any of (literally "sample", (digit once or more))')
202
+ expect(result).to be_success
203
+
204
+ regexp = regexp_repr(result)
205
+ expect(regexp.to_str).to eq('(?:sample|(?:\d+))')
206
+ end
200
207
 
201
- it "should parse a sequence of patterns" do
202
- #
203
- # DEBUG When I put a comma at the end ... looping endlessly
204
- #
208
+ it "should parse a long sequence of patterns" do
205
209
  source = <<-ENDS
206
210
  any of (any character, one of "._%-+") once or more,
207
211
  literally "@",
@@ -289,6 +293,145 @@ ENDS
289
293
  expect(regexp.to_str).to eq('[p-t]{10,}')
290
294
  end
291
295
  end # context
296
+
297
+ context 'Parsing lookaround:' do
298
+ it 'should parse positive lookahead' do
299
+ result = parse('letter if followed by (anything once or more, digit)')
300
+ expect(result).to be_success
301
+
302
+ regexp = regexp_repr(result)
303
+ expect(regexp.to_str).to eq('[a-z](?=(?:.+\d))')
304
+ end
305
+
306
+ it 'should parse negative lookahead' do
307
+ result = parse('letter if not followed by (anything once or more, digit)')
308
+ expect(result).to be_success
309
+
310
+ regexp = regexp_repr(result)
311
+ expect(regexp.to_str).to eq('[a-z](?!(?:.+\d))')
312
+ end
313
+
314
+ it 'should parse positive lookbehind' do
315
+ result = parse('literally "bar" if already had literally "foo"')
316
+ expect(result).to be_success
317
+
318
+ regexp = regexp_repr(result)
319
+ expect(regexp.to_str).to eq('bar(?<=foo)')
320
+ end
321
+
322
+ it 'should parse negative lookbehind' do
323
+ result = parse('literally "bar" if not already had literally "foo"')
324
+ expect(result).to be_success
325
+
326
+ regexp = regexp_repr(result)
327
+ expect(regexp.to_str).to eq('bar(?<!foo)')
328
+ end
329
+ end # context
330
+
331
+ context 'Parsing capturing group:' do
332
+ it 'should parse simple anonymous capturing group' do
333
+ result = parse('capture(literally "sample")')
334
+ expect(result).to be_success
335
+
336
+ regexp = regexp_repr(result)
337
+ expect(regexp.to_str).to eq('(sample)')
338
+ end
339
+
340
+ it 'should parse complex anonymous capturing group' do
341
+ result = parse('capture(any of (literally "sample", (digit once or more)))')
342
+ expect(result).to be_success
343
+
344
+ regexp = regexp_repr(result)
345
+ expect(regexp.to_str).to eq('((?:sample|(?:\d+)))')
346
+ end
347
+
348
+ it 'should parse simple anonymous until capturing group' do
349
+ result = parse('capture anything once or more until literally "!"')
350
+ expect(result).to be_success
351
+
352
+ regexp = regexp_repr(result)
353
+ expect(regexp.to_str).to eq('(.+)!')
354
+ end
355
+
356
+ it 'should parse complex named capturing group' do
357
+ result = parse('capture(any of (literally "sample", (digit once or more))) as "foo"')
358
+ expect(result).to be_success
359
+
360
+ regexp = regexp_repr(result)
361
+ expect(regexp.to_str).to eq('(?<foo>(?:sample|(?:\d+)))')
362
+ end
363
+
364
+ it 'should parse a sequence with named capturing groups' do
365
+ source = <<-ENDS
366
+ capture (anything once or more) as "first",
367
+ literally " - ",
368
+ capture literally "second part" as "second"
369
+ ENDS
370
+ result = parse(source)
371
+ expect(result).to be_success
372
+
373
+ regexp = regexp_repr(result)
374
+ expect(regexp.to_str).to eq('(?<first>.+) - (?<second>second part)')
375
+ end
376
+
377
+ it 'should parse complex named until capturing group' do
378
+ result = parse('capture (anything once or more) as "foo" until literally "m"')
379
+ expect(result).to be_success
380
+
381
+ regexp = regexp_repr(result)
382
+ expect(regexp.to_str).to eq('(?<foo>.+)m')
383
+ end
384
+
385
+ end # context
386
+
387
+ context 'Parsing anchors:' do
388
+ it 'should parse begin anchors' do
389
+ result = parse('starts with literally "match"')
390
+ expect(result).to be_success
391
+
392
+ regexp = regexp_repr(result)
393
+ expect(regexp.to_str).to eq('^match')
394
+ end
395
+
396
+ it 'should parse begin anchors (alternative syntax)' do
397
+ result = parse('begin with literally "match"')
398
+ expect(result).to be_success
399
+
400
+ regexp = regexp_repr(result)
401
+ expect(regexp.to_str).to eq('^match')
402
+ end
403
+
404
+ it 'should parse end anchors' do
405
+ result = parse('literally "match" must end')
406
+ expect(result).to be_success
407
+
408
+ regexp = regexp_repr(result)
409
+ expect(regexp.to_str).to eq('match$')
410
+ end
411
+
412
+ it 'should parse combination of begin and end anchors' do
413
+ result = parse('starts with literally "match" must end')
414
+ expect(result).to be_success
415
+
416
+ regexp = regexp_repr(result)
417
+ expect(regexp.to_str).to eq('^match$')
418
+ end
419
+
420
+ it "should accept anchor with a sequence of patterns" do
421
+ source = <<-ENDS
422
+ begin with any of (digit, letter, one of ".-") once or more,
423
+ literally ".",
424
+ letter at least 2 times must end
425
+ ENDS
426
+
427
+ result = parse(source)
428
+ expect(result).to be_success
429
+
430
+ regexp = regexp_repr(result)
431
+ # SRL expect: (?:\w|[\._%\-\+])+(?:@)(?:[0-9]|[a-z]|[\.\-])+(?:\.)[a-z]{2,}
432
+ expect(regexp.to_str).to eq('^(?:\d|[a-z]|[.\-])+\.[a-z]{2,}$')
433
+ end
434
+ end # context
292
435
  end # describe
293
436
 
294
437
 
@@ -3,7 +3,7 @@
3
3
 
4
4
  module Rley # Module used as a namespace
5
5
  # The version number of the gem.
6
- Version = '0.5.11'.freeze
6
+ Version = '0.5.12'.freeze
7
7
 
8
8
  # Brief description of the gem.
9
9
  Description = "Ruby implementation of the Earley's parsing algorithm".freeze
@@ -135,7 +135,7 @@ module Rley # This module is used as a namespace
135
135
  process_middle_entry(anEntry, anIndex)
136
136
  end
137
137
  else
138
- $stderr.puts "Internal Errore '#{anEvent}'"
138
+ $stderr.puts "Internal Errore '#{anEvent}', entry: #{anEntry}, index: #{anIndex}"
139
139
  raise NotImplementedError
140
140
  end
141
141
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rley
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.11
4
+ version: 0.5.12
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dimitri Geshef
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-01-25 00:00:00.000000000 Z
11
+ date: 2018-02-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: coveralls
@@ -149,7 +149,9 @@ files:
149
149
  - examples/general/SRL/lib/parser.rb
150
150
  - examples/general/SRL/lib/regex/abstract_method.rb
151
151
  - examples/general/SRL/lib/regex/alternation.rb
152
+ - examples/general/SRL/lib/regex/anchor.rb
152
153
  - examples/general/SRL/lib/regex/atomic_expression.rb
154
+ - examples/general/SRL/lib/regex/capturing_group.rb
153
155
  - examples/general/SRL/lib/regex/char_class.rb
154
156
  - examples/general/SRL/lib/regex/char_range.rb
155
157
  - examples/general/SRL/lib/regex/char_shorthand.rb
@@ -157,6 +159,8 @@ files:
157
159
  - examples/general/SRL/lib/regex/compound_expression.rb
158
160
  - examples/general/SRL/lib/regex/concatenation.rb
159
161
  - examples/general/SRL/lib/regex/expression.rb
162
+ - examples/general/SRL/lib/regex/lookaround.rb
163
+ - examples/general/SRL/lib/regex/match_option.rb
160
164
  - examples/general/SRL/lib/regex/monadic_expression.rb
161
165
  - examples/general/SRL/lib/regex/multiplicity.rb
162
166
  - examples/general/SRL/lib/regex/non_capturing_group.rb