rley 0.5.10 → 0.5.11

Sign up to get free protection for your applications and to get access to all the features.
Files changed (51) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +15 -0
  3. data/LICENSE.txt +1 -1
  4. data/README.md +2 -1
  5. data/appveyor.yml +6 -5
  6. data/examples/NLP/engtagger.rb +176 -0
  7. data/examples/general/SRL/lib/ast_builder.rb +217 -21
  8. data/examples/general/SRL/lib/grammar.rb +33 -5
  9. data/examples/general/SRL/lib/regex/alternation.rb +30 -0
  10. data/examples/general/SRL/lib/regex/char_class.rb +28 -22
  11. data/examples/general/SRL/lib/regex/char_shorthand.rb +50 -0
  12. data/examples/general/SRL/lib/regex/character.rb +5 -3
  13. data/examples/general/SRL/lib/regex/concatenation.rb +32 -0
  14. data/examples/general/SRL/lib/regex/non_capturing_group.rb +29 -0
  15. data/examples/general/SRL/lib/regex/wildcard.rb +26 -0
  16. data/examples/general/SRL/lib/regex_repr.rb +5 -0
  17. data/examples/general/SRL/lib/tokenizer.rb +28 -3
  18. data/examples/general/SRL/spec/integration_spec.rb +151 -8
  19. data/examples/general/SRL/spec/tokenizer_spec.rb +12 -0
  20. data/examples/general/left.rb +36 -0
  21. data/examples/general/right.rb +36 -0
  22. data/lib/rley/constants.rb +1 -1
  23. data/lib/rley/gfg/edge.rb +12 -1
  24. data/lib/rley/gfg/grm_flow_graph.rb +21 -1
  25. data/lib/rley/gfg/item_vertex.rb +1 -1
  26. data/lib/rley/gfg/non_terminal_vertex.rb +1 -1
  27. data/lib/rley/gfg/start_vertex.rb +1 -0
  28. data/lib/rley/gfg/vertex.rb +27 -0
  29. data/lib/rley/lexical/token.rb +1 -0
  30. data/lib/rley/parser/error_reason.rb +2 -1
  31. data/lib/rley/parser/gfg_chart.rb +14 -0
  32. data/lib/rley/parser/gfg_earley_parser.rb +0 -1
  33. data/lib/rley/parser/gfg_parsing.rb +4 -3
  34. data/lib/rley/parser/parse_entry.rb +33 -3
  35. data/lib/rley/parser/parse_entry_set.rb +14 -2
  36. data/lib/rley/parser/parse_tree_builder.rb +1 -1
  37. data/lib/rley/parser/parse_walker_factory.rb +0 -1
  38. data/lib/rley/syntax/grm_symbol.rb +2 -0
  39. data/lib/rley/syntax/production.rb +15 -3
  40. data/lib/rley/syntax/symbol_seq.rb +16 -1
  41. data/spec/rley/gfg/end_vertex_spec.rb +9 -1
  42. data/spec/rley/gfg/grm_flow_graph_spec.rb +9 -0
  43. data/spec/rley/gfg/item_vertex_spec.rb +9 -0
  44. data/spec/rley/gfg/start_vertex_spec.rb +9 -1
  45. data/spec/rley/parser/gfg_parsing_spec.rb +0 -1
  46. data/spec/rley/parser/parse_entry_set_spec.rb +15 -0
  47. data/spec/rley/parser/parse_entry_spec.rb +24 -13
  48. data/spec/rley/parser/parse_tracer_spec.rb +1 -1
  49. data/spec/rley/syntax/production_spec.rb +10 -0
  50. data/spec/rley/syntax/symbol_seq_spec.rb +5 -0
  51. metadata +10 -2
@@ -6,25 +6,53 @@ module SRL
6
6
  # This is a very partial grammar of SRL.
7
7
  # It will be expanded with the coming versions of Rley
8
8
  builder = Rley::Syntax::GrammarBuilder.new do
9
+ add_terminals('LPAREN', 'RPAREN', 'COMMA')
9
10
  add_terminals('DIGIT_LIT', 'INTEGER', 'LETTER_LIT')
11
+ add_terminals('LITERALLY', 'STRING_LIT')
10
12
  add_terminals('UPPERCASE', 'LETTER', 'FROM', 'TO')
11
- add_terminals('DIGIT', 'NUMBER')
13
+ add_terminals('DIGIT', 'NUMBER', 'ANY', 'NO')
14
+ add_terminals('CHARACTER', 'WHITESPACE', 'ANYTHING')
15
+ add_terminals('TAB', 'BACKSLASH', 'NEW', 'LINE')
16
+ add_terminals('OF', 'ONE')
12
17
  add_terminals('EXACTLY', 'TIMES', 'ONCE', 'TWICE')
13
18
  add_terminals('BETWEEN', 'AND', 'OPTIONAL', 'OR')
14
19
  add_terminals('MORE', 'NEVER', 'AT', 'LEAST')
15
20
 
16
- # For the moment one focuses on quantifier syntax only...
17
- rule 'srl' => 'term'
21
+ rule 'srl' => 'pattern'
22
+ rule 'pattern' => %w[pattern COMMA quantifiable]
23
+ rule 'pattern' => %w[pattern quantifiable]
24
+ rule 'pattern' => 'quantifiable'
25
+ rule 'quantifiable' => 'term'
26
+ rule 'quantifiable' => %w[term quantifier]
18
27
  rule 'term' => 'atom'
19
- rule 'term' => %w[atom quantifier]
28
+ rule 'term' => 'alternation'
29
+ rule 'term' => 'grouping'
20
30
  rule 'atom' => 'letter_range'
21
31
  rule 'atom' => 'digit_range'
32
+ rule 'atom' => 'character_class'
33
+ rule 'atom' => 'special_char'
34
+ rule 'atom' => 'literal'
22
35
  rule 'letter_range' => %w[LETTER FROM LETTER_LIT TO LETTER_LIT]
23
36
  rule 'letter_range' => %w[UPPERCASE LETTER FROM LETTER_LIT TO LETTER_LIT]
24
37
  rule 'letter_range' => 'LETTER'
25
38
  rule 'letter_range' => %w[UPPERCASE LETTER]
26
39
  rule 'digit_range' => %w[digit_or_number FROM DIGIT_LIT TO DIGIT_LIT]
27
- rule 'digit_range' => 'digit_or_number'
40
+ rule 'digit_range' => 'digit_or_number'
41
+ rule 'character_class' => %w[ANY CHARACTER]
42
+ rule 'character_class' => %w[NO CHARACTER]
43
+ rule 'character_class' => 'WHITESPACE'
44
+ rule 'character_class' => %w[NO WHITESPACE]
45
+ rule 'character_class' => 'ANYTHING'
46
+ rule 'character_class' => %w[ONE OF STRING_LIT]
47
+ rule 'special_char' => 'TAB'
48
+ rule 'special_char' => 'BACKSLASH'
49
+ rule 'special_char' => %w[NEW LINE]
50
+ rule 'literal' => %w[LITERALLY STRING_LIT]
51
+ rule 'alternation' => %w[ANY OF LPAREN alternatives RPAREN]
52
+ rule 'alternatives' => %w[alternatives COMMA quantifiable]
53
+ rule 'alternatives' => %w[alternatives quantifiable]
54
+ rule 'alternatives' => 'quantifiable'
55
+ rule 'grouping' => %w[LPAREN pattern RPAREN]
28
56
  rule 'quantifier' => 'ONCE'
29
57
  rule 'quantifier' => 'TWICE'
30
58
  rule 'quantifier' => %w[EXACTLY count TIMES]
@@ -0,0 +1,30 @@
1
+ # File: alternation.rb
2
+
3
+ require_relative 'polyadic_expression' # Access the superclass
4
+
5
+ module Regex # This module is used as a namespace
6
+
7
+ # Abstract class. A n-ary matching operator.
8
+ # It succeeds when one child expression succeeds to match the subject text
9
+ class Alternation < PolyadicExpression
10
+
11
+ # Constructor.
12
+ def initialize(*theChildren)
13
+ super(theChildren)
14
+ end
15
+
16
+ public
17
+ # Conversion method re-definition.
18
+ # Purpose: Return the String representation of the concatented expressions.
19
+ def to_str()
20
+ result_children = children.map { |aChild| aChild.to_str() }
21
+ result = '(?:' + result_children.join('|') + ')'
22
+
23
+ return result
24
+ end
25
+
26
+ end # class
27
+
28
+ end # module
29
+
30
+ # End of file
@@ -4,30 +4,36 @@ require_relative "polyadic_expression" # Access the superclass
4
4
 
5
5
  module Regex # This module is used as a namespace
6
6
 
7
- # Abstract class. A n-ary matching operator.
8
- # It succeeds when one child expression succeeds to match the subject text
9
- # than defined by this concatenation.
10
- class CharClass < PolyadicExpression
11
- # A flag that indicates whether the character is negated
12
- attr_reader(:negated)
13
-
14
- # Constructor.
15
- def initialize(to_negate,*theChildren)
16
- super(theChildren)
17
- @negated = to_negate
18
- end
7
+ # Abstract class. A n-ary matching operator.
8
+ # It succeeds when one child expression succeeds to match the subject text.
9
+ class CharClass < PolyadicExpression
10
+ # These are characters with special meaning in character classes
11
+ Metachars = ']\^-'.codepoints
12
+ # A flag that indicates whether the character is negated
13
+ attr_reader(:negated)
14
+
15
+ # Constructor.
16
+ def initialize(to_negate,*theChildren)
17
+ super(theChildren)
18
+ @negated = to_negate
19
+ end
19
20
 
20
- public
21
- # Conversion method re-definition.
22
- # Purpose: Return the String representation of the concatented expressions.
23
- def to_str()
24
- result_children = children.inject('') { |subResult, aChild| subResult << aChild.to_str() }
25
- result = '['+ (negated ? '^' : '') + result_children + ']'
26
-
27
- return result
28
- end
21
+ public
22
+ # Conversion method re-definition.
23
+ # Purpose: Return the String representation of the character class.
24
+ def to_str()
25
+ result_children = children.inject('') do |subResult, aChild|
26
+ if aChild.kind_of?(Regex::Character) && Metachars.include?(aChild.codepoint)
27
+ subResult << "\\" # Escape meta-character...
28
+ end
29
+ subResult << aChild.to_str()
30
+ end
31
+ result = '['+ (negated ? '^' : '') + result_children + ']'
32
+
33
+ return result
34
+ end
29
35
 
30
- end # class
36
+ end # class
31
37
 
32
38
  end # module
33
39
 
@@ -0,0 +1,50 @@
1
+ # File: char_shorthand.rb
2
+
3
+ require_relative "atomic_expression" # Access the superclass
4
+
5
+ module Regex # This module is used as a namespace
6
+
7
+ # A pre-defined character class is in essence a name for a built-in, standard character class.
8
+ class CharShorthand < AtomicExpression
9
+ # A constant Hash that defines all the predefined character shorthands.
10
+ # It contains pairs of the form:
11
+ # a pre-defined character shorthand letter => a CharRange object
12
+ StandardCClasses = {
13
+ 'd' => '[0-9]',
14
+ 'D' => '[^0-9]',
15
+ 'h' => '[0-9a-fA-F]',
16
+ 'H' => '[^0-9a-fA-F]',
17
+ 's' => '[ \t\r\n\f]',
18
+ 'S' => '[^ \t\r\n\f]',
19
+ 'w' => '[0-9a-zA-Z_]',
20
+ 'W' => '[^0-9a-zA-Z_]'
21
+ }
22
+
23
+ # An one-letter abbreviation
24
+ attr_reader(:shortname)
25
+
26
+ # Constructor
27
+ def initialize(aShortname)
28
+ @shortname = valid_shortname(aShortname)
29
+ end
30
+
31
+ public
32
+ # Conversion method re-definition.
33
+ # Purpose: Return the String representation of the expression.
34
+ def to_str()
35
+ return "\\#{shortname}"
36
+ end
37
+
38
+ private
39
+ # Return the validated short name.
40
+ def valid_shortname(aShortname)
41
+ raise StandardError, "Unknown predefined character class \\#{aShortname}" unless StandardCClasses.include? aShortname
42
+
43
+ return aShortname
44
+ end
45
+
46
+ end # class
47
+
48
+ end # module
49
+
50
+ # End of file
@@ -25,6 +25,8 @@ class Character < AtomicExpression
25
25
  "\\6" => 6,
26
26
  "\\7" => 7
27
27
  }
28
+
29
+ MetaChars = '\^$+?.'
28
30
 
29
31
  # The integer value that uniquely identifies the character.
30
32
  attr_reader(:codepoint)
@@ -63,7 +65,7 @@ class Character < AtomicExpression
63
65
  end
64
66
  @lexeme = aValue
65
67
 
66
- when Fixnum
68
+ when Integer
67
69
  @codepoint = aValue
68
70
  else
69
71
  raise StandardError, "Cannot initialize a Character with a '#{aValue}'."
@@ -125,14 +127,14 @@ public
125
127
  # newOne == newOne # true. Identity
126
128
  # newOne == Character.new(?\u03a3) # true. Both have same codepoint
127
129
  # newOne == ?\u03a3 # true. The single character String match exactly the char attribute.
128
- # newOne == 0x03a3 # true. The Fixnum is compared to the codepoint value.
130
+ # newOne == 0x03a3 # true. The Integer is compared to the codepoint value.
129
131
  # Will test equality with any Object that knows the to_s method
130
132
  def ==(another)
131
133
  result = case another
132
134
  when Character
133
135
  self.to_str == another.to_str
134
136
 
135
- when Fixnum
137
+ when Integer
136
138
  self.codepoint == another
137
139
 
138
140
  when String
@@ -0,0 +1,32 @@
1
+ # File: concatenation.rb
2
+
3
+ require_relative 'polyadic_expression' # Access the superclass
4
+
5
+ module Regex # This module is used as a namespace
6
+
7
+ # Abstract class. A n-ary matching operator.
8
+ # It succeeds when each child succeeds to match the subject text in the same
9
+ # serial arrangement than defined by this concatenation.
10
+ class Concatenation < PolyadicExpression
11
+
12
+ # Constructor.
13
+ def initialize(*theChildren)
14
+ super(theChildren)
15
+ end
16
+
17
+ public
18
+ # Conversion method re-definition.
19
+ # Purpose: Return the String representation of the concatented expressions.
20
+ def to_str()
21
+ result = children.inject('') { |result, aChild|
22
+ result << aChild.to_str()
23
+ }
24
+
25
+ return result
26
+ end
27
+
28
+ end # class
29
+
30
+ end # module
31
+
32
+ # End of file
@@ -0,0 +1,29 @@
1
+ # File: non_capturing_group.rb
2
+
3
+ require_relative "monadic_expression" # Access the superclass
4
+
5
+ module Regex # This module is used as a namespace
6
+
7
+ # A non-capturing group, in other word it is a pure grouping of sub-expressions
8
+ class NonCapturingGroup < MonadicExpression
9
+
10
+ # Constructor.
11
+ # [aChildExpression] A sub-expression to match. When successful
12
+ # the matching text is assigned to the capture variable.
13
+ def initialize(aChildExpression)
14
+ super(aChildExpression)
15
+ end
16
+
17
+ public
18
+ # Conversion method re-definition.
19
+ # Purpose: Return the String representation of the captured expression.
20
+ def to_str()
21
+ result = '(?:' + all_child_text() + ")"
22
+ return result
23
+ end
24
+
25
+ end # class
26
+
27
+ end # module
28
+
29
+ # End of file
@@ -0,0 +1,26 @@
1
+ # File: wildcard.rb
2
+
3
+ require_relative 'atomic_expression' # Access the superclass
4
+
5
+ module Regex # This module is used as a namespace
6
+
7
+ # A wildcard matches any character (except for the newline).
8
+ class Wildcard < AtomicExpression
9
+
10
+ # Constructor
11
+ def initialize()
12
+ super
13
+ end
14
+
15
+ public
16
+ # Conversion method re-definition.
17
+ # Purpose: Return the String representation of the expression.
18
+ def to_str()
19
+ return '.'
20
+ end
21
+
22
+ end # class
23
+
24
+ end # module
25
+
26
+ # End of file
@@ -1,5 +1,10 @@
1
1
  require_relative './regex/character'
2
2
  require_relative './regex/char_range'
3
+ require_relative './regex/concatenation'
3
4
  require_relative './regex/multiplicity'
4
5
  require_relative './regex/repetition'
5
6
  require_relative './regex/char_class'
7
+ require_relative './regex/char_shorthand'
8
+ require_relative './regex/wildcard'
9
+ require_relative './regex/alternation'
10
+ require_relative './regex/non_capturing_group'
@@ -26,23 +26,35 @@ module SRL
26
26
  # Here are all the SRL keywords (in uppercase)
27
27
  @@keywords = %w[
28
28
  AND
29
+ ANY
30
+ ANYTHING
29
31
  AT
32
+ BACKSLASH
30
33
  BETWEEN
34
+ CHARACTER
31
35
  DIGIT
32
36
  EXACTLY
33
37
  FROM
34
38
  LEAST
35
39
  LETTER
40
+ LINE
41
+ LITERALLY
36
42
  MORE
37
43
  NEVER
44
+ NEW
45
+ NO
38
46
  NUMBER
47
+ OF
39
48
  ONCE
49
+ ONE
40
50
  OPTIONAL
41
51
  OR
52
+ TAB
42
53
  TIMES
43
54
  TO
44
55
  TWICE
45
56
  UPPERCASE
57
+ WHITESPACE
46
58
  ].map { |x| [x, x] } .to_h
47
59
 
48
60
  class ScanError < StandardError; end
@@ -68,7 +80,7 @@ module SRL
68
80
  def _next_token()
69
81
  skip_whitespaces
70
82
  curr_ch = scanner.peek(1)
71
- return nil if curr_ch.nil?
83
+ return nil if curr_ch.nil? || curr_ch.empty?
72
84
 
73
85
  token = nil
74
86
 
@@ -83,7 +95,13 @@ module SRL
83
95
  token = build_token(@@keywords[lexeme.upcase], lexeme)
84
96
  # TODO: handle case unknown identifier
85
97
  elsif (lexeme = scanner.scan(/[a-zA-Z]((?=\s)|$)/))
86
- token = build_token('LETTER_LIT', lexeme)
98
+ token = build_token('LETTER_LIT', lexeme)
99
+ elsif (lexeme = scanner.scan(/"([^"]|\\")*"/)) # Double quotes literal?
100
+ unquoted = lexeme.gsub(/(^")|("$)/, '')
101
+ token = build_token('STRING_LIT', unquoted)
102
+ elsif (lexeme = scanner.scan(/'([^']|\\')*'/)) # Single quotes literal?
103
+ unquoted = lexeme.gsub(/(^')|('$)/, '')
104
+ token = build_token('STRING_LIT', unquoted)
87
105
  else # Unknown token
88
106
  erroneous = curr_ch.nil? ? '' : curr_ch
89
107
  sequel = scanner.scan(/.{1,20}/)
@@ -96,7 +114,14 @@ module SRL
96
114
 
97
115
  def build_token(aSymbolName, aLexeme)
98
116
  token_type = name2symbol[aSymbolName]
99
- return Rley::Lexical::Token.new(aLexeme, token_type)
117
+ begin
118
+ token = Rley::Lexical::Token.new(aLexeme, token_type)
119
+ rescue Exception => ex
120
+ puts "Failing with '#{aSymbolName}' and '#{aLexeme}'"
121
+ raise ex
122
+ end
123
+
124
+ return token
100
125
  end
101
126
 
102
127
  def skip_whitespaces()
@@ -16,7 +16,6 @@ describe 'Integration tests:' do
16
16
  end
17
17
 
18
18
  context 'Parsing character ranges:' do
19
-
20
19
  it "should parse 'letter from ... to ...' syntax" do
21
20
  result = parse('letter from a to f')
22
21
  expect(result).to be_success
@@ -56,13 +55,41 @@ describe 'Integration tests:' do
56
55
  regexp = regexp_repr(result)
57
56
  expect(regexp.to_str).to eq('[1-4]')
58
57
  end
58
+ end # context
59
+
60
+ context 'Parsing string literals:' do
61
+ it 'should parse double quotes literal string' do
62
+ result = parse('literally "hello"')
63
+ expect(result).to be_success
64
+
65
+ regexp = regexp_repr(result)
66
+ expect(regexp.to_str).to eq('hello')
67
+ end
68
+
69
+ it 'should parse single quotes literal string' do
70
+ result = parse("literally 'hello'")
71
+ expect(result).to be_success
72
+
73
+ regexp = regexp_repr(result)
74
+ expect(regexp.to_str).to eq('hello')
75
+ end
76
+
77
+ it 'should escape special characters' do
78
+ result = parse("literally '.'")
79
+ expect(result).to be_success
80
+
81
+ regexp = regexp_repr(result)
82
+ expect(regexp.to_str).to eq('\.')
83
+ end
84
+ end
59
85
 
86
+ context 'Parsing character classes:' do
60
87
  it "should parse 'digit' syntax" do
61
88
  result = parse('digit')
62
89
  expect(result).to be_success
63
90
 
64
91
  regexp = regexp_repr(result)
65
- expect(regexp.to_str).to eq('[0-9]')
92
+ expect(regexp.to_str).to eq('\d')
66
93
  end
67
94
 
68
95
  it "should parse 'number' syntax" do
@@ -70,9 +97,126 @@ describe 'Integration tests:' do
70
97
  expect(result).to be_success
71
98
 
72
99
  regexp = regexp_repr(result)
73
- expect(regexp.to_str).to eq('[0-9]')
100
+ expect(regexp.to_str).to eq('\d')
101
+ end
102
+
103
+ it "should parse 'any character' syntax" do
104
+ result = parse('any character')
105
+ expect(result).to be_success
106
+
107
+ regexp = regexp_repr(result)
108
+ expect(regexp.to_str).to eq('\w')
109
+ end
110
+
111
+ it "should parse 'no character' syntax" do
112
+ result = parse('no character')
113
+ expect(result).to be_success
114
+
115
+ regexp = regexp_repr(result)
116
+ expect(regexp.to_str).to eq('\W')
117
+ end
118
+
119
+ it "should parse 'whitespace' syntax" do
120
+ result = parse('whitespace')
121
+ expect(result).to be_success
122
+
123
+ regexp = regexp_repr(result)
124
+ expect(regexp.to_str).to eq('\s')
125
+ end
126
+
127
+ it "should parse 'no whitespace' syntax" do
128
+ result = parse('no whitespace')
129
+ expect(result).to be_success
130
+
131
+ regexp = regexp_repr(result)
132
+ expect(regexp.to_str).to eq('\S')
74
133
  end
75
134
 
135
+ it "should parse 'anything' syntax" do
136
+ result = parse('anything')
137
+ expect(result).to be_success
138
+
139
+ regexp = regexp_repr(result)
140
+ expect(regexp.to_str).to eq('.')
141
+ end
142
+
143
+ it "should parse 'one of' syntax" do
144
+ result = parse('one of "._%+-"')
145
+ expect(result).to be_success
146
+
147
+ regexp = regexp_repr(result)
148
+ # Remark: reference implementation less readable
149
+ # (escapes more characters than required)
150
+ expect(regexp.to_str).to eq('[._%+\-]')
151
+ end
152
+ end # context
153
+
154
+
155
+ context 'Parsing special character declarations:' do
156
+ it "should parse 'tab' syntax" do
157
+ result = parse('tab')
158
+ expect(result).to be_success
159
+
160
+ regexp = regexp_repr(result)
161
+ expect(regexp.to_str).to eq('\t')
162
+ end
163
+
164
+ it "should parse 'backslash' syntax" do
165
+ result = parse('backslash')
166
+ expect(result).to be_success
167
+
168
+ regexp = regexp_repr(result)
169
+ expect(regexp.to_str).to eq('\\')
170
+ end
171
+
172
+ it "should parse 'new line' syntax" do
173
+ result = parse('new line')
174
+ expect(result).to be_success
175
+
176
+ regexp = regexp_repr(result)
177
+ expect(regexp.to_str).to eq('\n')
178
+ end
179
+ end # context
180
+
181
+ context 'Parsing alternations:' do
182
+ it "should parse 'any of' syntax" do
183
+ source = 'any of (any character, one of "._%-+")'
184
+ result = parse(source)
185
+ expect(result).to be_success
186
+
187
+ regexp = regexp_repr(result)
188
+ expect(regexp.to_str).to eq('(?:\w|[._%\-+])')
189
+ end
190
+ end # context
191
+
192
+ context 'Parsing concatenation:' do
193
+ it "should reject dangling comma" do
194
+ source = 'literally "a",'
195
+ result = parse(source)
196
+ expect(result).not_to be_success
197
+ message_prefix = /Premature end of input after ','/
198
+ expect(result.failure_reason.message).to match(message_prefix)
199
+ end
200
+
201
+ it "should parse a sequence of patterns" do
202
+ #
203
+ # DEBUG When I put a comma at the end ... looping endlessly
204
+ #
205
+ source = <<-ENDS
206
+ any of (any character, one of "._%-+") once or more,
207
+ literally "@",
208
+ any of (digit, letter, one of ".-") once or more,
209
+ literally ".",
210
+ letter at least 2 times
211
+ ENDS
212
+
213
+ result = parse(source)
214
+ expect(result).to be_success
215
+
216
+ regexp = regexp_repr(result)
217
+ # SRL expect: (?:\w|[\._%\-\+])+(?:@)(?:[0-9]|[a-z]|[\.\-])+(?:\.)[a-z]{2,}
218
+ expect(regexp.to_str).to eq('(?:\w|[._%\-+])+@(?:\d|[a-z]|[.\-])+\.[a-z]{2,}')
219
+ end
76
220
  end # context
77
221
 
78
222
  context 'Parsing quantifiers:' do
@@ -87,19 +231,19 @@ describe 'Integration tests:' do
87
231
  end
88
232
 
89
233
  it "should parse 'twice' syntax" do
90
- result = parse(prefix + 'twice')
234
+ result = parse('digit twice')
91
235
  expect(result).to be_success
92
236
 
93
237
  regexp = regexp_repr(result)
94
- expect(regexp.to_str).to eq('[p-t]{2}')
238
+ expect(regexp.to_str).to eq('\d{2}')
95
239
  end
96
240
 
97
241
  it "should parse 'optional' syntax" do
98
- result = parse(prefix + 'optional')
242
+ result = parse('anything optional')
99
243
  expect(result).to be_success
100
244
 
101
245
  regexp = regexp_repr(result)
102
- expect(regexp.to_str).to eq('[p-t]?')
246
+ expect(regexp.to_str).to eq('.?')
103
247
  end
104
248
 
105
249
  it "should parse 'exactly ... times' syntax" do
@@ -121,7 +265,6 @@ describe 'Integration tests:' do
121
265
  expect(regexp.to_str).to eq('[p-t]{2,4}')
122
266
  end
123
267
 
124
-
125
268
  it "should parse 'once or more' syntax" do
126
269
  result = parse(prefix + 'once or more')
127
270
  expect(result).to be_success