srl_ruby 0.2.2 → 0.2.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d5c1623523fb8a959a54590b407208b61b0019ef
4
- data.tar.gz: 96d647c8fc0b91dc4e02aad405c870e14d3dce39
3
+ metadata.gz: 3642f7f1361cb920e2a9e42dcd8969d5ec75c7fe
4
+ data.tar.gz: 480c7561fd6972a8872e472166d07d0905c4612d
5
5
  SHA512:
6
- metadata.gz: f8d99780790b039088b4bc320d21dfa3d8e33866d34b203b95b650034ca8c2d29a79d953969bbf89eac45b1a2dd862af3e8f2e3aae156f5e62aef1ba3b62a0b3
7
- data.tar.gz: 188f144c3bad7eae1f6a21c9f30b674326bc596aa943ee087c3f6c0c5bcbf3837f16af5f387f6c8e39fb78e0a9f9b581834c4a2645dae59b0c557b763f04f9e5
6
+ metadata.gz: d60a8e33fbf0b2fafdacf03127fe1502efd60bc5fc69de9d97fa9ddd07d0505d3c6577f0fb078277980b6a024f76f88887fcd01e0798f02f6b55c41ebef65bbe
7
+ data.tar.gz: 838e7c67fa30ac58d2fefffd701394db75a632087c2440aaaac0d2fb3b349857c79eefd446089b6ccb6878232ed56c987dcd669847b1b4aff2cf40025e0df20e
data/CHANGELOG.md CHANGED
@@ -6,6 +6,21 @@
6
6
  ### Fixed
7
7
  ### Security
8
8
 
9
+ ## [0.2.4] - 2018-04-02
10
+ SrlRuby passes 10 tests out of 15 standard SRL tests in total.
11
+ ### Changed
12
+ - File `lib/srl_ruby/grammar.rb` grammar refactoring. Added support for new 'no digit' SRL expression.
13
+ - Class `SrlRuby::ASTBuilder` updates to reflect changes in the grammar.
14
+ - Class `Regex::Lookaround` refactored: now inherits from `Regex::MonadicExpression`
15
+ - File `spec/integration_spec` renamed to `spec/srl_ruby_spec.rb`, ssytematic use of the API of SrlRuby module.
16
+
17
+ ### Fixed
18
+ - Method `SrlRuby::ASTBuilder#reduce_one_of` now escapes character inside a character class.
19
+
20
+ ## [0.2.3] - 2018-03-15
21
+ ### Fixed
22
+ - Fixed a number of Yard warnings.
23
+
9
24
  ## [0.2.2] - 2018-03-15
10
25
  ### Fixed
11
26
  - Fixed rley version dependency
@@ -25,7 +25,8 @@ module Regex # This module is used as a namespace
25
25
  "\\7" => 7
26
26
  }.freeze
27
27
 
28
- MetaChars = '\^$+?.'.freeze
28
+ MetaChars = '\^$.|+?*()[]{}'.freeze
29
+ MetaCharsInClass = '\^[]-'.freeze # Characters with special meaning in char. class
29
30
 
30
31
  # The integer value that uniquely identifies the character.
31
32
  attr_reader(:codepoint)
@@ -5,7 +5,7 @@
5
5
  ########################
6
6
 
7
7
 
8
- require_relative 'polyadic_expression' # Access the superclass
8
+ require_relative 'monadic_expression' # Access the superclass
9
9
 
10
10
  module Regex # This module is used as a namespace
11
11
  # Lookaround is a zero-width assertion just like the start and end of line
@@ -14,7 +14,7 @@ module Regex # This module is used as a namespace
14
14
  # return the result of the match: match or no match.
15
15
  # That is why they are called "assertions". They do not consume characters
16
16
  # from the subject, but only assert whether a match is possible or not.
17
- class Lookaround < PolyadicExpression
17
+ class Lookaround < MonadicExpression
18
18
  # The "direction" of the lookaround. Can be ahead or behind. It specifies
19
19
  # the relative position of the expression to match compared to
20
20
  # the current 'position' in the subject text.
@@ -30,7 +30,7 @@ module Regex # This module is used as a namespace
30
30
  # [theDir] One of the following values: [ :ahead, :behind ]
31
31
  # [theKind] One of the following values: [ :positive, :negative ]
32
32
  def initialize(assertedExpression, theDir, theKind)
33
- super([assertedExpression])
33
+ super(assertedExpression)
34
34
  @dir = theDir
35
35
  @kind = theKind
36
36
  end
@@ -38,10 +38,9 @@ module Regex # This module is used as a namespace
38
38
  # Conversion method re-definition.
39
39
  # Purpose: Return the String representation of the captured expression.
40
40
  def to_str()
41
- result = children[0].to_str
42
41
  dir_syntax = (dir == :ahead) ? '' : '<'
43
42
  kind_syntax = (kind == :positive) ? '=' : '!'
44
- result << '(?' + dir_syntax + kind_syntax + children[1].to_str + ')'
43
+ result = '(?' + dir_syntax + kind_syntax + child.to_str + ')'
45
44
  return result
46
45
  end
47
46
  end # class
@@ -91,16 +91,26 @@ module SrlRuby
91
91
  return_first_child(aRange, theTokens, theChildren)
92
92
  end
93
93
 
94
- # rule('pattern' => %w[pattern separator quantifiable]).as 'pattern_sequence'
94
+ # rule('pattern' => %w[pattern separator sub_pattern]).as 'pattern_sequence'
95
95
  def reduce_pattern_sequence(_production, _range, _tokens, theChildren)
96
96
  return Regex::Concatenation.new(theChildren[0], theChildren[2])
97
97
  end
98
98
 
99
+ # rule('pattern' => 'sub_pattern').as 'basic_pattern'
100
+ def reduce_basic_pattern(_production, aRange, theTokens, theChildren)
101
+ return_first_child(aRange, theTokens, theChildren)
102
+ end
103
+
104
+ # rule('sub_pattern' => 'assertion').as 'assertion_sub_pattern'
105
+ def reduce_assertion_sub_pattern(_production, aRange, theTokens, theChildren)
106
+ return_first_child(aRange, theTokens, theChildren)
107
+ end
108
+
99
109
  # rule('flags' => %[flags separator single_flag]).as 'flag_sequence'
100
110
  def reduce_flag_sequence(_production, _range, _tokens, theChildren)
101
111
  theChildren[0] << theChildren[2]
102
112
  end
103
-
113
+
104
114
  # rule('flags' => %w[separator single_flag]).as 'flag_simple'
105
115
  def reduce_flag_simple(_production, _range, _tokens, theChildren)
106
116
  [theChildren.last]
@@ -151,13 +161,6 @@ module SrlRuby
151
161
  return Regex::Anchor.new('$')
152
162
  end
153
163
 
154
- # rule('anchorable' => %w[assertable assertion]).as 'asserted_anchorable'
155
- def reduce_asserted_anchorable(_production, _range, _tokens, theChildren)
156
- assertion = theChildren.last
157
- assertion.children.unshift(theChildren[0])
158
- return assertion
159
- end
160
-
161
164
  # rule('assertion' => %w[IF FOLLOWED BY assertable]).as 'if_followed'
162
165
  def reduce_if_followed(_production, _range, _tokens, theChildren)
163
166
  return Regex::Lookaround.new(theChildren.last, :ahead, :positive)
@@ -218,11 +221,6 @@ module SrlRuby
218
221
  reduce_lowercase_from_to(aProduction, aRange, theTokens, theChildren)
219
222
  end
220
223
 
221
- # rule('digit_range' => 'digit_or_number').as 'simple_digit_range'
222
- def reduce_simple_digit_range(_production, _range, _tokens, _children)
223
- char_shorthand('d')
224
- end
225
-
226
224
  # rule('character_class' => %w[ANY CHARACTER]).as 'any_character'
227
225
  def reduce_any_character(_production, _range, _tokens, _children)
228
226
  char_shorthand('w')
@@ -233,6 +231,16 @@ module SrlRuby
233
231
  char_shorthand('W')
234
232
  end
235
233
 
234
+ # rule('character_class' => 'digit_or_number').as 'digit'
235
+ def reduce_digit(_production, _range, _tokens, _children)
236
+ char_shorthand('d')
237
+ end
238
+
239
+ # rule('character_class' => %w[NO DIGIT]).as 'non_digit'
240
+ def reduce_non_digit(_production, _range, _tokens, _children)
241
+ char_shorthand('D')
242
+ end
243
+
236
244
  # rule('character_class' => 'WHITESPACE').as 'whitespace'
237
245
  def reduce_whitespace(_production, _range, _tokens, _children)
238
246
  char_shorthand('s')
@@ -248,10 +256,18 @@ module SrlRuby
248
256
  wildcard
249
257
  end
250
258
 
251
- # rule('alternation' => %w[ANY OF LPAREN alternatives RPAREN]).as 'any_of'
259
+ # rule('character_class' => %w[ONE OF STRING_LIT]).as 'one_of'
252
260
  def reduce_one_of(_production, _range, _tokens, theChildren)
253
261
  raw_literal = theChildren[-1].token.lexeme.dup
254
- alternatives = raw_literal.chars.map { |ch| Regex::Character.new(ch) }
262
+ alternatives = raw_literal.chars.map do |ch|
263
+ if Regex::Character::MetaCharsInClass.include?(ch)
264
+ chars = [Regex::Character.new("\\"), Regex::Character.new(ch)]
265
+ Regex::Concatenation.new(*chars)
266
+ else
267
+ Regex::Character.new(ch)
268
+ end
269
+ end
270
+
255
271
  # TODO check other implementations
256
272
  return Regex::CharClass.new(false, *alternatives)
257
273
  end
@@ -263,7 +279,7 @@ module SrlRuby
263
279
 
264
280
  # rule('special_char' => 'BACKSLASH').as 'backslash'
265
281
  def reduce_backslash(_production, _range, _tokens, _children)
266
- # Double the basckslash (because of escaping)
282
+ # Double the backslash (because of escaping)
267
283
  string_literal("\\", true)
268
284
  end
269
285
 
@@ -283,7 +299,17 @@ module SrlRuby
283
299
 
284
300
  # rule('alternation' => %w[ANY OF LPAREN alternatives RPAREN]).as 'any_of'
285
301
  def reduce_any_of(_production, _range, _tokens, theChildren)
286
- return Regex::Alternation.new(*theChildren[3])
302
+ first_alternative = theChildren[3].first
303
+ result = nil
304
+
305
+ # Ugly: in SRL, comma is a dummy separator except in any of construct...
306
+ if theChildren[3].size == 1 && first_alternative.kind_of?(Regex::Concatenation)
307
+ result = Regex::Alternation.new(*first_alternative.children)
308
+ else
309
+ result = Regex::Alternation.new(*theChildren[3])
310
+ end
311
+
312
+ return result
287
313
  end
288
314
 
289
315
  # rule('alternatives' => %w[alternatives separator quantifiable]).as 'alternative_list'
@@ -6,7 +6,7 @@ module SrlRuby
6
6
  builder = Rley::Syntax::GrammarBuilder.new do
7
7
  add_terminals('LPAREN', 'RPAREN', 'COMMA')
8
8
  add_terminals('DIGIT_LIT', 'INTEGER', 'LETTER_LIT')
9
- add_terminals('LITERALLY', 'STRING_LIT')
9
+ add_terminals('LITERALLY', 'STRING_LIT', 'IDENTIFIER')
10
10
  add_terminals('BEGIN', 'STARTS', 'WITH')
11
11
  add_terminals('MUST', 'END')
12
12
  add_terminals('UPPERCASE', 'LETTER', 'FROM', 'TO')
@@ -26,8 +26,10 @@ module SrlRuby
26
26
  rule('srl' => 'expression').as 'start_rule'
27
27
  rule('expression' => %w[pattern flags]).as 'flagged_expr'
28
28
  rule('expression' => 'pattern').as 'simple_expr'
29
- rule('pattern' => %w[pattern separator quantifiable]).as 'pattern_sequence'
30
- rule('pattern' => 'quantifiable').as 'basic_pattern'
29
+ rule('pattern' => %w[pattern separator sub_pattern]).as 'pattern_sequence'
30
+ rule('pattern' => 'sub_pattern').as 'basic_pattern'
31
+ rule('sub_pattern' => 'quantifiable').as 'quantifiable_sub_pattern'
32
+ rule('sub_pattern' => 'assertion').as 'assertion_sub_pattern'
31
33
  rule('separator' => 'COMMA').as 'comma_separator'
32
34
  rule('separator' => []).as 'void_separator'
33
35
  rule('flags' => %w[flags separator single_flag]).as 'flag_sequence'
@@ -43,7 +45,6 @@ module SrlRuby
43
45
  rule('begin_anchor' => %w[BEGIN WITH]).as 'begin_with'
44
46
  rule('end_anchor' => %w[separator MUST END]).as 'end_anchor'
45
47
  rule('anchorable' => 'assertable').as 'simple_anchorable'
46
- rule('anchorable' => %w[assertable assertion]).as 'asserted_anchorable'
47
48
  rule('assertion' => %w[IF FOLLOWED BY assertable]).as 'if_followed'
48
49
  rule('assertion' => %w[IF NOT FOLLOWED BY assertable]).as 'if_not_followed'
49
50
  rule('assertion' => %w[IF ALREADY HAD assertable]).as 'if_had'
@@ -64,9 +65,10 @@ module SrlRuby
64
65
  rule('letter_range' => 'LETTER').as 'any_lowercase'
65
66
  rule('letter_range' => %w[UPPERCASE LETTER]).as 'any_uppercase'
66
67
  rule('digit_range' => %w[digit_or_number FROM DIGIT_LIT TO DIGIT_LIT]).as 'digits_from_to'
67
- rule('digit_range' => 'digit_or_number').as 'simple_digit_range'
68
68
  rule('character_class' => %w[ANY CHARACTER]).as 'any_character'
69
69
  rule('character_class' => %w[NO CHARACTER]).as 'no_character'
70
+ rule('character_class' => 'digit_or_number').as 'digit'
71
+ rule('character_class' => %w[NO DIGIT]).as 'non_digit'
70
72
  rule('character_class' => 'WHITESPACE').as 'whitespace'
71
73
  rule('character_class' => %w[NO WHITESPACE]).as 'no_whitespace'
72
74
  rule('character_class' => 'ANYTHING').as 'anything'
@@ -84,6 +86,7 @@ module SrlRuby
84
86
  rule('capturing_group' => %w[CAPTURE assertable AS var_name]).as 'named_capture'
85
87
  rule('capturing_group' => %w[CAPTURE assertable AS var_name UNTIL assertable]).as 'named_capture_until'
86
88
  rule('var_name' => 'STRING_LIT').as 'var_name'
89
+ rule('var_name' => 'IDENTIFIER').as 'var_ident' # capture name not enclosed between quotes
87
90
  rule('quantifier' => 'ONCE').as 'once'
88
91
  rule('quantifier' => 'TWICE').as 'twice'
89
92
  rule('quantifier' => %w[EXACTLY count TIMES]).as 'exactly'
@@ -111,9 +111,10 @@ module SrlRuby
111
111
  token = build_token('INTEGER', lexeme) # An integer has 2..* digits
112
112
  elsif (lexeme = scanner.scan(/[0-9]/))
113
113
  token = build_token('DIGIT_LIT', lexeme)
114
- elsif (lexeme = scanner.scan(/[a-zA-Z]{2,}/))
115
- token = build_token(@@keywords[lexeme.upcase], lexeme)
116
- # TODO: handle case unknown identifier
114
+ elsif (lexeme = scanner.scan(/[a-zA-Z_][a-zA-Z0-9_]+/))
115
+ keyw = @@keywords[lexeme.upcase]
116
+ tok_type = keyw ? keyw : 'IDENTIFIER'
117
+ token = build_token(tok_type, lexeme)
117
118
  elsif (lexeme = scanner.scan(/[a-zA-Z]((?=\s)|$)/))
118
119
  token = build_token('LETTER_LIT', lexeme)
119
120
  elsif (lexeme = scanner.scan(/"(?:\\"|[^"])*"/)) # Double quotes literal?
@@ -126,7 +127,7 @@ module SrlRuby
126
127
  erroneous = curr_ch.nil? ? '' : curr_ch
127
128
  sequel = scanner.scan(/.{1,20}/)
128
129
  erroneous += sequel unless sequel.nil?
129
- raise ScanError.new("Unknown token #{erroneous}")
130
+ raise ScanError.new("Unknown token #{erroneous} on line #{lineno}")
130
131
  end
131
132
 
132
133
  return token
@@ -137,7 +138,7 @@ module SrlRuby
137
138
  col = scanner.pos - aLexeme.size - @line_start + 1
138
139
  pos = Position.new(@lineno, col)
139
140
  token = SrlToken.new(aLexeme, aSymbolName, pos)
140
- rescue StandardError => exc
141
+ rescue Exception => exc
141
142
  puts "Failing with '#{aSymbolName}' and '#{aLexeme}'"
142
143
  raise exc
143
144
  end
@@ -1,3 +1,3 @@
1
1
  module SrlRuby
2
- VERSION = '0.2.2'.freeze
2
+ VERSION = '0.2.4'.freeze
3
3
  end
data/lib/srl_ruby.rb CHANGED
@@ -21,7 +21,7 @@ module SrlRuby # This module is used as a namespace
21
21
  # @return [Regexp]
22
22
  def self.parse(source)
23
23
  # Create a Rley facade object
24
- engine = Rley::Engine.new
24
+ engine = Rley::Engine.new { |cfg| cfg.diagnose = true }
25
25
 
26
26
  # Step 1. Load SRL grammar
27
27
  engine.use_grammar(SrlRuby::Grammar)
@@ -3,8 +3,12 @@ require_relative './support/rule_file_parser'
3
3
  require_relative '../../lib/srl_ruby'
4
4
 
5
5
  ##############################
6
- # Understand how parser fails when first rule begins with %[...] instead of %w[...]
7
- ##############################
6
+ # Some rule files contain undocumented and unsupportd SRL expression:
7
+ # | File name | unrecognized input |
8
+ # | no_word.rule | 'no word' |
9
+ # | none_of.rule | 'none of abcd' |
10
+ # | word.rule | '(word)' |
11
+
8
12
 
9
13
  RSpec.describe Acceptance do
10
14
  def rule_path
@@ -17,10 +21,10 @@ RSpec.describe Acceptance do
17
21
 
18
22
  def test_rule_file(aRuleFileRepr)
19
23
  regex = SrlRuby::parse(aRuleFileRepr.srl.value)
20
- expect(regex).not_to be_nil
21
-
24
+ expect(regex).to be_kind_of(Regexp)
25
+
22
26
  aRuleFileRepr.match_tests.each do |test|
23
- expect(regex.match(test.test_string.value)).not_to be_nil
27
+ expect(test.test_string.value).to match(regex)
24
28
  end
25
29
  aRuleFileRepr.no_match_tests.each do |test|
26
30
  expect(regex.match(test.test_string.value)).to be_nil
@@ -42,21 +46,26 @@ RSpec.describe Acceptance do
42
46
  rule_file_repr = load_file('backslash.rule')
43
47
  test_rule_file(rule_file_repr)
44
48
  end
45
-
49
+
46
50
  it 'should support named capture group' do
47
51
  rule_file_repr = load_file('basename_capture_group.rule')
48
52
  test_rule_file(rule_file_repr)
49
53
  end
50
-
54
+
51
55
  it 'should match uppercase letter(s)' do
52
56
  rule_file_repr = load_file('issue_17_uppercase_letter.rule')
53
57
  test_rule_file(rule_file_repr)
54
- end
55
-
58
+ end
59
+
56
60
  it 'should not trim literal strings' do
57
61
  rule_file_repr = load_file('literally_spaces.rule')
58
62
  test_rule_file(rule_file_repr)
59
- end
63
+ end
64
+
65
+ it 'should match non digit pattern' do
66
+ rule_file_repr = load_file('nondigit.rule')
67
+ test_rule_file(rule_file_repr)
68
+ end
60
69
 
61
70
  it 'should match a tab' do
62
71
  rule_file_repr = load_file('tab.rule')
@@ -72,4 +81,14 @@ RSpec.describe Acceptance do
72
81
  rule_file_repr = load_file('website_example_lookahead.rule')
73
82
  test_rule_file(rule_file_repr)
74
83
  end
84
+
85
+ it 'should not trim literal strings' do
86
+ rule_file_repr = load_file('website_example_password.rule')
87
+ test_rule_file(rule_file_repr)
88
+ end
89
+
90
+ it 'should' do
91
+ rule_file_repr = load_file('website_example_url.rule')
92
+ test_rule_file(rule_file_repr)
93
+ end
75
94
  end
@@ -17,22 +17,22 @@ builder = Rley::Syntax::GrammarBuilder.new do
17
17
  add_terminals('INTEGER', 'STRING_LIT')
18
18
  add_terminals('IDENTIFIER', 'SRL_SOURCE')
19
19
 
20
- rule('rule_file' => %w[srl_heading srl_tests]).as 'start_rule'
21
- rule('srl_heading' => %w[SRL: SRL_SOURCE]).as 'srl_source'
22
- rule('srl_tests' => %w[srl_tests single_test]).as 'test_list'
20
+ rule('rule_file' => 'srl_heading srl_tests').as 'start_rule'
21
+ rule('srl_heading' => 'SRL: SRL_SOURCE').as 'srl_source'
22
+ rule('srl_tests' => 'srl_tests single_test').as 'test_list'
23
23
  rule('srl_tests' => 'single_test').as 'one_test'
24
24
  rule('single_test' => 'atomic_test').as 'single_atomic_test'
25
25
  rule('single_test' => 'compound_test').as 'single_compound_test'
26
26
  rule('atomic_test' => 'match_test').as 'atomic_match'
27
27
  rule('atomic_test' => 'no_match_test').as 'atomic_no_match'
28
28
  rule('compound_test' => 'capture_test').as 'compound_capture'
29
- rule('match_test' => %w[MATCH: STRING_LIT]).as 'match_string'
30
- rule('no_match_test' => %w[NO MATCH: STRING_LIT]).as 'no_match_string'
31
- rule('capture_test' => %w[capture_heading capture_expectations]).as 'capture_test'
32
- rule('capture_heading' => %w[CAPTURE FOR STRING_LIT COLON]).as 'capture_string'
33
- rule('capture_expectations' => %w[capture_expectations single_expectation]).as 'assertion_list'
29
+ rule('match_test' => 'MATCH: STRING_LIT').as 'match_string'
30
+ rule('no_match_test' => 'NO MATCH: STRING_LIT').as 'no_match_string'
31
+ rule('capture_test' => 'capture_heading capture_expectations').as 'capture_test'
32
+ rule('capture_heading' => 'CAPTURE FOR STRING_LIT COLON').as 'capture_string'
33
+ rule('capture_expectations' => 'capture_expectations single_expectation').as 'assertion_list'
34
34
  rule('capture_expectations' => 'single_expectation').as 'one_expectation'
35
- rule('single_expectation' => %w[DASH INTEGER COLON capture_variable COLON STRING_LIT]).as 'capture_expectation'
35
+ rule('single_expectation' => 'DASH INTEGER COLON capture_variable COLON STRING_LIT').as 'capture_expectation'
36
36
  rule('capture_variable' => 'INTEGER').as 'var_integer'
37
37
  rule('capture_variable' => 'IDENTIFIER').as 'var_identifier'
38
38
  end
@@ -0,0 +1,310 @@
1
+ require_relative 'spec_helper' # Use the RSpec framework
2
+ require_relative '../lib/srl_ruby'
3
+
4
+ describe SrlRuby do
5
+ context 'Parsing character ranges:' do
6
+ it "should parse 'letter from ... to ...' syntax" do
7
+ regexp = SrlRuby.parse('letter from a to f')
8
+ expect(regexp.source).to eq('[a-f]')
9
+ end
10
+
11
+ it "should parse 'uppercase letter from ... to ...' syntax" do
12
+ regexp = SrlRuby.parse('UPPERCASE letter from A to F')
13
+ expect(regexp.source).to eq('[A-F]')
14
+ end
15
+
16
+ it "should parse 'letter' syntax" do
17
+ regexp = SrlRuby.parse('letter')
18
+ expect(regexp.source).to eq('[a-z]')
19
+ end
20
+
21
+ it "should parse 'uppercase letter' syntax" do
22
+ regexp = SrlRuby.parse('uppercase letter')
23
+ expect(regexp.source).to eq('[A-Z]')
24
+ end
25
+
26
+ it "should parse 'digit from ... to ...' syntax" do
27
+ regexp = SrlRuby.parse('digit from 1 to 4')
28
+ expect(regexp.source).to eq('[1-4]')
29
+ end
30
+ end # context
31
+
32
+ context 'Parsing string literals:' do
33
+ it 'should parse double quotes literal string' do
34
+ regexp = SrlRuby.parse('literally "hello"')
35
+ expect(regexp.source).to eq('hello')
36
+ end
37
+
38
+ it 'should parse single quotes literal string' do
39
+ regexp = SrlRuby.parse("literally 'hello'")
40
+ expect(regexp.source).to eq('hello')
41
+ end
42
+
43
+ it 'should escape special characters' do
44
+ regexp = SrlRuby.parse("literally '.'")
45
+ expect(regexp.source).to eq('\.')
46
+ end
47
+ end # context
48
+
49
+ context 'Parsing character classes:' do
50
+ it "should parse 'digit' syntax" do
51
+ regexp = SrlRuby.parse('digit')
52
+ expect(regexp.source).to eq('\d')
53
+ end
54
+
55
+ it "should parse 'number' syntax" do
56
+ regexp = SrlRuby.parse('number')
57
+ expect(regexp.source).to eq('\d')
58
+ end
59
+
60
+ it "should parse 'no digit' syntax" do
61
+ regexp = SrlRuby.parse('no digit')
62
+ expect(regexp.source).to eq('\D')
63
+ end
64
+
65
+ it "should parse 'any character' syntax" do
66
+ regexp = SrlRuby.parse('any character')
67
+ expect(regexp.source).to eq('\w')
68
+ end
69
+
70
+ it "should parse 'no character' syntax" do
71
+ regexp = SrlRuby.parse('no character')
72
+ expect(regexp.source).to eq('\W')
73
+ end
74
+
75
+ it "should parse 'whitespace' syntax" do
76
+ regexp = SrlRuby.parse('whitespace')
77
+ expect(regexp.source).to eq('\s')
78
+ end
79
+
80
+ it "should parse 'no whitespace' syntax" do
81
+ regexp = SrlRuby.parse('no whitespace')
82
+ expect(regexp.source).to eq('\S')
83
+ end
84
+
85
+ it "should parse 'anything' syntax" do
86
+ regexp = SrlRuby.parse('anything')
87
+ expect(regexp.source).to eq('.')
88
+ end
89
+
90
+ it "should parse 'one of' syntax" do
91
+ regexp = SrlRuby.parse('one of "._%+-"')
92
+ # Remark: reference implementation less readable
93
+ # (escapes more characters than required)
94
+ expect(regexp.source).to eq('[._%+\-]')
95
+ end
96
+ end # context
97
+
98
+ context 'Parsing special character declarations:' do
99
+ it "should parse 'tab' syntax" do
100
+ regexp = SrlRuby.parse('tab')
101
+ expect(regexp.source).to eq('\t')
102
+ end
103
+
104
+ it "should parse 'backslash' syntax" do
105
+ regexp = SrlRuby.parse('backslash')
106
+ expect(regexp.source).to eq('\\\\')
107
+ end
108
+
109
+ it "should parse 'new line' syntax" do
110
+ regexp = SrlRuby.parse('new line')
111
+ expect(regexp.source).to eq('\n')
112
+ end
113
+ end # context
114
+
115
+ context 'Parsing alternations:' do
116
+ it "should parse 'any of' syntax" do
117
+ source = 'any of (any character, one of "._%-+")'
118
+ regexp = SrlRuby.parse(source)
119
+ expect(regexp.source).to eq('(?:\w|[._%\-+])')
120
+ end
121
+
122
+ it 'should anchor as alternative' do
123
+ regexp = SrlRuby.parse('any of (literally "?", must end)')
124
+ expect(regexp.source).to eq('(?:\\?|$)')
125
+ end
126
+ end # context
127
+
128
+ context 'Parsing concatenation:' do
129
+ it 'should reject dangling comma' do
130
+ source = 'literally "a",'
131
+ err = StandardError
132
+ msg_pattern = /Premature end of input after ',' at position line 1, column 14/
133
+ expect { SrlRuby.parse(source) }.to raise_error(err, msg_pattern)
134
+ end
135
+
136
+ it 'should parse concatenation' do
137
+ regexp = SrlRuby.parse('any of (literally "sample", (digit once or more))')
138
+ expect(regexp.source).to eq('(?:sample|(?:\d+))')
139
+ end
140
+
141
+ it 'should parse a long sequence of patterns' do
142
+ source = <<-END_SRL
143
+ any of (any character, one of "._%-+") once or more,
144
+ literally "@",
145
+ any of (digit, letter, one of ".-") once or more,
146
+ literally ".",
147
+ letter at least 2 times
148
+ END_SRL
149
+
150
+ regexp = SrlRuby.parse(source)
151
+ # SRL: (?:\w|[\._%\-\+])+(?:@)(?:[0-9]|[a-z]|[\.\-])+(?:\.)[a-z]{2,}
152
+ expectation = '(?:\w|[._%\-+])+@(?:\d|[a-z]|[.\-])+\.[a-z]{2,}'
153
+ expect(regexp.source).to eq(expectation)
154
+ end
155
+ end # context
156
+
157
+ context 'Parsing quantifiers:' do
158
+ let(:prefix) { 'letter from p to t ' }
159
+
160
+ it "should parse 'once' syntax" do
161
+ regexp = SrlRuby.parse(prefix + 'once')
162
+ expect(regexp.source).to eq('[p-t]{1}')
163
+ end
164
+
165
+ it "should parse 'twice' syntax" do
166
+ regexp = SrlRuby.parse('digit twice')
167
+ expect(regexp.source).to eq('\d{2}')
168
+ end
169
+
170
+ it "should parse 'optional' syntax" do
171
+ regexp = SrlRuby.parse('anything optional')
172
+ expect(regexp.source).to eq('.?')
173
+ end
174
+
175
+ it "should parse 'exactly ... times' syntax" do
176
+ regexp = SrlRuby.parse('letter from a to f exactly 4 times')
177
+ expect(regexp.source).to eq('[a-f]{4}')
178
+ end
179
+
180
+ it "should parse 'between ... and ... times' syntax" do
181
+ regexp = SrlRuby.parse(prefix + 'between 2 and 4 times')
182
+ expect(regexp.source).to eq('[p-t]{2,4}')
183
+
184
+ # Dropping 'times' keyword is a shorter alternative syntax
185
+ regexp = SrlRuby.parse(prefix + 'between 2 and 4')
186
+ expect(regexp.source).to eq('[p-t]{2,4}')
187
+ end
188
+
189
+ it "should parse 'once or more' syntax" do
190
+ regexp = SrlRuby.parse(prefix + 'once or more')
191
+ expect(regexp.source).to eq('[p-t]+')
192
+ end
193
+
194
+ it "should parse 'never or more' syntax" do
195
+ regexp = SrlRuby.parse(prefix + 'never or more')
196
+ expect(regexp.source).to eq('[p-t]*')
197
+ end
198
+
199
+ it "should parse 'at least ... times' syntax" do
200
+ regexp = SrlRuby.parse(prefix + 'at least 10 times')
201
+ expect(regexp.source).to eq('[p-t]{10,}')
202
+ end
203
+ end # context
204
+
205
+ context 'Parsing lookaround:' do
206
+ it 'should parse positive lookahead' do
207
+ regexp = SrlRuby.parse('letter if followed by (anything once or more, digit)')
208
+ expect(regexp.source).to eq('[a-z](?=(?:.+\d))')
209
+ end
210
+
211
+ it 'should parse negative lookahead' do
212
+ regexp = SrlRuby.parse('letter if not followed by (anything once or more, digit)')
213
+ expect(regexp.source).to eq('[a-z](?!(?:.+\d))')
214
+ end
215
+
216
+ it 'should parse positive lookbehind' do
217
+ regexp = SrlRuby.parse('literally "bar" if already had literally "foo"')
218
+ expect(regexp.source).to eq('bar(?<=foo)')
219
+ end
220
+
221
+ it 'should parse negative lookbehind' do
222
+ regexp = SrlRuby.parse('literally "bar" if not already had literally "foo"')
223
+ expect(regexp.source).to eq('bar(?<!foo)')
224
+ end
225
+ end # context
226
+
227
+ context 'Parsing capturing group:' do
228
+ it 'should parse simple anonymous capturing group' do
229
+ regexp = SrlRuby.parse('capture(literally "sample")')
230
+ expect(regexp.source).to eq('(sample)')
231
+ end
232
+
233
+ it 'should parse complex anonymous capturing group' do
234
+ source = 'capture(any of (literally "sample", (digit once or more)))'
235
+ regexp = SrlRuby.parse(source)
236
+ expect(regexp.source).to eq('((?:sample|(?:\d+)))')
237
+ end
238
+
239
+ it 'should parse simple anonymous until capturing group' do
240
+ regexp = SrlRuby.parse('capture anything once or more until literally "!"')
241
+ expect(regexp.source).to eq('(.+)!')
242
+ end
243
+
244
+ it 'should parse unquoted named capturing group' do
245
+ source = 'capture (anything once or more) as first, must end'
246
+ regexp = SrlRuby.parse(source)
247
+ expect(regexp.source).to eq('(?<first>.+)$')
248
+ end
249
+
250
+ it 'should parse complex named capturing group' do
251
+ source = <<-END_SRL
252
+ capture(any of (literally "sample", (digit once or more)))
253
+ as "foo"
254
+ END_SRL
255
+ regexp = SrlRuby.parse(source)
256
+ expect(regexp.source).to eq('(?<foo>(?:sample|(?:\d+)))')
257
+ end
258
+
259
+ it 'should parse a sequence with named capturing groups' do
260
+ source = <<-END_SRL
261
+ capture (anything once or more) as "first",
262
+ literally " - ",
263
+ capture literally "second part" as "second"
264
+ END_SRL
265
+ regexp = SrlRuby.parse(source)
266
+ expect(regexp.source).to eq('(?<first>.+) - (?<second>second part)')
267
+ end
268
+
269
+ it 'should parse complex named until capturing group' do
270
+ source = 'capture (anything once or more) as "foo" until literally "m"'
271
+ regexp = SrlRuby.parse(source)
272
+ expect(regexp.source).to eq('(?<foo>.+)m')
273
+ end
274
+ end # context
275
+
276
+ context 'Parsing anchors:' do
277
+ it 'should parse begin anchors' do
278
+ regexp = SrlRuby.parse('starts with literally "match"')
279
+ expect(regexp.source).to eq('^match')
280
+ end
281
+
282
+ it 'should parse begin anchors (alternative syntax)' do
283
+ regexp = SrlRuby.parse('begin with literally "match"')
284
+ expect(regexp.source).to eq('^match')
285
+ end
286
+
287
+ it 'should parse end anchors' do
288
+ regexp = SrlRuby.parse('literally "match" must end')
289
+ expect(regexp.source).to eq('match$')
290
+ end
291
+
292
+ it 'should parse combination of begin and end anchors' do
293
+ regexp = SrlRuby.parse('starts with literally "match" must end')
294
+ expect(regexp.source).to eq('^match$')
295
+ end
296
+
297
+ it 'should accept anchor with a sequence of patterns' do
298
+ source = <<-END_SRL
299
+ begin with any of (digit, letter, one of ".-") once or more,
300
+ literally ".",
301
+ letter at least 2 times must end
302
+ END_SRL
303
+
304
+ regexp = SrlRuby.parse(source)
305
+ # SRL: (?:\w|[\._%\-\+])+(?:@)(?:[0-9]|[a-z]|[\.\-])+(?:\.)[a-z]{2,}
306
+ expect(regexp.source).to eq('^(?:\d|[a-z]|[.\-])+\.[a-z]{2,}$')
307
+ end
308
+ end # context
309
+ end # describe
310
+ # End of file
data/srl_ruby.gemspec CHANGED
@@ -55,7 +55,7 @@ END_DESCR
55
55
  spec.required_ruby_version = '>= 2.1.0'
56
56
 
57
57
  # Runtime dependencies
58
- spec.add_dependency 'rley', '~> 0.6.03'
58
+ spec.add_dependency 'rley', '~> 0.6.05'
59
59
 
60
60
  # Development dependencies
61
61
  spec.add_development_dependency 'bundler', '~> 1.16'
@@ -1,4 +1,4 @@
1
- srl: if followed by (anything never or more, letter), if followed by (anything never or more, uppercase letter), if followed by (anything never or more, digit), if followed by (anything never or more, one of "!@#$%^&*[]\"';:_-<>., =+/\\"), anything at least 8 time
1
+ srl: if followed by (anything never or more, letter), if followed by (anything never or more, uppercase letter), if followed by (anything never or more, digit), if followed by (anything never or more, one of "!@#$%^&*[]\"';:_-<>., =+/\\"), anything at least 8 times
2
2
  match: "P@sSword1"
3
3
  match: "Pass-w0rd"
4
4
  match: "Th1s is Secure"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: srl_ruby
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.2
4
+ version: 0.2.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dimitri Geshef
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-03-15 00:00:00.000000000 Z
11
+ date: 2018-04-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rley
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: 0.6.03
19
+ version: 0.6.05
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: 0.6.03
26
+ version: 0.6.05
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: bundler
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -120,12 +120,12 @@ files:
120
120
  - spec/acceptance/support/rule_file_parser.rb
121
121
  - spec/acceptance/support/rule_file_token.rb
122
122
  - spec/acceptance/support/rule_file_tokenizer.rb
123
- - spec/integration_spec.rb
124
123
  - spec/regex/character_spec.rb
125
124
  - spec/regex/multiplicity_spec.rb
126
125
  - spec/spec_helper.rb
127
126
  - spec/srl_ruby/srl_ruby_spec.rb
128
127
  - spec/srl_ruby/tokenizer_spec.rb
128
+ - spec/srl_ruby_spec.rb
129
129
  - srl_ruby.gemspec
130
130
  - srl_test/README.md
131
131
  - srl_test/Test-Rules/README.md
@@ -173,8 +173,8 @@ summary: srl_ruby is a gem implementing a parser for Simple Regex Language (SRL)
173
173
  literals.
174
174
  test_files:
175
175
  - spec/acceptance/srl_test_suite_spec.rb
176
- - spec/integration_spec.rb
177
176
  - spec/regex/character_spec.rb
178
177
  - spec/regex/multiplicity_spec.rb
179
178
  - spec/srl_ruby/srl_ruby_spec.rb
180
179
  - spec/srl_ruby/tokenizer_spec.rb
180
+ - spec/srl_ruby_spec.rb
@@ -1,451 +0,0 @@
1
- require_relative 'spec_helper' # Use the RSpec framework
2
- require_relative '../lib/srl_ruby/tokenizer'
3
- require_relative '../lib/srl_ruby/grammar'
4
- require_relative '../lib/srl_ruby/ast_builder'
5
-
6
- module SrlRuby
7
- describe 'Integration tests:' do
8
- def parse(someSRL)
9
- tokenizer = SrlRuby::Tokenizer.new(someSRL)
10
- @engine.parse(tokenizer.tokens)
11
- end
12
-
13
- def regexp_repr(aResult)
14
- # Generate an abstract syntax parse tree from the parse result
15
- tree = @engine.convert(aResult)
16
- tree.root
17
- end
18
-
19
- before(:each) do
20
- @engine = Rley::Engine.new do |config|
21
- config.repr_builder = ASTBuilder
22
- end
23
- @engine.use_grammar(SrlRuby::Grammar)
24
- end
25
-
26
- context 'Parsing character ranges:' do
27
- it "should parse 'letter from ... to ...' syntax" do
28
- result = parse('letter from a to f')
29
- expect(result).to be_success
30
-
31
- regexp = regexp_repr(result)
32
- expect(regexp.to_str).to eq('[a-f]')
33
- end
34
-
35
- it "should parse 'uppercase letter from ... to ...' syntax" do
36
- result = parse('UPPERCASE letter from A to F')
37
- expect(result).to be_success
38
-
39
- regexp = regexp_repr(result)
40
- expect(regexp.to_str).to eq('[A-F]')
41
- end
42
-
43
- it "should parse 'letter' syntax" do
44
- result = parse('letter')
45
- expect(result).to be_success
46
-
47
- regexp = regexp_repr(result)
48
- expect(regexp.to_str).to eq('[a-z]')
49
- end
50
-
51
- it "should parse 'uppercase letter' syntax" do
52
- result = parse('uppercase letter')
53
- expect(result).to be_success
54
-
55
- regexp = regexp_repr(result)
56
- expect(regexp.to_str).to eq('[A-Z]')
57
- end
58
-
59
- it "should parse 'digit from ... to ...' syntax" do
60
- result = parse('digit from 1 to 4')
61
- expect(result).to be_success
62
-
63
- regexp = regexp_repr(result)
64
- expect(regexp.to_str).to eq('[1-4]')
65
- end
66
- end # context
67
-
68
- context 'Parsing string literals:' do
69
- it 'should parse double quotes literal string' do
70
- result = parse('literally "hello"')
71
- expect(result).to be_success
72
-
73
- regexp = regexp_repr(result)
74
- expect(regexp.to_str).to eq('hello')
75
- end
76
-
77
- it 'should parse single quotes literal string' do
78
- result = parse("literally 'hello'")
79
- expect(result).to be_success
80
-
81
- regexp = regexp_repr(result)
82
- expect(regexp.to_str).to eq('hello')
83
- end
84
-
85
- it 'should escape special characters' do
86
- result = parse("literally '.'")
87
- expect(result).to be_success
88
-
89
- regexp = regexp_repr(result)
90
- expect(regexp.to_str).to eq('\.')
91
- end
92
- end
93
-
94
- context 'Parsing character classes:' do
95
- it "should parse 'digit' syntax" do
96
- result = parse('digit')
97
- expect(result).to be_success
98
-
99
- regexp = regexp_repr(result)
100
- expect(regexp.to_str).to eq('\d')
101
- end
102
-
103
- it "should parse 'number' syntax" do
104
- result = parse('number')
105
- expect(result).to be_success
106
-
107
- regexp = regexp_repr(result)
108
- expect(regexp.to_str).to eq('\d')
109
- end
110
-
111
- it "should parse 'any character' syntax" do
112
- result = parse('any character')
113
- expect(result).to be_success
114
-
115
- regexp = regexp_repr(result)
116
- expect(regexp.to_str).to eq('\w')
117
- end
118
-
119
- it "should parse 'no character' syntax" do
120
- result = parse('no character')
121
- expect(result).to be_success
122
-
123
- regexp = regexp_repr(result)
124
- expect(regexp.to_str).to eq('\W')
125
- end
126
-
127
- it "should parse 'whitespace' syntax" do
128
- result = parse('whitespace')
129
- expect(result).to be_success
130
-
131
- regexp = regexp_repr(result)
132
- expect(regexp.to_str).to eq('\s')
133
- end
134
-
135
- it "should parse 'no whitespace' syntax" do
136
- result = parse('no whitespace')
137
- expect(result).to be_success
138
-
139
- regexp = regexp_repr(result)
140
- expect(regexp.to_str).to eq('\S')
141
- end
142
-
143
- it "should parse 'anything' syntax" do
144
- result = parse('anything')
145
- expect(result).to be_success
146
-
147
- regexp = regexp_repr(result)
148
- expect(regexp.to_str).to eq('.')
149
- end
150
-
151
- it "should parse 'one of' syntax" do
152
- result = parse('one of "._%+-"')
153
- expect(result).to be_success
154
-
155
- regexp = regexp_repr(result)
156
- # Remark: reference implementation less readable
157
- # (escapes more characters than required)
158
- expect(regexp.to_str).to eq('[._%+\-]')
159
- end
160
- end # context
161
-
162
- context 'Parsing special character declarations:' do
163
- it "should parse 'tab' syntax" do
164
- result = parse('tab')
165
- expect(result).to be_success
166
-
167
- regexp = regexp_repr(result)
168
- expect(regexp.to_str).to eq('\t')
169
- end
170
-
171
- it "should parse 'backslash' syntax" do
172
- result = parse('backslash')
173
- expect(result).to be_success
174
-
175
- regexp = regexp_repr(result)
176
- expect(regexp.to_str).to eq('\\\\')
177
- end
178
-
179
- it "should parse 'new line' syntax" do
180
- result = parse('new line')
181
- expect(result).to be_success
182
-
183
- regexp = regexp_repr(result)
184
- expect(regexp.to_str).to eq('\n')
185
- end
186
- end # context
187
-
188
- context 'Parsing alternations:' do
189
- it "should parse 'any of' syntax" do
190
- source = 'any of (any character, one of "._%-+")'
191
- result = parse(source)
192
- expect(result).to be_success
193
-
194
- regexp = regexp_repr(result)
195
- expect(regexp.to_str).to eq('(?:\w|[._%\-+])')
196
- end
197
- end # context
198
-
199
- context 'Parsing concatenation:' do
200
- it 'should reject dangling comma' do
201
- source = 'literally "a",'
202
- result = parse(source)
203
- expect(result).not_to be_success
204
- message_prefix = /Premature end of input after ','/
205
- expect(result.failure_reason.message).to match(message_prefix)
206
- end
207
-
208
- it 'should parse concatenation' do
209
- result = parse('any of (literally "sample", (digit once or more))')
210
- expect(result).to be_success
211
-
212
- regexp = regexp_repr(result)
213
- expect(regexp.to_str).to eq('(?:sample|(?:\d+))')
214
- end
215
-
216
- it 'should parse a long sequence of patterns' do
217
- source = <<-ENDS
218
- any of (any character, one of "._%-+") once or more,
219
- literally "@",
220
- any of (digit, letter, one of ".-") once or more,
221
- literally ".",
222
- letter at least 2 times
223
- ENDS
224
-
225
- result = parse(source)
226
- expect(result).to be_success
227
-
228
- regexp = regexp_repr(result)
229
- # SRL: (?:\w|[\._%\-\+])+(?:@)(?:[0-9]|[a-z]|[\.\-])+(?:\.)[a-z]{2,}
230
- expectation = '(?:\w|[._%\-+])+@(?:\d|[a-z]|[.\-])+\.[a-z]{2,}'
231
- expect(regexp.to_str).to eq(expectation)
232
- end
233
- end # context
234
-
235
- context 'Parsing quantifiers:' do
236
- let(:prefix) { 'letter from p to t ' }
237
-
238
- it "should parse 'once' syntax" do
239
- result = parse(prefix + 'once')
240
- expect(result).to be_success
241
-
242
- regexp = regexp_repr(result)
243
- expect(regexp.to_str).to eq('[p-t]{1}')
244
- end
245
-
246
- it "should parse 'twice' syntax" do
247
- result = parse('digit twice')
248
- expect(result).to be_success
249
-
250
- regexp = regexp_repr(result)
251
- expect(regexp.to_str).to eq('\d{2}')
252
- end
253
-
254
- it "should parse 'optional' syntax" do
255
- result = parse('anything optional')
256
- expect(result).to be_success
257
-
258
- regexp = regexp_repr(result)
259
- expect(regexp.to_str).to eq('.?')
260
- end
261
-
262
- it "should parse 'exactly ... times' syntax" do
263
- result = parse('letter from a to f exactly 4 times')
264
- expect(result).to be_success
265
-
266
- regexp = regexp_repr(result)
267
- expect(regexp.to_str).to eq('[a-f]{4}')
268
- end
269
-
270
- it "should parse 'between ... and ... times' syntax" do
271
- result = parse(prefix + 'between 2 and 4 times')
272
- expect(result).to be_success
273
-
274
- # Dropping 'times' keyword is shorter syntax
275
- expect(parse(prefix + 'between 2 and 4')).to be_success
276
-
277
- regexp = regexp_repr(result)
278
- expect(regexp.to_str).to eq('[p-t]{2,4}')
279
- end
280
-
281
- it "should parse 'once or more' syntax" do
282
- result = parse(prefix + 'once or more')
283
- expect(result).to be_success
284
-
285
- regexp = regexp_repr(result)
286
- expect(regexp.to_str).to eq('[p-t]+')
287
- end
288
-
289
- it "should parse 'never or more' syntax" do
290
- result = parse(prefix + 'never or more')
291
- expect(result).to be_success
292
-
293
- regexp = regexp_repr(result)
294
- expect(regexp.to_str).to eq('[p-t]*')
295
- end
296
-
297
- it "should parse 'at least ... times' syntax" do
298
- result = parse(prefix + 'at least 10 times')
299
- expect(result).to be_success
300
-
301
- regexp = regexp_repr(result)
302
- expect(regexp.to_str).to eq('[p-t]{10,}')
303
- end
304
- end # context
305
-
306
- context 'Parsing lookaround:' do
307
- it 'should parse positive lookahead' do
308
- result = parse('letter if followed by (anything once or more, digit)')
309
- expect(result).to be_success
310
-
311
- regexp = regexp_repr(result)
312
- expect(regexp.to_str).to eq('[a-z](?=(?:.+\d))')
313
- end
314
-
315
- it 'should parse negative lookahead' do
316
- result = parse('letter if not followed by (anything once or more, digit)')
317
- expect(result).to be_success
318
-
319
- regexp = regexp_repr(result)
320
- expect(regexp.to_str).to eq('[a-z](?!(?:.+\d))')
321
- end
322
-
323
- it 'should parse positive lookbehind' do
324
- result = parse('literally "bar" if already had literally "foo"')
325
- expect(result).to be_success
326
-
327
- regexp = regexp_repr(result)
328
- expect(regexp.to_str).to eq('bar(?<=foo)')
329
- end
330
-
331
- it 'should parse negative lookbehind' do
332
- result = parse('literally "bar" if not already had literally "foo"')
333
- expect(result).to be_success
334
-
335
- regexp = regexp_repr(result)
336
- expect(regexp.to_str).to eq('bar(?<!foo)')
337
- end
338
- end # context
339
-
340
- context 'Parsing capturing group:' do
341
- it 'should parse simple anonymous capturing group' do
342
- result = parse('capture(literally "sample")')
343
- expect(result).to be_success
344
-
345
- regexp = regexp_repr(result)
346
- expect(regexp.to_str).to eq('(sample)')
347
- end
348
-
349
- it 'should parse complex anonymous capturing group' do
350
- source = 'capture(any of (literally "sample", (digit once or more)))'
351
- result = parse(source)
352
- expect(result).to be_success
353
-
354
- regexp = regexp_repr(result)
355
- expect(regexp.to_str).to eq('((?:sample|(?:\d+)))')
356
- end
357
-
358
- it 'should parse simple anonymous until capturing group' do
359
- result = parse('capture anything once or more until literally "!"')
360
- expect(result).to be_success
361
-
362
- regexp = regexp_repr(result)
363
- expect(regexp.to_str).to eq('(.+)!')
364
- end
365
-
366
- it 'should parse complex named capturing group' do
367
- source = <<-END_SRL
368
- capture(any of (literally "sample", (digit once or more)))
369
- as "foo"
370
- END_SRL
371
- result = parse(source)
372
- expect(result).to be_success
373
-
374
- regexp = regexp_repr(result)
375
- expect(regexp.to_str).to eq('(?<foo>(?:sample|(?:\d+)))')
376
- end
377
-
378
- it 'should parse a sequence with named capturing groups' do
379
- source = <<-ENDS
380
- capture (anything once or more) as "first",
381
- literally " - ",
382
- capture literally "second part" as "second"
383
- ENDS
384
- result = parse(source)
385
- expect(result).to be_success
386
-
387
- regexp = regexp_repr(result)
388
- expect(regexp.to_str).to eq('(?<first>.+) - (?<second>second part)')
389
- end
390
-
391
- it 'should parse complex named until capturing group' do
392
- source = 'capture (anything once or more) as "foo" until literally "m"'
393
- result = parse(source)
394
- expect(result).to be_success
395
-
396
- regexp = regexp_repr(result)
397
- expect(regexp.to_str).to eq('(?<foo>.+)m')
398
- end
399
- end # context
400
-
401
- context 'Parsing anchors:' do
402
- it 'should parse begin anchors' do
403
- result = parse('starts with literally "match"')
404
- expect(result).to be_success
405
-
406
- regexp = regexp_repr(result)
407
- expect(regexp.to_str).to eq('^match')
408
- end
409
-
410
- it 'should parse begin anchors (alternative syntax)' do
411
- result = parse('begin with literally "match"')
412
- expect(result).to be_success
413
-
414
- regexp = regexp_repr(result)
415
- expect(regexp.to_str).to eq('^match')
416
- end
417
-
418
- it 'should parse end anchors' do
419
- result = parse('literally "match" must end')
420
- expect(result).to be_success
421
-
422
- regexp = regexp_repr(result)
423
- expect(regexp.to_str).to eq('match$')
424
- end
425
-
426
- it 'should parse combination of begin and end anchors' do
427
- result = parse('starts with literally "match" must end')
428
- expect(result).to be_success
429
-
430
- regexp = regexp_repr(result)
431
- expect(regexp.to_str).to eq('^match$')
432
- end
433
-
434
- it 'should accept anchor with a sequence of patterns' do
435
- source = <<-ENDS
436
- begin with any of (digit, letter, one of ".-") once or more,
437
- literally ".",
438
- letter at least 2 times must end
439
- ENDS
440
-
441
- result = parse(source)
442
- expect(result).to be_success
443
-
444
- regexp = regexp_repr(result)
445
- # SRL: (?:\w|[\._%\-\+])+(?:@)(?:[0-9]|[a-z]|[\.\-])+(?:\.)[a-z]{2,}
446
- expect(regexp.to_str).to eq('^(?:\d|[a-z]|[.\-])+\.[a-z]{2,}$')
447
- end
448
- end # context
449
- end # describe
450
- end # module
451
- # End of file