srl_ruby 0.2.2 → 0.2.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +15 -0
- data/lib/regex/character.rb +2 -1
- data/lib/regex/lookaround.rb +4 -5
- data/lib/srl_ruby/ast_builder.rb +44 -18
- data/lib/srl_ruby/grammar.rb +8 -5
- data/lib/srl_ruby/tokenizer.rb +6 -5
- data/lib/srl_ruby/version.rb +1 -1
- data/lib/srl_ruby.rb +1 -1
- data/spec/acceptance/srl_test_suite_spec.rb +29 -10
- data/spec/acceptance/support/rule_file_grammar.rb +9 -9
- data/spec/srl_ruby_spec.rb +310 -0
- data/srl_ruby.gemspec +1 -1
- data/srl_test/Test-Rules/website_example_password.rule +1 -1
- metadata +6 -6
- data/spec/integration_spec.rb +0 -451
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3642f7f1361cb920e2a9e42dcd8969d5ec75c7fe
|
4
|
+
data.tar.gz: 480c7561fd6972a8872e472166d07d0905c4612d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d60a8e33fbf0b2fafdacf03127fe1502efd60bc5fc69de9d97fa9ddd07d0505d3c6577f0fb078277980b6a024f76f88887fcd01e0798f02f6b55c41ebef65bbe
|
7
|
+
data.tar.gz: 838e7c67fa30ac58d2fefffd701394db75a632087c2440aaaac0d2fb3b349857c79eefd446089b6ccb6878232ed56c987dcd669847b1b4aff2cf40025e0df20e
|
data/CHANGELOG.md
CHANGED
@@ -6,6 +6,21 @@
|
|
6
6
|
### Fixed
|
7
7
|
### Security
|
8
8
|
|
9
|
+
## [0.2.4] - 2018-04-02
|
10
|
+
SrlRuby passes 10 tests out of 15 standard SRL tests in total.
|
11
|
+
### Changed
|
12
|
+
- File `lib/srl_ruby/grammar.rb` grammar refactoring. Added support for new 'no digit' SRL expression.
|
13
|
+
- Class `SrlRuby::ASTBuilder` updates to reflect changes in the grammar.
|
14
|
+
- Class `Regex::Lookaround` refactored: now inherits from `Regex::MonadicExpression`
|
15
|
+
- File `spec/integration_spec` renamed to `spec/srl_ruby_spec.rb`, ssytematic use of the API of SrlRuby module.
|
16
|
+
|
17
|
+
### Fixed
|
18
|
+
- Method `SrlRuby::ASTBuilder#reduce_one_of` now escapes character inside a character class.
|
19
|
+
|
20
|
+
## [0.2.3] - 2018-03-15
|
21
|
+
### Fixed
|
22
|
+
- Fixed a number of Yard warnings.
|
23
|
+
|
9
24
|
## [0.2.2] - 2018-03-15
|
10
25
|
### Fixed
|
11
26
|
- Fixed rley version dependency
|
data/lib/regex/character.rb
CHANGED
@@ -25,7 +25,8 @@ module Regex # This module is used as a namespace
|
|
25
25
|
"\\7" => 7
|
26
26
|
}.freeze
|
27
27
|
|
28
|
-
MetaChars = '
|
28
|
+
MetaChars = '\^$.|+?*()[]{}'.freeze
|
29
|
+
MetaCharsInClass = '\^[]-'.freeze # Characters with special meaning in char. class
|
29
30
|
|
30
31
|
# The integer value that uniquely identifies the character.
|
31
32
|
attr_reader(:codepoint)
|
data/lib/regex/lookaround.rb
CHANGED
@@ -5,7 +5,7 @@
|
|
5
5
|
########################
|
6
6
|
|
7
7
|
|
8
|
-
require_relative '
|
8
|
+
require_relative 'monadic_expression' # Access the superclass
|
9
9
|
|
10
10
|
module Regex # This module is used as a namespace
|
11
11
|
# Lookaround is a zero-width assertion just like the start and end of line
|
@@ -14,7 +14,7 @@ module Regex # This module is used as a namespace
|
|
14
14
|
# return the result of the match: match or no match.
|
15
15
|
# That is why they are called "assertions". They do not consume characters
|
16
16
|
# from the subject, but only assert whether a match is possible or not.
|
17
|
-
class Lookaround <
|
17
|
+
class Lookaround < MonadicExpression
|
18
18
|
# The "direction" of the lookaround. Can be ahead or behind. It specifies
|
19
19
|
# the relative position of the expression to match compared to
|
20
20
|
# the current 'position' in the subject text.
|
@@ -30,7 +30,7 @@ module Regex # This module is used as a namespace
|
|
30
30
|
# [theDir] One of the following values: [ :ahead, :behind ]
|
31
31
|
# [theKind] One of the following values: [ :positive, :negative ]
|
32
32
|
def initialize(assertedExpression, theDir, theKind)
|
33
|
-
super(
|
33
|
+
super(assertedExpression)
|
34
34
|
@dir = theDir
|
35
35
|
@kind = theKind
|
36
36
|
end
|
@@ -38,10 +38,9 @@ module Regex # This module is used as a namespace
|
|
38
38
|
# Conversion method re-definition.
|
39
39
|
# Purpose: Return the String representation of the captured expression.
|
40
40
|
def to_str()
|
41
|
-
result = children[0].to_str
|
42
41
|
dir_syntax = (dir == :ahead) ? '' : '<'
|
43
42
|
kind_syntax = (kind == :positive) ? '=' : '!'
|
44
|
-
result
|
43
|
+
result = '(?' + dir_syntax + kind_syntax + child.to_str + ')'
|
45
44
|
return result
|
46
45
|
end
|
47
46
|
end # class
|
data/lib/srl_ruby/ast_builder.rb
CHANGED
@@ -91,16 +91,26 @@ module SrlRuby
|
|
91
91
|
return_first_child(aRange, theTokens, theChildren)
|
92
92
|
end
|
93
93
|
|
94
|
-
# rule('pattern' => %w[pattern separator
|
94
|
+
# rule('pattern' => %w[pattern separator sub_pattern]).as 'pattern_sequence'
|
95
95
|
def reduce_pattern_sequence(_production, _range, _tokens, theChildren)
|
96
96
|
return Regex::Concatenation.new(theChildren[0], theChildren[2])
|
97
97
|
end
|
98
98
|
|
99
|
+
# rule('pattern' => 'sub_pattern').as 'basic_pattern'
|
100
|
+
def reduce_basic_pattern(_production, aRange, theTokens, theChildren)
|
101
|
+
return_first_child(aRange, theTokens, theChildren)
|
102
|
+
end
|
103
|
+
|
104
|
+
# rule('sub_pattern' => 'assertion').as 'assertion_sub_pattern'
|
105
|
+
def reduce_assertion_sub_pattern(_production, aRange, theTokens, theChildren)
|
106
|
+
return_first_child(aRange, theTokens, theChildren)
|
107
|
+
end
|
108
|
+
|
99
109
|
# rule('flags' => %[flags separator single_flag]).as 'flag_sequence'
|
100
110
|
def reduce_flag_sequence(_production, _range, _tokens, theChildren)
|
101
111
|
theChildren[0] << theChildren[2]
|
102
112
|
end
|
103
|
-
|
113
|
+
|
104
114
|
# rule('flags' => %w[separator single_flag]).as 'flag_simple'
|
105
115
|
def reduce_flag_simple(_production, _range, _tokens, theChildren)
|
106
116
|
[theChildren.last]
|
@@ -151,13 +161,6 @@ module SrlRuby
|
|
151
161
|
return Regex::Anchor.new('$')
|
152
162
|
end
|
153
163
|
|
154
|
-
# rule('anchorable' => %w[assertable assertion]).as 'asserted_anchorable'
|
155
|
-
def reduce_asserted_anchorable(_production, _range, _tokens, theChildren)
|
156
|
-
assertion = theChildren.last
|
157
|
-
assertion.children.unshift(theChildren[0])
|
158
|
-
return assertion
|
159
|
-
end
|
160
|
-
|
161
164
|
# rule('assertion' => %w[IF FOLLOWED BY assertable]).as 'if_followed'
|
162
165
|
def reduce_if_followed(_production, _range, _tokens, theChildren)
|
163
166
|
return Regex::Lookaround.new(theChildren.last, :ahead, :positive)
|
@@ -218,11 +221,6 @@ module SrlRuby
|
|
218
221
|
reduce_lowercase_from_to(aProduction, aRange, theTokens, theChildren)
|
219
222
|
end
|
220
223
|
|
221
|
-
# rule('digit_range' => 'digit_or_number').as 'simple_digit_range'
|
222
|
-
def reduce_simple_digit_range(_production, _range, _tokens, _children)
|
223
|
-
char_shorthand('d')
|
224
|
-
end
|
225
|
-
|
226
224
|
# rule('character_class' => %w[ANY CHARACTER]).as 'any_character'
|
227
225
|
def reduce_any_character(_production, _range, _tokens, _children)
|
228
226
|
char_shorthand('w')
|
@@ -233,6 +231,16 @@ module SrlRuby
|
|
233
231
|
char_shorthand('W')
|
234
232
|
end
|
235
233
|
|
234
|
+
# rule('character_class' => 'digit_or_number').as 'digit'
|
235
|
+
def reduce_digit(_production, _range, _tokens, _children)
|
236
|
+
char_shorthand('d')
|
237
|
+
end
|
238
|
+
|
239
|
+
# rule('character_class' => %w[NO DIGIT]).as 'non_digit'
|
240
|
+
def reduce_non_digit(_production, _range, _tokens, _children)
|
241
|
+
char_shorthand('D')
|
242
|
+
end
|
243
|
+
|
236
244
|
# rule('character_class' => 'WHITESPACE').as 'whitespace'
|
237
245
|
def reduce_whitespace(_production, _range, _tokens, _children)
|
238
246
|
char_shorthand('s')
|
@@ -248,10 +256,18 @@ module SrlRuby
|
|
248
256
|
wildcard
|
249
257
|
end
|
250
258
|
|
251
|
-
# rule('
|
259
|
+
# rule('character_class' => %w[ONE OF STRING_LIT]).as 'one_of'
|
252
260
|
def reduce_one_of(_production, _range, _tokens, theChildren)
|
253
261
|
raw_literal = theChildren[-1].token.lexeme.dup
|
254
|
-
alternatives = raw_literal.chars.map
|
262
|
+
alternatives = raw_literal.chars.map do |ch|
|
263
|
+
if Regex::Character::MetaCharsInClass.include?(ch)
|
264
|
+
chars = [Regex::Character.new("\\"), Regex::Character.new(ch)]
|
265
|
+
Regex::Concatenation.new(*chars)
|
266
|
+
else
|
267
|
+
Regex::Character.new(ch)
|
268
|
+
end
|
269
|
+
end
|
270
|
+
|
255
271
|
# TODO check other implementations
|
256
272
|
return Regex::CharClass.new(false, *alternatives)
|
257
273
|
end
|
@@ -263,7 +279,7 @@ module SrlRuby
|
|
263
279
|
|
264
280
|
# rule('special_char' => 'BACKSLASH').as 'backslash'
|
265
281
|
def reduce_backslash(_production, _range, _tokens, _children)
|
266
|
-
# Double the
|
282
|
+
# Double the backslash (because of escaping)
|
267
283
|
string_literal("\\", true)
|
268
284
|
end
|
269
285
|
|
@@ -283,7 +299,17 @@ module SrlRuby
|
|
283
299
|
|
284
300
|
# rule('alternation' => %w[ANY OF LPAREN alternatives RPAREN]).as 'any_of'
|
285
301
|
def reduce_any_of(_production, _range, _tokens, theChildren)
|
286
|
-
|
302
|
+
first_alternative = theChildren[3].first
|
303
|
+
result = nil
|
304
|
+
|
305
|
+
# Ugly: in SRL, comma is a dummy separator except in any of construct...
|
306
|
+
if theChildren[3].size == 1 && first_alternative.kind_of?(Regex::Concatenation)
|
307
|
+
result = Regex::Alternation.new(*first_alternative.children)
|
308
|
+
else
|
309
|
+
result = Regex::Alternation.new(*theChildren[3])
|
310
|
+
end
|
311
|
+
|
312
|
+
return result
|
287
313
|
end
|
288
314
|
|
289
315
|
# rule('alternatives' => %w[alternatives separator quantifiable]).as 'alternative_list'
|
data/lib/srl_ruby/grammar.rb
CHANGED
@@ -6,7 +6,7 @@ module SrlRuby
|
|
6
6
|
builder = Rley::Syntax::GrammarBuilder.new do
|
7
7
|
add_terminals('LPAREN', 'RPAREN', 'COMMA')
|
8
8
|
add_terminals('DIGIT_LIT', 'INTEGER', 'LETTER_LIT')
|
9
|
-
add_terminals('LITERALLY', 'STRING_LIT')
|
9
|
+
add_terminals('LITERALLY', 'STRING_LIT', 'IDENTIFIER')
|
10
10
|
add_terminals('BEGIN', 'STARTS', 'WITH')
|
11
11
|
add_terminals('MUST', 'END')
|
12
12
|
add_terminals('UPPERCASE', 'LETTER', 'FROM', 'TO')
|
@@ -26,8 +26,10 @@ module SrlRuby
|
|
26
26
|
rule('srl' => 'expression').as 'start_rule'
|
27
27
|
rule('expression' => %w[pattern flags]).as 'flagged_expr'
|
28
28
|
rule('expression' => 'pattern').as 'simple_expr'
|
29
|
-
rule('pattern' => %w[pattern separator
|
30
|
-
rule('pattern' => '
|
29
|
+
rule('pattern' => %w[pattern separator sub_pattern]).as 'pattern_sequence'
|
30
|
+
rule('pattern' => 'sub_pattern').as 'basic_pattern'
|
31
|
+
rule('sub_pattern' => 'quantifiable').as 'quantifiable_sub_pattern'
|
32
|
+
rule('sub_pattern' => 'assertion').as 'assertion_sub_pattern'
|
31
33
|
rule('separator' => 'COMMA').as 'comma_separator'
|
32
34
|
rule('separator' => []).as 'void_separator'
|
33
35
|
rule('flags' => %w[flags separator single_flag]).as 'flag_sequence'
|
@@ -43,7 +45,6 @@ module SrlRuby
|
|
43
45
|
rule('begin_anchor' => %w[BEGIN WITH]).as 'begin_with'
|
44
46
|
rule('end_anchor' => %w[separator MUST END]).as 'end_anchor'
|
45
47
|
rule('anchorable' => 'assertable').as 'simple_anchorable'
|
46
|
-
rule('anchorable' => %w[assertable assertion]).as 'asserted_anchorable'
|
47
48
|
rule('assertion' => %w[IF FOLLOWED BY assertable]).as 'if_followed'
|
48
49
|
rule('assertion' => %w[IF NOT FOLLOWED BY assertable]).as 'if_not_followed'
|
49
50
|
rule('assertion' => %w[IF ALREADY HAD assertable]).as 'if_had'
|
@@ -64,9 +65,10 @@ module SrlRuby
|
|
64
65
|
rule('letter_range' => 'LETTER').as 'any_lowercase'
|
65
66
|
rule('letter_range' => %w[UPPERCASE LETTER]).as 'any_uppercase'
|
66
67
|
rule('digit_range' => %w[digit_or_number FROM DIGIT_LIT TO DIGIT_LIT]).as 'digits_from_to'
|
67
|
-
rule('digit_range' => 'digit_or_number').as 'simple_digit_range'
|
68
68
|
rule('character_class' => %w[ANY CHARACTER]).as 'any_character'
|
69
69
|
rule('character_class' => %w[NO CHARACTER]).as 'no_character'
|
70
|
+
rule('character_class' => 'digit_or_number').as 'digit'
|
71
|
+
rule('character_class' => %w[NO DIGIT]).as 'non_digit'
|
70
72
|
rule('character_class' => 'WHITESPACE').as 'whitespace'
|
71
73
|
rule('character_class' => %w[NO WHITESPACE]).as 'no_whitespace'
|
72
74
|
rule('character_class' => 'ANYTHING').as 'anything'
|
@@ -84,6 +86,7 @@ module SrlRuby
|
|
84
86
|
rule('capturing_group' => %w[CAPTURE assertable AS var_name]).as 'named_capture'
|
85
87
|
rule('capturing_group' => %w[CAPTURE assertable AS var_name UNTIL assertable]).as 'named_capture_until'
|
86
88
|
rule('var_name' => 'STRING_LIT').as 'var_name'
|
89
|
+
rule('var_name' => 'IDENTIFIER').as 'var_ident' # capture name not enclosed between quotes
|
87
90
|
rule('quantifier' => 'ONCE').as 'once'
|
88
91
|
rule('quantifier' => 'TWICE').as 'twice'
|
89
92
|
rule('quantifier' => %w[EXACTLY count TIMES]).as 'exactly'
|
data/lib/srl_ruby/tokenizer.rb
CHANGED
@@ -111,9 +111,10 @@ module SrlRuby
|
|
111
111
|
token = build_token('INTEGER', lexeme) # An integer has 2..* digits
|
112
112
|
elsif (lexeme = scanner.scan(/[0-9]/))
|
113
113
|
token = build_token('DIGIT_LIT', lexeme)
|
114
|
-
elsif (lexeme = scanner.scan(/[a-zA-
|
115
|
-
|
116
|
-
|
114
|
+
elsif (lexeme = scanner.scan(/[a-zA-Z_][a-zA-Z0-9_]+/))
|
115
|
+
keyw = @@keywords[lexeme.upcase]
|
116
|
+
tok_type = keyw ? keyw : 'IDENTIFIER'
|
117
|
+
token = build_token(tok_type, lexeme)
|
117
118
|
elsif (lexeme = scanner.scan(/[a-zA-Z]((?=\s)|$)/))
|
118
119
|
token = build_token('LETTER_LIT', lexeme)
|
119
120
|
elsif (lexeme = scanner.scan(/"(?:\\"|[^"])*"/)) # Double quotes literal?
|
@@ -126,7 +127,7 @@ module SrlRuby
|
|
126
127
|
erroneous = curr_ch.nil? ? '' : curr_ch
|
127
128
|
sequel = scanner.scan(/.{1,20}/)
|
128
129
|
erroneous += sequel unless sequel.nil?
|
129
|
-
raise ScanError.new("Unknown token #{erroneous}")
|
130
|
+
raise ScanError.new("Unknown token #{erroneous} on line #{lineno}")
|
130
131
|
end
|
131
132
|
|
132
133
|
return token
|
@@ -137,7 +138,7 @@ module SrlRuby
|
|
137
138
|
col = scanner.pos - aLexeme.size - @line_start + 1
|
138
139
|
pos = Position.new(@lineno, col)
|
139
140
|
token = SrlToken.new(aLexeme, aSymbolName, pos)
|
140
|
-
rescue
|
141
|
+
rescue Exception => exc
|
141
142
|
puts "Failing with '#{aSymbolName}' and '#{aLexeme}'"
|
142
143
|
raise exc
|
143
144
|
end
|
data/lib/srl_ruby/version.rb
CHANGED
data/lib/srl_ruby.rb
CHANGED
@@ -21,7 +21,7 @@ module SrlRuby # This module is used as a namespace
|
|
21
21
|
# @return [Regexp]
|
22
22
|
def self.parse(source)
|
23
23
|
# Create a Rley facade object
|
24
|
-
engine = Rley::Engine.new
|
24
|
+
engine = Rley::Engine.new { |cfg| cfg.diagnose = true }
|
25
25
|
|
26
26
|
# Step 1. Load SRL grammar
|
27
27
|
engine.use_grammar(SrlRuby::Grammar)
|
@@ -3,8 +3,12 @@ require_relative './support/rule_file_parser'
|
|
3
3
|
require_relative '../../lib/srl_ruby'
|
4
4
|
|
5
5
|
##############################
|
6
|
-
#
|
7
|
-
|
6
|
+
# Some rule files contain undocumented and unsupportd SRL expression:
|
7
|
+
# | File name | unrecognized input |
|
8
|
+
# | no_word.rule | 'no word' |
|
9
|
+
# | none_of.rule | 'none of abcd' |
|
10
|
+
# | word.rule | '(word)' |
|
11
|
+
|
8
12
|
|
9
13
|
RSpec.describe Acceptance do
|
10
14
|
def rule_path
|
@@ -17,10 +21,10 @@ RSpec.describe Acceptance do
|
|
17
21
|
|
18
22
|
def test_rule_file(aRuleFileRepr)
|
19
23
|
regex = SrlRuby::parse(aRuleFileRepr.srl.value)
|
20
|
-
expect(regex).
|
21
|
-
|
24
|
+
expect(regex).to be_kind_of(Regexp)
|
25
|
+
|
22
26
|
aRuleFileRepr.match_tests.each do |test|
|
23
|
-
expect(
|
27
|
+
expect(test.test_string.value).to match(regex)
|
24
28
|
end
|
25
29
|
aRuleFileRepr.no_match_tests.each do |test|
|
26
30
|
expect(regex.match(test.test_string.value)).to be_nil
|
@@ -42,21 +46,26 @@ RSpec.describe Acceptance do
|
|
42
46
|
rule_file_repr = load_file('backslash.rule')
|
43
47
|
test_rule_file(rule_file_repr)
|
44
48
|
end
|
45
|
-
|
49
|
+
|
46
50
|
it 'should support named capture group' do
|
47
51
|
rule_file_repr = load_file('basename_capture_group.rule')
|
48
52
|
test_rule_file(rule_file_repr)
|
49
53
|
end
|
50
|
-
|
54
|
+
|
51
55
|
it 'should match uppercase letter(s)' do
|
52
56
|
rule_file_repr = load_file('issue_17_uppercase_letter.rule')
|
53
57
|
test_rule_file(rule_file_repr)
|
54
|
-
end
|
55
|
-
|
58
|
+
end
|
59
|
+
|
56
60
|
it 'should not trim literal strings' do
|
57
61
|
rule_file_repr = load_file('literally_spaces.rule')
|
58
62
|
test_rule_file(rule_file_repr)
|
59
|
-
end
|
63
|
+
end
|
64
|
+
|
65
|
+
it 'should match non digit pattern' do
|
66
|
+
rule_file_repr = load_file('nondigit.rule')
|
67
|
+
test_rule_file(rule_file_repr)
|
68
|
+
end
|
60
69
|
|
61
70
|
it 'should match a tab' do
|
62
71
|
rule_file_repr = load_file('tab.rule')
|
@@ -72,4 +81,14 @@ RSpec.describe Acceptance do
|
|
72
81
|
rule_file_repr = load_file('website_example_lookahead.rule')
|
73
82
|
test_rule_file(rule_file_repr)
|
74
83
|
end
|
84
|
+
|
85
|
+
it 'should not trim literal strings' do
|
86
|
+
rule_file_repr = load_file('website_example_password.rule')
|
87
|
+
test_rule_file(rule_file_repr)
|
88
|
+
end
|
89
|
+
|
90
|
+
it 'should' do
|
91
|
+
rule_file_repr = load_file('website_example_url.rule')
|
92
|
+
test_rule_file(rule_file_repr)
|
93
|
+
end
|
75
94
|
end
|
@@ -17,22 +17,22 @@ builder = Rley::Syntax::GrammarBuilder.new do
|
|
17
17
|
add_terminals('INTEGER', 'STRING_LIT')
|
18
18
|
add_terminals('IDENTIFIER', 'SRL_SOURCE')
|
19
19
|
|
20
|
-
rule('rule_file' =>
|
21
|
-
rule('srl_heading' =>
|
22
|
-
rule('srl_tests' =>
|
20
|
+
rule('rule_file' => 'srl_heading srl_tests').as 'start_rule'
|
21
|
+
rule('srl_heading' => 'SRL: SRL_SOURCE').as 'srl_source'
|
22
|
+
rule('srl_tests' => 'srl_tests single_test').as 'test_list'
|
23
23
|
rule('srl_tests' => 'single_test').as 'one_test'
|
24
24
|
rule('single_test' => 'atomic_test').as 'single_atomic_test'
|
25
25
|
rule('single_test' => 'compound_test').as 'single_compound_test'
|
26
26
|
rule('atomic_test' => 'match_test').as 'atomic_match'
|
27
27
|
rule('atomic_test' => 'no_match_test').as 'atomic_no_match'
|
28
28
|
rule('compound_test' => 'capture_test').as 'compound_capture'
|
29
|
-
rule('match_test' =>
|
30
|
-
rule('no_match_test' =>
|
31
|
-
rule('capture_test' =>
|
32
|
-
rule('capture_heading' =>
|
33
|
-
rule('capture_expectations' =>
|
29
|
+
rule('match_test' => 'MATCH: STRING_LIT').as 'match_string'
|
30
|
+
rule('no_match_test' => 'NO MATCH: STRING_LIT').as 'no_match_string'
|
31
|
+
rule('capture_test' => 'capture_heading capture_expectations').as 'capture_test'
|
32
|
+
rule('capture_heading' => 'CAPTURE FOR STRING_LIT COLON').as 'capture_string'
|
33
|
+
rule('capture_expectations' => 'capture_expectations single_expectation').as 'assertion_list'
|
34
34
|
rule('capture_expectations' => 'single_expectation').as 'one_expectation'
|
35
|
-
rule('single_expectation' =>
|
35
|
+
rule('single_expectation' => 'DASH INTEGER COLON capture_variable COLON STRING_LIT').as 'capture_expectation'
|
36
36
|
rule('capture_variable' => 'INTEGER').as 'var_integer'
|
37
37
|
rule('capture_variable' => 'IDENTIFIER').as 'var_identifier'
|
38
38
|
end
|
@@ -0,0 +1,310 @@
|
|
1
|
+
require_relative 'spec_helper' # Use the RSpec framework
|
2
|
+
require_relative '../lib/srl_ruby'
|
3
|
+
|
4
|
+
describe SrlRuby do
|
5
|
+
context 'Parsing character ranges:' do
|
6
|
+
it "should parse 'letter from ... to ...' syntax" do
|
7
|
+
regexp = SrlRuby.parse('letter from a to f')
|
8
|
+
expect(regexp.source).to eq('[a-f]')
|
9
|
+
end
|
10
|
+
|
11
|
+
it "should parse 'uppercase letter from ... to ...' syntax" do
|
12
|
+
regexp = SrlRuby.parse('UPPERCASE letter from A to F')
|
13
|
+
expect(regexp.source).to eq('[A-F]')
|
14
|
+
end
|
15
|
+
|
16
|
+
it "should parse 'letter' syntax" do
|
17
|
+
regexp = SrlRuby.parse('letter')
|
18
|
+
expect(regexp.source).to eq('[a-z]')
|
19
|
+
end
|
20
|
+
|
21
|
+
it "should parse 'uppercase letter' syntax" do
|
22
|
+
regexp = SrlRuby.parse('uppercase letter')
|
23
|
+
expect(regexp.source).to eq('[A-Z]')
|
24
|
+
end
|
25
|
+
|
26
|
+
it "should parse 'digit from ... to ...' syntax" do
|
27
|
+
regexp = SrlRuby.parse('digit from 1 to 4')
|
28
|
+
expect(regexp.source).to eq('[1-4]')
|
29
|
+
end
|
30
|
+
end # context
|
31
|
+
|
32
|
+
context 'Parsing string literals:' do
|
33
|
+
it 'should parse double quotes literal string' do
|
34
|
+
regexp = SrlRuby.parse('literally "hello"')
|
35
|
+
expect(regexp.source).to eq('hello')
|
36
|
+
end
|
37
|
+
|
38
|
+
it 'should parse single quotes literal string' do
|
39
|
+
regexp = SrlRuby.parse("literally 'hello'")
|
40
|
+
expect(regexp.source).to eq('hello')
|
41
|
+
end
|
42
|
+
|
43
|
+
it 'should escape special characters' do
|
44
|
+
regexp = SrlRuby.parse("literally '.'")
|
45
|
+
expect(regexp.source).to eq('\.')
|
46
|
+
end
|
47
|
+
end # context
|
48
|
+
|
49
|
+
context 'Parsing character classes:' do
|
50
|
+
it "should parse 'digit' syntax" do
|
51
|
+
regexp = SrlRuby.parse('digit')
|
52
|
+
expect(regexp.source).to eq('\d')
|
53
|
+
end
|
54
|
+
|
55
|
+
it "should parse 'number' syntax" do
|
56
|
+
regexp = SrlRuby.parse('number')
|
57
|
+
expect(regexp.source).to eq('\d')
|
58
|
+
end
|
59
|
+
|
60
|
+
it "should parse 'no digit' syntax" do
|
61
|
+
regexp = SrlRuby.parse('no digit')
|
62
|
+
expect(regexp.source).to eq('\D')
|
63
|
+
end
|
64
|
+
|
65
|
+
it "should parse 'any character' syntax" do
|
66
|
+
regexp = SrlRuby.parse('any character')
|
67
|
+
expect(regexp.source).to eq('\w')
|
68
|
+
end
|
69
|
+
|
70
|
+
it "should parse 'no character' syntax" do
|
71
|
+
regexp = SrlRuby.parse('no character')
|
72
|
+
expect(regexp.source).to eq('\W')
|
73
|
+
end
|
74
|
+
|
75
|
+
it "should parse 'whitespace' syntax" do
|
76
|
+
regexp = SrlRuby.parse('whitespace')
|
77
|
+
expect(regexp.source).to eq('\s')
|
78
|
+
end
|
79
|
+
|
80
|
+
it "should parse 'no whitespace' syntax" do
|
81
|
+
regexp = SrlRuby.parse('no whitespace')
|
82
|
+
expect(regexp.source).to eq('\S')
|
83
|
+
end
|
84
|
+
|
85
|
+
it "should parse 'anything' syntax" do
|
86
|
+
regexp = SrlRuby.parse('anything')
|
87
|
+
expect(regexp.source).to eq('.')
|
88
|
+
end
|
89
|
+
|
90
|
+
it "should parse 'one of' syntax" do
|
91
|
+
regexp = SrlRuby.parse('one of "._%+-"')
|
92
|
+
# Remark: reference implementation less readable
|
93
|
+
# (escapes more characters than required)
|
94
|
+
expect(regexp.source).to eq('[._%+\-]')
|
95
|
+
end
|
96
|
+
end # context
|
97
|
+
|
98
|
+
context 'Parsing special character declarations:' do
|
99
|
+
it "should parse 'tab' syntax" do
|
100
|
+
regexp = SrlRuby.parse('tab')
|
101
|
+
expect(regexp.source).to eq('\t')
|
102
|
+
end
|
103
|
+
|
104
|
+
it "should parse 'backslash' syntax" do
|
105
|
+
regexp = SrlRuby.parse('backslash')
|
106
|
+
expect(regexp.source).to eq('\\\\')
|
107
|
+
end
|
108
|
+
|
109
|
+
it "should parse 'new line' syntax" do
|
110
|
+
regexp = SrlRuby.parse('new line')
|
111
|
+
expect(regexp.source).to eq('\n')
|
112
|
+
end
|
113
|
+
end # context
|
114
|
+
|
115
|
+
context 'Parsing alternations:' do
|
116
|
+
it "should parse 'any of' syntax" do
|
117
|
+
source = 'any of (any character, one of "._%-+")'
|
118
|
+
regexp = SrlRuby.parse(source)
|
119
|
+
expect(regexp.source).to eq('(?:\w|[._%\-+])')
|
120
|
+
end
|
121
|
+
|
122
|
+
it 'should anchor as alternative' do
|
123
|
+
regexp = SrlRuby.parse('any of (literally "?", must end)')
|
124
|
+
expect(regexp.source).to eq('(?:\\?|$)')
|
125
|
+
end
|
126
|
+
end # context
|
127
|
+
|
128
|
+
context 'Parsing concatenation:' do
|
129
|
+
it 'should reject dangling comma' do
|
130
|
+
source = 'literally "a",'
|
131
|
+
err = StandardError
|
132
|
+
msg_pattern = /Premature end of input after ',' at position line 1, column 14/
|
133
|
+
expect { SrlRuby.parse(source) }.to raise_error(err, msg_pattern)
|
134
|
+
end
|
135
|
+
|
136
|
+
it 'should parse concatenation' do
|
137
|
+
regexp = SrlRuby.parse('any of (literally "sample", (digit once or more))')
|
138
|
+
expect(regexp.source).to eq('(?:sample|(?:\d+))')
|
139
|
+
end
|
140
|
+
|
141
|
+
it 'should parse a long sequence of patterns' do
|
142
|
+
source = <<-END_SRL
|
143
|
+
any of (any character, one of "._%-+") once or more,
|
144
|
+
literally "@",
|
145
|
+
any of (digit, letter, one of ".-") once or more,
|
146
|
+
literally ".",
|
147
|
+
letter at least 2 times
|
148
|
+
END_SRL
|
149
|
+
|
150
|
+
regexp = SrlRuby.parse(source)
|
151
|
+
# SRL: (?:\w|[\._%\-\+])+(?:@)(?:[0-9]|[a-z]|[\.\-])+(?:\.)[a-z]{2,}
|
152
|
+
expectation = '(?:\w|[._%\-+])+@(?:\d|[a-z]|[.\-])+\.[a-z]{2,}'
|
153
|
+
expect(regexp.source).to eq(expectation)
|
154
|
+
end
|
155
|
+
end # context
|
156
|
+
|
157
|
+
context 'Parsing quantifiers:' do
|
158
|
+
let(:prefix) { 'letter from p to t ' }
|
159
|
+
|
160
|
+
it "should parse 'once' syntax" do
|
161
|
+
regexp = SrlRuby.parse(prefix + 'once')
|
162
|
+
expect(regexp.source).to eq('[p-t]{1}')
|
163
|
+
end
|
164
|
+
|
165
|
+
it "should parse 'twice' syntax" do
|
166
|
+
regexp = SrlRuby.parse('digit twice')
|
167
|
+
expect(regexp.source).to eq('\d{2}')
|
168
|
+
end
|
169
|
+
|
170
|
+
it "should parse 'optional' syntax" do
|
171
|
+
regexp = SrlRuby.parse('anything optional')
|
172
|
+
expect(regexp.source).to eq('.?')
|
173
|
+
end
|
174
|
+
|
175
|
+
it "should parse 'exactly ... times' syntax" do
|
176
|
+
regexp = SrlRuby.parse('letter from a to f exactly 4 times')
|
177
|
+
expect(regexp.source).to eq('[a-f]{4}')
|
178
|
+
end
|
179
|
+
|
180
|
+
it "should parse 'between ... and ... times' syntax" do
|
181
|
+
regexp = SrlRuby.parse(prefix + 'between 2 and 4 times')
|
182
|
+
expect(regexp.source).to eq('[p-t]{2,4}')
|
183
|
+
|
184
|
+
# Dropping 'times' keyword is a shorter alternative syntax
|
185
|
+
regexp = SrlRuby.parse(prefix + 'between 2 and 4')
|
186
|
+
expect(regexp.source).to eq('[p-t]{2,4}')
|
187
|
+
end
|
188
|
+
|
189
|
+
it "should parse 'once or more' syntax" do
|
190
|
+
regexp = SrlRuby.parse(prefix + 'once or more')
|
191
|
+
expect(regexp.source).to eq('[p-t]+')
|
192
|
+
end
|
193
|
+
|
194
|
+
it "should parse 'never or more' syntax" do
|
195
|
+
regexp = SrlRuby.parse(prefix + 'never or more')
|
196
|
+
expect(regexp.source).to eq('[p-t]*')
|
197
|
+
end
|
198
|
+
|
199
|
+
it "should parse 'at least ... times' syntax" do
|
200
|
+
regexp = SrlRuby.parse(prefix + 'at least 10 times')
|
201
|
+
expect(regexp.source).to eq('[p-t]{10,}')
|
202
|
+
end
|
203
|
+
end # context
|
204
|
+
|
205
|
+
context 'Parsing lookaround:' do
|
206
|
+
it 'should parse positive lookahead' do
|
207
|
+
regexp = SrlRuby.parse('letter if followed by (anything once or more, digit)')
|
208
|
+
expect(regexp.source).to eq('[a-z](?=(?:.+\d))')
|
209
|
+
end
|
210
|
+
|
211
|
+
it 'should parse negative lookahead' do
|
212
|
+
regexp = SrlRuby.parse('letter if not followed by (anything once or more, digit)')
|
213
|
+
expect(regexp.source).to eq('[a-z](?!(?:.+\d))')
|
214
|
+
end
|
215
|
+
|
216
|
+
it 'should parse positive lookbehind' do
|
217
|
+
regexp = SrlRuby.parse('literally "bar" if already had literally "foo"')
|
218
|
+
expect(regexp.source).to eq('bar(?<=foo)')
|
219
|
+
end
|
220
|
+
|
221
|
+
it 'should parse negative lookbehind' do
|
222
|
+
regexp = SrlRuby.parse('literally "bar" if not already had literally "foo"')
|
223
|
+
expect(regexp.source).to eq('bar(?<!foo)')
|
224
|
+
end
|
225
|
+
end # context
|
226
|
+
|
227
|
+
context 'Parsing capturing group:' do
|
228
|
+
it 'should parse simple anonymous capturing group' do
|
229
|
+
regexp = SrlRuby.parse('capture(literally "sample")')
|
230
|
+
expect(regexp.source).to eq('(sample)')
|
231
|
+
end
|
232
|
+
|
233
|
+
it 'should parse complex anonymous capturing group' do
|
234
|
+
source = 'capture(any of (literally "sample", (digit once or more)))'
|
235
|
+
regexp = SrlRuby.parse(source)
|
236
|
+
expect(regexp.source).to eq('((?:sample|(?:\d+)))')
|
237
|
+
end
|
238
|
+
|
239
|
+
it 'should parse simple anonymous until capturing group' do
|
240
|
+
regexp = SrlRuby.parse('capture anything once or more until literally "!"')
|
241
|
+
expect(regexp.source).to eq('(.+)!')
|
242
|
+
end
|
243
|
+
|
244
|
+
it 'should parse unquoted named capturing group' do
|
245
|
+
source = 'capture (anything once or more) as first, must end'
|
246
|
+
regexp = SrlRuby.parse(source)
|
247
|
+
expect(regexp.source).to eq('(?<first>.+)$')
|
248
|
+
end
|
249
|
+
|
250
|
+
it 'should parse complex named capturing group' do
|
251
|
+
source = <<-END_SRL
|
252
|
+
capture(any of (literally "sample", (digit once or more)))
|
253
|
+
as "foo"
|
254
|
+
END_SRL
|
255
|
+
regexp = SrlRuby.parse(source)
|
256
|
+
expect(regexp.source).to eq('(?<foo>(?:sample|(?:\d+)))')
|
257
|
+
end
|
258
|
+
|
259
|
+
it 'should parse a sequence with named capturing groups' do
|
260
|
+
source = <<-END_SRL
|
261
|
+
capture (anything once or more) as "first",
|
262
|
+
literally " - ",
|
263
|
+
capture literally "second part" as "second"
|
264
|
+
END_SRL
|
265
|
+
regexp = SrlRuby.parse(source)
|
266
|
+
expect(regexp.source).to eq('(?<first>.+) - (?<second>second part)')
|
267
|
+
end
|
268
|
+
|
269
|
+
it 'should parse complex named until capturing group' do
|
270
|
+
source = 'capture (anything once or more) as "foo" until literally "m"'
|
271
|
+
regexp = SrlRuby.parse(source)
|
272
|
+
expect(regexp.source).to eq('(?<foo>.+)m')
|
273
|
+
end
|
274
|
+
end # context
|
275
|
+
|
276
|
+
context 'Parsing anchors:' do
|
277
|
+
it 'should parse begin anchors' do
|
278
|
+
regexp = SrlRuby.parse('starts with literally "match"')
|
279
|
+
expect(regexp.source).to eq('^match')
|
280
|
+
end
|
281
|
+
|
282
|
+
it 'should parse begin anchors (alternative syntax)' do
|
283
|
+
regexp = SrlRuby.parse('begin with literally "match"')
|
284
|
+
expect(regexp.source).to eq('^match')
|
285
|
+
end
|
286
|
+
|
287
|
+
it 'should parse end anchors' do
|
288
|
+
regexp = SrlRuby.parse('literally "match" must end')
|
289
|
+
expect(regexp.source).to eq('match$')
|
290
|
+
end
|
291
|
+
|
292
|
+
it 'should parse combination of begin and end anchors' do
|
293
|
+
regexp = SrlRuby.parse('starts with literally "match" must end')
|
294
|
+
expect(regexp.source).to eq('^match$')
|
295
|
+
end
|
296
|
+
|
297
|
+
it 'should accept anchor with a sequence of patterns' do
|
298
|
+
source = <<-END_SRL
|
299
|
+
begin with any of (digit, letter, one of ".-") once or more,
|
300
|
+
literally ".",
|
301
|
+
letter at least 2 times must end
|
302
|
+
END_SRL
|
303
|
+
|
304
|
+
regexp = SrlRuby.parse(source)
|
305
|
+
# SRL: (?:\w|[\._%\-\+])+(?:@)(?:[0-9]|[a-z]|[\.\-])+(?:\.)[a-z]{2,}
|
306
|
+
expect(regexp.source).to eq('^(?:\d|[a-z]|[.\-])+\.[a-z]{2,}$')
|
307
|
+
end
|
308
|
+
end # context
|
309
|
+
end # describe
|
310
|
+
# End of file
|
data/srl_ruby.gemspec
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
srl: if followed by (anything never or more, letter), if followed by (anything never or more, uppercase letter), if followed by (anything never or more, digit), if followed by (anything never or more, one of "!@#$%^&*[]\"';:_-<>., =+/\\"), anything at least 8
|
1
|
+
srl: if followed by (anything never or more, letter), if followed by (anything never or more, uppercase letter), if followed by (anything never or more, digit), if followed by (anything never or more, one of "!@#$%^&*[]\"';:_-<>., =+/\\"), anything at least 8 times
|
2
2
|
match: "P@sSword1"
|
3
3
|
match: "Pass-w0rd"
|
4
4
|
match: "Th1s is Secure"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: srl_ruby
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dimitri Geshef
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-04-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rley
|
@@ -16,14 +16,14 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: 0.6.
|
19
|
+
version: 0.6.05
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: 0.6.
|
26
|
+
version: 0.6.05
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: bundler
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -120,12 +120,12 @@ files:
|
|
120
120
|
- spec/acceptance/support/rule_file_parser.rb
|
121
121
|
- spec/acceptance/support/rule_file_token.rb
|
122
122
|
- spec/acceptance/support/rule_file_tokenizer.rb
|
123
|
-
- spec/integration_spec.rb
|
124
123
|
- spec/regex/character_spec.rb
|
125
124
|
- spec/regex/multiplicity_spec.rb
|
126
125
|
- spec/spec_helper.rb
|
127
126
|
- spec/srl_ruby/srl_ruby_spec.rb
|
128
127
|
- spec/srl_ruby/tokenizer_spec.rb
|
128
|
+
- spec/srl_ruby_spec.rb
|
129
129
|
- srl_ruby.gemspec
|
130
130
|
- srl_test/README.md
|
131
131
|
- srl_test/Test-Rules/README.md
|
@@ -173,8 +173,8 @@ summary: srl_ruby is a gem implementing a parser for Simple Regex Language (SRL)
|
|
173
173
|
literals.
|
174
174
|
test_files:
|
175
175
|
- spec/acceptance/srl_test_suite_spec.rb
|
176
|
-
- spec/integration_spec.rb
|
177
176
|
- spec/regex/character_spec.rb
|
178
177
|
- spec/regex/multiplicity_spec.rb
|
179
178
|
- spec/srl_ruby/srl_ruby_spec.rb
|
180
179
|
- spec/srl_ruby/tokenizer_spec.rb
|
180
|
+
- spec/srl_ruby_spec.rb
|
data/spec/integration_spec.rb
DELETED
@@ -1,451 +0,0 @@
|
|
1
|
-
require_relative 'spec_helper' # Use the RSpec framework
|
2
|
-
require_relative '../lib/srl_ruby/tokenizer'
|
3
|
-
require_relative '../lib/srl_ruby/grammar'
|
4
|
-
require_relative '../lib/srl_ruby/ast_builder'
|
5
|
-
|
6
|
-
module SrlRuby
|
7
|
-
describe 'Integration tests:' do
|
8
|
-
def parse(someSRL)
|
9
|
-
tokenizer = SrlRuby::Tokenizer.new(someSRL)
|
10
|
-
@engine.parse(tokenizer.tokens)
|
11
|
-
end
|
12
|
-
|
13
|
-
def regexp_repr(aResult)
|
14
|
-
# Generate an abstract syntax parse tree from the parse result
|
15
|
-
tree = @engine.convert(aResult)
|
16
|
-
tree.root
|
17
|
-
end
|
18
|
-
|
19
|
-
before(:each) do
|
20
|
-
@engine = Rley::Engine.new do |config|
|
21
|
-
config.repr_builder = ASTBuilder
|
22
|
-
end
|
23
|
-
@engine.use_grammar(SrlRuby::Grammar)
|
24
|
-
end
|
25
|
-
|
26
|
-
context 'Parsing character ranges:' do
|
27
|
-
it "should parse 'letter from ... to ...' syntax" do
|
28
|
-
result = parse('letter from a to f')
|
29
|
-
expect(result).to be_success
|
30
|
-
|
31
|
-
regexp = regexp_repr(result)
|
32
|
-
expect(regexp.to_str).to eq('[a-f]')
|
33
|
-
end
|
34
|
-
|
35
|
-
it "should parse 'uppercase letter from ... to ...' syntax" do
|
36
|
-
result = parse('UPPERCASE letter from A to F')
|
37
|
-
expect(result).to be_success
|
38
|
-
|
39
|
-
regexp = regexp_repr(result)
|
40
|
-
expect(regexp.to_str).to eq('[A-F]')
|
41
|
-
end
|
42
|
-
|
43
|
-
it "should parse 'letter' syntax" do
|
44
|
-
result = parse('letter')
|
45
|
-
expect(result).to be_success
|
46
|
-
|
47
|
-
regexp = regexp_repr(result)
|
48
|
-
expect(regexp.to_str).to eq('[a-z]')
|
49
|
-
end
|
50
|
-
|
51
|
-
it "should parse 'uppercase letter' syntax" do
|
52
|
-
result = parse('uppercase letter')
|
53
|
-
expect(result).to be_success
|
54
|
-
|
55
|
-
regexp = regexp_repr(result)
|
56
|
-
expect(regexp.to_str).to eq('[A-Z]')
|
57
|
-
end
|
58
|
-
|
59
|
-
it "should parse 'digit from ... to ...' syntax" do
|
60
|
-
result = parse('digit from 1 to 4')
|
61
|
-
expect(result).to be_success
|
62
|
-
|
63
|
-
regexp = regexp_repr(result)
|
64
|
-
expect(regexp.to_str).to eq('[1-4]')
|
65
|
-
end
|
66
|
-
end # context
|
67
|
-
|
68
|
-
context 'Parsing string literals:' do
|
69
|
-
it 'should parse double quotes literal string' do
|
70
|
-
result = parse('literally "hello"')
|
71
|
-
expect(result).to be_success
|
72
|
-
|
73
|
-
regexp = regexp_repr(result)
|
74
|
-
expect(regexp.to_str).to eq('hello')
|
75
|
-
end
|
76
|
-
|
77
|
-
it 'should parse single quotes literal string' do
|
78
|
-
result = parse("literally 'hello'")
|
79
|
-
expect(result).to be_success
|
80
|
-
|
81
|
-
regexp = regexp_repr(result)
|
82
|
-
expect(regexp.to_str).to eq('hello')
|
83
|
-
end
|
84
|
-
|
85
|
-
it 'should escape special characters' do
|
86
|
-
result = parse("literally '.'")
|
87
|
-
expect(result).to be_success
|
88
|
-
|
89
|
-
regexp = regexp_repr(result)
|
90
|
-
expect(regexp.to_str).to eq('\.')
|
91
|
-
end
|
92
|
-
end
|
93
|
-
|
94
|
-
context 'Parsing character classes:' do
|
95
|
-
it "should parse 'digit' syntax" do
|
96
|
-
result = parse('digit')
|
97
|
-
expect(result).to be_success
|
98
|
-
|
99
|
-
regexp = regexp_repr(result)
|
100
|
-
expect(regexp.to_str).to eq('\d')
|
101
|
-
end
|
102
|
-
|
103
|
-
it "should parse 'number' syntax" do
|
104
|
-
result = parse('number')
|
105
|
-
expect(result).to be_success
|
106
|
-
|
107
|
-
regexp = regexp_repr(result)
|
108
|
-
expect(regexp.to_str).to eq('\d')
|
109
|
-
end
|
110
|
-
|
111
|
-
it "should parse 'any character' syntax" do
|
112
|
-
result = parse('any character')
|
113
|
-
expect(result).to be_success
|
114
|
-
|
115
|
-
regexp = regexp_repr(result)
|
116
|
-
expect(regexp.to_str).to eq('\w')
|
117
|
-
end
|
118
|
-
|
119
|
-
it "should parse 'no character' syntax" do
|
120
|
-
result = parse('no character')
|
121
|
-
expect(result).to be_success
|
122
|
-
|
123
|
-
regexp = regexp_repr(result)
|
124
|
-
expect(regexp.to_str).to eq('\W')
|
125
|
-
end
|
126
|
-
|
127
|
-
it "should parse 'whitespace' syntax" do
|
128
|
-
result = parse('whitespace')
|
129
|
-
expect(result).to be_success
|
130
|
-
|
131
|
-
regexp = regexp_repr(result)
|
132
|
-
expect(regexp.to_str).to eq('\s')
|
133
|
-
end
|
134
|
-
|
135
|
-
it "should parse 'no whitespace' syntax" do
|
136
|
-
result = parse('no whitespace')
|
137
|
-
expect(result).to be_success
|
138
|
-
|
139
|
-
regexp = regexp_repr(result)
|
140
|
-
expect(regexp.to_str).to eq('\S')
|
141
|
-
end
|
142
|
-
|
143
|
-
it "should parse 'anything' syntax" do
|
144
|
-
result = parse('anything')
|
145
|
-
expect(result).to be_success
|
146
|
-
|
147
|
-
regexp = regexp_repr(result)
|
148
|
-
expect(regexp.to_str).to eq('.')
|
149
|
-
end
|
150
|
-
|
151
|
-
it "should parse 'one of' syntax" do
|
152
|
-
result = parse('one of "._%+-"')
|
153
|
-
expect(result).to be_success
|
154
|
-
|
155
|
-
regexp = regexp_repr(result)
|
156
|
-
# Remark: reference implementation less readable
|
157
|
-
# (escapes more characters than required)
|
158
|
-
expect(regexp.to_str).to eq('[._%+\-]')
|
159
|
-
end
|
160
|
-
end # context
|
161
|
-
|
162
|
-
context 'Parsing special character declarations:' do
|
163
|
-
it "should parse 'tab' syntax" do
|
164
|
-
result = parse('tab')
|
165
|
-
expect(result).to be_success
|
166
|
-
|
167
|
-
regexp = regexp_repr(result)
|
168
|
-
expect(regexp.to_str).to eq('\t')
|
169
|
-
end
|
170
|
-
|
171
|
-
it "should parse 'backslash' syntax" do
|
172
|
-
result = parse('backslash')
|
173
|
-
expect(result).to be_success
|
174
|
-
|
175
|
-
regexp = regexp_repr(result)
|
176
|
-
expect(regexp.to_str).to eq('\\\\')
|
177
|
-
end
|
178
|
-
|
179
|
-
it "should parse 'new line' syntax" do
|
180
|
-
result = parse('new line')
|
181
|
-
expect(result).to be_success
|
182
|
-
|
183
|
-
regexp = regexp_repr(result)
|
184
|
-
expect(regexp.to_str).to eq('\n')
|
185
|
-
end
|
186
|
-
end # context
|
187
|
-
|
188
|
-
context 'Parsing alternations:' do
|
189
|
-
it "should parse 'any of' syntax" do
|
190
|
-
source = 'any of (any character, one of "._%-+")'
|
191
|
-
result = parse(source)
|
192
|
-
expect(result).to be_success
|
193
|
-
|
194
|
-
regexp = regexp_repr(result)
|
195
|
-
expect(regexp.to_str).to eq('(?:\w|[._%\-+])')
|
196
|
-
end
|
197
|
-
end # context
|
198
|
-
|
199
|
-
context 'Parsing concatenation:' do
|
200
|
-
it 'should reject dangling comma' do
|
201
|
-
source = 'literally "a",'
|
202
|
-
result = parse(source)
|
203
|
-
expect(result).not_to be_success
|
204
|
-
message_prefix = /Premature end of input after ','/
|
205
|
-
expect(result.failure_reason.message).to match(message_prefix)
|
206
|
-
end
|
207
|
-
|
208
|
-
it 'should parse concatenation' do
|
209
|
-
result = parse('any of (literally "sample", (digit once or more))')
|
210
|
-
expect(result).to be_success
|
211
|
-
|
212
|
-
regexp = regexp_repr(result)
|
213
|
-
expect(regexp.to_str).to eq('(?:sample|(?:\d+))')
|
214
|
-
end
|
215
|
-
|
216
|
-
it 'should parse a long sequence of patterns' do
|
217
|
-
source = <<-ENDS
|
218
|
-
any of (any character, one of "._%-+") once or more,
|
219
|
-
literally "@",
|
220
|
-
any of (digit, letter, one of ".-") once or more,
|
221
|
-
literally ".",
|
222
|
-
letter at least 2 times
|
223
|
-
ENDS
|
224
|
-
|
225
|
-
result = parse(source)
|
226
|
-
expect(result).to be_success
|
227
|
-
|
228
|
-
regexp = regexp_repr(result)
|
229
|
-
# SRL: (?:\w|[\._%\-\+])+(?:@)(?:[0-9]|[a-z]|[\.\-])+(?:\.)[a-z]{2,}
|
230
|
-
expectation = '(?:\w|[._%\-+])+@(?:\d|[a-z]|[.\-])+\.[a-z]{2,}'
|
231
|
-
expect(regexp.to_str).to eq(expectation)
|
232
|
-
end
|
233
|
-
end # context
|
234
|
-
|
235
|
-
context 'Parsing quantifiers:' do
|
236
|
-
let(:prefix) { 'letter from p to t ' }
|
237
|
-
|
238
|
-
it "should parse 'once' syntax" do
|
239
|
-
result = parse(prefix + 'once')
|
240
|
-
expect(result).to be_success
|
241
|
-
|
242
|
-
regexp = regexp_repr(result)
|
243
|
-
expect(regexp.to_str).to eq('[p-t]{1}')
|
244
|
-
end
|
245
|
-
|
246
|
-
it "should parse 'twice' syntax" do
|
247
|
-
result = parse('digit twice')
|
248
|
-
expect(result).to be_success
|
249
|
-
|
250
|
-
regexp = regexp_repr(result)
|
251
|
-
expect(regexp.to_str).to eq('\d{2}')
|
252
|
-
end
|
253
|
-
|
254
|
-
it "should parse 'optional' syntax" do
|
255
|
-
result = parse('anything optional')
|
256
|
-
expect(result).to be_success
|
257
|
-
|
258
|
-
regexp = regexp_repr(result)
|
259
|
-
expect(regexp.to_str).to eq('.?')
|
260
|
-
end
|
261
|
-
|
262
|
-
it "should parse 'exactly ... times' syntax" do
|
263
|
-
result = parse('letter from a to f exactly 4 times')
|
264
|
-
expect(result).to be_success
|
265
|
-
|
266
|
-
regexp = regexp_repr(result)
|
267
|
-
expect(regexp.to_str).to eq('[a-f]{4}')
|
268
|
-
end
|
269
|
-
|
270
|
-
it "should parse 'between ... and ... times' syntax" do
|
271
|
-
result = parse(prefix + 'between 2 and 4 times')
|
272
|
-
expect(result).to be_success
|
273
|
-
|
274
|
-
# Dropping 'times' keyword is shorter syntax
|
275
|
-
expect(parse(prefix + 'between 2 and 4')).to be_success
|
276
|
-
|
277
|
-
regexp = regexp_repr(result)
|
278
|
-
expect(regexp.to_str).to eq('[p-t]{2,4}')
|
279
|
-
end
|
280
|
-
|
281
|
-
it "should parse 'once or more' syntax" do
|
282
|
-
result = parse(prefix + 'once or more')
|
283
|
-
expect(result).to be_success
|
284
|
-
|
285
|
-
regexp = regexp_repr(result)
|
286
|
-
expect(regexp.to_str).to eq('[p-t]+')
|
287
|
-
end
|
288
|
-
|
289
|
-
it "should parse 'never or more' syntax" do
|
290
|
-
result = parse(prefix + 'never or more')
|
291
|
-
expect(result).to be_success
|
292
|
-
|
293
|
-
regexp = regexp_repr(result)
|
294
|
-
expect(regexp.to_str).to eq('[p-t]*')
|
295
|
-
end
|
296
|
-
|
297
|
-
it "should parse 'at least ... times' syntax" do
|
298
|
-
result = parse(prefix + 'at least 10 times')
|
299
|
-
expect(result).to be_success
|
300
|
-
|
301
|
-
regexp = regexp_repr(result)
|
302
|
-
expect(regexp.to_str).to eq('[p-t]{10,}')
|
303
|
-
end
|
304
|
-
end # context
|
305
|
-
|
306
|
-
context 'Parsing lookaround:' do
|
307
|
-
it 'should parse positive lookahead' do
|
308
|
-
result = parse('letter if followed by (anything once or more, digit)')
|
309
|
-
expect(result).to be_success
|
310
|
-
|
311
|
-
regexp = regexp_repr(result)
|
312
|
-
expect(regexp.to_str).to eq('[a-z](?=(?:.+\d))')
|
313
|
-
end
|
314
|
-
|
315
|
-
it 'should parse negative lookahead' do
|
316
|
-
result = parse('letter if not followed by (anything once or more, digit)')
|
317
|
-
expect(result).to be_success
|
318
|
-
|
319
|
-
regexp = regexp_repr(result)
|
320
|
-
expect(regexp.to_str).to eq('[a-z](?!(?:.+\d))')
|
321
|
-
end
|
322
|
-
|
323
|
-
it 'should parse positive lookbehind' do
|
324
|
-
result = parse('literally "bar" if already had literally "foo"')
|
325
|
-
expect(result).to be_success
|
326
|
-
|
327
|
-
regexp = regexp_repr(result)
|
328
|
-
expect(regexp.to_str).to eq('bar(?<=foo)')
|
329
|
-
end
|
330
|
-
|
331
|
-
it 'should parse negative lookbehind' do
|
332
|
-
result = parse('literally "bar" if not already had literally "foo"')
|
333
|
-
expect(result).to be_success
|
334
|
-
|
335
|
-
regexp = regexp_repr(result)
|
336
|
-
expect(regexp.to_str).to eq('bar(?<!foo)')
|
337
|
-
end
|
338
|
-
end # context
|
339
|
-
|
340
|
-
context 'Parsing capturing group:' do
|
341
|
-
it 'should parse simple anonymous capturing group' do
|
342
|
-
result = parse('capture(literally "sample")')
|
343
|
-
expect(result).to be_success
|
344
|
-
|
345
|
-
regexp = regexp_repr(result)
|
346
|
-
expect(regexp.to_str).to eq('(sample)')
|
347
|
-
end
|
348
|
-
|
349
|
-
it 'should parse complex anonymous capturing group' do
|
350
|
-
source = 'capture(any of (literally "sample", (digit once or more)))'
|
351
|
-
result = parse(source)
|
352
|
-
expect(result).to be_success
|
353
|
-
|
354
|
-
regexp = regexp_repr(result)
|
355
|
-
expect(regexp.to_str).to eq('((?:sample|(?:\d+)))')
|
356
|
-
end
|
357
|
-
|
358
|
-
it 'should parse simple anonymous until capturing group' do
|
359
|
-
result = parse('capture anything once or more until literally "!"')
|
360
|
-
expect(result).to be_success
|
361
|
-
|
362
|
-
regexp = regexp_repr(result)
|
363
|
-
expect(regexp.to_str).to eq('(.+)!')
|
364
|
-
end
|
365
|
-
|
366
|
-
it 'should parse complex named capturing group' do
|
367
|
-
source = <<-END_SRL
|
368
|
-
capture(any of (literally "sample", (digit once or more)))
|
369
|
-
as "foo"
|
370
|
-
END_SRL
|
371
|
-
result = parse(source)
|
372
|
-
expect(result).to be_success
|
373
|
-
|
374
|
-
regexp = regexp_repr(result)
|
375
|
-
expect(regexp.to_str).to eq('(?<foo>(?:sample|(?:\d+)))')
|
376
|
-
end
|
377
|
-
|
378
|
-
it 'should parse a sequence with named capturing groups' do
|
379
|
-
source = <<-ENDS
|
380
|
-
capture (anything once or more) as "first",
|
381
|
-
literally " - ",
|
382
|
-
capture literally "second part" as "second"
|
383
|
-
ENDS
|
384
|
-
result = parse(source)
|
385
|
-
expect(result).to be_success
|
386
|
-
|
387
|
-
regexp = regexp_repr(result)
|
388
|
-
expect(regexp.to_str).to eq('(?<first>.+) - (?<second>second part)')
|
389
|
-
end
|
390
|
-
|
391
|
-
it 'should parse complex named until capturing group' do
|
392
|
-
source = 'capture (anything once or more) as "foo" until literally "m"'
|
393
|
-
result = parse(source)
|
394
|
-
expect(result).to be_success
|
395
|
-
|
396
|
-
regexp = regexp_repr(result)
|
397
|
-
expect(regexp.to_str).to eq('(?<foo>.+)m')
|
398
|
-
end
|
399
|
-
end # context
|
400
|
-
|
401
|
-
context 'Parsing anchors:' do
|
402
|
-
it 'should parse begin anchors' do
|
403
|
-
result = parse('starts with literally "match"')
|
404
|
-
expect(result).to be_success
|
405
|
-
|
406
|
-
regexp = regexp_repr(result)
|
407
|
-
expect(regexp.to_str).to eq('^match')
|
408
|
-
end
|
409
|
-
|
410
|
-
it 'should parse begin anchors (alternative syntax)' do
|
411
|
-
result = parse('begin with literally "match"')
|
412
|
-
expect(result).to be_success
|
413
|
-
|
414
|
-
regexp = regexp_repr(result)
|
415
|
-
expect(regexp.to_str).to eq('^match')
|
416
|
-
end
|
417
|
-
|
418
|
-
it 'should parse end anchors' do
|
419
|
-
result = parse('literally "match" must end')
|
420
|
-
expect(result).to be_success
|
421
|
-
|
422
|
-
regexp = regexp_repr(result)
|
423
|
-
expect(regexp.to_str).to eq('match$')
|
424
|
-
end
|
425
|
-
|
426
|
-
it 'should parse combination of begin and end anchors' do
|
427
|
-
result = parse('starts with literally "match" must end')
|
428
|
-
expect(result).to be_success
|
429
|
-
|
430
|
-
regexp = regexp_repr(result)
|
431
|
-
expect(regexp.to_str).to eq('^match$')
|
432
|
-
end
|
433
|
-
|
434
|
-
it 'should accept anchor with a sequence of patterns' do
|
435
|
-
source = <<-ENDS
|
436
|
-
begin with any of (digit, letter, one of ".-") once or more,
|
437
|
-
literally ".",
|
438
|
-
letter at least 2 times must end
|
439
|
-
ENDS
|
440
|
-
|
441
|
-
result = parse(source)
|
442
|
-
expect(result).to be_success
|
443
|
-
|
444
|
-
regexp = regexp_repr(result)
|
445
|
-
# SRL: (?:\w|[\._%\-\+])+(?:@)(?:[0-9]|[a-z]|[\.\-])+(?:\.)[a-z]{2,}
|
446
|
-
expect(regexp.to_str).to eq('^(?:\d|[a-z]|[.\-])+\.[a-z]{2,}$')
|
447
|
-
end
|
448
|
-
end # context
|
449
|
-
end # describe
|
450
|
-
end # module
|
451
|
-
# End of file
|