rley 0.6.01 → 0.6.02
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -1
- data/examples/NLP/engtagger.rb +58 -60
- data/lib/rley/constants.rb +1 -1
- metadata +2 -33
- data/examples/general/SRL/lib/ast_builder.rb +0 -382
- data/examples/general/SRL/lib/grammar.rb +0 -106
- data/examples/general/SRL/lib/regex/abstract_method.rb +0 -35
- data/examples/general/SRL/lib/regex/alternation.rb +0 -27
- data/examples/general/SRL/lib/regex/anchor.rb +0 -45
- data/examples/general/SRL/lib/regex/atomic_expression.rb +0 -16
- data/examples/general/SRL/lib/regex/capturing_group.rb +0 -51
- data/examples/general/SRL/lib/regex/char_class.rb +0 -38
- data/examples/general/SRL/lib/regex/char_range.rb +0 -51
- data/examples/general/SRL/lib/regex/char_shorthand.rb +0 -50
- data/examples/general/SRL/lib/regex/character.rb +0 -204
- data/examples/general/SRL/lib/regex/compound_expression.rb +0 -57
- data/examples/general/SRL/lib/regex/concatenation.rb +0 -29
- data/examples/general/SRL/lib/regex/expression.rb +0 -60
- data/examples/general/SRL/lib/regex/lookaround.rb +0 -50
- data/examples/general/SRL/lib/regex/match_option.rb +0 -34
- data/examples/general/SRL/lib/regex/monadic_expression.rb +0 -28
- data/examples/general/SRL/lib/regex/multiplicity.rb +0 -91
- data/examples/general/SRL/lib/regex/non_capturing_group.rb +0 -27
- data/examples/general/SRL/lib/regex/polyadic_expression.rb +0 -60
- data/examples/general/SRL/lib/regex/quantifiable.rb +0 -22
- data/examples/general/SRL/lib/regex/repetition.rb +0 -29
- data/examples/general/SRL/lib/regex/wildcard.rb +0 -23
- data/examples/general/SRL/lib/regex_repr.rb +0 -13
- data/examples/general/SRL/lib/tokenizer.rb +0 -147
- data/examples/general/SRL/spec/integration_spec.rb +0 -448
- data/examples/general/SRL/spec/regex/character_spec.rb +0 -166
- data/examples/general/SRL/spec/regex/multiplicity_spec.rb +0 -79
- data/examples/general/SRL/spec/spec_helper.rb +0 -25
- data/examples/general/SRL/spec/tokenizer_spec.rb +0 -148
- data/examples/general/SRL/srl_demo.rb +0 -75
@@ -1,22 +0,0 @@
|
|
1
|
-
# File: quantifiable.rb
|
2
|
-
|
3
|
-
require_relative 'multiplicity'
|
4
|
-
|
5
|
-
module Regex # This module is used as a namespace
|
6
|
-
module Quantifiable
|
7
|
-
# Redefined method. Return true since it may not have any child.
|
8
|
-
def quantified?
|
9
|
-
return @quantifier.nil? ? false : true
|
10
|
-
end
|
11
|
-
|
12
|
-
def quantifier
|
13
|
-
@quantifier
|
14
|
-
end
|
15
|
-
|
16
|
-
def quantifier=(aQuantifier)
|
17
|
-
@quantifier = aQuantifier
|
18
|
-
end
|
19
|
-
end # module
|
20
|
-
end # module
|
21
|
-
|
22
|
-
# End of file
|
@@ -1,29 +0,0 @@
|
|
1
|
-
# File: repetition.rb
|
2
|
-
|
3
|
-
require_relative 'monadic_expression' # Access the superclass
|
4
|
-
|
5
|
-
module Regex # This module is used as a namespace
|
6
|
-
# Abstract class. An unary matching operator.
|
7
|
-
# It succeeds when the specified repetition of the child expression
|
8
|
-
# succeeds to match the subject text in the same serial arrangement
|
9
|
-
class Repetition < MonadicExpression
|
10
|
-
attr_reader(:multiplicity)
|
11
|
-
|
12
|
-
# Constructor.
|
13
|
-
def initialize(childExpressionToRepeat, aMultiplicity)
|
14
|
-
super(childExpressionToRepeat)
|
15
|
-
@multiplicity = aMultiplicity
|
16
|
-
end
|
17
|
-
|
18
|
-
protected
|
19
|
-
|
20
|
-
# Conversion method re-definition.
|
21
|
-
# Purpose: Return the String representation of the concatented expressions.
|
22
|
-
def text_repr()
|
23
|
-
result = all_child_text + multiplicity.to_str
|
24
|
-
return result
|
25
|
-
end
|
26
|
-
end # class
|
27
|
-
end # module
|
28
|
-
|
29
|
-
# End of file
|
@@ -1,23 +0,0 @@
|
|
1
|
-
# File: wildcard.rb
|
2
|
-
|
3
|
-
require_relative 'atomic_expression' # Access the superclass
|
4
|
-
|
5
|
-
module Regex # This module is used as a namespace
|
6
|
-
# A wildcard matches any character (except for the newline).
|
7
|
-
class Wildcard < AtomicExpression
|
8
|
-
# Constructor
|
9
|
-
def initialize()
|
10
|
-
super
|
11
|
-
end
|
12
|
-
|
13
|
-
protected
|
14
|
-
|
15
|
-
# Conversion method re-definition.
|
16
|
-
# Purpose: Return the String representation of the expression.
|
17
|
-
def text_repr()
|
18
|
-
return '.'
|
19
|
-
end
|
20
|
-
end # class
|
21
|
-
end # module
|
22
|
-
|
23
|
-
# End of file
|
@@ -1,13 +0,0 @@
|
|
1
|
-
require_relative './regex/character'
|
2
|
-
require_relative './regex/char_range'
|
3
|
-
require_relative './regex/concatenation'
|
4
|
-
require_relative './regex/multiplicity'
|
5
|
-
require_relative './regex/repetition'
|
6
|
-
require_relative './regex/char_class'
|
7
|
-
require_relative './regex/char_shorthand'
|
8
|
-
require_relative './regex/wildcard'
|
9
|
-
require_relative './regex/alternation'
|
10
|
-
require_relative './regex/non_capturing_group'
|
11
|
-
require_relative './regex/anchor'
|
12
|
-
require_relative './regex/lookaround'
|
13
|
-
require_relative './regex/capturing_group'
|
@@ -1,147 +0,0 @@
|
|
1
|
-
# File: srl_tokenizer.rb
|
2
|
-
# Tokenizer for SRL (Simple Regex Language)
|
3
|
-
require 'strscan'
|
4
|
-
require 'rley' # Load the gem
|
5
|
-
|
6
|
-
module SRL
|
7
|
-
# The tokenizer should recognize:
|
8
|
-
# Keywords: as, capture, letter
|
9
|
-
# Integer literals including single digit
|
10
|
-
# String literals (quote delimited)
|
11
|
-
# Single character literal
|
12
|
-
# Delimiters: parentheses '(' and ')'
|
13
|
-
# Separators: comma (optional)
|
14
|
-
class Tokenizer
|
15
|
-
attr_reader(:scanner)
|
16
|
-
attr_reader(:lineno)
|
17
|
-
attr_reader(:line_start)
|
18
|
-
|
19
|
-
@@lexeme2name = {
|
20
|
-
'(' => 'LPAREN',
|
21
|
-
')' => 'RPAREN',
|
22
|
-
',' => 'COMMA'
|
23
|
-
}.freeze
|
24
|
-
|
25
|
-
# Here are all the SRL keywords (in uppercase)
|
26
|
-
@@keywords = %w[
|
27
|
-
ALL
|
28
|
-
ALREADY
|
29
|
-
AND
|
30
|
-
ANY
|
31
|
-
ANYTHING
|
32
|
-
AS
|
33
|
-
AT
|
34
|
-
BACKSLASH
|
35
|
-
BEGIN
|
36
|
-
BETWEEN
|
37
|
-
BY
|
38
|
-
CAPTURE
|
39
|
-
CASE
|
40
|
-
CHARACTER
|
41
|
-
DIGIT
|
42
|
-
END
|
43
|
-
EXACTLY
|
44
|
-
FOLLOWED
|
45
|
-
FROM
|
46
|
-
HAD
|
47
|
-
IF
|
48
|
-
INSENSITIVE
|
49
|
-
LAZY
|
50
|
-
LEAST
|
51
|
-
LETTER
|
52
|
-
LINE
|
53
|
-
LITERALLY
|
54
|
-
MORE
|
55
|
-
MULTI
|
56
|
-
MUST
|
57
|
-
NEVER
|
58
|
-
NEW
|
59
|
-
NO
|
60
|
-
NOT
|
61
|
-
NUMBER
|
62
|
-
OF
|
63
|
-
ONCE
|
64
|
-
ONE
|
65
|
-
OPTIONAL
|
66
|
-
OR
|
67
|
-
STARTS
|
68
|
-
TAB
|
69
|
-
TIMES
|
70
|
-
TO
|
71
|
-
TWICE
|
72
|
-
UNTIL
|
73
|
-
UPPERCASE
|
74
|
-
WHITESPACE
|
75
|
-
WITH
|
76
|
-
].map { |x| [x, x] } .to_h
|
77
|
-
|
78
|
-
class ScanError < StandardError; end
|
79
|
-
|
80
|
-
def initialize(source)
|
81
|
-
@scanner = StringScanner.new(source)
|
82
|
-
@lineno = 1
|
83
|
-
end
|
84
|
-
|
85
|
-
def tokens()
|
86
|
-
tok_sequence = []
|
87
|
-
until @scanner.eos?
|
88
|
-
token = _next_token
|
89
|
-
tok_sequence << token unless token.nil?
|
90
|
-
end
|
91
|
-
|
92
|
-
return tok_sequence
|
93
|
-
end
|
94
|
-
|
95
|
-
private
|
96
|
-
|
97
|
-
def _next_token()
|
98
|
-
skip_whitespaces
|
99
|
-
curr_ch = scanner.peek(1)
|
100
|
-
return nil if curr_ch.nil? || curr_ch.empty?
|
101
|
-
|
102
|
-
token = nil
|
103
|
-
|
104
|
-
if '(),'.include? curr_ch
|
105
|
-
# Delimiters, separators => single character token
|
106
|
-
token = build_token(@@lexeme2name[curr_ch], scanner.getch)
|
107
|
-
elsif (lexeme = scanner.scan(/[0-9]{2,}/))
|
108
|
-
token = build_token('INTEGER', lexeme) # An integer has 2..* digits
|
109
|
-
elsif (lexeme = scanner.scan(/[0-9]/))
|
110
|
-
token = build_token('DIGIT_LIT', lexeme)
|
111
|
-
elsif (lexeme = scanner.scan(/[a-zA-Z]{2,}/))
|
112
|
-
token = build_token(@@keywords[lexeme.upcase], lexeme)
|
113
|
-
# TODO: handle case unknown identifier
|
114
|
-
elsif (lexeme = scanner.scan(/[a-zA-Z]((?=\s)|$)/))
|
115
|
-
token = build_token('LETTER_LIT', lexeme)
|
116
|
-
elsif (lexeme = scanner.scan(/"([^"]|\\")*"/)) # Double quotes literal?
|
117
|
-
unquoted = lexeme.gsub(/(^")|("$)/, '')
|
118
|
-
token = build_token('STRING_LIT', unquoted)
|
119
|
-
elsif (lexeme = scanner.scan(/'([^']|\\')*'/)) # Single quotes literal?
|
120
|
-
unquoted = lexeme.gsub(/(^')|('$)/, '')
|
121
|
-
token = build_token('STRING_LIT', unquoted)
|
122
|
-
else # Unknown token
|
123
|
-
erroneous = curr_ch.nil? ? '' : curr_ch
|
124
|
-
sequel = scanner.scan(/.{1,20}/)
|
125
|
-
erroneous += sequel unless sequel.nil?
|
126
|
-
raise ScanError.new("Unknown token #{erroneous}")
|
127
|
-
end
|
128
|
-
|
129
|
-
return token
|
130
|
-
end
|
131
|
-
|
132
|
-
def build_token(aSymbolName, aLexeme)
|
133
|
-
begin
|
134
|
-
token = Rley::Lexical::Token.new(aLexeme, aSymbolName)
|
135
|
-
rescue StandardError
|
136
|
-
puts "Failing with '#{aSymbolName}' and '#{aLexeme}'"
|
137
|
-
raise ex
|
138
|
-
end
|
139
|
-
|
140
|
-
return token
|
141
|
-
end
|
142
|
-
|
143
|
-
def skip_whitespaces()
|
144
|
-
scanner.scan(/[ \t\f\n\r]+/)
|
145
|
-
end
|
146
|
-
end # class
|
147
|
-
end # module
|
@@ -1,448 +0,0 @@
|
|
1
|
-
require_relative 'spec_helper' # Use the RSpec framework
|
2
|
-
require_relative '../lib/tokenizer'
|
3
|
-
require_relative '../lib/grammar'
|
4
|
-
require_relative '../lib/ast_builder'
|
5
|
-
|
6
|
-
describe 'Integration tests:' do
|
7
|
-
def parse(someSRL)
|
8
|
-
tokenizer = SRL::Tokenizer.new(someSRL)
|
9
|
-
@engine.parse(tokenizer.tokens)
|
10
|
-
end
|
11
|
-
|
12
|
-
def regexp_repr(aResult)
|
13
|
-
# Generate an abstract syntax parse tree from the parse result
|
14
|
-
tree = @engine.convert(aResult)
|
15
|
-
tree.root
|
16
|
-
end
|
17
|
-
|
18
|
-
before(:each) do
|
19
|
-
@engine = Rley::Engine.new do |config|
|
20
|
-
config.repr_builder = ASTBuilder
|
21
|
-
end
|
22
|
-
@engine.use_grammar(SRL::Grammar)
|
23
|
-
end
|
24
|
-
|
25
|
-
context 'Parsing character ranges:' do
|
26
|
-
it "should parse 'letter from ... to ...' syntax" do
|
27
|
-
result = parse('letter from a to f')
|
28
|
-
expect(result).to be_success
|
29
|
-
|
30
|
-
regexp = regexp_repr(result)
|
31
|
-
expect(regexp.to_str).to eq('[a-f]')
|
32
|
-
end
|
33
|
-
|
34
|
-
it "should parse 'uppercase letter from ... to ...' syntax" do
|
35
|
-
result = parse('UPPERCASE letter from A to F')
|
36
|
-
expect(result).to be_success
|
37
|
-
|
38
|
-
regexp = regexp_repr(result)
|
39
|
-
expect(regexp.to_str).to eq('[A-F]')
|
40
|
-
end
|
41
|
-
|
42
|
-
it "should parse 'letter' syntax" do
|
43
|
-
result = parse('letter')
|
44
|
-
expect(result).to be_success
|
45
|
-
|
46
|
-
regexp = regexp_repr(result)
|
47
|
-
expect(regexp.to_str).to eq('[a-z]')
|
48
|
-
end
|
49
|
-
|
50
|
-
it "should parse 'uppercase letter' syntax" do
|
51
|
-
result = parse('uppercase letter')
|
52
|
-
expect(result).to be_success
|
53
|
-
|
54
|
-
regexp = regexp_repr(result)
|
55
|
-
expect(regexp.to_str).to eq('[A-Z]')
|
56
|
-
end
|
57
|
-
|
58
|
-
it "should parse 'digit from ... to ...' syntax" do
|
59
|
-
result = parse('digit from 1 to 4')
|
60
|
-
expect(result).to be_success
|
61
|
-
|
62
|
-
regexp = regexp_repr(result)
|
63
|
-
expect(regexp.to_str).to eq('[1-4]')
|
64
|
-
end
|
65
|
-
end # context
|
66
|
-
|
67
|
-
context 'Parsing string literals:' do
|
68
|
-
it 'should parse double quotes literal string' do
|
69
|
-
result = parse('literally "hello"')
|
70
|
-
expect(result).to be_success
|
71
|
-
|
72
|
-
regexp = regexp_repr(result)
|
73
|
-
expect(regexp.to_str).to eq('hello')
|
74
|
-
end
|
75
|
-
|
76
|
-
it 'should parse single quotes literal string' do
|
77
|
-
result = parse("literally 'hello'")
|
78
|
-
expect(result).to be_success
|
79
|
-
|
80
|
-
regexp = regexp_repr(result)
|
81
|
-
expect(regexp.to_str).to eq('hello')
|
82
|
-
end
|
83
|
-
|
84
|
-
it 'should escape special characters' do
|
85
|
-
result = parse("literally '.'")
|
86
|
-
expect(result).to be_success
|
87
|
-
|
88
|
-
regexp = regexp_repr(result)
|
89
|
-
expect(regexp.to_str).to eq('\.')
|
90
|
-
end
|
91
|
-
end
|
92
|
-
|
93
|
-
context 'Parsing character classes:' do
|
94
|
-
it "should parse 'digit' syntax" do
|
95
|
-
result = parse('digit')
|
96
|
-
expect(result).to be_success
|
97
|
-
|
98
|
-
regexp = regexp_repr(result)
|
99
|
-
expect(regexp.to_str).to eq('\d')
|
100
|
-
end
|
101
|
-
|
102
|
-
it "should parse 'number' syntax" do
|
103
|
-
result = parse('number')
|
104
|
-
expect(result).to be_success
|
105
|
-
|
106
|
-
regexp = regexp_repr(result)
|
107
|
-
expect(regexp.to_str).to eq('\d')
|
108
|
-
end
|
109
|
-
|
110
|
-
it "should parse 'any character' syntax" do
|
111
|
-
result = parse('any character')
|
112
|
-
expect(result).to be_success
|
113
|
-
|
114
|
-
regexp = regexp_repr(result)
|
115
|
-
expect(regexp.to_str).to eq('\w')
|
116
|
-
end
|
117
|
-
|
118
|
-
it "should parse 'no character' syntax" do
|
119
|
-
result = parse('no character')
|
120
|
-
expect(result).to be_success
|
121
|
-
|
122
|
-
regexp = regexp_repr(result)
|
123
|
-
expect(regexp.to_str).to eq('\W')
|
124
|
-
end
|
125
|
-
|
126
|
-
it "should parse 'whitespace' syntax" do
|
127
|
-
result = parse('whitespace')
|
128
|
-
expect(result).to be_success
|
129
|
-
|
130
|
-
regexp = regexp_repr(result)
|
131
|
-
expect(regexp.to_str).to eq('\s')
|
132
|
-
end
|
133
|
-
|
134
|
-
it "should parse 'no whitespace' syntax" do
|
135
|
-
result = parse('no whitespace')
|
136
|
-
expect(result).to be_success
|
137
|
-
|
138
|
-
regexp = regexp_repr(result)
|
139
|
-
expect(regexp.to_str).to eq('\S')
|
140
|
-
end
|
141
|
-
|
142
|
-
it "should parse 'anything' syntax" do
|
143
|
-
result = parse('anything')
|
144
|
-
expect(result).to be_success
|
145
|
-
|
146
|
-
regexp = regexp_repr(result)
|
147
|
-
expect(regexp.to_str).to eq('.')
|
148
|
-
end
|
149
|
-
|
150
|
-
it "should parse 'one of' syntax" do
|
151
|
-
result = parse('one of "._%+-"')
|
152
|
-
expect(result).to be_success
|
153
|
-
|
154
|
-
regexp = regexp_repr(result)
|
155
|
-
# Remark: reference implementation less readable
|
156
|
-
# (escapes more characters than required)
|
157
|
-
expect(regexp.to_str).to eq('[._%+\-]')
|
158
|
-
end
|
159
|
-
end # context
|
160
|
-
|
161
|
-
context 'Parsing special character declarations:' do
|
162
|
-
it "should parse 'tab' syntax" do
|
163
|
-
result = parse('tab')
|
164
|
-
expect(result).to be_success
|
165
|
-
|
166
|
-
regexp = regexp_repr(result)
|
167
|
-
expect(regexp.to_str).to eq('\t')
|
168
|
-
end
|
169
|
-
|
170
|
-
it "should parse 'backslash' syntax" do
|
171
|
-
result = parse('backslash')
|
172
|
-
expect(result).to be_success
|
173
|
-
|
174
|
-
regexp = regexp_repr(result)
|
175
|
-
expect(regexp.to_str).to eq('\\')
|
176
|
-
end
|
177
|
-
|
178
|
-
it "should parse 'new line' syntax" do
|
179
|
-
result = parse('new line')
|
180
|
-
expect(result).to be_success
|
181
|
-
|
182
|
-
regexp = regexp_repr(result)
|
183
|
-
expect(regexp.to_str).to eq('\n')
|
184
|
-
end
|
185
|
-
end # context
|
186
|
-
|
187
|
-
context 'Parsing alternations:' do
|
188
|
-
it "should parse 'any of' syntax" do
|
189
|
-
source = 'any of (any character, one of "._%-+")'
|
190
|
-
result = parse(source)
|
191
|
-
expect(result).to be_success
|
192
|
-
|
193
|
-
regexp = regexp_repr(result)
|
194
|
-
expect(regexp.to_str).to eq('(?:\w|[._%\-+])')
|
195
|
-
end
|
196
|
-
end # context
|
197
|
-
|
198
|
-
context 'Parsing concatenation:' do
|
199
|
-
it 'should reject dangling comma' do
|
200
|
-
source = 'literally "a",'
|
201
|
-
result = parse(source)
|
202
|
-
expect(result).not_to be_success
|
203
|
-
message_prefix = /Premature end of input after ','/
|
204
|
-
expect(result.failure_reason.message).to match(message_prefix)
|
205
|
-
end
|
206
|
-
|
207
|
-
it 'should parse concatenation' do
|
208
|
-
result = parse('any of (literally "sample", (digit once or more))')
|
209
|
-
expect(result).to be_success
|
210
|
-
|
211
|
-
regexp = regexp_repr(result)
|
212
|
-
expect(regexp.to_str).to eq('(?:sample|(?:\d+))')
|
213
|
-
end
|
214
|
-
|
215
|
-
it 'should parse a long sequence of patterns' do
|
216
|
-
source = <<-ENDS
|
217
|
-
any of (any character, one of "._%-+") once or more,
|
218
|
-
literally "@",
|
219
|
-
any of (digit, letter, one of ".-") once or more,
|
220
|
-
literally ".",
|
221
|
-
letter at least 2 times
|
222
|
-
ENDS
|
223
|
-
|
224
|
-
result = parse(source)
|
225
|
-
expect(result).to be_success
|
226
|
-
|
227
|
-
regexp = regexp_repr(result)
|
228
|
-
# SRL: (?:\w|[\._%\-\+])+(?:@)(?:[0-9]|[a-z]|[\.\-])+(?:\.)[a-z]{2,}
|
229
|
-
expectation = '(?:\w|[._%\-+])+@(?:\d|[a-z]|[.\-])+\.[a-z]{2,}'
|
230
|
-
expect(regexp.to_str).to eq(expectation)
|
231
|
-
end
|
232
|
-
end # context
|
233
|
-
|
234
|
-
context 'Parsing quantifiers:' do
|
235
|
-
let(:prefix) { 'letter from p to t ' }
|
236
|
-
|
237
|
-
it "should parse 'once' syntax" do
|
238
|
-
result = parse(prefix + 'once')
|
239
|
-
expect(result).to be_success
|
240
|
-
|
241
|
-
regexp = regexp_repr(result)
|
242
|
-
expect(regexp.to_str).to eq('[p-t]{1}')
|
243
|
-
end
|
244
|
-
|
245
|
-
it "should parse 'twice' syntax" do
|
246
|
-
result = parse('digit twice')
|
247
|
-
expect(result).to be_success
|
248
|
-
|
249
|
-
regexp = regexp_repr(result)
|
250
|
-
expect(regexp.to_str).to eq('\d{2}')
|
251
|
-
end
|
252
|
-
|
253
|
-
it "should parse 'optional' syntax" do
|
254
|
-
result = parse('anything optional')
|
255
|
-
expect(result).to be_success
|
256
|
-
|
257
|
-
regexp = regexp_repr(result)
|
258
|
-
expect(regexp.to_str).to eq('.?')
|
259
|
-
end
|
260
|
-
|
261
|
-
it "should parse 'exactly ... times' syntax" do
|
262
|
-
result = parse('letter from a to f exactly 4 times')
|
263
|
-
expect(result).to be_success
|
264
|
-
|
265
|
-
regexp = regexp_repr(result)
|
266
|
-
expect(regexp.to_str).to eq('[a-f]{4}')
|
267
|
-
end
|
268
|
-
|
269
|
-
it "should parse 'between ... and ... times' syntax" do
|
270
|
-
result = parse(prefix + 'between 2 and 4 times')
|
271
|
-
expect(result).to be_success
|
272
|
-
|
273
|
-
# Dropping 'times' keyword is shorter syntax
|
274
|
-
expect(parse(prefix + 'between 2 and 4')).to be_success
|
275
|
-
|
276
|
-
regexp = regexp_repr(result)
|
277
|
-
expect(regexp.to_str).to eq('[p-t]{2,4}')
|
278
|
-
end
|
279
|
-
|
280
|
-
it "should parse 'once or more' syntax" do
|
281
|
-
result = parse(prefix + 'once or more')
|
282
|
-
expect(result).to be_success
|
283
|
-
|
284
|
-
regexp = regexp_repr(result)
|
285
|
-
expect(regexp.to_str).to eq('[p-t]+')
|
286
|
-
end
|
287
|
-
|
288
|
-
it "should parse 'never or more' syntax" do
|
289
|
-
result = parse(prefix + 'never or more')
|
290
|
-
expect(result).to be_success
|
291
|
-
|
292
|
-
regexp = regexp_repr(result)
|
293
|
-
expect(regexp.to_str).to eq('[p-t]*')
|
294
|
-
end
|
295
|
-
|
296
|
-
it "should parse 'at least ... times' syntax" do
|
297
|
-
result = parse(prefix + 'at least 10 times')
|
298
|
-
expect(result).to be_success
|
299
|
-
|
300
|
-
regexp = regexp_repr(result)
|
301
|
-
expect(regexp.to_str).to eq('[p-t]{10,}')
|
302
|
-
end
|
303
|
-
end # context
|
304
|
-
|
305
|
-
context 'Parsing lookaround:' do
|
306
|
-
it 'should parse positive lookahead' do
|
307
|
-
result = parse('letter if followed by (anything once or more, digit)')
|
308
|
-
expect(result).to be_success
|
309
|
-
|
310
|
-
regexp = regexp_repr(result)
|
311
|
-
expect(regexp.to_str).to eq('[a-z](?=(?:.+\d))')
|
312
|
-
end
|
313
|
-
|
314
|
-
it 'should parse negative lookahead' do
|
315
|
-
result = parse('letter if not followed by (anything once or more, digit)')
|
316
|
-
expect(result).to be_success
|
317
|
-
|
318
|
-
regexp = regexp_repr(result)
|
319
|
-
expect(regexp.to_str).to eq('[a-z](?!(?:.+\d))')
|
320
|
-
end
|
321
|
-
|
322
|
-
it 'should parse positive lookbehind' do
|
323
|
-
result = parse('literally "bar" if already had literally "foo"')
|
324
|
-
expect(result).to be_success
|
325
|
-
|
326
|
-
regexp = regexp_repr(result)
|
327
|
-
expect(regexp.to_str).to eq('bar(?<=foo)')
|
328
|
-
end
|
329
|
-
|
330
|
-
it 'should parse negative lookbehind' do
|
331
|
-
result = parse('literally "bar" if not already had literally "foo"')
|
332
|
-
expect(result).to be_success
|
333
|
-
|
334
|
-
regexp = regexp_repr(result)
|
335
|
-
expect(regexp.to_str).to eq('bar(?<!foo)')
|
336
|
-
end
|
337
|
-
end # context
|
338
|
-
|
339
|
-
context 'Parsing capturing group:' do
|
340
|
-
it 'should parse simple anonymous capturing group' do
|
341
|
-
result = parse('capture(literally "sample")')
|
342
|
-
expect(result).to be_success
|
343
|
-
|
344
|
-
regexp = regexp_repr(result)
|
345
|
-
expect(regexp.to_str).to eq('(sample)')
|
346
|
-
end
|
347
|
-
|
348
|
-
it 'should parse complex anonymous capturing group' do
|
349
|
-
source = 'capture(any of (literally "sample", (digit once or more)))'
|
350
|
-
result = parse(source)
|
351
|
-
expect(result).to be_success
|
352
|
-
|
353
|
-
regexp = regexp_repr(result)
|
354
|
-
expect(regexp.to_str).to eq('((?:sample|(?:\d+)))')
|
355
|
-
end
|
356
|
-
|
357
|
-
it 'should parse simple anonymous until capturing group' do
|
358
|
-
result = parse('capture anything once or more until literally "!"')
|
359
|
-
expect(result).to be_success
|
360
|
-
|
361
|
-
regexp = regexp_repr(result)
|
362
|
-
expect(regexp.to_str).to eq('(.+)!')
|
363
|
-
end
|
364
|
-
|
365
|
-
it 'should parse complex named capturing group' do
|
366
|
-
source = <<-END_SRL
|
367
|
-
capture(any of (literally "sample", (digit once or more)))
|
368
|
-
as "foo"
|
369
|
-
END_SRL
|
370
|
-
result = parse(source)
|
371
|
-
expect(result).to be_success
|
372
|
-
|
373
|
-
regexp = regexp_repr(result)
|
374
|
-
expect(regexp.to_str).to eq('(?<foo>(?:sample|(?:\d+)))')
|
375
|
-
end
|
376
|
-
|
377
|
-
it 'should parse a sequence with named capturing groups' do
|
378
|
-
source = <<-ENDS
|
379
|
-
capture (anything once or more) as "first",
|
380
|
-
literally " - ",
|
381
|
-
capture literally "second part" as "second"
|
382
|
-
ENDS
|
383
|
-
result = parse(source)
|
384
|
-
expect(result).to be_success
|
385
|
-
|
386
|
-
regexp = regexp_repr(result)
|
387
|
-
expect(regexp.to_str).to eq('(?<first>.+) - (?<second>second part)')
|
388
|
-
end
|
389
|
-
|
390
|
-
it 'should parse complex named until capturing group' do
|
391
|
-
source = 'capture (anything once or more) as "foo" until literally "m"'
|
392
|
-
result = parse(source)
|
393
|
-
expect(result).to be_success
|
394
|
-
|
395
|
-
regexp = regexp_repr(result)
|
396
|
-
expect(regexp.to_str).to eq('(?<foo>.+)m')
|
397
|
-
end
|
398
|
-
end # context
|
399
|
-
|
400
|
-
context 'Parsing anchors:' do
|
401
|
-
it 'should parse begin anchors' do
|
402
|
-
result = parse('starts with literally "match"')
|
403
|
-
expect(result).to be_success
|
404
|
-
|
405
|
-
regexp = regexp_repr(result)
|
406
|
-
expect(regexp.to_str).to eq('^match')
|
407
|
-
end
|
408
|
-
|
409
|
-
it 'should parse begin anchors (alternative syntax)' do
|
410
|
-
result = parse('begin with literally "match"')
|
411
|
-
expect(result).to be_success
|
412
|
-
|
413
|
-
regexp = regexp_repr(result)
|
414
|
-
expect(regexp.to_str).to eq('^match')
|
415
|
-
end
|
416
|
-
|
417
|
-
it 'should parse end anchors' do
|
418
|
-
result = parse('literally "match" must end')
|
419
|
-
expect(result).to be_success
|
420
|
-
|
421
|
-
regexp = regexp_repr(result)
|
422
|
-
expect(regexp.to_str).to eq('match$')
|
423
|
-
end
|
424
|
-
|
425
|
-
it 'should parse combination of begin and end anchors' do
|
426
|
-
result = parse('starts with literally "match" must end')
|
427
|
-
expect(result).to be_success
|
428
|
-
|
429
|
-
regexp = regexp_repr(result)
|
430
|
-
expect(regexp.to_str).to eq('^match$')
|
431
|
-
end
|
432
|
-
|
433
|
-
it 'should accept anchor with a sequence of patterns' do
|
434
|
-
source = <<-ENDS
|
435
|
-
begin with any of (digit, letter, one of ".-") once or more,
|
436
|
-
literally ".",
|
437
|
-
letter at least 2 times must end
|
438
|
-
ENDS
|
439
|
-
|
440
|
-
result = parse(source)
|
441
|
-
expect(result).to be_success
|
442
|
-
|
443
|
-
regexp = regexp_repr(result)
|
444
|
-
# SRL: (?:\w|[\._%\-\+])+(?:@)(?:[0-9]|[a-z]|[\.\-])+(?:\.)[a-z]{2,}
|
445
|
-
expect(regexp.to_str).to eq('^(?:\d|[a-z]|[.\-])+\.[a-z]{2,}$')
|
446
|
-
end
|
447
|
-
end # context
|
448
|
-
end # describe
|