srl_ruby 0.2.6 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/lib/regex/non_capturing_group.rb +5 -3
- data/lib/srl_ruby/ast_builder.rb +25 -0
- data/lib/srl_ruby/version.rb +1 -1
- data/spec/acceptance/srl_test_suite_spec.rb +48 -11
- data/spec/srl_ruby_spec.rb +2 -2
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 17b59ca17f397a24c01b84bd9d1a4a67743e2f73
|
4
|
+
data.tar.gz: 8b6b397b9d5e95bf1f5a6f6b5cdd866d7a8e966d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5b7e2d0a99199a431eb2c381f566171161e7c7aaa6098211d9740e3f4bbc86568aafa60d6f41a5e334a35c81cddffc2260e11c1a6a8392e39f8095f7cba05571
|
7
|
+
data.tar.gz: 6d7cd5fe49e6277558f2cfaa2601ee515cb14556fc5d8c4b831d853f4f77f52301758166618b0cf3e7f5ae6cf9cab957052ad7cb0f5f53a323726c4bb307824d
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,13 @@
|
|
1
|
+
## [0.3.0] - 2018-04-04
|
2
|
+
Version bump: SrlRuby passes the complete official SRL test suite!
|
3
|
+
### Changed
|
4
|
+
- File `acceptance/srl_test_suite_spec.rb`. 15 test files from official test suite are passing.
|
5
|
+
|
6
|
+
### Fixed
|
7
|
+
- Class `SrlRuby::ASTBuilder` Fixing the capture...until semantic.
|
8
|
+
If last captured subexpression is a repetition, then it is made lazy (instead of greedy).
|
9
|
+
|
10
|
+
|
1
11
|
## [0.2.6] - 2018-04-03
|
2
12
|
SrlRuby passes 13 tests out of 15 from standard SRL test suite.
|
3
13
|
### Changed
|
@@ -7,10 +7,12 @@ module Regex # This module is used as a namespace
|
|
7
7
|
# of sub-expressions
|
8
8
|
class NonCapturingGroup < MonadicExpression
|
9
9
|
# Constructor.
|
10
|
-
# [
|
10
|
+
# [aChild] A sub-expression to match. When successful
|
11
11
|
# the matching text is assigned to the capture variable.
|
12
|
-
def initialize(
|
13
|
-
|
12
|
+
def initialize(aChild)
|
13
|
+
# If necessary get rid of nested non-capturing groups
|
14
|
+
effective_child = aChild.kind_of?(self.class) ? aChild.child : aChild
|
15
|
+
super(effective_child)
|
14
16
|
end
|
15
17
|
|
16
18
|
protected
|
data/lib/srl_ruby/ast_builder.rb
CHANGED
@@ -367,9 +367,31 @@ module SrlRuby
|
|
367
367
|
return Regex::CapturingGroup.new(theChildren[1])
|
368
368
|
end
|
369
369
|
|
370
|
+
# If the rightmost (sub)expression is a repetition, then make it lazy
|
371
|
+
def make_last_repetition_lazy(anExpr)
|
372
|
+
sub_expr = anExpr
|
373
|
+
loop do
|
374
|
+
if sub_expr.is_a?(Regex::Repetition)
|
375
|
+
# Make repetition lazy
|
376
|
+
cardinality = sub_expr.multiplicity
|
377
|
+
cardinality.instance_variable_set(:@policy, :lazy)
|
378
|
+
break
|
379
|
+
elsif sub_expr.kind_of?(Regex::PolyadicExpression)
|
380
|
+
sub_expr = sub_expr.children.last
|
381
|
+
elsif sub_expr.kind_of?(Regex::MonadicExpression)
|
382
|
+
sub_expr = sub_expr.child
|
383
|
+
elsif sub_expr.kind_of?(Regex::AtomicExpression)
|
384
|
+
break
|
385
|
+
end
|
386
|
+
end
|
387
|
+
end
|
388
|
+
|
370
389
|
# rule('capturing_group' => %w[CAPTURE assertable UNTIL assertable]).as
|
371
390
|
# 'capture_until'
|
372
391
|
def reduce_capture_until(_production, _range, _tokens, theChildren)
|
392
|
+
# Until semantic requires that the last pattern in capture to be lazy
|
393
|
+
make_last_repetition_lazy(theChildren[1])
|
394
|
+
|
373
395
|
group = Regex::CapturingGroup.new(theChildren[1])
|
374
396
|
return Regex::Concatenation.new(group, theChildren[3])
|
375
397
|
end
|
@@ -384,6 +406,9 @@ module SrlRuby
|
|
384
406
|
# rule('capturing_group' => %w[CAPTURE assertable AS var_name
|
385
407
|
# UNTIL assertable]).as 'named_capture_until'
|
386
408
|
def reduce_named_capture_until(_production, _range, _tokens, theChildren)
|
409
|
+
# Until semantic requires that the last pattern in capture to be lazy
|
410
|
+
make_last_repetition_lazy(theChildren[1])
|
411
|
+
|
387
412
|
name = theChildren[3].token.lexeme.dup
|
388
413
|
group = Regex::CapturingGroup.new(theChildren[1], name)
|
389
414
|
return Regex::Concatenation.new(group, theChildren[5])
|
data/lib/srl_ruby/version.rb
CHANGED
@@ -10,7 +10,7 @@ require_relative '../../lib/srl_ruby'
|
|
10
10
|
# | word.rule | '(word)' |
|
11
11
|
|
12
12
|
|
13
|
-
RSpec.describe
|
13
|
+
RSpec.describe SrlRuby do
|
14
14
|
def rule_path
|
15
15
|
__FILE__.sub(/spec\/.+$/, 'srl_test/Test-Rules/')
|
16
16
|
end
|
@@ -19,8 +19,12 @@ RSpec.describe Acceptance do
|
|
19
19
|
return Acceptance::RuleFileParser.load_file(rule_path + aFilename)
|
20
20
|
end
|
21
21
|
|
22
|
+
#
|
23
|
+
# CaptureTest = Struct.new(:test_string, :expectations)
|
24
|
+
|
22
25
|
def test_rule_file(aRuleFileRepr)
|
23
26
|
regex = SrlRuby::parse(aRuleFileRepr.srl.value)
|
27
|
+
puts regex.source
|
24
28
|
expect(regex).to be_kind_of(Regexp)
|
25
29
|
|
26
30
|
aRuleFileRepr.match_tests.each do |test|
|
@@ -29,16 +33,39 @@ RSpec.describe Acceptance do
|
|
29
33
|
aRuleFileRepr.no_match_tests.each do |test|
|
30
34
|
expect(regex.match(test.test_string.value)).to be_nil
|
31
35
|
end
|
32
|
-
aRuleFileRepr.capture_tests.each do |
|
33
|
-
|
34
|
-
expect(
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
expect(matching.captures[name_index]).to eq(captured)
|
36
|
+
aRuleFileRepr.capture_tests.each do |cp_test|
|
37
|
+
test_string = cp_test.test_string.value
|
38
|
+
expect(test_string).to match(regex)
|
39
|
+
if regex.names.empty?
|
40
|
+
indices = cp_test.expectations.map { |exp| exp.result_index.value.to_s }
|
41
|
+
actual_names = indices.uniq.sort
|
42
|
+
else
|
43
|
+
actual_names = regex.names
|
41
44
|
end
|
45
|
+
|
46
|
+
# CaptureExpectation = Struct.new(:result_index, :var_name, :captured_text)
|
47
|
+
# Compare actual vs. expected capture names
|
48
|
+
cp_test.expectations.each do |expec|
|
49
|
+
expected_name = expec.var_name.value.to_s
|
50
|
+
unless actual_names.empty?
|
51
|
+
expect(actual_names).to be_include(expected_name)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
scan_results = test_string.scan(regex)
|
56
|
+
actual_captures = scan_results.map do |capture_tuples|
|
57
|
+
actual_names.zip(capture_tuples).to_h
|
58
|
+
end
|
59
|
+
|
60
|
+
# Compare actual vs. expected captured texts
|
61
|
+
cp_test.expectations.each do |expec|
|
62
|
+
index = expec.result_index.value
|
63
|
+
var_name = expec.var_name.value.to_s
|
64
|
+
expected_capture = expec.captured_text.value
|
65
|
+
names2val = actual_captures[index]
|
66
|
+
actual = names2val[var_name].nil? ? "" : names2val[var_name]
|
67
|
+
expect(actual).to eq(expected_capture )
|
68
|
+
end
|
42
69
|
end
|
43
70
|
end
|
44
71
|
|
@@ -76,6 +103,11 @@ RSpec.describe Acceptance do
|
|
76
103
|
rule_file_repr = load_file('none_of.rule')
|
77
104
|
test_rule_file(rule_file_repr)
|
78
105
|
end
|
106
|
+
|
107
|
+
it 'should support negative character class' do
|
108
|
+
rule_file_repr = load_file('sample_capture.rule')
|
109
|
+
test_rule_file(rule_file_repr)
|
110
|
+
end
|
79
111
|
|
80
112
|
it 'should match a tab' do
|
81
113
|
rule_file_repr = load_file('tab.rule')
|
@@ -87,6 +119,11 @@ RSpec.describe Acceptance do
|
|
87
119
|
test_rule_file(rule_file_repr)
|
88
120
|
end
|
89
121
|
|
122
|
+
it 'should match mail address' do
|
123
|
+
rule_file_repr = load_file('website_example_email_capture.rule')
|
124
|
+
test_rule_file(rule_file_repr)
|
125
|
+
end
|
126
|
+
|
90
127
|
it 'should support lookahead' do
|
91
128
|
rule_file_repr = load_file('website_example_lookahead.rule')
|
92
129
|
test_rule_file(rule_file_repr)
|
@@ -106,4 +143,4 @@ RSpec.describe Acceptance do
|
|
106
143
|
rule_file_repr = load_file('word.rule')
|
107
144
|
test_rule_file(rule_file_repr)
|
108
145
|
end
|
109
|
-
end
|
146
|
+
end # describe
|
data/spec/srl_ruby_spec.rb
CHANGED
@@ -304,7 +304,7 @@ END_SRL
|
|
304
304
|
|
305
305
|
it 'should parse simple anonymous until capturing group' do
|
306
306
|
regexp = SrlRuby.parse('capture anything once or more until literally "!"')
|
307
|
-
expect(regexp.source).to eq('(
|
307
|
+
expect(regexp.source).to eq('(.+?)!')
|
308
308
|
end
|
309
309
|
|
310
310
|
it 'should parse unquoted named capturing group' do
|
@@ -335,7 +335,7 @@ END_SRL
|
|
335
335
|
it 'should parse complex named until capturing group' do
|
336
336
|
source = 'capture (anything once or more) as "foo" until literally "m"'
|
337
337
|
regexp = SrlRuby.parse(source)
|
338
|
-
expect(regexp.source).to eq('(?<foo
|
338
|
+
expect(regexp.source).to eq('(?<foo>.+?)m')
|
339
339
|
end
|
340
340
|
end # context
|
341
341
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: srl_ruby
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dimitri Geshef
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-04-
|
11
|
+
date: 2018-04-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rley
|