srl_ruby 0.2.6 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/lib/regex/non_capturing_group.rb +5 -3
- data/lib/srl_ruby/ast_builder.rb +25 -0
- data/lib/srl_ruby/version.rb +1 -1
- data/spec/acceptance/srl_test_suite_spec.rb +48 -11
- data/spec/srl_ruby_spec.rb +2 -2
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 17b59ca17f397a24c01b84bd9d1a4a67743e2f73
|
4
|
+
data.tar.gz: 8b6b397b9d5e95bf1f5a6f6b5cdd866d7a8e966d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5b7e2d0a99199a431eb2c381f566171161e7c7aaa6098211d9740e3f4bbc86568aafa60d6f41a5e334a35c81cddffc2260e11c1a6a8392e39f8095f7cba05571
|
7
|
+
data.tar.gz: 6d7cd5fe49e6277558f2cfaa2601ee515cb14556fc5d8c4b831d853f4f77f52301758166618b0cf3e7f5ae6cf9cab957052ad7cb0f5f53a323726c4bb307824d
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,13 @@
|
|
1
|
+
## [0.3.0] - 2018-04-04
|
2
|
+
Version bump: SrlRuby passes the complete official SRL test suite!
|
3
|
+
### Changed
|
4
|
+
- File `acceptance/srl_test_suite_spec.rb`. 15 test files from official test suite are passing.
|
5
|
+
|
6
|
+
### Fixed
|
7
|
+
- Class `SrlRuby::ASTBuilder` Fixing the capture...until semantic.
|
8
|
+
If last captured subexpression is a repetition, then it is made lazy (instead of greedy).
|
9
|
+
|
10
|
+
|
1
11
|
## [0.2.6] - 2018-04-03
|
2
12
|
SrlRuby passes 13 tests out of 15 from standard SRL test suite.
|
3
13
|
### Changed
|
@@ -7,10 +7,12 @@ module Regex # This module is used as a namespace
|
|
7
7
|
# of sub-expressions
|
8
8
|
class NonCapturingGroup < MonadicExpression
|
9
9
|
# Constructor.
|
10
|
-
# [
|
10
|
+
# [aChild] A sub-expression to match. When successful
|
11
11
|
# the matching text is assigned to the capture variable.
|
12
|
-
def initialize(
|
13
|
-
|
12
|
+
def initialize(aChild)
|
13
|
+
# If necessary get rid of nested non-capturing groups
|
14
|
+
effective_child = aChild.kind_of?(self.class) ? aChild.child : aChild
|
15
|
+
super(effective_child)
|
14
16
|
end
|
15
17
|
|
16
18
|
protected
|
data/lib/srl_ruby/ast_builder.rb
CHANGED
@@ -367,9 +367,31 @@ module SrlRuby
|
|
367
367
|
return Regex::CapturingGroup.new(theChildren[1])
|
368
368
|
end
|
369
369
|
|
370
|
+
# If the rightmost (sub)expression is a repetition, then make it lazy
|
371
|
+
def make_last_repetition_lazy(anExpr)
|
372
|
+
sub_expr = anExpr
|
373
|
+
loop do
|
374
|
+
if sub_expr.is_a?(Regex::Repetition)
|
375
|
+
# Make repetition lazy
|
376
|
+
cardinality = sub_expr.multiplicity
|
377
|
+
cardinality.instance_variable_set(:@policy, :lazy)
|
378
|
+
break
|
379
|
+
elsif sub_expr.kind_of?(Regex::PolyadicExpression)
|
380
|
+
sub_expr = sub_expr.children.last
|
381
|
+
elsif sub_expr.kind_of?(Regex::MonadicExpression)
|
382
|
+
sub_expr = sub_expr.child
|
383
|
+
elsif sub_expr.kind_of?(Regex::AtomicExpression)
|
384
|
+
break
|
385
|
+
end
|
386
|
+
end
|
387
|
+
end
|
388
|
+
|
370
389
|
# rule('capturing_group' => %w[CAPTURE assertable UNTIL assertable]).as
|
371
390
|
# 'capture_until'
|
372
391
|
def reduce_capture_until(_production, _range, _tokens, theChildren)
|
392
|
+
# Until semantic requires that the last pattern in capture to be lazy
|
393
|
+
make_last_repetition_lazy(theChildren[1])
|
394
|
+
|
373
395
|
group = Regex::CapturingGroup.new(theChildren[1])
|
374
396
|
return Regex::Concatenation.new(group, theChildren[3])
|
375
397
|
end
|
@@ -384,6 +406,9 @@ module SrlRuby
|
|
384
406
|
# rule('capturing_group' => %w[CAPTURE assertable AS var_name
|
385
407
|
# UNTIL assertable]).as 'named_capture_until'
|
386
408
|
def reduce_named_capture_until(_production, _range, _tokens, theChildren)
|
409
|
+
# Until semantic requires that the last pattern in capture to be lazy
|
410
|
+
make_last_repetition_lazy(theChildren[1])
|
411
|
+
|
387
412
|
name = theChildren[3].token.lexeme.dup
|
388
413
|
group = Regex::CapturingGroup.new(theChildren[1], name)
|
389
414
|
return Regex::Concatenation.new(group, theChildren[5])
|
data/lib/srl_ruby/version.rb
CHANGED
@@ -10,7 +10,7 @@ require_relative '../../lib/srl_ruby'
|
|
10
10
|
# | word.rule | '(word)' |
|
11
11
|
|
12
12
|
|
13
|
-
RSpec.describe
|
13
|
+
RSpec.describe SrlRuby do
|
14
14
|
def rule_path
|
15
15
|
__FILE__.sub(/spec\/.+$/, 'srl_test/Test-Rules/')
|
16
16
|
end
|
@@ -19,8 +19,12 @@ RSpec.describe Acceptance do
|
|
19
19
|
return Acceptance::RuleFileParser.load_file(rule_path + aFilename)
|
20
20
|
end
|
21
21
|
|
22
|
+
#
|
23
|
+
# CaptureTest = Struct.new(:test_string, :expectations)
|
24
|
+
|
22
25
|
def test_rule_file(aRuleFileRepr)
|
23
26
|
regex = SrlRuby::parse(aRuleFileRepr.srl.value)
|
27
|
+
puts regex.source
|
24
28
|
expect(regex).to be_kind_of(Regexp)
|
25
29
|
|
26
30
|
aRuleFileRepr.match_tests.each do |test|
|
@@ -29,16 +33,39 @@ RSpec.describe Acceptance do
|
|
29
33
|
aRuleFileRepr.no_match_tests.each do |test|
|
30
34
|
expect(regex.match(test.test_string.value)).to be_nil
|
31
35
|
end
|
32
|
-
aRuleFileRepr.capture_tests.each do |
|
33
|
-
|
34
|
-
expect(
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
expect(matching.captures[name_index]).to eq(captured)
|
36
|
+
aRuleFileRepr.capture_tests.each do |cp_test|
|
37
|
+
test_string = cp_test.test_string.value
|
38
|
+
expect(test_string).to match(regex)
|
39
|
+
if regex.names.empty?
|
40
|
+
indices = cp_test.expectations.map { |exp| exp.result_index.value.to_s }
|
41
|
+
actual_names = indices.uniq.sort
|
42
|
+
else
|
43
|
+
actual_names = regex.names
|
41
44
|
end
|
45
|
+
|
46
|
+
# CaptureExpectation = Struct.new(:result_index, :var_name, :captured_text)
|
47
|
+
# Compare actual vs. expected capture names
|
48
|
+
cp_test.expectations.each do |expec|
|
49
|
+
expected_name = expec.var_name.value.to_s
|
50
|
+
unless actual_names.empty?
|
51
|
+
expect(actual_names).to be_include(expected_name)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
scan_results = test_string.scan(regex)
|
56
|
+
actual_captures = scan_results.map do |capture_tuples|
|
57
|
+
actual_names.zip(capture_tuples).to_h
|
58
|
+
end
|
59
|
+
|
60
|
+
# Compare actual vs. expected captured texts
|
61
|
+
cp_test.expectations.each do |expec|
|
62
|
+
index = expec.result_index.value
|
63
|
+
var_name = expec.var_name.value.to_s
|
64
|
+
expected_capture = expec.captured_text.value
|
65
|
+
names2val = actual_captures[index]
|
66
|
+
actual = names2val[var_name].nil? ? "" : names2val[var_name]
|
67
|
+
expect(actual).to eq(expected_capture )
|
68
|
+
end
|
42
69
|
end
|
43
70
|
end
|
44
71
|
|
@@ -76,6 +103,11 @@ RSpec.describe Acceptance do
|
|
76
103
|
rule_file_repr = load_file('none_of.rule')
|
77
104
|
test_rule_file(rule_file_repr)
|
78
105
|
end
|
106
|
+
|
107
|
+
it 'should support negative character class' do
|
108
|
+
rule_file_repr = load_file('sample_capture.rule')
|
109
|
+
test_rule_file(rule_file_repr)
|
110
|
+
end
|
79
111
|
|
80
112
|
it 'should match a tab' do
|
81
113
|
rule_file_repr = load_file('tab.rule')
|
@@ -87,6 +119,11 @@ RSpec.describe Acceptance do
|
|
87
119
|
test_rule_file(rule_file_repr)
|
88
120
|
end
|
89
121
|
|
122
|
+
it 'should match mail address' do
|
123
|
+
rule_file_repr = load_file('website_example_email_capture.rule')
|
124
|
+
test_rule_file(rule_file_repr)
|
125
|
+
end
|
126
|
+
|
90
127
|
it 'should support lookahead' do
|
91
128
|
rule_file_repr = load_file('website_example_lookahead.rule')
|
92
129
|
test_rule_file(rule_file_repr)
|
@@ -106,4 +143,4 @@ RSpec.describe Acceptance do
|
|
106
143
|
rule_file_repr = load_file('word.rule')
|
107
144
|
test_rule_file(rule_file_repr)
|
108
145
|
end
|
109
|
-
end
|
146
|
+
end # describe
|
data/spec/srl_ruby_spec.rb
CHANGED
@@ -304,7 +304,7 @@ END_SRL
|
|
304
304
|
|
305
305
|
it 'should parse simple anonymous until capturing group' do
|
306
306
|
regexp = SrlRuby.parse('capture anything once or more until literally "!"')
|
307
|
-
expect(regexp.source).to eq('(
|
307
|
+
expect(regexp.source).to eq('(.+?)!')
|
308
308
|
end
|
309
309
|
|
310
310
|
it 'should parse unquoted named capturing group' do
|
@@ -335,7 +335,7 @@ END_SRL
|
|
335
335
|
it 'should parse complex named until capturing group' do
|
336
336
|
source = 'capture (anything once or more) as "foo" until literally "m"'
|
337
337
|
regexp = SrlRuby.parse(source)
|
338
|
-
expect(regexp.source).to eq('(?<foo
|
338
|
+
expect(regexp.source).to eq('(?<foo>.+?)m')
|
339
339
|
end
|
340
340
|
end # context
|
341
341
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: srl_ruby
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dimitri Geshef
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-04-
|
11
|
+
date: 2018-04-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rley
|