regexp_parser 2.0.1 β 2.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +17 -0
- data/Gemfile +1 -0
- data/Rakefile +2 -2
- data/lib/regexp_parser/expression/classes/free_space.rb +1 -1
- data/lib/regexp_parser/expression/classes/group.rb +6 -1
- data/lib/regexp_parser/expression/methods/match_length.rb +2 -2
- data/lib/regexp_parser/expression/methods/traverse.rb +2 -2
- data/lib/regexp_parser/parser.rb +0 -2
- data/lib/regexp_parser/scanner.rb +612 -674
- data/lib/regexp_parser/scanner/scanner.rl +8 -8
- data/lib/regexp_parser/syntax.rb +4 -4
- data/lib/regexp_parser/syntax/any.rb +2 -2
- data/lib/regexp_parser/syntax/base.rb +1 -1
- data/lib/regexp_parser/syntax/version_lookup.rb +2 -2
- data/lib/regexp_parser/version.rb +1 -1
- data/spec/expression/subexpression_spec.rb +1 -1
- data/spec/expression/to_s_spec.rb +28 -36
- data/spec/parser/errors_spec.rb +1 -1
- data/spec/parser/quantifiers_spec.rb +1 -0
- data/spec/scanner/sets_spec.rb +15 -3
- data/spec/spec_helper.rb +1 -0
- metadata +2 -2
@@ -37,7 +37,7 @@
|
|
37
37
|
class_posix = ('[:' . '^'? . class_name_posix . ':]');
|
38
38
|
|
39
39
|
|
40
|
-
# these are not supported in ruby
|
40
|
+
# these are not supported in ruby at the moment
|
41
41
|
collating_sequence = '[.' . (alpha | [\-])+ . '.]';
|
42
42
|
character_equivalent = '[=' . alpha . '=]';
|
43
43
|
|
@@ -228,13 +228,13 @@
|
|
228
228
|
emit(type, class_name.to_sym, text)
|
229
229
|
};
|
230
230
|
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
character_equivalent >(open_bracket, 1) @set_closed @eof(premature_end_error) {
|
236
|
-
|
237
|
-
};
|
231
|
+
# These are not supported in ruby at the moment. Enable them if they are.
|
232
|
+
# collating_sequence >(open_bracket, 1) @set_closed @eof(premature_end_error) {
|
233
|
+
# emit(:set, :collation, copy(data, ts, te))
|
234
|
+
# };
|
235
|
+
# character_equivalent >(open_bracket, 1) @set_closed @eof(premature_end_error) {
|
236
|
+
# emit(:set, :equivalent, copy(data, ts, te))
|
237
|
+
# };
|
238
238
|
|
239
239
|
meta_char > (set_meta, 1) {
|
240
240
|
emit(:literal, :literal, copy(data, ts, te))
|
data/lib/regexp_parser/syntax.rb
CHANGED
@@ -1,9 +1,9 @@
|
|
1
|
+
module Regexp::Syntax
|
2
|
+
class SyntaxError < StandardError; end
|
3
|
+
end
|
4
|
+
|
1
5
|
require File.expand_path('../syntax/tokens', __FILE__)
|
2
6
|
require File.expand_path('../syntax/base', __FILE__)
|
3
7
|
require File.expand_path('../syntax/any', __FILE__)
|
4
8
|
require File.expand_path('../syntax/version_lookup', __FILE__)
|
5
9
|
require File.expand_path('../syntax/versions', __FILE__)
|
6
|
-
|
7
|
-
module Regexp::Syntax
|
8
|
-
class SyntaxError < StandardError; end
|
9
|
-
end
|
@@ -3,13 +3,13 @@ module Regexp::Syntax
|
|
3
3
|
VERSION_REGEXP = /#{VERSION_FORMAT}/
|
4
4
|
VERSION_CONST_REGEXP = /\AV\d+_\d+(?:_\d+)?\z/
|
5
5
|
|
6
|
-
class InvalidVersionNameError < SyntaxError
|
6
|
+
class InvalidVersionNameError < Regexp::Syntax::SyntaxError
|
7
7
|
def initialize(name)
|
8
8
|
super "Invalid version name '#{name}'. Expected format is '#{VERSION_FORMAT}'"
|
9
9
|
end
|
10
10
|
end
|
11
11
|
|
12
|
-
class UnknownSyntaxNameError < SyntaxError
|
12
|
+
class UnknownSyntaxNameError < Regexp::Syntax::SyntaxError
|
13
13
|
def initialize(name)
|
14
14
|
super "Unknown syntax name '#{name}'."
|
15
15
|
end
|
@@ -32,7 +32,7 @@ RSpec.describe(Regexp::Expression::Subexpression) do
|
|
32
32
|
}
|
33
33
|
|
34
34
|
root.each_expression do |exp|
|
35
|
-
next unless expected_nesting_level = tests.delete(exp.to_s)
|
35
|
+
next unless (expected_nesting_level = tests.delete(exp.to_s))
|
36
36
|
expect(expected_nesting_level).to eq exp.nesting_level
|
37
37
|
end
|
38
38
|
|
@@ -1,58 +1,50 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
3
|
RSpec.describe('Expression#to_s') do
|
4
|
-
|
5
|
-
pattern
|
4
|
+
def parse_frozen(pattern, ruby_version = nil)
|
5
|
+
IceNine.deep_freeze(RP.parse(pattern, *ruby_version))
|
6
|
+
end
|
6
7
|
|
7
|
-
|
8
|
+
def expect_round_trip(pattern, ruby_version = nil)
|
9
|
+
parsed = parse_frozen(pattern, ruby_version)
|
10
|
+
|
11
|
+
expect(parsed.to_s).to eql(pattern)
|
8
12
|
end
|
9
13
|
|
10
|
-
specify('
|
11
|
-
|
14
|
+
specify('literal alternation') do
|
15
|
+
expect_round_trip('abcd|ghij|klmn|pqur')
|
16
|
+
end
|
12
17
|
|
13
|
-
|
18
|
+
specify('quantified alternations') do
|
19
|
+
expect_round_trip('(?:a?[b]+(c){2}|d+[e]*(f)?)|(?:g+[h]?(i){2,3}|j*[k]{3,5}(l)?)')
|
14
20
|
end
|
15
21
|
|
16
22
|
specify('quantified sets') do
|
17
|
-
|
18
|
-
|
19
|
-
expect(RP.parse(pattern).to_s).to eq pattern
|
23
|
+
expect_round_trip('[abc]+|[^def]{3,6}')
|
20
24
|
end
|
21
25
|
|
22
26
|
specify('property sets') do
|
23
|
-
|
24
|
-
|
25
|
-
expect(RP.parse(pattern, 'ruby/1.9').to_s).to eq pattern
|
27
|
+
expect_round_trip('[\\a\\b\\p{Lu}\\P{Z}\\c\\d]+', 'ruby/1.9')
|
26
28
|
end
|
27
29
|
|
28
30
|
specify('groups') do
|
29
|
-
|
30
|
-
|
31
|
-
expect(RP.parse(pattern, 'ruby/1.9').to_s).to eq pattern
|
31
|
+
expect_round_trip("(a(?>b(?:c(?<n>d(?'N'e)??f)+g)*+h)*i)++", 'ruby/1.9')
|
32
32
|
end
|
33
33
|
|
34
34
|
specify('assertions') do
|
35
|
-
|
36
|
-
|
37
|
-
expect(RP.parse(pattern, 'ruby/1.9').to_s).to eq pattern
|
35
|
+
expect_round_trip('(a+(?=b+(?!c+(?<=d+(?<!e+)?f+)?g+)?h+)?i+)?', 'ruby/1.9')
|
38
36
|
end
|
39
37
|
|
40
38
|
specify('comments') do
|
41
|
-
|
42
|
-
|
43
|
-
expect(RP.parse(pattern).to_s).to eq pattern
|
39
|
+
expect_round_trip('(?#start)a(?#middle)b(?#end)')
|
44
40
|
end
|
45
41
|
|
46
42
|
specify('options') do
|
47
|
-
|
48
|
-
|
49
|
-
expect(RP.parse(pattern).to_s).to eq pattern
|
43
|
+
expect_round_trip('(?mix:start)a(?-mix:middle)b(?i-mx:end)')
|
50
44
|
end
|
51
45
|
|
52
46
|
specify('url') do
|
53
|
-
|
54
|
-
|
55
|
-
expect(RP.parse(pattern).to_s).to eq pattern
|
47
|
+
expect_round_trip('(^$)|(^(http|https):\\/\\/[a-z0-9]+([\\-\\.]{1}[a-z0-9]+)*' + '\\.[a-z]{2,5}(([0-9]{1,5})?\\/.*)?$)')
|
56
48
|
end
|
57
49
|
|
58
50
|
specify('multiline source') do
|
@@ -64,7 +56,7 @@ RSpec.describe('Expression#to_s') do
|
|
64
56
|
\z
|
65
57
|
/x
|
66
58
|
|
67
|
-
expect(
|
59
|
+
expect(parse_frozen(multiline).to_s).to eql(multiline.source)
|
68
60
|
end
|
69
61
|
|
70
62
|
specify('multiline #to_s') do
|
@@ -76,7 +68,7 @@ RSpec.describe('Expression#to_s') do
|
|
76
68
|
\z
|
77
69
|
/x
|
78
70
|
|
79
|
-
|
71
|
+
expect_round_trip(multiline.to_s)
|
80
72
|
end
|
81
73
|
|
82
74
|
# Free spacing expressions that use spaces between quantifiers and their
|
@@ -93,24 +85,24 @@ RSpec.describe('Expression#to_s') do
|
|
93
85
|
/x
|
94
86
|
|
95
87
|
str = 'bbbcged'
|
96
|
-
root =
|
88
|
+
root = parse_frozen(multiline)
|
97
89
|
|
98
|
-
expect(Regexp.new(root.to_s, Regexp::EXTENDED).match(str)[0]).to
|
90
|
+
expect(Regexp.new(root.to_s, Regexp::EXTENDED).match(str)[0]).to eql(multiline.match(str)[0])
|
99
91
|
end
|
100
92
|
|
101
93
|
# special case: implicit groups used for chained quantifiers produce no parens
|
102
94
|
specify 'chained quantifiers #to_s' do
|
103
95
|
pattern = /a+{1}{2}/
|
104
|
-
root =
|
105
|
-
expect(root.to_s).to
|
96
|
+
root = parse_frozen(pattern)
|
97
|
+
expect(root.to_s).to eql('a+{1}{2}')
|
106
98
|
end
|
107
99
|
|
108
100
|
# regression test for https://github.com/ammar/regexp_parser/issues/74
|
109
101
|
specify('non-ascii comment') do
|
110
102
|
pattern = '(?x) π # π'
|
111
103
|
root = RP.parse(pattern)
|
112
|
-
expect(root.last).to be_a
|
113
|
-
expect(root.last.to_s).to
|
114
|
-
expect(root.to_s).to
|
104
|
+
expect(root.last).to be_a(Regexp::Expression::Comment)
|
105
|
+
expect(root.last.to_s).to eql('# π')
|
106
|
+
expect(root.to_s).to eql(pattern)
|
115
107
|
end
|
116
108
|
end
|
data/spec/parser/errors_spec.rb
CHANGED
@@ -9,7 +9,7 @@ RSpec.describe('Parsing errors') do
|
|
9
9
|
.to raise_error(Regexp::Parser::UnknownTokenTypeError)
|
10
10
|
end
|
11
11
|
|
12
|
-
RSpec.shared_examples 'UnknownTokenError' do |type
|
12
|
+
RSpec.shared_examples 'UnknownTokenError' do |type|
|
13
13
|
it "raises for unkown tokens of type #{type}" do
|
14
14
|
expect { parser.send(:parse_token, Regexp::Token.new(type, :foo)) }
|
15
15
|
.to raise_error(Regexp::Parser::UnknownTokenError)
|
data/spec/scanner/sets_spec.rb
CHANGED
@@ -61,9 +61,6 @@ RSpec.describe('Set scanning') do
|
|
61
61
|
include_examples 'scan', /[[:digit:][:space:]]/, 2 => [:posixclass, :space, '[:space:]', 10, 19]
|
62
62
|
include_examples 'scan', /[[:^digit:]]/, 1 => [:nonposixclass, :digit, '[:^digit:]', 1, 11]
|
63
63
|
|
64
|
-
include_examples 'scan', /[a[.a-b.]c]/, 2 => [:set, :collation, '[.a-b.]', 2, 9]
|
65
|
-
include_examples 'scan', /[a[=e=]c]/, 2 => [:set, :equivalent, '[=e=]', 2, 7]
|
66
|
-
|
67
64
|
include_examples 'scan', /[a-d&&g-h]/, 4 => [:set, :intersection, '&&', 4, 6]
|
68
65
|
include_examples 'scan', /[a&&]/, 2 => [:set, :intersection, '&&', 2, 4]
|
69
66
|
include_examples 'scan', /[&&z]/, 1 => [:set, :intersection, '&&', 1, 3]
|
@@ -88,6 +85,21 @@ RSpec.describe('Set scanning') do
|
|
88
85
|
8 => [:set, :range, '-', 9, 10],
|
89
86
|
10=> [:set, :close, ']', 11, 12]
|
90
87
|
|
88
|
+
# Collations/collating sequences and character equivalents are not enabled
|
89
|
+
# in Ruby at the moment. If they ever are, enable them in the scanner,
|
90
|
+
# add them to a new syntax version, and handle them in the parser. Until then,
|
91
|
+
# expect them to be scanned as regular subsets containing literals.
|
92
|
+
# include_examples 'scan', /[a[.a-b.]c]/, 2 => [:set, :collation, '[.a-b.]', 2, 9]
|
93
|
+
# include_examples 'scan', /[a[=e=]c]/, 2 => [:set, :equivalent, '[=e=]', 2, 7]
|
94
|
+
include_examples 'scan', '[a[.a-b.]c]',
|
95
|
+
2 => [:set, :open, '[', 2, 3],
|
96
|
+
3 => [:literal, :literal, '.', 3, 4],
|
97
|
+
4 => [:literal, :literal, 'a', 4, 5]
|
98
|
+
include_examples 'scan', '[a[=e=]c]',
|
99
|
+
2 => [:set, :open, '[', 2, 3],
|
100
|
+
3 => [:literal, :literal, '=', 3, 4],
|
101
|
+
4 => [:literal, :literal, 'e', 4, 5]
|
102
|
+
|
91
103
|
# multi-byte characters should not affect indices
|
92
104
|
include_examples 'scan', /[γγΎγ]/,
|
93
105
|
0 => [:set, :open, '[', 0, 1],
|
data/spec/spec_helper.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: regexp_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0.
|
4
|
+
version: 2.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ammar Ali
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-12-
|
11
|
+
date: 2020-12-28 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: A library for tokenizing, lexing, and parsing Ruby regular expressions.
|
14
14
|
email:
|