regexp_parser 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/ChangeLog +4 -0
- data/LICENSE +22 -0
- data/README.rdoc +307 -0
- data/Rakefile +91 -0
- data/lib/regexp_parser/ctype.rb +48 -0
- data/lib/regexp_parser/expression/property.rb +108 -0
- data/lib/regexp_parser/expression/set.rb +59 -0
- data/lib/regexp_parser/expression.rb +287 -0
- data/lib/regexp_parser/lexer.rb +105 -0
- data/lib/regexp_parser/parser.rb +417 -0
- data/lib/regexp_parser/scanner/property.rl +534 -0
- data/lib/regexp_parser/scanner/scanner.rl +712 -0
- data/lib/regexp_parser/scanner.rb +3325 -0
- data/lib/regexp_parser/syntax/ruby/1.8.6.rb +14 -0
- data/lib/regexp_parser/syntax/ruby/1.8.7.rb +14 -0
- data/lib/regexp_parser/syntax/ruby/1.8.rb +39 -0
- data/lib/regexp_parser/syntax/ruby/1.9.1.rb +39 -0
- data/lib/regexp_parser/syntax/ruby/1.9.2.rb +10 -0
- data/lib/regexp_parser/syntax/ruby/1.9.3.rb +24 -0
- data/lib/regexp_parser/syntax/ruby/1.9.rb +8 -0
- data/lib/regexp_parser/syntax/tokens.rb +332 -0
- data/lib/regexp_parser/syntax.rb +172 -0
- data/lib/regexp_parser.rb +45 -0
- data/test/helpers.rb +8 -0
- data/test/lexer/test_all.rb +26 -0
- data/test/lexer/test_literals.rb +120 -0
- data/test/lexer/test_nesting.rb +107 -0
- data/test/lexer/test_refcalls.rb +45 -0
- data/test/parser/test_all.rb +44 -0
- data/test/parser/test_alternation.rb +46 -0
- data/test/parser/test_anchors.rb +35 -0
- data/test/parser/test_errors.rb +59 -0
- data/test/parser/test_escapes.rb +48 -0
- data/test/parser/test_expression.rb +51 -0
- data/test/parser/test_groups.rb +69 -0
- data/test/parser/test_properties.rb +346 -0
- data/test/parser/test_quantifiers.rb +236 -0
- data/test/parser/test_refcalls.rb +101 -0
- data/test/parser/test_sets.rb +99 -0
- data/test/scanner/test_all.rb +30 -0
- data/test/scanner/test_anchors.rb +35 -0
- data/test/scanner/test_errors.rb +36 -0
- data/test/scanner/test_escapes.rb +49 -0
- data/test/scanner/test_groups.rb +41 -0
- data/test/scanner/test_literals.rb +85 -0
- data/test/scanner/test_meta.rb +36 -0
- data/test/scanner/test_properties.rb +315 -0
- data/test/scanner/test_quantifiers.rb +38 -0
- data/test/scanner/test_refcalls.rb +45 -0
- data/test/scanner/test_scripts.rb +314 -0
- data/test/scanner/test_sets.rb +80 -0
- data/test/scanner/test_types.rb +30 -0
- data/test/syntax/ruby/test_1.8.rb +57 -0
- data/test/syntax/ruby/test_1.9.1.rb +39 -0
- data/test/syntax/ruby/test_1.9.3.rb +38 -0
- data/test/syntax/ruby/test_all.rb +12 -0
- data/test/syntax/test_all.rb +19 -0
- data/test/test_all.rb +4 -0
- metadata +160 -0
@@ -0,0 +1,172 @@
|
|
1
|
+
module Regexp::Syntax
|
2
|
+
require File.expand_path('../syntax/tokens', __FILE__)
|
3
|
+
|
4
|
+
class SyntaxError < StandardError
|
5
|
+
def initialize(what)
|
6
|
+
super what
|
7
|
+
end
|
8
|
+
end
|
9
|
+
|
10
|
+
class UnknownSyntaxNameError < SyntaxError
|
11
|
+
def initialize(name)
|
12
|
+
super "Unknown syntax name '#{name}'"
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
class MissingSyntaxSpecError < SyntaxError
|
17
|
+
def initialize(name)
|
18
|
+
super "Missing syntax specification file for '#{name}'"
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
class NotImplementedError < SyntaxError
|
23
|
+
def initialize(syntax, type, token)
|
24
|
+
super "#{syntax.class.name} does not implement: [#{type}:#{token}]"
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
SYNTAX_SPEC_ROOT = File.expand_path('../syntax', __FILE__)
|
29
|
+
|
30
|
+
# Loads, and instantiates an instance of the syntax specification class for
|
31
|
+
# the given syntax flavor name. The special names 'any' and '*' returns a
|
32
|
+
# instance of Syntax::Any. See below for more details.
|
33
|
+
def self.new(name)
|
34
|
+
return Regexp::Syntax::Any.new if
|
35
|
+
['*', 'any'].include?( name.to_s )
|
36
|
+
|
37
|
+
self.load(name)
|
38
|
+
|
39
|
+
case name
|
40
|
+
when 'ruby/1.8.6'; syntax = Regexp::Syntax::Ruby::V186.new
|
41
|
+
when 'ruby/1.8.7'; syntax = Regexp::Syntax::Ruby::V187.new
|
42
|
+
|
43
|
+
# alias for the latest 1.8 implementation
|
44
|
+
when 'ruby/1.8'; syntax = Regexp::Syntax::Ruby::V18.new
|
45
|
+
|
46
|
+
when 'ruby/1.9.1'; syntax = Regexp::Syntax::Ruby::V191.new
|
47
|
+
when 'ruby/1.9.2'; syntax = Regexp::Syntax::Ruby::V192.new
|
48
|
+
when 'ruby/1.9.3'; syntax = Regexp::Syntax::Ruby::V193.new
|
49
|
+
|
50
|
+
# alias for the latest 1.9 implementation
|
51
|
+
when 'ruby/1.9'; syntax = Regexp::Syntax::Ruby::V19.new
|
52
|
+
|
53
|
+
else
|
54
|
+
raise UnknownSyntaxError.new(name)
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
# Checks if the named syntax has a specification class file, and requires
|
59
|
+
# it if it does. Downcases names, and adds the .rb extension if omitted.
|
60
|
+
def self.load(name)
|
61
|
+
full = "#{SYNTAX_SPEC_ROOT}/#{name.downcase}"
|
62
|
+
full = (full[-1, 3] == '.rb') ? full : "#{full}.rb"
|
63
|
+
|
64
|
+
raise MissingSyntaxSpecError.new(name) unless File.exist?(full)
|
65
|
+
require full
|
66
|
+
end
|
67
|
+
|
68
|
+
# A lookup map of supported types and tokens in a given syntax
|
69
|
+
class Base
|
70
|
+
def initialize
|
71
|
+
@implements = {}
|
72
|
+
|
73
|
+
implements :literal, [:literal]
|
74
|
+
end
|
75
|
+
|
76
|
+
def implementation
|
77
|
+
@implements
|
78
|
+
end
|
79
|
+
|
80
|
+
def implements(type, tokens)
|
81
|
+
if @implements[type]
|
82
|
+
@implements[type] = (@implements[type] + tokens).uniq
|
83
|
+
else
|
84
|
+
@implements[type] = tokens
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
# removes
|
89
|
+
def excludes(type, tokens)
|
90
|
+
if tokens
|
91
|
+
tokens = [tokens] unless tokens.is_a?(Array)
|
92
|
+
end
|
93
|
+
|
94
|
+
if @implements[type]
|
95
|
+
if tokens
|
96
|
+
@implements[type] = @implements[type] - tokens
|
97
|
+
@implements[type] = nil if @implements[type].empty?
|
98
|
+
else
|
99
|
+
@implements[type] = nil
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
def implements?(type, token)
|
105
|
+
return true if @implements[type] and @implements[type].include?(token)
|
106
|
+
false
|
107
|
+
end
|
108
|
+
alias :check? :implements?
|
109
|
+
|
110
|
+
def implements!(type, token)
|
111
|
+
raise NotImplementedError.new(self, type, token) unless
|
112
|
+
implements?(type, token)
|
113
|
+
end
|
114
|
+
alias :check! :implements!
|
115
|
+
|
116
|
+
def normalize(type, token)
|
117
|
+
case type
|
118
|
+
when :group
|
119
|
+
normalize_group(type, token)
|
120
|
+
when :backref
|
121
|
+
normalize_backref(type, token)
|
122
|
+
else
|
123
|
+
[type, token]
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
def normalize_group(type, token)
|
128
|
+
case token
|
129
|
+
when :named_ab, :named_sq
|
130
|
+
[:group, :named]
|
131
|
+
else
|
132
|
+
[type, token]
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
def normalize_backref(type, token)
|
137
|
+
case token
|
138
|
+
when :name_ref_ab, :name_ref_sq
|
139
|
+
[:backref, :name_ref]
|
140
|
+
when :name_call_ab, :name_call_sq
|
141
|
+
[:backref, :name_call]
|
142
|
+
when :name_nest_ref_ab, :name_nest_ref_sq
|
143
|
+
[:backref, :name_nest_ref]
|
144
|
+
when :number_ref_ab, :number_ref_sq
|
145
|
+
[:backref, :number_ref]
|
146
|
+
when :number_call_ab, :number_call_sq
|
147
|
+
[:backref, :number_call]
|
148
|
+
when :number_rel_ref_ab, :number_rel_ref_sq
|
149
|
+
[:backref, :number_rel_ref]
|
150
|
+
when :number_rel_call_ab, :number_rel_call_sq
|
151
|
+
[:backref, :number_rel_call]
|
152
|
+
when :number_nest_ref_ab, :number_nest_ref_sq
|
153
|
+
[:backref, :number_nest_ref]
|
154
|
+
else
|
155
|
+
[type, token]
|
156
|
+
end
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
# A syntax that always returns true, passing all tokens as implemented. This
|
161
|
+
# is useful during development, testing, and should be useful for some types
|
162
|
+
# of transformations as well.
|
163
|
+
class Any < Base
|
164
|
+
def initialize
|
165
|
+
@implements = { :* => [:*] }
|
166
|
+
end
|
167
|
+
|
168
|
+
def implements?(type, token) true end
|
169
|
+
def implements!(type, token) true end
|
170
|
+
end
|
171
|
+
|
172
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
class Regexp
|
2
|
+
|
3
|
+
module Parser
|
4
|
+
VERSION = '0.0.1'
|
5
|
+
end
|
6
|
+
|
7
|
+
TOKEN_KEYS = [:type, :token, :text, :ts, :te, :depth, :set_depth].freeze
|
8
|
+
Token = Struct.new(*TOKEN_KEYS) do
|
9
|
+
def offset
|
10
|
+
[self.ts, self.te]
|
11
|
+
end
|
12
|
+
|
13
|
+
def length
|
14
|
+
self.te - self.ts
|
15
|
+
end
|
16
|
+
|
17
|
+
def to_h
|
18
|
+
hash = {}
|
19
|
+
members.each do |member|
|
20
|
+
hash[member.to_sym] = self.send(member.to_sym)
|
21
|
+
end; hash
|
22
|
+
end
|
23
|
+
|
24
|
+
def next(exp = nil)
|
25
|
+
if exp
|
26
|
+
@next = exp
|
27
|
+
else
|
28
|
+
@next
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def previous(exp = nil)
|
33
|
+
if exp
|
34
|
+
@previous = exp
|
35
|
+
else
|
36
|
+
@previous
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
end
|
42
|
+
|
43
|
+
%w{ctype scanner syntax lexer parser}.each do |file|
|
44
|
+
require File.expand_path("../regexp_parser/#{file}", __FILE__)
|
45
|
+
end
|
data/test/helpers.rb
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
require File.expand_path("../../helpers", __FILE__)
|
2
|
+
|
3
|
+
%w{
|
4
|
+
literals nesting refcalls
|
5
|
+
}.each do|tc|
|
6
|
+
require File.expand_path("../test_#{tc}", __FILE__)
|
7
|
+
end
|
8
|
+
|
9
|
+
class TestRegexpLexer < Test::Unit::TestCase
|
10
|
+
|
11
|
+
def test_lexer_returns_an_array
|
12
|
+
assert_instance_of( Array, RL.scan('abc'))
|
13
|
+
end
|
14
|
+
|
15
|
+
def test_lexer_returns_tokens
|
16
|
+
tokens = RL.scan('^abc+[^one]{2,3}\b\d\\\C-C$')
|
17
|
+
assert( tokens.all?{|token| token.kind_of?(Regexp::Token)},
|
18
|
+
"Not all array members are tokens")
|
19
|
+
end
|
20
|
+
|
21
|
+
def test_lexer_token_count
|
22
|
+
tokens = RL.scan(/^(one|two){2,3}([^d\]efm-qz\,\-]*)(ghi)+$/i)
|
23
|
+
assert_equal( 26, tokens.length )
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
@@ -0,0 +1,120 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
|
3
|
+
require File.expand_path("../../helpers", __FILE__)
|
4
|
+
|
5
|
+
class LexerLiterals < Test::Unit::TestCase
|
6
|
+
|
7
|
+
tests = {
|
8
|
+
# ascii, single byte characters
|
9
|
+
'a' => {
|
10
|
+
0 => [:literal, :literal, 'a', 0, 1, 0, 0],
|
11
|
+
},
|
12
|
+
|
13
|
+
'ab+' => {
|
14
|
+
0 => [:literal, :literal, 'a', 0, 1, 0, 0],
|
15
|
+
1 => [:literal, :literal, 'b', 1, 2, 0, 0],
|
16
|
+
2 => [:quantifier, :one_or_more, '+', 2, 3, 0, 0],
|
17
|
+
},
|
18
|
+
|
19
|
+
|
20
|
+
# 2 byte wide characters, Arabic
|
21
|
+
'ا' => {
|
22
|
+
0 => [:literal, :literal, 'ا', 0, 2, 0, 0],
|
23
|
+
},
|
24
|
+
|
25
|
+
'aاbبcت' => {
|
26
|
+
0 => [:literal, :literal, 'aاbبcت', 0, 9, 0, 0],
|
27
|
+
},
|
28
|
+
|
29
|
+
'aاbبت?' => {
|
30
|
+
0 => [:literal, :literal, 'aاbب', 0, 6, 0, 0],
|
31
|
+
1 => [:literal, :literal, 'ت', 6, 8, 0, 0],
|
32
|
+
2 => [:quantifier, :zero_or_one, '?', 8, 9, 0, 0],
|
33
|
+
},
|
34
|
+
|
35
|
+
'aا?bبcت+' => {
|
36
|
+
0 => [:literal, :literal, 'a', 0, 1, 0, 0],
|
37
|
+
1 => [:literal, :literal, 'ا', 1, 3, 0, 0],
|
38
|
+
2 => [:quantifier, :zero_or_one, '?', 3, 4, 0, 0],
|
39
|
+
3 => [:literal, :literal, 'bبc', 4, 8, 0, 0],
|
40
|
+
4 => [:literal, :literal, 'ت', 8, 10, 0, 0],
|
41
|
+
5 => [:quantifier, :one_or_more, '+', 10, 11, 0, 0],
|
42
|
+
},
|
43
|
+
|
44
|
+
'a(اbب+)cت?' => {
|
45
|
+
0 => [:literal, :literal, 'a', 0, 1, 0, 0],
|
46
|
+
1 => [:group, :capture, '(', 1, 2, 0, 0],
|
47
|
+
2 => [:literal, :literal, 'اb', 2, 5, 1, 0],
|
48
|
+
3 => [:literal, :literal, 'ب', 5, 7, 1, 0],
|
49
|
+
4 => [:quantifier, :one_or_more, '+', 7, 8, 1, 0],
|
50
|
+
5 => [:group, :close, ')', 8, 9, 0, 0],
|
51
|
+
6 => [:literal, :literal, 'c', 9, 10, 0, 0],
|
52
|
+
7 => [:literal, :literal, 'ت', 10, 12, 0, 0],
|
53
|
+
8 => [:quantifier, :zero_or_one, '?', 12, 13, 0, 0],
|
54
|
+
},
|
55
|
+
|
56
|
+
|
57
|
+
# 3 byte wide characters, Japanese
|
58
|
+
'ab?れます+cd' => {
|
59
|
+
0 => [:literal, :literal, 'a', 0, 1, 0, 0],
|
60
|
+
1 => [:literal, :literal, 'b', 1, 2, 0, 0],
|
61
|
+
2 => [:quantifier, :zero_or_one, '?', 2, 3, 0, 0],
|
62
|
+
3 => [:literal, :literal, 'れま', 3, 9, 0, 0],
|
63
|
+
4 => [:literal, :literal, 'す', 9, 12, 0, 0],
|
64
|
+
5 => [:quantifier, :one_or_more, '+', 12, 13, 0, 0],
|
65
|
+
6 => [:literal, :literal, 'cd', 13, 15, 0, 0],
|
66
|
+
},
|
67
|
+
|
68
|
+
|
69
|
+
# 4 byte wide characters, Osmanya
|
70
|
+
'𐒀𐒁?𐒂ab+𐒃' => {
|
71
|
+
0 => [:literal, :literal, '𐒀', 0, 4, 0, 0],
|
72
|
+
1 => [:literal, :literal, '𐒁', 4, 8, 0, 0],
|
73
|
+
2 => [:quantifier, :zero_or_one, '?', 8, 9, 0, 0],
|
74
|
+
3 => [:literal, :literal, '𐒂a', 9, 14, 0, 0],
|
75
|
+
4 => [:literal, :literal, 'b', 14, 15, 0, 0],
|
76
|
+
5 => [:quantifier, :one_or_more, '+', 15, 16, 0, 0],
|
77
|
+
6 => [:literal, :literal, '𐒃', 16, 20, 0, 0],
|
78
|
+
},
|
79
|
+
|
80
|
+
'mu𝄞?si*𝄫c+' => {
|
81
|
+
0 => [:literal, :literal, 'mu', 0, 2, 0, 0],
|
82
|
+
1 => [:literal, :literal, '𝄞', 2, 6, 0, 0],
|
83
|
+
2 => [:quantifier, :zero_or_one, '?', 6, 7, 0, 0],
|
84
|
+
3 => [:literal, :literal, 's', 7, 8, 0, 0],
|
85
|
+
4 => [:literal, :literal, 'i', 8, 9, 0, 0],
|
86
|
+
5 => [:quantifier, :zero_or_more, '*', 9, 10, 0, 0],
|
87
|
+
6 => [:literal, :literal, '𝄫', 10, 14, 0, 0],
|
88
|
+
7 => [:literal, :literal, 'c', 14, 15, 0, 0],
|
89
|
+
8 => [:quantifier, :one_or_more, '+', 15, 16, 0, 0],
|
90
|
+
},
|
91
|
+
}
|
92
|
+
|
93
|
+
count = 0
|
94
|
+
tests.each do |pattern, checks|
|
95
|
+
define_method "test_lex_literal_runs_#{count+=1}" do
|
96
|
+
|
97
|
+
tokens = RL.scan(pattern)
|
98
|
+
checks.each do |offset, token|
|
99
|
+
assert_equal( token, tokens[offset].to_a )
|
100
|
+
end
|
101
|
+
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
def test_lex_single_2_byte_char
|
106
|
+
tokens = RL.scan('ا+')
|
107
|
+
assert_equal( 2, tokens.length )
|
108
|
+
end
|
109
|
+
|
110
|
+
def test_lex_single_3_byte_char
|
111
|
+
tokens = RL.scan('れ+')
|
112
|
+
assert_equal( 2, tokens.length )
|
113
|
+
end
|
114
|
+
|
115
|
+
def test_lex_single_4_byte_char
|
116
|
+
tokens = RL.scan('𝄞+')
|
117
|
+
assert_equal( 2, tokens.length )
|
118
|
+
end
|
119
|
+
|
120
|
+
end
|
@@ -0,0 +1,107 @@
|
|
1
|
+
require File.expand_path("../../helpers", __FILE__)
|
2
|
+
|
3
|
+
class LexerNesting < Test::Unit::TestCase
|
4
|
+
|
5
|
+
tests = {
|
6
|
+
'(((b)))' => {
|
7
|
+
0 => [:group, :capture, '(', 0, 1, 0, 0],
|
8
|
+
1 => [:group, :capture, '(', 1, 2, 1, 0],
|
9
|
+
2 => [:group, :capture, '(', 2, 3, 2, 0],
|
10
|
+
3 => [:literal, :literal, 'b', 3, 4, 3, 0],
|
11
|
+
4 => [:group, :close, ')', 4, 5, 2, 0],
|
12
|
+
5 => [:group, :close, ')', 5, 6, 1, 0],
|
13
|
+
6 => [:group, :close, ')', 6, 7, 0, 0],
|
14
|
+
},
|
15
|
+
|
16
|
+
'(\((b)\))' => {
|
17
|
+
0 => [:group, :capture, '(', 0, 1, 0, 0],
|
18
|
+
1 => [:escape, :group_open, '\(', 1, 3, 1, 0],
|
19
|
+
2 => [:group, :capture, '(', 3, 4, 1, 0],
|
20
|
+
3 => [:literal, :literal, 'b', 4, 5, 2, 0],
|
21
|
+
4 => [:group, :close, ')', 5, 6, 1, 0],
|
22
|
+
5 => [:escape, :group_close, '\)', 6, 8, 1, 0],
|
23
|
+
6 => [:group, :close, ')', 8, 9, 0, 0],
|
24
|
+
},
|
25
|
+
|
26
|
+
'(?>a(?>b(?>c)))' => {
|
27
|
+
0 => [:group, :atomic, '(?>', 0, 3, 0, 0],
|
28
|
+
2 => [:group, :atomic, '(?>', 4, 7, 1, 0],
|
29
|
+
4 => [:group, :atomic, '(?>', 8, 11, 2, 0],
|
30
|
+
6 => [:group, :close, ')', 12, 13, 2, 0],
|
31
|
+
7 => [:group, :close, ')', 13, 14, 1, 0],
|
32
|
+
8 => [:group, :close, ')', 14, 15, 0, 0],
|
33
|
+
},
|
34
|
+
|
35
|
+
'(?:a(?:b(?:c)))' => {
|
36
|
+
0 => [:group, :passive, '(?:', 0, 3, 0, 0],
|
37
|
+
2 => [:group, :passive, '(?:', 4, 7, 1, 0],
|
38
|
+
4 => [:group, :passive, '(?:', 8, 11, 2, 0],
|
39
|
+
6 => [:group, :close, ')', 12, 13, 2, 0],
|
40
|
+
7 => [:group, :close, ')', 13, 14, 1, 0],
|
41
|
+
8 => [:group, :close, ')', 14, 15, 0, 0],
|
42
|
+
},
|
43
|
+
|
44
|
+
'(?=a(?!b(?<=c(?<!d))))' => {
|
45
|
+
0 => [:assertion, :lookahead, '(?=', 0, 3, 0, 0],
|
46
|
+
2 => [:assertion, :nlookahead, '(?!', 4, 7, 1, 0],
|
47
|
+
4 => [:assertion, :lookbehind, '(?<=', 8, 12, 2, 0],
|
48
|
+
6 => [:assertion, :nlookbehind, '(?<!', 13, 17, 3, 0],
|
49
|
+
8 => [:group, :close, ')', 18, 19, 3, 0],
|
50
|
+
9 => [:group, :close, ')', 19, 20, 2, 0],
|
51
|
+
10 => [:group, :close, ')', 20, 21, 1, 0],
|
52
|
+
11 => [:group, :close, ')', 21, 22, 0, 0],
|
53
|
+
},
|
54
|
+
|
55
|
+
'((?#a)b(?#c)d(?#e))' => {
|
56
|
+
0 => [:group, :capture, '(', 0, 1, 0, 0],
|
57
|
+
1 => [:group, :comment, '(?#a)', 1, 6, 1, 0],
|
58
|
+
3 => [:group, :comment, '(?#c)', 7, 12, 1, 0],
|
59
|
+
5 => [:group, :comment, '(?#e)', 13, 18, 1, 0],
|
60
|
+
6 => [:group, :close, ')', 18, 19, 0, 0],
|
61
|
+
},
|
62
|
+
|
63
|
+
'a[b-e]f' => {
|
64
|
+
1 => [:set, :open, '[', 1, 2, 0, 0],
|
65
|
+
2 => [:set, :range, 'b-e', 2, 5, 0, 1],
|
66
|
+
3 => [:set, :close, ']', 5, 6, 0, 0],
|
67
|
+
},
|
68
|
+
|
69
|
+
'[a-w&&[^c-g]z]' => {
|
70
|
+
0 => [:set, :open, '[', 0, 1, 0, 0],
|
71
|
+
2 => [:set, :intersection, '&&', 4, 6, 0, 1],
|
72
|
+
3 => [:subset, :open, '[', 6, 7, 0, 1],
|
73
|
+
4 => [:subset, :negate, '^', 7, 8, 0, 2],
|
74
|
+
5 => [:subset, :range, 'c-g', 8, 11, 0, 2],
|
75
|
+
6 => [:subset, :close, ']', 11, 12, 0, 1],
|
76
|
+
8 => [:set, :close, ']', 13, 14, 0, 0],
|
77
|
+
},
|
78
|
+
|
79
|
+
'[a[b[c[d-g]]]]' => {
|
80
|
+
0 => [:set, :open, '[', 0, 1, 0, 0],
|
81
|
+
1 => [:set, :member, 'a', 1, 2, 0, 1],
|
82
|
+
2 => [:subset, :open, '[', 2, 3, 0, 1],
|
83
|
+
3 => [:subset, :member, 'b', 3, 4, 0, 2],
|
84
|
+
4 => [:subset, :open, '[', 4, 5, 0, 2],
|
85
|
+
5 => [:subset, :member, 'c', 5, 6, 0, 3],
|
86
|
+
6 => [:subset, :open, '[', 6, 7, 0, 3],
|
87
|
+
7 => [:subset, :range, 'd-g', 7, 10, 0, 4],
|
88
|
+
8 => [:subset, :close, ']', 10, 11, 0, 3],
|
89
|
+
9 => [:subset, :close, ']', 11, 12, 0, 2],
|
90
|
+
10 => [:subset, :close, ']', 12, 13, 0, 1],
|
91
|
+
11 => [:set, :close, ']', 13, 14, 0, 0],
|
92
|
+
},
|
93
|
+
}
|
94
|
+
|
95
|
+
count = 0
|
96
|
+
tests.each do |pattern, checks|
|
97
|
+
define_method "test_lex_nesting_#{count+=1}" do
|
98
|
+
|
99
|
+
tokens = RL.scan(pattern, 'ruby/1.9')
|
100
|
+
checks.each do |offset, token|
|
101
|
+
assert_equal( token, tokens[offset].to_a )
|
102
|
+
end
|
103
|
+
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
end
|