regexp_parser 0.1.6 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ChangeLog +57 -0
- data/Gemfile +8 -0
- data/LICENSE +1 -1
- data/README.md +225 -206
- data/Rakefile +9 -3
- data/lib/regexp_parser.rb +7 -11
- data/lib/regexp_parser/expression.rb +72 -14
- data/lib/regexp_parser/expression/classes/alternation.rb +3 -16
- data/lib/regexp_parser/expression/classes/conditional.rb +57 -0
- data/lib/regexp_parser/expression/classes/free_space.rb +17 -0
- data/lib/regexp_parser/expression/classes/keep.rb +7 -0
- data/lib/regexp_parser/expression/classes/set.rb +28 -7
- data/lib/regexp_parser/expression/methods/strfregexp.rb +113 -0
- data/lib/regexp_parser/expression/methods/tests.rb +116 -0
- data/lib/regexp_parser/expression/methods/traverse.rb +63 -0
- data/lib/regexp_parser/expression/quantifier.rb +10 -0
- data/lib/regexp_parser/expression/sequence.rb +45 -0
- data/lib/regexp_parser/expression/subexpression.rb +29 -1
- data/lib/regexp_parser/lexer.rb +31 -8
- data/lib/regexp_parser/parser.rb +118 -45
- data/lib/regexp_parser/scanner.rb +1745 -1404
- data/lib/regexp_parser/scanner/property.rl +57 -3
- data/lib/regexp_parser/scanner/scanner.rl +161 -34
- data/lib/regexp_parser/syntax.rb +12 -2
- data/lib/regexp_parser/syntax/ruby/1.9.1.rb +3 -3
- data/lib/regexp_parser/syntax/ruby/1.9.3.rb +2 -7
- data/lib/regexp_parser/syntax/ruby/2.0.0.rb +4 -1
- data/lib/regexp_parser/syntax/ruby/2.1.4.rb +13 -0
- data/lib/regexp_parser/syntax/ruby/2.1.5.rb +13 -0
- data/lib/regexp_parser/syntax/ruby/2.1.rb +2 -2
- data/lib/regexp_parser/syntax/ruby/2.2.0.rb +16 -0
- data/lib/regexp_parser/syntax/ruby/2.2.rb +8 -0
- data/lib/regexp_parser/syntax/tokens.rb +19 -2
- data/lib/regexp_parser/syntax/tokens/conditional.rb +22 -0
- data/lib/regexp_parser/syntax/tokens/keep.rb +14 -0
- data/lib/regexp_parser/syntax/tokens/unicode_property.rb +45 -4
- data/lib/regexp_parser/token.rb +23 -8
- data/lib/regexp_parser/version.rb +5 -0
- data/regexp_parser.gemspec +35 -0
- data/test/expression/test_all.rb +6 -1
- data/test/expression/test_base.rb +19 -0
- data/test/expression/test_conditionals.rb +114 -0
- data/test/expression/test_free_space.rb +33 -0
- data/test/expression/test_set.rb +61 -0
- data/test/expression/test_strfregexp.rb +214 -0
- data/test/expression/test_subexpression.rb +24 -0
- data/test/expression/test_tests.rb +99 -0
- data/test/expression/test_to_h.rb +48 -0
- data/test/expression/test_to_s.rb +46 -0
- data/test/expression/test_traverse.rb +164 -0
- data/test/lexer/test_all.rb +16 -3
- data/test/lexer/test_conditionals.rb +101 -0
- data/test/lexer/test_keep.rb +24 -0
- data/test/lexer/test_literals.rb +51 -51
- data/test/lexer/test_nesting.rb +62 -62
- data/test/lexer/test_refcalls.rb +18 -20
- data/test/parser/test_all.rb +18 -3
- data/test/parser/test_alternation.rb +11 -14
- data/test/parser/test_conditionals.rb +148 -0
- data/test/parser/test_escapes.rb +29 -5
- data/test/parser/test_free_space.rb +139 -0
- data/test/parser/test_groups.rb +40 -0
- data/test/parser/test_keep.rb +21 -0
- data/test/scanner/test_all.rb +8 -2
- data/test/scanner/test_conditionals.rb +166 -0
- data/test/scanner/test_escapes.rb +8 -5
- data/test/scanner/test_free_space.rb +133 -0
- data/test/scanner/test_groups.rb +28 -0
- data/test/scanner/test_keep.rb +33 -0
- data/test/scanner/test_properties.rb +4 -0
- data/test/scanner/test_scripts.rb +71 -1
- data/test/syntax/ruby/test_1.9.3.rb +2 -2
- data/test/syntax/ruby/test_2.0.0.rb +38 -0
- data/test/syntax/ruby/test_2.2.0.rb +38 -0
- data/test/syntax/ruby/test_all.rb +1 -8
- data/test/syntax/ruby/test_files.rb +104 -0
- data/test/test_all.rb +2 -1
- data/test/token/test_all.rb +2 -0
- data/test/token/test_token.rb +109 -0
- metadata +75 -21
- data/VERSION.yml +0 -5
- data/lib/regexp_parser/ctype.rb +0 -48
- data/test/syntax/ruby/test_2.x.rb +0 -46
data/Rakefile
CHANGED
@@ -18,7 +18,9 @@ Bundler::GemHelper.install_tasks
|
|
18
18
|
task :default => [:test]
|
19
19
|
|
20
20
|
Rake::TestTask.new('test') do |t|
|
21
|
-
t.description
|
21
|
+
if t.respond_to?(:description)
|
22
|
+
t.description = "Run all unit tests under the test directory"
|
23
|
+
end
|
22
24
|
|
23
25
|
t.libs << "test"
|
24
26
|
t.test_files = FileList['test/test_all.rb']
|
@@ -27,7 +29,9 @@ end
|
|
27
29
|
namespace :test do
|
28
30
|
%w{scanner lexer parser expression syntax}.each do |component|
|
29
31
|
Rake::TestTask.new(component) do |t|
|
30
|
-
t.description
|
32
|
+
if t.respond_to?(:description)
|
33
|
+
t.description = "Run all #{component} unit tests under the test/#{component} directory"
|
34
|
+
end
|
31
35
|
|
32
36
|
t.libs << "test"
|
33
37
|
t.test_files = ["test/#{component}/test_all.rb"]
|
@@ -35,7 +39,9 @@ namespace :test do
|
|
35
39
|
end
|
36
40
|
|
37
41
|
Rake::TestTask.new('full' => 'ragel:rb') do |t|
|
38
|
-
t.description
|
42
|
+
if t.respond_to?(:description)
|
43
|
+
t.description = "Regenerate the scanner and run all unit tests under the test directory"
|
44
|
+
end
|
39
45
|
|
40
46
|
t.libs << "test"
|
41
47
|
t.test_files = FileList['test/test_all.rb']
|
data/lib/regexp_parser.rb
CHANGED
@@ -1,12 +1,8 @@
|
|
1
|
-
|
1
|
+
# encoding: utf-8
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
%w{token ctype scanner syntax lexer parser}.each do |file|
|
11
|
-
require File.expand_path("../regexp_parser/#{file}", __FILE__)
|
12
|
-
end
|
3
|
+
require 'regexp_parser/version'
|
4
|
+
require 'regexp_parser/token'
|
5
|
+
require 'regexp_parser/scanner'
|
6
|
+
require 'regexp_parser/syntax'
|
7
|
+
require 'regexp_parser/lexer'
|
8
|
+
require 'regexp_parser/parser'
|
@@ -2,18 +2,21 @@ module Regexp::Expression
|
|
2
2
|
|
3
3
|
class Base
|
4
4
|
attr_accessor :type, :token
|
5
|
-
attr_accessor :
|
5
|
+
attr_accessor :text, :ts
|
6
|
+
attr_accessor :level, :set_level, :conditional_level
|
6
7
|
|
7
8
|
attr_accessor :quantifier
|
8
9
|
attr_accessor :options
|
9
10
|
|
10
11
|
def initialize(token)
|
11
|
-
@type
|
12
|
-
@token
|
13
|
-
@text
|
14
|
-
@ts
|
15
|
-
@level
|
16
|
-
@
|
12
|
+
@type = token.type
|
13
|
+
@token = token.token
|
14
|
+
@text = token.text
|
15
|
+
@ts = token.ts
|
16
|
+
@level = token.level
|
17
|
+
@set_level = token.set_level
|
18
|
+
@conditional_level = token.conditional_level
|
19
|
+
@options = nil
|
17
20
|
end
|
18
21
|
|
19
22
|
def clone
|
@@ -106,6 +109,47 @@ module Regexp::Expression
|
|
106
109
|
end
|
107
110
|
alias :x? :free_spacing?
|
108
111
|
alias :extended? :free_spacing?
|
112
|
+
|
113
|
+
if RUBY_VERSION >= '2.0'
|
114
|
+
def default_classes?
|
115
|
+
(@options and @options[:d]) ? true : false
|
116
|
+
end
|
117
|
+
alias :d? :default_classes?
|
118
|
+
|
119
|
+
def ascii_classes?
|
120
|
+
(@options and @options[:a]) ? true : false
|
121
|
+
end
|
122
|
+
alias :a? :ascii_classes?
|
123
|
+
|
124
|
+
def unicode_classes?
|
125
|
+
(@options and @options[:u]) ? true : false
|
126
|
+
end
|
127
|
+
alias :u? :unicode_classes?
|
128
|
+
end
|
129
|
+
|
130
|
+
def matches?(string)
|
131
|
+
Regexp.new(to_s) =~ string ? true : false
|
132
|
+
end
|
133
|
+
|
134
|
+
def match(string, offset)
|
135
|
+
Regexp.new(to_s).match(string, offset)
|
136
|
+
end
|
137
|
+
alias :=~ :match
|
138
|
+
|
139
|
+
def to_h
|
140
|
+
{
|
141
|
+
:type => @type,
|
142
|
+
:token => @token,
|
143
|
+
:text => to_s(:base),
|
144
|
+
:starts_at => @ts,
|
145
|
+
:length => full_length,
|
146
|
+
:level => @level,
|
147
|
+
:set_level => @set_level,
|
148
|
+
:conditional_level => @conditional_level,
|
149
|
+
:options => @options,
|
150
|
+
:quantifier => quantified? ? @quantifier.to_h : nil
|
151
|
+
}
|
152
|
+
end
|
109
153
|
end
|
110
154
|
|
111
155
|
def self.parsed(exp)
|
@@ -125,10 +169,24 @@ module Regexp::Expression
|
|
125
169
|
|
126
170
|
end # module Regexp::Expression
|
127
171
|
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
172
|
+
require 'regexp_parser/expression/methods/tests'
|
173
|
+
require 'regexp_parser/expression/methods/traverse'
|
174
|
+
require 'regexp_parser/expression/methods/strfregexp'
|
175
|
+
|
176
|
+
require 'regexp_parser/expression/quantifier'
|
177
|
+
require 'regexp_parser/expression/subexpression'
|
178
|
+
require 'regexp_parser/expression/sequence'
|
179
|
+
|
180
|
+
require 'regexp_parser/expression/classes/alternation'
|
181
|
+
require 'regexp_parser/expression/classes/anchor'
|
182
|
+
require 'regexp_parser/expression/classes/backref'
|
183
|
+
require 'regexp_parser/expression/classes/conditional'
|
184
|
+
require 'regexp_parser/expression/classes/escape'
|
185
|
+
require 'regexp_parser/expression/classes/free_space'
|
186
|
+
require 'regexp_parser/expression/classes/group'
|
187
|
+
require 'regexp_parser/expression/classes/keep'
|
188
|
+
require 'regexp_parser/expression/classes/literal'
|
189
|
+
require 'regexp_parser/expression/classes/property'
|
190
|
+
require 'regexp_parser/expression/classes/root'
|
191
|
+
require 'regexp_parser/expression/classes/set'
|
192
|
+
require 'regexp_parser/expression/classes/type'
|
@@ -12,7 +12,7 @@ module Regexp::Expression
|
|
12
12
|
end
|
13
13
|
|
14
14
|
def alternative(exp = nil)
|
15
|
-
@expressions << (exp ? exp :
|
15
|
+
@expressions << (exp ? exp : Alternative.new(level, set_level, conditional_level))
|
16
16
|
end
|
17
17
|
|
18
18
|
def alternatives
|
@@ -28,20 +28,7 @@ module Regexp::Expression
|
|
28
28
|
end
|
29
29
|
end
|
30
30
|
|
31
|
-
# A sequence of expressions, used by
|
32
|
-
|
33
|
-
class Sequence < Regexp::Expression::Subexpression
|
34
|
-
def initialize
|
35
|
-
super Regexp::Token.new(:expression, :sequence, '')
|
36
|
-
end
|
37
|
-
|
38
|
-
def starts_at
|
39
|
-
@expressions.first.starts_at
|
40
|
-
end
|
41
|
-
|
42
|
-
def quantify(token, text, min = nil, max = nil, mode = :greedy)
|
43
|
-
last.quantify(token, text, min, max, mode)
|
44
|
-
end
|
45
|
-
end
|
31
|
+
# A sequence of expressions, used by Alternation as one of its alternative.
|
32
|
+
class Alternative < Regexp::Expression::Sequence; end
|
46
33
|
|
47
34
|
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
module Regexp::Expression
|
2
|
+
|
3
|
+
module Conditional
|
4
|
+
class TooManyBranches < StandardError
|
5
|
+
def initialize
|
6
|
+
super('The conditional expression has more than 2 branches')
|
7
|
+
end
|
8
|
+
end
|
9
|
+
|
10
|
+
class Condition < Regexp::Expression::Base; end
|
11
|
+
class Branch < Regexp::Expression::Sequence; end
|
12
|
+
|
13
|
+
class Expression < Regexp::Expression::Subexpression
|
14
|
+
def initialize(token)
|
15
|
+
super(token)
|
16
|
+
|
17
|
+
@condition = nil
|
18
|
+
@branches = []
|
19
|
+
end
|
20
|
+
|
21
|
+
def condition(exp = nil)
|
22
|
+
return @condition unless exp
|
23
|
+
@condition = exp
|
24
|
+
@expressions << exp
|
25
|
+
end
|
26
|
+
|
27
|
+
def <<(exp)
|
28
|
+
@expressions.last << exp
|
29
|
+
end
|
30
|
+
|
31
|
+
def branch(exp = nil)
|
32
|
+
raise TooManyBranches.new if @branches.length == 2
|
33
|
+
|
34
|
+
sequence = Branch.new(level, set_level, conditional_level + 1)
|
35
|
+
|
36
|
+
@expressions << sequence
|
37
|
+
@branches << @expressions.last
|
38
|
+
end
|
39
|
+
|
40
|
+
def branches
|
41
|
+
@branches
|
42
|
+
end
|
43
|
+
|
44
|
+
def quantify(token, text, min = nil, max = nil, mode = :greedy)
|
45
|
+
branches.last.last.quantify(token, text, min, max, mode)
|
46
|
+
end
|
47
|
+
|
48
|
+
def to_s
|
49
|
+
s = @text.dup
|
50
|
+
s << @condition.text
|
51
|
+
s << branches.map{|e| e.to_s}.join('|')
|
52
|
+
s << ')'
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module Regexp::Expression
|
2
|
+
|
3
|
+
class FreeSpace < Regexp::Expression::Base
|
4
|
+
def quantify(token, text, min = nil, max = nil, mode = :greedy)
|
5
|
+
raise "Can not quantify a free space object"
|
6
|
+
end
|
7
|
+
end
|
8
|
+
|
9
|
+
class Comment < Regexp::Expression::FreeSpace; end
|
10
|
+
|
11
|
+
class WhiteSpace < Regexp::Expression::FreeSpace
|
12
|
+
def merge(exp)
|
13
|
+
@text << exp.text
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
end
|
@@ -72,6 +72,33 @@ module Regexp::Expression
|
|
72
72
|
@closed
|
73
73
|
end
|
74
74
|
|
75
|
+
# Returns an array of the members with any shorthand members like \d and \W
|
76
|
+
# expanded to either traditional form or unicode properties.
|
77
|
+
def expand_members(use_properties = false)
|
78
|
+
@members.map do |member|
|
79
|
+
case member
|
80
|
+
when "\\d"
|
81
|
+
use_properties ? '\p{Digit}' : '0-9'
|
82
|
+
when "\\D"
|
83
|
+
use_properties ? '\P{Digit}' : '^0-9'
|
84
|
+
when "\\w"
|
85
|
+
use_properties ? '\p{Word}' : 'A-Za-z0-9_'
|
86
|
+
when "\\W"
|
87
|
+
use_properties ? '\P{Word}' : '^A-Za-z0-9_'
|
88
|
+
when "\\s"
|
89
|
+
use_properties ? '\p{Space}' : ' \t\f\v\n\r'
|
90
|
+
when "\\S"
|
91
|
+
use_properties ? '\P{Space}' : '^ \t\f\v\n\r'
|
92
|
+
when "\\h"
|
93
|
+
use_properties ? '\p{Xdigit}' : '0-9A-Fa-f'
|
94
|
+
when "\\H"
|
95
|
+
use_properties ? '\P{Xdigit}' : '^0-9A-Fa-f'
|
96
|
+
else
|
97
|
+
member
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
75
102
|
def to_s(format = :full)
|
76
103
|
s = ''
|
77
104
|
|
@@ -80,18 +107,12 @@ module Regexp::Expression
|
|
80
107
|
s << @members.join
|
81
108
|
s << ']'
|
82
109
|
|
83
|
-
|
84
|
-
when :base
|
85
|
-
else
|
110
|
+
unless format == :base
|
86
111
|
s << @quantifier.to_s if quantified?
|
87
112
|
end
|
88
113
|
|
89
114
|
s
|
90
115
|
end
|
91
|
-
|
92
|
-
def matches?(input)
|
93
|
-
input =~ /#{to_s}/ ? true : false
|
94
|
-
end
|
95
116
|
end
|
96
117
|
|
97
118
|
class CharacterSubSet < CharacterSet
|
@@ -0,0 +1,113 @@
|
|
1
|
+
module Regexp::Expression
|
2
|
+
|
3
|
+
class Base
|
4
|
+
|
5
|
+
# %l Level (depth) of the expression. Returns 'root' for the root
|
6
|
+
# expression, returns zero or higher for all others.
|
7
|
+
#
|
8
|
+
# %> Indentation at expression's level.
|
9
|
+
#
|
10
|
+
# %x Index of the expression at its depth. Available when using
|
11
|
+
# the sprintf_tree method only.
|
12
|
+
#
|
13
|
+
# %s Start offset within the whole expression.
|
14
|
+
# %e End offset within the whole expression.
|
15
|
+
# %S Length of expression.
|
16
|
+
#
|
17
|
+
# %o Coded offset and length, same as '@%s+%S'
|
18
|
+
#
|
19
|
+
# %y Type of expression.
|
20
|
+
# %k Token of expression.
|
21
|
+
# %i ID, same as '%y:%k'
|
22
|
+
# %c Class name
|
23
|
+
#
|
24
|
+
# %q Quantifier info, as {m[,M]}
|
25
|
+
# %Q Quantifier text
|
26
|
+
#
|
27
|
+
# %z Quantifier min
|
28
|
+
# %Z Quantifier max
|
29
|
+
#
|
30
|
+
# %t Base text of the expression (excludes quantifier, if any)
|
31
|
+
# %~t Full text if the expression is terminal, otherwise %i
|
32
|
+
# %T Full text of the expression (includes quantifier, if any)
|
33
|
+
#
|
34
|
+
# %b Basic info, same as '%o %i'
|
35
|
+
# %m Most info, same as '%b %q'
|
36
|
+
# %a All info, same as '%m %t'
|
37
|
+
#
|
38
|
+
def strfregexp(format = '%a', indent_offset = 0, index = nil)
|
39
|
+
have_index = index ? true : false
|
40
|
+
|
41
|
+
part = {}
|
42
|
+
|
43
|
+
# Order is important! Fields that use other fields in their
|
44
|
+
# definition must appear before the fields they use.
|
45
|
+
part_keys = %w{a m b o i l x s e S y k c q Q z Z t ~t T >}
|
46
|
+
part.keys.each {|k| part[k] = "<?#{k}?>"}
|
47
|
+
|
48
|
+
part['>'] = level ? (' ' * (level + indent_offset)) : ''
|
49
|
+
|
50
|
+
part['l'] = level ? "#{'%d' % level}" : 'root'
|
51
|
+
part['x'] = "#{'%d' % index}" if have_index
|
52
|
+
|
53
|
+
part['s'] = starts_at
|
54
|
+
part['S'] = full_length
|
55
|
+
part['e'] = starts_at + full_length
|
56
|
+
part['o'] = coded_offset
|
57
|
+
|
58
|
+
part['k'] = token
|
59
|
+
part['y'] = type
|
60
|
+
part['i'] = '%y:%k'
|
61
|
+
part['c'] = self.class.name
|
62
|
+
|
63
|
+
if quantified?
|
64
|
+
if quantifier.max == -1
|
65
|
+
part['q'] = "{#{quantifier.min}, or-more}"
|
66
|
+
else
|
67
|
+
part['q'] = "{#{quantifier.min}, #{quantifier.max}}"
|
68
|
+
end
|
69
|
+
|
70
|
+
part['Q'] = quantifier.text
|
71
|
+
part['z'] = quantifier.min
|
72
|
+
part['Z'] = quantifier.max
|
73
|
+
else
|
74
|
+
part['q'] = '{1}'
|
75
|
+
part['Q'] = ''
|
76
|
+
part['z'] = '1'
|
77
|
+
part['Z'] = '1'
|
78
|
+
end
|
79
|
+
|
80
|
+
part['t'] = to_s(:base)
|
81
|
+
part['~t'] = terminal? ? to_s : "#{type}:#{token}"
|
82
|
+
part['T'] = to_s(:full)
|
83
|
+
|
84
|
+
part['b'] = '%o %i'
|
85
|
+
part['m'] = '%b %q'
|
86
|
+
part['a'] = '%m %t'
|
87
|
+
|
88
|
+
out = format.dup
|
89
|
+
|
90
|
+
part_keys.each do |k|
|
91
|
+
out.gsub!(/%#{k}/, part[k].to_s)
|
92
|
+
end
|
93
|
+
|
94
|
+
out
|
95
|
+
end
|
96
|
+
|
97
|
+
alias :strfre :strfregexp
|
98
|
+
end
|
99
|
+
|
100
|
+
class Subexpression < Regexp::Expression::Base
|
101
|
+
def strfregexp_tree(format = '%a', include_self = true, separator = "\n")
|
102
|
+
output = include_self ? [self.strfregexp(format)] : []
|
103
|
+
|
104
|
+
output += map {|exp, index|
|
105
|
+
exp.strfregexp(format, (include_self ? 1 : 0), index)
|
106
|
+
}
|
107
|
+
|
108
|
+
output.join(separator)
|
109
|
+
end
|
110
|
+
|
111
|
+
alias :strfre_tree :strfregexp_tree
|
112
|
+
end
|
113
|
+
end
|