regexp_parser 0.1.6 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ChangeLog +57 -0
- data/Gemfile +8 -0
- data/LICENSE +1 -1
- data/README.md +225 -206
- data/Rakefile +9 -3
- data/lib/regexp_parser.rb +7 -11
- data/lib/regexp_parser/expression.rb +72 -14
- data/lib/regexp_parser/expression/classes/alternation.rb +3 -16
- data/lib/regexp_parser/expression/classes/conditional.rb +57 -0
- data/lib/regexp_parser/expression/classes/free_space.rb +17 -0
- data/lib/regexp_parser/expression/classes/keep.rb +7 -0
- data/lib/regexp_parser/expression/classes/set.rb +28 -7
- data/lib/regexp_parser/expression/methods/strfregexp.rb +113 -0
- data/lib/regexp_parser/expression/methods/tests.rb +116 -0
- data/lib/regexp_parser/expression/methods/traverse.rb +63 -0
- data/lib/regexp_parser/expression/quantifier.rb +10 -0
- data/lib/regexp_parser/expression/sequence.rb +45 -0
- data/lib/regexp_parser/expression/subexpression.rb +29 -1
- data/lib/regexp_parser/lexer.rb +31 -8
- data/lib/regexp_parser/parser.rb +118 -45
- data/lib/regexp_parser/scanner.rb +1745 -1404
- data/lib/regexp_parser/scanner/property.rl +57 -3
- data/lib/regexp_parser/scanner/scanner.rl +161 -34
- data/lib/regexp_parser/syntax.rb +12 -2
- data/lib/regexp_parser/syntax/ruby/1.9.1.rb +3 -3
- data/lib/regexp_parser/syntax/ruby/1.9.3.rb +2 -7
- data/lib/regexp_parser/syntax/ruby/2.0.0.rb +4 -1
- data/lib/regexp_parser/syntax/ruby/2.1.4.rb +13 -0
- data/lib/regexp_parser/syntax/ruby/2.1.5.rb +13 -0
- data/lib/regexp_parser/syntax/ruby/2.1.rb +2 -2
- data/lib/regexp_parser/syntax/ruby/2.2.0.rb +16 -0
- data/lib/regexp_parser/syntax/ruby/2.2.rb +8 -0
- data/lib/regexp_parser/syntax/tokens.rb +19 -2
- data/lib/regexp_parser/syntax/tokens/conditional.rb +22 -0
- data/lib/regexp_parser/syntax/tokens/keep.rb +14 -0
- data/lib/regexp_parser/syntax/tokens/unicode_property.rb +45 -4
- data/lib/regexp_parser/token.rb +23 -8
- data/lib/regexp_parser/version.rb +5 -0
- data/regexp_parser.gemspec +35 -0
- data/test/expression/test_all.rb +6 -1
- data/test/expression/test_base.rb +19 -0
- data/test/expression/test_conditionals.rb +114 -0
- data/test/expression/test_free_space.rb +33 -0
- data/test/expression/test_set.rb +61 -0
- data/test/expression/test_strfregexp.rb +214 -0
- data/test/expression/test_subexpression.rb +24 -0
- data/test/expression/test_tests.rb +99 -0
- data/test/expression/test_to_h.rb +48 -0
- data/test/expression/test_to_s.rb +46 -0
- data/test/expression/test_traverse.rb +164 -0
- data/test/lexer/test_all.rb +16 -3
- data/test/lexer/test_conditionals.rb +101 -0
- data/test/lexer/test_keep.rb +24 -0
- data/test/lexer/test_literals.rb +51 -51
- data/test/lexer/test_nesting.rb +62 -62
- data/test/lexer/test_refcalls.rb +18 -20
- data/test/parser/test_all.rb +18 -3
- data/test/parser/test_alternation.rb +11 -14
- data/test/parser/test_conditionals.rb +148 -0
- data/test/parser/test_escapes.rb +29 -5
- data/test/parser/test_free_space.rb +139 -0
- data/test/parser/test_groups.rb +40 -0
- data/test/parser/test_keep.rb +21 -0
- data/test/scanner/test_all.rb +8 -2
- data/test/scanner/test_conditionals.rb +166 -0
- data/test/scanner/test_escapes.rb +8 -5
- data/test/scanner/test_free_space.rb +133 -0
- data/test/scanner/test_groups.rb +28 -0
- data/test/scanner/test_keep.rb +33 -0
- data/test/scanner/test_properties.rb +4 -0
- data/test/scanner/test_scripts.rb +71 -1
- data/test/syntax/ruby/test_1.9.3.rb +2 -2
- data/test/syntax/ruby/test_2.0.0.rb +38 -0
- data/test/syntax/ruby/test_2.2.0.rb +38 -0
- data/test/syntax/ruby/test_all.rb +1 -8
- data/test/syntax/ruby/test_files.rb +104 -0
- data/test/test_all.rb +2 -1
- data/test/token/test_all.rb +2 -0
- data/test/token/test_token.rb +109 -0
- metadata +75 -21
- data/VERSION.yml +0 -5
- data/lib/regexp_parser/ctype.rb +0 -48
- data/test/syntax/ruby/test_2.x.rb +0 -46
@@ -0,0 +1,116 @@
|
|
1
|
+
module Regexp::Expression
|
2
|
+
class Base
|
3
|
+
|
4
|
+
# Test if this expression has the given test_type, which can be either
|
5
|
+
# a symbol or an array of symbols to check against the expression's type.
|
6
|
+
#
|
7
|
+
# # is it a :group expression
|
8
|
+
# exp.type? :group
|
9
|
+
#
|
10
|
+
# # is it a :set, :subset, or :meta
|
11
|
+
# exp.type? [:set, :subset, :meta]
|
12
|
+
#
|
13
|
+
def type?(test_type)
|
14
|
+
case test_type
|
15
|
+
when Array
|
16
|
+
if test_type.include?(:*)
|
17
|
+
return (test_type.include?(type) or test_type.include?(:*))
|
18
|
+
else
|
19
|
+
return test_type.include?(type)
|
20
|
+
end
|
21
|
+
when Symbol
|
22
|
+
return (type == test_type or test_type == :*)
|
23
|
+
else
|
24
|
+
raise "Array or Symbol expected, #{test_type.class.name} given"
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
# Test if this expression has the given test_token, and optionally a given
|
29
|
+
# test_type.
|
30
|
+
#
|
31
|
+
# # Any expressions
|
32
|
+
# exp.is? :* # always returns true
|
33
|
+
#
|
34
|
+
# # is it a :capture
|
35
|
+
# exp.is? :capture
|
36
|
+
#
|
37
|
+
# # is it a :character and a :set
|
38
|
+
# exp.is? :character, :set
|
39
|
+
#
|
40
|
+
# # is it a :meta :dot
|
41
|
+
# exp.is? :dot, :meta
|
42
|
+
#
|
43
|
+
# # is it a :meta or :escape :dot
|
44
|
+
# exp.is? :dot, [:meta, :escape]
|
45
|
+
#
|
46
|
+
def is?(test_token, test_type = nil)
|
47
|
+
return true if test_token === :*
|
48
|
+
token == test_token and (test_type ? type?(test_type) : true)
|
49
|
+
end
|
50
|
+
|
51
|
+
# Test if this expression matches an entry in the given scope spec.
|
52
|
+
#
|
53
|
+
# A scope spec can be one of:
|
54
|
+
#
|
55
|
+
# . An array: Interpreted as a set of tokens, tested for inclusion
|
56
|
+
# of the expression's token.
|
57
|
+
#
|
58
|
+
# . A hash: Where the key is interpreted as the expression type
|
59
|
+
# and the value is either a symbol or an array. In this
|
60
|
+
# case, when the scope is a hash, one_of? calls itself to
|
61
|
+
# evaluate the key's value.
|
62
|
+
#
|
63
|
+
# . A symbol: matches the expression's token or type, depending on
|
64
|
+
# the level of the call. If one_of? is called directly with
|
65
|
+
# a symbol then it will always be checked against the
|
66
|
+
# type of the expression. If it's being called for a value
|
67
|
+
# from a hash, it will be checked against the token of the
|
68
|
+
# expression.
|
69
|
+
#
|
70
|
+
# # any expression
|
71
|
+
# exp.one_of?(:*) # always true
|
72
|
+
#
|
73
|
+
# # like exp.type?(:group)
|
74
|
+
# exp.one_of?(:group)
|
75
|
+
#
|
76
|
+
# # any expression of type meta
|
77
|
+
# exp.one_of?(:meta => :*)
|
78
|
+
#
|
79
|
+
# # meta dots and alternations
|
80
|
+
# exp.one_of?(:meta => [:dot, :alternation])
|
81
|
+
#
|
82
|
+
# # meta dots and any set tokens
|
83
|
+
# exp.one_of?({meta: [:dot], set: :*})
|
84
|
+
#
|
85
|
+
def one_of?(scope, top = true)
|
86
|
+
case scope
|
87
|
+
when Array
|
88
|
+
if scope.include?(:*)
|
89
|
+
return (scope.include?(token) or scope.include?(:*))
|
90
|
+
else
|
91
|
+
return scope.include?(token)
|
92
|
+
end
|
93
|
+
|
94
|
+
when Hash
|
95
|
+
if scope.has_key?(:*)
|
96
|
+
test_type = scope.has_key?(type) ? type : :*
|
97
|
+
return one_of?(scope[test_type], false)
|
98
|
+
else
|
99
|
+
return (scope.has_key?(type) and one_of?(scope[type], false))
|
100
|
+
end
|
101
|
+
|
102
|
+
when Symbol
|
103
|
+
return true if scope == :*
|
104
|
+
|
105
|
+
return is?(scope) unless top
|
106
|
+
return type?(scope) if top
|
107
|
+
|
108
|
+
else
|
109
|
+
raise "Array, Hash, or Symbol expected, #{scope.class.name} given"
|
110
|
+
end
|
111
|
+
|
112
|
+
false
|
113
|
+
end
|
114
|
+
|
115
|
+
end
|
116
|
+
end
|
@@ -0,0 +1,63 @@
|
|
1
|
+
module Regexp::Expression
|
2
|
+
class Subexpression < Regexp::Expression::Base
|
3
|
+
|
4
|
+
# Traverses the subexpression (depth-first, pre-order) and calls the given
|
5
|
+
# block for each expression with three arguments; the traversal event,
|
6
|
+
# the expression, and the index of the expression within its parent.
|
7
|
+
#
|
8
|
+
# The event argument is passed as follows:
|
9
|
+
#
|
10
|
+
# - For subexpressions, :enter upon entrering the subexpression, and
|
11
|
+
# :exit upon exiting it.
|
12
|
+
#
|
13
|
+
# - For terminal expressions, :visit is called once.
|
14
|
+
#
|
15
|
+
# Returns self.
|
16
|
+
def traverse(include_self = false, &block)
|
17
|
+
raise 'traverse requires a block' unless block_given?
|
18
|
+
|
19
|
+
block.call(:enter, self, 0) if include_self
|
20
|
+
|
21
|
+
each_with_index do |exp, index|
|
22
|
+
if exp.terminal?
|
23
|
+
block.call(:visit, exp, index)
|
24
|
+
else
|
25
|
+
block.call(:enter, exp, index)
|
26
|
+
exp.traverse(&block)
|
27
|
+
block.call(:exit, exp, index)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
block.call(:exit, self, 0) if include_self
|
32
|
+
|
33
|
+
self
|
34
|
+
end
|
35
|
+
alias :walk :traverse
|
36
|
+
|
37
|
+
# Iterates over the expressions of this expression as an array, passing
|
38
|
+
# the expression and its index within its parent to the given block.
|
39
|
+
def each_expression(include_self = false, &block)
|
40
|
+
traverse(include_self) do |event, exp, index|
|
41
|
+
yield(exp, index) unless event == :exit
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
# Returns a new array with the results of calling the given block once
|
46
|
+
# for every expression. If a block is not given, returns an array with
|
47
|
+
# each expression and its level index as an array.
|
48
|
+
def map(include_self = false, &block)
|
49
|
+
result = []
|
50
|
+
|
51
|
+
each_expression(include_self) do |exp, index|
|
52
|
+
if block_given?
|
53
|
+
result << yield(exp, index)
|
54
|
+
else
|
55
|
+
result << [exp, index]
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
result
|
60
|
+
end
|
61
|
+
|
62
|
+
end
|
63
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
module Regexp::Expression
|
2
|
+
|
3
|
+
# A sequence of expressions. Differs from a Subexpressions by how it handles
|
4
|
+
# quantifiers, as it applies them to its last element instead of itself as
|
5
|
+
# a whole subexpression.
|
6
|
+
#
|
7
|
+
# Used as the base class for the Alternation alternatives and Conditional
|
8
|
+
# branches.
|
9
|
+
class Sequence < Regexp::Expression::Subexpression
|
10
|
+
def initialize(level, set_level, conditional_level)
|
11
|
+
super Regexp::Token.new(
|
12
|
+
:expression,
|
13
|
+
:sequence,
|
14
|
+
'',
|
15
|
+
nil, # ts
|
16
|
+
nil, # te
|
17
|
+
level,
|
18
|
+
set_level,
|
19
|
+
conditional_level
|
20
|
+
)
|
21
|
+
end
|
22
|
+
|
23
|
+
def text
|
24
|
+
to_s
|
25
|
+
end
|
26
|
+
|
27
|
+
def starts_at
|
28
|
+
@expressions.first.starts_at
|
29
|
+
end
|
30
|
+
|
31
|
+
def quantify(token, text, min = nil, max = nil, mode = :greedy)
|
32
|
+
offset = -1
|
33
|
+
target = expressions[offset]
|
34
|
+
while target and target.is_a?(FreeSpace)
|
35
|
+
target = expressions[offset -= 1]
|
36
|
+
end
|
37
|
+
|
38
|
+
raise ArgumentError.new("No valid target found for '#{text}' " +
|
39
|
+
"quantifier") unless target
|
40
|
+
|
41
|
+
target.quantify(token, text, min, max, mode)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
end
|
@@ -17,7 +17,12 @@ module Regexp::Expression
|
|
17
17
|
end
|
18
18
|
|
19
19
|
def <<(exp)
|
20
|
-
@expressions
|
20
|
+
if exp.is_a?(WhiteSpace) and @expressions.last and
|
21
|
+
@expressions.last.is_a?(WhiteSpace)
|
22
|
+
@expressions.last.merge(exp)
|
23
|
+
else
|
24
|
+
@expressions << exp
|
25
|
+
end
|
21
26
|
end
|
22
27
|
|
23
28
|
def insert(exp)
|
@@ -48,6 +53,22 @@ module Regexp::Expression
|
|
48
53
|
@expressions.length
|
49
54
|
end
|
50
55
|
|
56
|
+
def empty?
|
57
|
+
@expressions.empty?
|
58
|
+
end
|
59
|
+
|
60
|
+
def all?(&block)
|
61
|
+
@expressions.all? {|exp| yield(exp) }
|
62
|
+
end
|
63
|
+
|
64
|
+
def ts
|
65
|
+
starts_at
|
66
|
+
end
|
67
|
+
|
68
|
+
def te
|
69
|
+
ts + to_s.length
|
70
|
+
end
|
71
|
+
|
51
72
|
def to_s(format = :full)
|
52
73
|
s = ''
|
53
74
|
|
@@ -64,6 +85,13 @@ module Regexp::Expression
|
|
64
85
|
|
65
86
|
s
|
66
87
|
end
|
88
|
+
|
89
|
+
def to_h
|
90
|
+
h = super
|
91
|
+
h[:text] = to_s(:base)
|
92
|
+
h[:expressions] = @expressions.map(&:to_h)
|
93
|
+
h
|
94
|
+
end
|
67
95
|
end
|
68
96
|
|
69
97
|
end
|
data/lib/regexp_parser/lexer.rb
CHANGED
@@ -10,11 +10,11 @@ module Regexp::Lexer
|
|
10
10
|
|
11
11
|
CLOSING_TOKENS = [:close].freeze
|
12
12
|
|
13
|
-
def self.
|
13
|
+
def self.lex(input, syntax = "ruby/#{RUBY_VERSION}", &block)
|
14
14
|
syntax = Regexp::Syntax.new(syntax)
|
15
15
|
|
16
16
|
@tokens = []
|
17
|
-
@nesting, @set_nesting = 0, 0
|
17
|
+
@nesting, @set_nesting, @conditional_nesting = 0, 0, 0
|
18
18
|
|
19
19
|
last = nil
|
20
20
|
Regexp::Scanner.scan(input) do |type, token, text, ts, te|
|
@@ -27,11 +27,14 @@ module Regexp::Lexer
|
|
27
27
|
last and last.type == :literal
|
28
28
|
|
29
29
|
current = Regexp::Token.new(type, token, text, ts, te,
|
30
|
-
|
30
|
+
@nesting, @set_nesting, @conditional_nesting)
|
31
31
|
|
32
32
|
current = merge_literal(current) if type == :literal and
|
33
33
|
last and last.type == :literal
|
34
34
|
|
35
|
+
current = merge_condition(current) if type == :conditional and
|
36
|
+
[:condition, :condition_close].include?(token)
|
37
|
+
|
35
38
|
last.next(current) if last
|
36
39
|
current.previous(last) if last
|
37
40
|
|
@@ -42,12 +45,18 @@ module Regexp::Lexer
|
|
42
45
|
end
|
43
46
|
|
44
47
|
if block_given?
|
45
|
-
@tokens.
|
48
|
+
@tokens.map {|t| block.call(t)}
|
46
49
|
else
|
47
50
|
@tokens
|
48
51
|
end
|
49
52
|
end
|
50
53
|
|
54
|
+
class << self
|
55
|
+
alias :scan :lex
|
56
|
+
end
|
57
|
+
|
58
|
+
protected
|
59
|
+
|
51
60
|
def self.ascend(type, token)
|
52
61
|
if type == :group or type == :assertion
|
53
62
|
@nesting -= 1 if CLOSING_TOKENS.include?(token)
|
@@ -56,6 +65,10 @@ module Regexp::Lexer
|
|
56
65
|
if type == :set or type == :subset
|
57
66
|
@set_nesting -= 1 if token == :close
|
58
67
|
end
|
68
|
+
|
69
|
+
if type == :conditional
|
70
|
+
@conditional_nesting -= 1 if token == :close
|
71
|
+
end
|
59
72
|
end
|
60
73
|
|
61
74
|
def self.descend(type, token)
|
@@ -66,6 +79,10 @@ module Regexp::Lexer
|
|
66
79
|
if type == :set or type == :subset
|
67
80
|
@set_nesting += 1 if token == :open
|
68
81
|
end
|
82
|
+
|
83
|
+
if type == :conditional
|
84
|
+
@conditional_nesting += 1 if token == :open
|
85
|
+
end
|
69
86
|
end
|
70
87
|
|
71
88
|
# called by scan to break a literal run that is longer than one character
|
@@ -86,11 +103,11 @@ module Regexp::Lexer
|
|
86
103
|
|
87
104
|
@tokens.pop
|
88
105
|
@tokens << Regexp::Token.new(:literal, :literal, lead, token.ts,
|
89
|
-
|
106
|
+
(token.te - last_length), @nesting, @set_nesting, @conditional_nesting)
|
90
107
|
|
91
108
|
@tokens << Regexp::Token.new(:literal, :literal, last,
|
92
|
-
|
93
|
-
|
109
|
+
(token.ts + lead_length),
|
110
|
+
token.te, @nesting, @set_nesting, @conditional_nesting)
|
94
111
|
end
|
95
112
|
end
|
96
113
|
|
@@ -99,7 +116,13 @@ module Regexp::Lexer
|
|
99
116
|
def self.merge_literal(current)
|
100
117
|
last = @tokens.pop
|
101
118
|
replace = Regexp::Token.new(:literal, :literal, last.text + current.text,
|
102
|
-
|
119
|
+
last.ts, current.te, @nesting, @set_nesting, @conditional_nesting)
|
120
|
+
end
|
121
|
+
|
122
|
+
def self.merge_condition(current)
|
123
|
+
last = @tokens.pop
|
124
|
+
Regexp::Token.new(:conditional, :condition, last.text + current.text,
|
125
|
+
last.ts, current.te, @nesting, @set_nesting, @conditional_nesting)
|
103
126
|
end
|
104
127
|
|
105
128
|
end # module Regexp::Lexer
|
data/lib/regexp_parser/parser.rb
CHANGED
@@ -1,18 +1,14 @@
|
|
1
|
-
require
|
1
|
+
require 'regexp_parser/expression'
|
2
2
|
|
3
3
|
module Regexp::Parser
|
4
4
|
include Regexp::Expression
|
5
5
|
include Regexp::Syntax
|
6
6
|
|
7
|
-
class ParserError < StandardError
|
8
|
-
def initialize(what)
|
9
|
-
super what
|
10
|
-
end
|
11
|
-
end
|
7
|
+
class ParserError < StandardError; end
|
12
8
|
|
13
9
|
class UnknownTokenTypeError < ParserError
|
14
10
|
def initialize(type, token)
|
15
|
-
super "Unknown #{type}
|
11
|
+
super "Unknown token type #{type} #{token.inspect}"
|
16
12
|
end
|
17
13
|
end
|
18
14
|
|
@@ -25,8 +21,10 @@ module Regexp::Parser
|
|
25
21
|
def self.parse(input, syntax = "ruby/#{RUBY_VERSION}", &block)
|
26
22
|
@nesting = [@root = @node = Root.new]
|
27
23
|
|
24
|
+
@conditional_nesting = []
|
25
|
+
|
28
26
|
Regexp::Lexer.scan(input, syntax) do |token|
|
29
|
-
|
27
|
+
parse_token token
|
30
28
|
end
|
31
29
|
|
32
30
|
if block_given?
|
@@ -43,23 +41,34 @@ module Regexp::Parser
|
|
43
41
|
@node = exp
|
44
42
|
end
|
45
43
|
|
44
|
+
def self.nest_conditional(exp)
|
45
|
+
@conditional_nesting.push exp
|
46
|
+
|
47
|
+
@node << exp
|
48
|
+
@node = exp
|
49
|
+
end
|
50
|
+
|
46
51
|
def self.parse_token(token)
|
47
52
|
case token.type
|
48
|
-
when :meta;
|
49
|
-
when :quantifier;
|
50
|
-
when :anchor;
|
51
|
-
when :escape;
|
52
|
-
when :group;
|
53
|
-
when :assertion;
|
54
|
-
when :set, :subset;
|
55
|
-
when :type;
|
56
|
-
when :backref;
|
53
|
+
when :meta; meta(token)
|
54
|
+
when :quantifier; quantifier(token)
|
55
|
+
when :anchor; anchor(token)
|
56
|
+
when :escape; escape(token)
|
57
|
+
when :group; group(token)
|
58
|
+
when :assertion; group(token)
|
59
|
+
when :set, :subset; set(token)
|
60
|
+
when :type; type(token)
|
61
|
+
when :backref; backref(token)
|
62
|
+
when :conditional; conditional(token)
|
63
|
+
when :keep; keep(token)
|
57
64
|
|
58
65
|
when :property, :nonproperty
|
59
|
-
|
66
|
+
property(token)
|
60
67
|
|
61
68
|
when :literal
|
62
69
|
@node << Literal.new(token)
|
70
|
+
when :free_space
|
71
|
+
free_space(token)
|
63
72
|
|
64
73
|
else
|
65
74
|
raise UnknownTokenTypeError.new(token.type, token)
|
@@ -69,19 +78,19 @@ module Regexp::Parser
|
|
69
78
|
def self.set(token)
|
70
79
|
case token.token
|
71
80
|
when :open
|
72
|
-
|
81
|
+
open_set(token)
|
73
82
|
when :close
|
74
|
-
|
83
|
+
close_set(token)
|
75
84
|
when :negate
|
76
|
-
|
85
|
+
negate_set
|
77
86
|
when :member, :range, :escape, :collation, :equivalent
|
78
|
-
|
87
|
+
append_set(token)
|
79
88
|
when *Token::Escape::All
|
80
|
-
|
89
|
+
append_set(token)
|
81
90
|
when *Token::CharacterSet::All
|
82
|
-
|
91
|
+
append_set(token)
|
83
92
|
when *Token::UnicodeProperty::All
|
84
|
-
|
93
|
+
append_set(token)
|
85
94
|
else
|
86
95
|
raise UnknownTokenError.new('CharacterSet', token)
|
87
96
|
end
|
@@ -95,7 +104,7 @@ module Regexp::Parser
|
|
95
104
|
unless @node.token == :alternation
|
96
105
|
unless @node.last.is_a?(Alternation)
|
97
106
|
alt = Alternation.new(token)
|
98
|
-
seq =
|
107
|
+
seq = Alternative.new(alt.level, alt.set_level, alt.conditional_level)
|
99
108
|
|
100
109
|
while @node.expressions.last
|
101
110
|
seq.insert @node.expressions.pop
|
@@ -163,6 +172,30 @@ module Regexp::Parser
|
|
163
172
|
end
|
164
173
|
end
|
165
174
|
|
175
|
+
def self.conditional(token)
|
176
|
+
case token.token
|
177
|
+
when :open
|
178
|
+
nest_conditional(Conditional::Expression.new(token))
|
179
|
+
when :condition
|
180
|
+
@conditional_nesting.last.condition(Conditional::Condition.new(token))
|
181
|
+
@conditional_nesting.last.branch
|
182
|
+
when :separator
|
183
|
+
@conditional_nesting.last.branch
|
184
|
+
@node = @conditional_nesting.last.branches.last
|
185
|
+
when :close
|
186
|
+
@conditional_nesting.pop
|
187
|
+
|
188
|
+
@node = if @conditional_nesting.empty?
|
189
|
+
@nesting.last
|
190
|
+
else
|
191
|
+
@conditional_nesting.last
|
192
|
+
end
|
193
|
+
|
194
|
+
else
|
195
|
+
raise UnknownTokenError.new('Conditional', token)
|
196
|
+
end
|
197
|
+
end
|
198
|
+
|
166
199
|
def self.property(token)
|
167
200
|
include Regexp::Expression::UnicodeProperty
|
168
201
|
|
@@ -291,14 +324,50 @@ module Regexp::Parser
|
|
291
324
|
when :control
|
292
325
|
@node << EscapeSequence::Control.new(token)
|
293
326
|
|
327
|
+
when :meta_sequence
|
328
|
+
if token.text =~ /\A\\M-\\C/
|
329
|
+
@node << EscapeSequence::MetaControl.new(token)
|
330
|
+
else
|
331
|
+
@node << EscapeSequence::Meta.new(token)
|
332
|
+
end
|
333
|
+
|
294
334
|
else
|
295
335
|
# treating everything else as a literal
|
296
336
|
@node << EscapeSequence::Literal.new(token)
|
297
337
|
end
|
298
338
|
end
|
299
339
|
|
340
|
+
|
341
|
+
def self.keep(token)
|
342
|
+
@node << Keep::Mark.new(token)
|
343
|
+
end
|
344
|
+
|
345
|
+
def self.free_space(token)
|
346
|
+
case token.token
|
347
|
+
when :comment
|
348
|
+
@node << Comment.new(token)
|
349
|
+
when :whitespace
|
350
|
+
if @node.last and @node.last.is_a?(WhiteSpace)
|
351
|
+
@node.last.merge(WhiteSpace.new(token))
|
352
|
+
else
|
353
|
+
@node << WhiteSpace.new(token)
|
354
|
+
end
|
355
|
+
else
|
356
|
+
raise UnknownTokenError.new('FreeSpace', token)
|
357
|
+
end
|
358
|
+
end
|
359
|
+
|
300
360
|
def self.quantifier(token)
|
301
|
-
|
361
|
+
offset = -1
|
362
|
+
target_node = @node.expressions[offset]
|
363
|
+
while target_node and target_node.is_a?(FreeSpace)
|
364
|
+
target_node = @node.expressions[offset -= 1]
|
365
|
+
end
|
366
|
+
|
367
|
+
raise ArgumentError.new("No valid target found for '#{token.text}' "+
|
368
|
+
"quantifier") unless target_node
|
369
|
+
|
370
|
+
unless target_node
|
302
371
|
if token.token == :zero_or_one
|
303
372
|
raise "Quantifier given without a target, or the syntax of the group " +
|
304
373
|
"or its options is incorrect"
|
@@ -309,35 +378,36 @@ module Regexp::Parser
|
|
309
378
|
|
310
379
|
case token.token
|
311
380
|
when :zero_or_one
|
312
|
-
|
381
|
+
target_node.quantify(:zero_or_one, token.text, 0, 1, :greedy)
|
313
382
|
when :zero_or_one_reluctant
|
314
|
-
|
383
|
+
target_node.quantify(:zero_or_one, token.text, 0, 1, :reluctant)
|
315
384
|
when :zero_or_one_possessive
|
316
|
-
|
385
|
+
target_node.quantify(:zero_or_one, token.text, 0, 1, :possessive)
|
317
386
|
|
318
387
|
when :zero_or_more
|
319
|
-
|
388
|
+
target_node.quantify(:zero_or_more, token.text, 0, -1, :greedy)
|
320
389
|
when :zero_or_more_reluctant
|
321
|
-
|
390
|
+
target_node.quantify(:zero_or_more, token.text, 0, -1, :reluctant)
|
322
391
|
when :zero_or_more_possessive
|
323
|
-
|
392
|
+
target_node.quantify(:zero_or_more, token.text, 0, -1, :possessive)
|
324
393
|
|
325
394
|
when :one_or_more
|
326
|
-
|
395
|
+
target_node.quantify(:one_or_more, token.text, 1, -1, :greedy)
|
327
396
|
when :one_or_more_reluctant
|
328
|
-
|
397
|
+
target_node.quantify(:one_or_more, token.text, 1, -1, :reluctant)
|
329
398
|
when :one_or_more_possessive
|
330
|
-
|
399
|
+
target_node.quantify(:one_or_more, token.text, 1, -1, :possessive)
|
331
400
|
|
332
401
|
when :interval
|
333
|
-
|
402
|
+
interval(target_node, token)
|
334
403
|
|
335
404
|
else
|
336
405
|
raise UnknownTokenError.new('Quantifier', token)
|
337
406
|
end
|
338
407
|
end
|
339
408
|
|
340
|
-
def self.interval(
|
409
|
+
def self.interval(target_node, token)
|
410
|
+
text = token.text
|
341
411
|
mchr = text[text.length-1].chr =~ /[?+]/ ? text[text.length-1].chr : nil
|
342
412
|
mode = case mchr
|
343
413
|
when '?'; text.chop!; :reluctant
|
@@ -349,19 +419,19 @@ module Regexp::Parser
|
|
349
419
|
min = range[0].empty? ? 0 : range[0]
|
350
420
|
max = range[1] ? (range[1].empty? ? -1 : range[1]) : min
|
351
421
|
|
352
|
-
|
422
|
+
target_node.quantify(:interval, text, min.to_i, max.to_i, mode)
|
353
423
|
end
|
354
424
|
|
355
425
|
def self.group(token)
|
356
426
|
case token.token
|
357
427
|
when :options
|
358
|
-
|
428
|
+
options(token)
|
359
429
|
when :close
|
360
|
-
|
430
|
+
close_group
|
361
431
|
when :comment
|
362
432
|
@node << Group::Comment.new(token)
|
363
433
|
else
|
364
|
-
|
434
|
+
open_group(token)
|
365
435
|
end
|
366
436
|
end
|
367
437
|
|
@@ -372,10 +442,13 @@ module Regexp::Parser
|
|
372
442
|
exp.options = {
|
373
443
|
:m => opt[0].include?('m') ? true : false,
|
374
444
|
:i => opt[0].include?('i') ? true : false,
|
375
|
-
:x => opt[0].include?('x') ? true : false
|
445
|
+
:x => opt[0].include?('x') ? true : false,
|
446
|
+
:d => opt[0].include?('d') ? true : false,
|
447
|
+
:a => opt[0].include?('a') ? true : false,
|
448
|
+
:u => opt[0].include?('u') ? true : false
|
376
449
|
}
|
377
450
|
|
378
|
-
|
451
|
+
nest(exp)
|
379
452
|
end
|
380
453
|
|
381
454
|
def self.open_group(token)
|
@@ -402,7 +475,7 @@ module Regexp::Parser
|
|
402
475
|
raise UnknownTokenError.new('Group type open', token)
|
403
476
|
end
|
404
477
|
|
405
|
-
|
478
|
+
nest(exp)
|
406
479
|
end
|
407
480
|
|
408
481
|
def self.close_group
|