regexp_parser 0.1.1 → 0.1.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/ChangeLog +45 -0
- data/Rakefile +12 -44
- data/VERSION.yml +5 -0
- data/lib/regexp_parser.rb +5 -38
- data/lib/regexp_parser/expression.rb +68 -221
- data/lib/regexp_parser/expression/classes/alternation.rb +47 -0
- data/lib/regexp_parser/expression/classes/anchor.rb +26 -0
- data/lib/regexp_parser/expression/classes/backref.rb +42 -0
- data/lib/regexp_parser/expression/classes/escape.rb +27 -0
- data/lib/regexp_parser/expression/classes/group.rb +67 -0
- data/lib/regexp_parser/expression/classes/literal.rb +7 -0
- data/lib/regexp_parser/expression/{property.rb → classes/property.rb} +1 -1
- data/lib/regexp_parser/expression/classes/root.rb +26 -0
- data/lib/regexp_parser/expression/classes/set.rb +100 -0
- data/lib/regexp_parser/expression/classes/type.rb +17 -0
- data/lib/regexp_parser/expression/quantifier.rb +26 -0
- data/lib/regexp_parser/expression/subexpression.rb +69 -0
- data/lib/regexp_parser/lexer.rb +4 -4
- data/lib/regexp_parser/parser.rb +31 -13
- data/lib/regexp_parser/scanner.rb +1849 -1488
- data/lib/regexp_parser/scanner/property.rl +7 -2
- data/lib/regexp_parser/scanner/scanner.rl +377 -191
- data/lib/regexp_parser/syntax.rb +7 -0
- data/lib/regexp_parser/syntax/ruby/1.8.6.rb +4 -4
- data/lib/regexp_parser/syntax/ruby/1.9.1.rb +9 -9
- data/lib/regexp_parser/syntax/ruby/2.0.0.rb +16 -0
- data/lib/regexp_parser/syntax/ruby/2.1.0.rb +13 -0
- data/lib/regexp_parser/syntax/tokens.rb +21 -320
- data/lib/regexp_parser/syntax/tokens/anchor.rb +17 -0
- data/lib/regexp_parser/syntax/tokens/assertion.rb +15 -0
- data/lib/regexp_parser/syntax/tokens/backref.rb +26 -0
- data/lib/regexp_parser/syntax/tokens/character_set.rb +48 -0
- data/lib/regexp_parser/syntax/tokens/character_type.rb +16 -0
- data/lib/regexp_parser/syntax/tokens/escape.rb +29 -0
- data/lib/regexp_parser/syntax/tokens/group.rb +22 -0
- data/lib/regexp_parser/syntax/tokens/meta.rb +15 -0
- data/lib/regexp_parser/syntax/tokens/quantifier.rb +37 -0
- data/lib/regexp_parser/syntax/tokens/unicode_property.rb +204 -0
- data/lib/regexp_parser/token.rb +37 -0
- data/test/expression/test_all.rb +7 -0
- data/test/expression/test_base.rb +72 -0
- data/test/expression/test_clone.rb +144 -0
- data/test/{parser/test_expression.rb → expression/test_to_s.rb} +10 -10
- data/test/helpers.rb +1 -0
- data/test/parser/test_all.rb +1 -1
- data/test/parser/test_alternation.rb +35 -0
- data/test/parser/test_anchors.rb +2 -2
- data/test/parser/test_refcalls.rb +1 -1
- data/test/parser/test_sets.rb +54 -8
- data/test/scanner/test_anchors.rb +2 -2
- data/test/scanner/test_conditionals.rb +31 -0
- data/test/scanner/test_errors.rb +88 -8
- data/test/scanner/test_escapes.rb +4 -4
- data/test/scanner/test_groups.rb +7 -0
- data/test/scanner/test_quoting.rb +29 -0
- data/test/scanner/test_sets.rb +1 -0
- data/test/syntax/ruby/test_1.8.rb +3 -3
- data/test/test_all.rb +1 -1
- metadata +62 -48
- data/lib/regexp_parser/expression/set.rb +0 -59
@@ -0,0 +1,47 @@
|
|
1
|
+
module Regexp::Expression
|
2
|
+
|
3
|
+
# This is not a subexpression really, but considering it one simplifies
|
4
|
+
# the API when it comes to handling the alternatives.
|
5
|
+
class Alternation < Regexp::Expression::Subexpression
|
6
|
+
def starts_at
|
7
|
+
@expressions.first.starts_at
|
8
|
+
end
|
9
|
+
|
10
|
+
def <<(exp)
|
11
|
+
@expressions.last << exp
|
12
|
+
end
|
13
|
+
|
14
|
+
def alternative(exp = nil)
|
15
|
+
@expressions << (exp ? exp : Sequence.new)
|
16
|
+
end
|
17
|
+
|
18
|
+
def alternatives
|
19
|
+
@expressions
|
20
|
+
end
|
21
|
+
|
22
|
+
def quantify(token, text, min = nil, max = nil, mode = :greedy)
|
23
|
+
alternatives.last.last.quantify(token, text, min, max, mode)
|
24
|
+
end
|
25
|
+
|
26
|
+
def to_s(format = :full)
|
27
|
+
alternatives.map{|e| e.to_s(format)}.join('|')
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
# A sequence of expressions, used by alternations as one alternative.
|
32
|
+
# TODO: perhaps rename this to Alternative?
|
33
|
+
class Sequence < Regexp::Expression::Subexpression
|
34
|
+
def initialize
|
35
|
+
super Regexp::Token.new(:expression, :sequence, '')
|
36
|
+
end
|
37
|
+
|
38
|
+
def starts_at
|
39
|
+
@expressions.first.starts_at
|
40
|
+
end
|
41
|
+
|
42
|
+
def quantify(token, text, min = nil, max = nil, mode = :greedy)
|
43
|
+
last.quantify(token, text, min, max, mode)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
module Regexp::Expression
|
2
|
+
|
3
|
+
module Anchor
|
4
|
+
class Base < Regexp::Expression::Base; end
|
5
|
+
|
6
|
+
class BeginningOfLine < Anchor::Base; end
|
7
|
+
class EndOfLine < Anchor::Base; end
|
8
|
+
|
9
|
+
class BeginningOfString < Anchor::Base; end
|
10
|
+
class EndOfString < Anchor::Base; end
|
11
|
+
|
12
|
+
class EndOfStringOrBeforeEndOfLine < Anchor::Base; end
|
13
|
+
|
14
|
+
class WordBoundary < Anchor::Base; end
|
15
|
+
class NonWordBoundary < Anchor::Base; end
|
16
|
+
|
17
|
+
class MatchStart < Anchor::Base; end
|
18
|
+
|
19
|
+
BOL = BeginningOfLine
|
20
|
+
EOL = EndOfLine
|
21
|
+
BOS = BeginningOfString
|
22
|
+
EOS = EndOfString
|
23
|
+
EOSobEOL = EndOfStringOrBeforeEndOfLine
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
module Regexp::Expression
|
2
|
+
|
3
|
+
module Backreference
|
4
|
+
class Base < Regexp::Expression::Base; end
|
5
|
+
|
6
|
+
class Name < Backreference::Base
|
7
|
+
attr_reader :name
|
8
|
+
|
9
|
+
def initialize(token)
|
10
|
+
@name = token.text[3..-2]
|
11
|
+
super(token)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
class Number < Backreference::Base
|
16
|
+
attr_reader :number
|
17
|
+
|
18
|
+
def initialize(token)
|
19
|
+
@number = token.text[3..-2]
|
20
|
+
super(token)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
class NumberRelative < Backreference::Number; end
|
25
|
+
|
26
|
+
class NameNestLevel < Backreference::Base; end
|
27
|
+
class NumberNestLevel < Backreference::Base; end
|
28
|
+
|
29
|
+
class NameCall < Backreference::Base
|
30
|
+
attr_reader :name
|
31
|
+
|
32
|
+
def initialize(token)
|
33
|
+
@name = token.text[3..-2]
|
34
|
+
super(token)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
class NumberCall < Backreference::Base; end
|
39
|
+
class NumberCallRelative < Backreference::Base; end
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module Regexp::Expression
|
2
|
+
|
3
|
+
module EscapeSequence
|
4
|
+
class Base < Regexp::Expression::Base; end
|
5
|
+
|
6
|
+
class Literal < EscapeSequence::Base; end
|
7
|
+
|
8
|
+
class AsciiEscape < EscapeSequence::Base; end
|
9
|
+
class Backspace < EscapeSequence::Base; end
|
10
|
+
class Bell < EscapeSequence::Base; end
|
11
|
+
class FormFeed < EscapeSequence::Base; end
|
12
|
+
class Newline < EscapeSequence::Base; end
|
13
|
+
class Return < EscapeSequence::Base; end
|
14
|
+
class Space < EscapeSequence::Base; end
|
15
|
+
class Tab < EscapeSequence::Base; end
|
16
|
+
class VerticalTab < EscapeSequence::Base; end
|
17
|
+
|
18
|
+
class Octal < EscapeSequence::Base; end
|
19
|
+
class Hex < EscapeSequence::Base; end
|
20
|
+
class HexWide < EscapeSequence::Base; end
|
21
|
+
|
22
|
+
class Control < EscapeSequence::Base; end
|
23
|
+
class Meta < EscapeSequence::Base; end
|
24
|
+
class MetaControl < EscapeSequence::Base; end
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
@@ -0,0 +1,67 @@
|
|
1
|
+
module Regexp::Expression
|
2
|
+
|
3
|
+
module Group
|
4
|
+
class Base < Regexp::Expression::Subexpression
|
5
|
+
def capturing?
|
6
|
+
[:capture, :named].include? @token
|
7
|
+
end
|
8
|
+
|
9
|
+
def comment?; @type == :comment end
|
10
|
+
|
11
|
+
def to_s(format = :full)
|
12
|
+
s = ''
|
13
|
+
|
14
|
+
case format
|
15
|
+
when :base
|
16
|
+
s << @text.dup
|
17
|
+
s << @expressions.join
|
18
|
+
s << ')'
|
19
|
+
else
|
20
|
+
s << @text.dup
|
21
|
+
s << @expressions.join
|
22
|
+
s << ')'
|
23
|
+
s << @quantifier.to_s if quantified?
|
24
|
+
end
|
25
|
+
|
26
|
+
s
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
class Atomic < Group::Base; end
|
31
|
+
class Capture < Group::Base; end
|
32
|
+
class Passive < Group::Base; end
|
33
|
+
class Options < Group::Base; end
|
34
|
+
|
35
|
+
class Named < Group::Capture
|
36
|
+
attr_reader :name
|
37
|
+
|
38
|
+
def initialize(token)
|
39
|
+
@name = token.text[3..-2]
|
40
|
+
super(token)
|
41
|
+
end
|
42
|
+
|
43
|
+
def clone
|
44
|
+
copy = super
|
45
|
+
copy.instance_variable_set(:@name, @name.dup)
|
46
|
+
copy
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
class Comment < Group::Base
|
51
|
+
def to_s(format = :full)
|
52
|
+
@text.dup
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
module Assertion
|
58
|
+
class Base < Regexp::Expression::Group::Base; end
|
59
|
+
|
60
|
+
class Lookahead < Assertion::Base; end
|
61
|
+
class NegativeLookahead < Assertion::Base; end
|
62
|
+
|
63
|
+
class Lookbehind < Assertion::Base; end
|
64
|
+
class NegativeLookbehind < Assertion::Base; end
|
65
|
+
end
|
66
|
+
|
67
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
module Regexp::Expression
|
2
|
+
|
3
|
+
class Root < Regexp::Expression::Subexpression
|
4
|
+
def initialize
|
5
|
+
super Regexp::Token.new(:expression, :root, '', 0)
|
6
|
+
end
|
7
|
+
|
8
|
+
def multiline?
|
9
|
+
@expressions[0].m?
|
10
|
+
end
|
11
|
+
alias :m? :multiline?
|
12
|
+
|
13
|
+
def case_insensitive?
|
14
|
+
@expressions[0].i?
|
15
|
+
end
|
16
|
+
alias :i? :case_insensitive?
|
17
|
+
alias :ignore_case? :case_insensitive?
|
18
|
+
|
19
|
+
def free_spacing?
|
20
|
+
@expressions[0].x?
|
21
|
+
end
|
22
|
+
alias :x? :free_spacing?
|
23
|
+
alias :extended? :free_spacing?
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
@@ -0,0 +1,100 @@
|
|
1
|
+
module Regexp::Expression
|
2
|
+
|
3
|
+
class CharacterSet < Regexp::Expression::Base
|
4
|
+
attr_accessor :members
|
5
|
+
|
6
|
+
def initialize(token)
|
7
|
+
@members = []
|
8
|
+
@negative = false
|
9
|
+
@closed = false
|
10
|
+
super
|
11
|
+
end
|
12
|
+
|
13
|
+
# Override base method to clone set members as well.
|
14
|
+
def clone
|
15
|
+
copy = super
|
16
|
+
copy.members = @members.map {|m| m.clone }
|
17
|
+
copy
|
18
|
+
end
|
19
|
+
|
20
|
+
def <<(member)
|
21
|
+
if @members.last.is_a?(CharacterSubSet) and not @members.last.closed?
|
22
|
+
@members.last << member
|
23
|
+
else
|
24
|
+
@members << member
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def include?(member, directly = false)
|
29
|
+
@members.each do |m|
|
30
|
+
if m.is_a?(CharacterSubSet) and not directly
|
31
|
+
return true if m.include?(member)
|
32
|
+
else
|
33
|
+
return true if member == m.to_s
|
34
|
+
end
|
35
|
+
end; false
|
36
|
+
end
|
37
|
+
|
38
|
+
def each(&block)
|
39
|
+
@members.each {|m| yield m}
|
40
|
+
end
|
41
|
+
|
42
|
+
def each_with_index(&block)
|
43
|
+
@members.each_with_index {|m, i| yield m, i}
|
44
|
+
end
|
45
|
+
|
46
|
+
def length
|
47
|
+
@members.length
|
48
|
+
end
|
49
|
+
|
50
|
+
def negate
|
51
|
+
if @members.last.is_a?(CharacterSubSet)
|
52
|
+
@members.last.negate
|
53
|
+
else
|
54
|
+
@negative = true
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
def negative?
|
59
|
+
@negative
|
60
|
+
end
|
61
|
+
alias :negated? :negative?
|
62
|
+
|
63
|
+
def close
|
64
|
+
if @members.last.is_a?(CharacterSubSet) and not @members.last.closed?
|
65
|
+
@members.last.close
|
66
|
+
else
|
67
|
+
@closed = true
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
def closed?
|
72
|
+
@closed
|
73
|
+
end
|
74
|
+
|
75
|
+
def to_s(format = :full)
|
76
|
+
s = ''
|
77
|
+
|
78
|
+
s << @text.dup
|
79
|
+
s << '^' if negative?
|
80
|
+
s << @members.join
|
81
|
+
s << ']'
|
82
|
+
|
83
|
+
case format
|
84
|
+
when :base
|
85
|
+
else
|
86
|
+
s << @quantifier.to_s if quantified?
|
87
|
+
end
|
88
|
+
|
89
|
+
s
|
90
|
+
end
|
91
|
+
|
92
|
+
def matches?(input)
|
93
|
+
input =~ /#{to_s}/ ? true : false
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
class CharacterSubSet < CharacterSet
|
98
|
+
end
|
99
|
+
|
100
|
+
end # module Regexp::Expression
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module Regexp::Expression
|
2
|
+
|
3
|
+
module CharacterType
|
4
|
+
class Base < Regexp::Expression::Base; end
|
5
|
+
|
6
|
+
class Any < CharacterType::Base; end
|
7
|
+
class Digit < CharacterType::Base; end
|
8
|
+
class NonDigit < CharacterType::Base; end
|
9
|
+
class Hex < CharacterType::Base; end
|
10
|
+
class NonHex < CharacterType::Base; end
|
11
|
+
class Word < CharacterType::Base; end
|
12
|
+
class NonWord < CharacterType::Base; end
|
13
|
+
class Space < CharacterType::Base; end
|
14
|
+
class NonSpace < CharacterType::Base; end
|
15
|
+
end
|
16
|
+
|
17
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
module Regexp::Expression
|
2
|
+
|
3
|
+
class Quantifier
|
4
|
+
attr_reader :token, :text, :min, :max, :mode
|
5
|
+
|
6
|
+
def initialize(token, text, min, max, mode)
|
7
|
+
@token = token
|
8
|
+
@text = text
|
9
|
+
@mode = mode
|
10
|
+
@min = min
|
11
|
+
@max = max
|
12
|
+
end
|
13
|
+
|
14
|
+
def clone
|
15
|
+
copy = self.dup
|
16
|
+
copy.instance_variable_set(:@text, @text.dup)
|
17
|
+
copy
|
18
|
+
end
|
19
|
+
|
20
|
+
def to_s
|
21
|
+
@text.dup
|
22
|
+
end
|
23
|
+
alias :to_str :to_s
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
@@ -0,0 +1,69 @@
|
|
1
|
+
module Regexp::Expression
|
2
|
+
|
3
|
+
class Subexpression < Regexp::Expression::Base
|
4
|
+
attr_accessor :expressions
|
5
|
+
|
6
|
+
def initialize(token)
|
7
|
+
super(token)
|
8
|
+
|
9
|
+
@expressions = []
|
10
|
+
end
|
11
|
+
|
12
|
+
# Override base method to clone the expressions as well.
|
13
|
+
def clone
|
14
|
+
copy = super
|
15
|
+
copy.expressions = @expressions.map {|e| e.clone }
|
16
|
+
copy
|
17
|
+
end
|
18
|
+
|
19
|
+
def <<(exp)
|
20
|
+
@expressions << exp
|
21
|
+
end
|
22
|
+
|
23
|
+
def insert(exp)
|
24
|
+
@expressions.insert 0, exp
|
25
|
+
end
|
26
|
+
|
27
|
+
def each(&block)
|
28
|
+
@expressions.each {|e| yield e}
|
29
|
+
end
|
30
|
+
|
31
|
+
def each_with_index(&block)
|
32
|
+
@expressions.each_with_index {|e, i| yield e, i}
|
33
|
+
end
|
34
|
+
|
35
|
+
def first
|
36
|
+
@expressions.first
|
37
|
+
end
|
38
|
+
|
39
|
+
def last
|
40
|
+
@expressions.last
|
41
|
+
end
|
42
|
+
|
43
|
+
def [](index)
|
44
|
+
@expressions[index]
|
45
|
+
end
|
46
|
+
|
47
|
+
def length
|
48
|
+
@expressions.length
|
49
|
+
end
|
50
|
+
|
51
|
+
def to_s(format = :full)
|
52
|
+
s = ''
|
53
|
+
|
54
|
+
# Note: the format does not get passed down to subexpressions.
|
55
|
+
case format
|
56
|
+
when :base
|
57
|
+
s << @text.dup
|
58
|
+
s << @expressions.map{|e| e.to_s}.join unless @expressions.empty?
|
59
|
+
else
|
60
|
+
s << @text.dup
|
61
|
+
s << @expressions.map{|e| e.to_s}.join unless @expressions.empty?
|
62
|
+
s << @quantifier if quantified?
|
63
|
+
end
|
64
|
+
|
65
|
+
s
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
end
|