regexp_parser 0.1.1 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/ChangeLog +45 -0
- data/Rakefile +12 -44
- data/VERSION.yml +5 -0
- data/lib/regexp_parser.rb +5 -38
- data/lib/regexp_parser/expression.rb +68 -221
- data/lib/regexp_parser/expression/classes/alternation.rb +47 -0
- data/lib/regexp_parser/expression/classes/anchor.rb +26 -0
- data/lib/regexp_parser/expression/classes/backref.rb +42 -0
- data/lib/regexp_parser/expression/classes/escape.rb +27 -0
- data/lib/regexp_parser/expression/classes/group.rb +67 -0
- data/lib/regexp_parser/expression/classes/literal.rb +7 -0
- data/lib/regexp_parser/expression/{property.rb → classes/property.rb} +1 -1
- data/lib/regexp_parser/expression/classes/root.rb +26 -0
- data/lib/regexp_parser/expression/classes/set.rb +100 -0
- data/lib/regexp_parser/expression/classes/type.rb +17 -0
- data/lib/regexp_parser/expression/quantifier.rb +26 -0
- data/lib/regexp_parser/expression/subexpression.rb +69 -0
- data/lib/regexp_parser/lexer.rb +4 -4
- data/lib/regexp_parser/parser.rb +31 -13
- data/lib/regexp_parser/scanner.rb +1849 -1488
- data/lib/regexp_parser/scanner/property.rl +7 -2
- data/lib/regexp_parser/scanner/scanner.rl +377 -191
- data/lib/regexp_parser/syntax.rb +7 -0
- data/lib/regexp_parser/syntax/ruby/1.8.6.rb +4 -4
- data/lib/regexp_parser/syntax/ruby/1.9.1.rb +9 -9
- data/lib/regexp_parser/syntax/ruby/2.0.0.rb +16 -0
- data/lib/regexp_parser/syntax/ruby/2.1.0.rb +13 -0
- data/lib/regexp_parser/syntax/tokens.rb +21 -320
- data/lib/regexp_parser/syntax/tokens/anchor.rb +17 -0
- data/lib/regexp_parser/syntax/tokens/assertion.rb +15 -0
- data/lib/regexp_parser/syntax/tokens/backref.rb +26 -0
- data/lib/regexp_parser/syntax/tokens/character_set.rb +48 -0
- data/lib/regexp_parser/syntax/tokens/character_type.rb +16 -0
- data/lib/regexp_parser/syntax/tokens/escape.rb +29 -0
- data/lib/regexp_parser/syntax/tokens/group.rb +22 -0
- data/lib/regexp_parser/syntax/tokens/meta.rb +15 -0
- data/lib/regexp_parser/syntax/tokens/quantifier.rb +37 -0
- data/lib/regexp_parser/syntax/tokens/unicode_property.rb +204 -0
- data/lib/regexp_parser/token.rb +37 -0
- data/test/expression/test_all.rb +7 -0
- data/test/expression/test_base.rb +72 -0
- data/test/expression/test_clone.rb +144 -0
- data/test/{parser/test_expression.rb → expression/test_to_s.rb} +10 -10
- data/test/helpers.rb +1 -0
- data/test/parser/test_all.rb +1 -1
- data/test/parser/test_alternation.rb +35 -0
- data/test/parser/test_anchors.rb +2 -2
- data/test/parser/test_refcalls.rb +1 -1
- data/test/parser/test_sets.rb +54 -8
- data/test/scanner/test_anchors.rb +2 -2
- data/test/scanner/test_conditionals.rb +31 -0
- data/test/scanner/test_errors.rb +88 -8
- data/test/scanner/test_escapes.rb +4 -4
- data/test/scanner/test_groups.rb +7 -0
- data/test/scanner/test_quoting.rb +29 -0
- data/test/scanner/test_sets.rb +1 -0
- data/test/syntax/ruby/test_1.8.rb +3 -3
- data/test/test_all.rb +1 -1
- metadata +62 -48
- data/lib/regexp_parser/expression/set.rb +0 -59
@@ -0,0 +1,47 @@
|
|
1
|
+
module Regexp::Expression
|
2
|
+
|
3
|
+
# This is not a subexpression really, but considering it one simplifies
|
4
|
+
# the API when it comes to handling the alternatives.
|
5
|
+
class Alternation < Regexp::Expression::Subexpression
|
6
|
+
def starts_at
|
7
|
+
@expressions.first.starts_at
|
8
|
+
end
|
9
|
+
|
10
|
+
def <<(exp)
|
11
|
+
@expressions.last << exp
|
12
|
+
end
|
13
|
+
|
14
|
+
def alternative(exp = nil)
|
15
|
+
@expressions << (exp ? exp : Sequence.new)
|
16
|
+
end
|
17
|
+
|
18
|
+
def alternatives
|
19
|
+
@expressions
|
20
|
+
end
|
21
|
+
|
22
|
+
def quantify(token, text, min = nil, max = nil, mode = :greedy)
|
23
|
+
alternatives.last.last.quantify(token, text, min, max, mode)
|
24
|
+
end
|
25
|
+
|
26
|
+
def to_s(format = :full)
|
27
|
+
alternatives.map{|e| e.to_s(format)}.join('|')
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
# A sequence of expressions, used by alternations as one alternative.
|
32
|
+
# TODO: perhaps rename this to Alternative?
|
33
|
+
class Sequence < Regexp::Expression::Subexpression
|
34
|
+
def initialize
|
35
|
+
super Regexp::Token.new(:expression, :sequence, '')
|
36
|
+
end
|
37
|
+
|
38
|
+
def starts_at
|
39
|
+
@expressions.first.starts_at
|
40
|
+
end
|
41
|
+
|
42
|
+
def quantify(token, text, min = nil, max = nil, mode = :greedy)
|
43
|
+
last.quantify(token, text, min, max, mode)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
module Regexp::Expression
|
2
|
+
|
3
|
+
module Anchor
|
4
|
+
class Base < Regexp::Expression::Base; end
|
5
|
+
|
6
|
+
class BeginningOfLine < Anchor::Base; end
|
7
|
+
class EndOfLine < Anchor::Base; end
|
8
|
+
|
9
|
+
class BeginningOfString < Anchor::Base; end
|
10
|
+
class EndOfString < Anchor::Base; end
|
11
|
+
|
12
|
+
class EndOfStringOrBeforeEndOfLine < Anchor::Base; end
|
13
|
+
|
14
|
+
class WordBoundary < Anchor::Base; end
|
15
|
+
class NonWordBoundary < Anchor::Base; end
|
16
|
+
|
17
|
+
class MatchStart < Anchor::Base; end
|
18
|
+
|
19
|
+
BOL = BeginningOfLine
|
20
|
+
EOL = EndOfLine
|
21
|
+
BOS = BeginningOfString
|
22
|
+
EOS = EndOfString
|
23
|
+
EOSobEOL = EndOfStringOrBeforeEndOfLine
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
module Regexp::Expression
|
2
|
+
|
3
|
+
module Backreference
|
4
|
+
class Base < Regexp::Expression::Base; end
|
5
|
+
|
6
|
+
class Name < Backreference::Base
|
7
|
+
attr_reader :name
|
8
|
+
|
9
|
+
def initialize(token)
|
10
|
+
@name = token.text[3..-2]
|
11
|
+
super(token)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
class Number < Backreference::Base
|
16
|
+
attr_reader :number
|
17
|
+
|
18
|
+
def initialize(token)
|
19
|
+
@number = token.text[3..-2]
|
20
|
+
super(token)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
class NumberRelative < Backreference::Number; end
|
25
|
+
|
26
|
+
class NameNestLevel < Backreference::Base; end
|
27
|
+
class NumberNestLevel < Backreference::Base; end
|
28
|
+
|
29
|
+
class NameCall < Backreference::Base
|
30
|
+
attr_reader :name
|
31
|
+
|
32
|
+
def initialize(token)
|
33
|
+
@name = token.text[3..-2]
|
34
|
+
super(token)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
class NumberCall < Backreference::Base; end
|
39
|
+
class NumberCallRelative < Backreference::Base; end
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module Regexp::Expression
|
2
|
+
|
3
|
+
module EscapeSequence
|
4
|
+
class Base < Regexp::Expression::Base; end
|
5
|
+
|
6
|
+
class Literal < EscapeSequence::Base; end
|
7
|
+
|
8
|
+
class AsciiEscape < EscapeSequence::Base; end
|
9
|
+
class Backspace < EscapeSequence::Base; end
|
10
|
+
class Bell < EscapeSequence::Base; end
|
11
|
+
class FormFeed < EscapeSequence::Base; end
|
12
|
+
class Newline < EscapeSequence::Base; end
|
13
|
+
class Return < EscapeSequence::Base; end
|
14
|
+
class Space < EscapeSequence::Base; end
|
15
|
+
class Tab < EscapeSequence::Base; end
|
16
|
+
class VerticalTab < EscapeSequence::Base; end
|
17
|
+
|
18
|
+
class Octal < EscapeSequence::Base; end
|
19
|
+
class Hex < EscapeSequence::Base; end
|
20
|
+
class HexWide < EscapeSequence::Base; end
|
21
|
+
|
22
|
+
class Control < EscapeSequence::Base; end
|
23
|
+
class Meta < EscapeSequence::Base; end
|
24
|
+
class MetaControl < EscapeSequence::Base; end
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
@@ -0,0 +1,67 @@
|
|
1
|
+
module Regexp::Expression
|
2
|
+
|
3
|
+
module Group
|
4
|
+
class Base < Regexp::Expression::Subexpression
|
5
|
+
def capturing?
|
6
|
+
[:capture, :named].include? @token
|
7
|
+
end
|
8
|
+
|
9
|
+
def comment?; @type == :comment end
|
10
|
+
|
11
|
+
def to_s(format = :full)
|
12
|
+
s = ''
|
13
|
+
|
14
|
+
case format
|
15
|
+
when :base
|
16
|
+
s << @text.dup
|
17
|
+
s << @expressions.join
|
18
|
+
s << ')'
|
19
|
+
else
|
20
|
+
s << @text.dup
|
21
|
+
s << @expressions.join
|
22
|
+
s << ')'
|
23
|
+
s << @quantifier.to_s if quantified?
|
24
|
+
end
|
25
|
+
|
26
|
+
s
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
class Atomic < Group::Base; end
|
31
|
+
class Capture < Group::Base; end
|
32
|
+
class Passive < Group::Base; end
|
33
|
+
class Options < Group::Base; end
|
34
|
+
|
35
|
+
class Named < Group::Capture
|
36
|
+
attr_reader :name
|
37
|
+
|
38
|
+
def initialize(token)
|
39
|
+
@name = token.text[3..-2]
|
40
|
+
super(token)
|
41
|
+
end
|
42
|
+
|
43
|
+
def clone
|
44
|
+
copy = super
|
45
|
+
copy.instance_variable_set(:@name, @name.dup)
|
46
|
+
copy
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
class Comment < Group::Base
|
51
|
+
def to_s(format = :full)
|
52
|
+
@text.dup
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
module Assertion
|
58
|
+
class Base < Regexp::Expression::Group::Base; end
|
59
|
+
|
60
|
+
class Lookahead < Assertion::Base; end
|
61
|
+
class NegativeLookahead < Assertion::Base; end
|
62
|
+
|
63
|
+
class Lookbehind < Assertion::Base; end
|
64
|
+
class NegativeLookbehind < Assertion::Base; end
|
65
|
+
end
|
66
|
+
|
67
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
module Regexp::Expression
|
2
|
+
|
3
|
+
class Root < Regexp::Expression::Subexpression
|
4
|
+
def initialize
|
5
|
+
super Regexp::Token.new(:expression, :root, '', 0)
|
6
|
+
end
|
7
|
+
|
8
|
+
def multiline?
|
9
|
+
@expressions[0].m?
|
10
|
+
end
|
11
|
+
alias :m? :multiline?
|
12
|
+
|
13
|
+
def case_insensitive?
|
14
|
+
@expressions[0].i?
|
15
|
+
end
|
16
|
+
alias :i? :case_insensitive?
|
17
|
+
alias :ignore_case? :case_insensitive?
|
18
|
+
|
19
|
+
def free_spacing?
|
20
|
+
@expressions[0].x?
|
21
|
+
end
|
22
|
+
alias :x? :free_spacing?
|
23
|
+
alias :extended? :free_spacing?
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
@@ -0,0 +1,100 @@
|
|
1
|
+
module Regexp::Expression
|
2
|
+
|
3
|
+
class CharacterSet < Regexp::Expression::Base
|
4
|
+
attr_accessor :members
|
5
|
+
|
6
|
+
def initialize(token)
|
7
|
+
@members = []
|
8
|
+
@negative = false
|
9
|
+
@closed = false
|
10
|
+
super
|
11
|
+
end
|
12
|
+
|
13
|
+
# Override base method to clone set members as well.
|
14
|
+
def clone
|
15
|
+
copy = super
|
16
|
+
copy.members = @members.map {|m| m.clone }
|
17
|
+
copy
|
18
|
+
end
|
19
|
+
|
20
|
+
def <<(member)
|
21
|
+
if @members.last.is_a?(CharacterSubSet) and not @members.last.closed?
|
22
|
+
@members.last << member
|
23
|
+
else
|
24
|
+
@members << member
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def include?(member, directly = false)
|
29
|
+
@members.each do |m|
|
30
|
+
if m.is_a?(CharacterSubSet) and not directly
|
31
|
+
return true if m.include?(member)
|
32
|
+
else
|
33
|
+
return true if member == m.to_s
|
34
|
+
end
|
35
|
+
end; false
|
36
|
+
end
|
37
|
+
|
38
|
+
def each(&block)
|
39
|
+
@members.each {|m| yield m}
|
40
|
+
end
|
41
|
+
|
42
|
+
def each_with_index(&block)
|
43
|
+
@members.each_with_index {|m, i| yield m, i}
|
44
|
+
end
|
45
|
+
|
46
|
+
def length
|
47
|
+
@members.length
|
48
|
+
end
|
49
|
+
|
50
|
+
def negate
|
51
|
+
if @members.last.is_a?(CharacterSubSet)
|
52
|
+
@members.last.negate
|
53
|
+
else
|
54
|
+
@negative = true
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
def negative?
|
59
|
+
@negative
|
60
|
+
end
|
61
|
+
alias :negated? :negative?
|
62
|
+
|
63
|
+
def close
|
64
|
+
if @members.last.is_a?(CharacterSubSet) and not @members.last.closed?
|
65
|
+
@members.last.close
|
66
|
+
else
|
67
|
+
@closed = true
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
def closed?
|
72
|
+
@closed
|
73
|
+
end
|
74
|
+
|
75
|
+
def to_s(format = :full)
|
76
|
+
s = ''
|
77
|
+
|
78
|
+
s << @text.dup
|
79
|
+
s << '^' if negative?
|
80
|
+
s << @members.join
|
81
|
+
s << ']'
|
82
|
+
|
83
|
+
case format
|
84
|
+
when :base
|
85
|
+
else
|
86
|
+
s << @quantifier.to_s if quantified?
|
87
|
+
end
|
88
|
+
|
89
|
+
s
|
90
|
+
end
|
91
|
+
|
92
|
+
def matches?(input)
|
93
|
+
input =~ /#{to_s}/ ? true : false
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
class CharacterSubSet < CharacterSet
|
98
|
+
end
|
99
|
+
|
100
|
+
end # module Regexp::Expression
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module Regexp::Expression
|
2
|
+
|
3
|
+
module CharacterType
|
4
|
+
class Base < Regexp::Expression::Base; end
|
5
|
+
|
6
|
+
class Any < CharacterType::Base; end
|
7
|
+
class Digit < CharacterType::Base; end
|
8
|
+
class NonDigit < CharacterType::Base; end
|
9
|
+
class Hex < CharacterType::Base; end
|
10
|
+
class NonHex < CharacterType::Base; end
|
11
|
+
class Word < CharacterType::Base; end
|
12
|
+
class NonWord < CharacterType::Base; end
|
13
|
+
class Space < CharacterType::Base; end
|
14
|
+
class NonSpace < CharacterType::Base; end
|
15
|
+
end
|
16
|
+
|
17
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
module Regexp::Expression
|
2
|
+
|
3
|
+
class Quantifier
|
4
|
+
attr_reader :token, :text, :min, :max, :mode
|
5
|
+
|
6
|
+
def initialize(token, text, min, max, mode)
|
7
|
+
@token = token
|
8
|
+
@text = text
|
9
|
+
@mode = mode
|
10
|
+
@min = min
|
11
|
+
@max = max
|
12
|
+
end
|
13
|
+
|
14
|
+
def clone
|
15
|
+
copy = self.dup
|
16
|
+
copy.instance_variable_set(:@text, @text.dup)
|
17
|
+
copy
|
18
|
+
end
|
19
|
+
|
20
|
+
def to_s
|
21
|
+
@text.dup
|
22
|
+
end
|
23
|
+
alias :to_str :to_s
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
@@ -0,0 +1,69 @@
|
|
1
|
+
module Regexp::Expression
|
2
|
+
|
3
|
+
class Subexpression < Regexp::Expression::Base
|
4
|
+
attr_accessor :expressions
|
5
|
+
|
6
|
+
def initialize(token)
|
7
|
+
super(token)
|
8
|
+
|
9
|
+
@expressions = []
|
10
|
+
end
|
11
|
+
|
12
|
+
# Override base method to clone the expressions as well.
|
13
|
+
def clone
|
14
|
+
copy = super
|
15
|
+
copy.expressions = @expressions.map {|e| e.clone }
|
16
|
+
copy
|
17
|
+
end
|
18
|
+
|
19
|
+
def <<(exp)
|
20
|
+
@expressions << exp
|
21
|
+
end
|
22
|
+
|
23
|
+
def insert(exp)
|
24
|
+
@expressions.insert 0, exp
|
25
|
+
end
|
26
|
+
|
27
|
+
def each(&block)
|
28
|
+
@expressions.each {|e| yield e}
|
29
|
+
end
|
30
|
+
|
31
|
+
def each_with_index(&block)
|
32
|
+
@expressions.each_with_index {|e, i| yield e, i}
|
33
|
+
end
|
34
|
+
|
35
|
+
def first
|
36
|
+
@expressions.first
|
37
|
+
end
|
38
|
+
|
39
|
+
def last
|
40
|
+
@expressions.last
|
41
|
+
end
|
42
|
+
|
43
|
+
def [](index)
|
44
|
+
@expressions[index]
|
45
|
+
end
|
46
|
+
|
47
|
+
def length
|
48
|
+
@expressions.length
|
49
|
+
end
|
50
|
+
|
51
|
+
def to_s(format = :full)
|
52
|
+
s = ''
|
53
|
+
|
54
|
+
# Note: the format does not get passed down to subexpressions.
|
55
|
+
case format
|
56
|
+
when :base
|
57
|
+
s << @text.dup
|
58
|
+
s << @expressions.map{|e| e.to_s}.join unless @expressions.empty?
|
59
|
+
else
|
60
|
+
s << @text.dup
|
61
|
+
s << @expressions.map{|e| e.to_s}.join unless @expressions.empty?
|
62
|
+
s << @quantifier if quantified?
|
63
|
+
end
|
64
|
+
|
65
|
+
s
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
end
|