regexp_parser 0.1.1 → 0.1.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (61) hide show
  1. checksums.yaml +7 -0
  2. data/ChangeLog +45 -0
  3. data/Rakefile +12 -44
  4. data/VERSION.yml +5 -0
  5. data/lib/regexp_parser.rb +5 -38
  6. data/lib/regexp_parser/expression.rb +68 -221
  7. data/lib/regexp_parser/expression/classes/alternation.rb +47 -0
  8. data/lib/regexp_parser/expression/classes/anchor.rb +26 -0
  9. data/lib/regexp_parser/expression/classes/backref.rb +42 -0
  10. data/lib/regexp_parser/expression/classes/escape.rb +27 -0
  11. data/lib/regexp_parser/expression/classes/group.rb +67 -0
  12. data/lib/regexp_parser/expression/classes/literal.rb +7 -0
  13. data/lib/regexp_parser/expression/{property.rb → classes/property.rb} +1 -1
  14. data/lib/regexp_parser/expression/classes/root.rb +26 -0
  15. data/lib/regexp_parser/expression/classes/set.rb +100 -0
  16. data/lib/regexp_parser/expression/classes/type.rb +17 -0
  17. data/lib/regexp_parser/expression/quantifier.rb +26 -0
  18. data/lib/regexp_parser/expression/subexpression.rb +69 -0
  19. data/lib/regexp_parser/lexer.rb +4 -4
  20. data/lib/regexp_parser/parser.rb +31 -13
  21. data/lib/regexp_parser/scanner.rb +1849 -1488
  22. data/lib/regexp_parser/scanner/property.rl +7 -2
  23. data/lib/regexp_parser/scanner/scanner.rl +377 -191
  24. data/lib/regexp_parser/syntax.rb +7 -0
  25. data/lib/regexp_parser/syntax/ruby/1.8.6.rb +4 -4
  26. data/lib/regexp_parser/syntax/ruby/1.9.1.rb +9 -9
  27. data/lib/regexp_parser/syntax/ruby/2.0.0.rb +16 -0
  28. data/lib/regexp_parser/syntax/ruby/2.1.0.rb +13 -0
  29. data/lib/regexp_parser/syntax/tokens.rb +21 -320
  30. data/lib/regexp_parser/syntax/tokens/anchor.rb +17 -0
  31. data/lib/regexp_parser/syntax/tokens/assertion.rb +15 -0
  32. data/lib/regexp_parser/syntax/tokens/backref.rb +26 -0
  33. data/lib/regexp_parser/syntax/tokens/character_set.rb +48 -0
  34. data/lib/regexp_parser/syntax/tokens/character_type.rb +16 -0
  35. data/lib/regexp_parser/syntax/tokens/escape.rb +29 -0
  36. data/lib/regexp_parser/syntax/tokens/group.rb +22 -0
  37. data/lib/regexp_parser/syntax/tokens/meta.rb +15 -0
  38. data/lib/regexp_parser/syntax/tokens/quantifier.rb +37 -0
  39. data/lib/regexp_parser/syntax/tokens/unicode_property.rb +204 -0
  40. data/lib/regexp_parser/token.rb +37 -0
  41. data/test/expression/test_all.rb +7 -0
  42. data/test/expression/test_base.rb +72 -0
  43. data/test/expression/test_clone.rb +144 -0
  44. data/test/{parser/test_expression.rb → expression/test_to_s.rb} +10 -10
  45. data/test/helpers.rb +1 -0
  46. data/test/parser/test_all.rb +1 -1
  47. data/test/parser/test_alternation.rb +35 -0
  48. data/test/parser/test_anchors.rb +2 -2
  49. data/test/parser/test_refcalls.rb +1 -1
  50. data/test/parser/test_sets.rb +54 -8
  51. data/test/scanner/test_anchors.rb +2 -2
  52. data/test/scanner/test_conditionals.rb +31 -0
  53. data/test/scanner/test_errors.rb +88 -8
  54. data/test/scanner/test_escapes.rb +4 -4
  55. data/test/scanner/test_groups.rb +7 -0
  56. data/test/scanner/test_quoting.rb +29 -0
  57. data/test/scanner/test_sets.rb +1 -0
  58. data/test/syntax/ruby/test_1.8.rb +3 -3
  59. data/test/test_all.rb +1 -1
  60. metadata +62 -48
  61. data/lib/regexp_parser/expression/set.rb +0 -59
@@ -0,0 +1,47 @@
1
+ module Regexp::Expression
2
+
3
+ # This is not a subexpression really, but considering it one simplifies
4
+ # the API when it comes to handling the alternatives.
5
+ class Alternation < Regexp::Expression::Subexpression
6
+ def starts_at
7
+ @expressions.first.starts_at
8
+ end
9
+
10
+ def <<(exp)
11
+ @expressions.last << exp
12
+ end
13
+
14
+ def alternative(exp = nil)
15
+ @expressions << (exp ? exp : Sequence.new)
16
+ end
17
+
18
+ def alternatives
19
+ @expressions
20
+ end
21
+
22
+ def quantify(token, text, min = nil, max = nil, mode = :greedy)
23
+ alternatives.last.last.quantify(token, text, min, max, mode)
24
+ end
25
+
26
+ def to_s(format = :full)
27
+ alternatives.map{|e| e.to_s(format)}.join('|')
28
+ end
29
+ end
30
+
31
+ # A sequence of expressions, used by alternations as one alternative.
32
+ # TODO: perhaps rename this to Alternative?
33
+ class Sequence < Regexp::Expression::Subexpression
34
+ def initialize
35
+ super Regexp::Token.new(:expression, :sequence, '')
36
+ end
37
+
38
+ def starts_at
39
+ @expressions.first.starts_at
40
+ end
41
+
42
+ def quantify(token, text, min = nil, max = nil, mode = :greedy)
43
+ last.quantify(token, text, min, max, mode)
44
+ end
45
+ end
46
+
47
+ end
@@ -0,0 +1,26 @@
1
+ module Regexp::Expression
2
+
3
+ module Anchor
4
+ class Base < Regexp::Expression::Base; end
5
+
6
+ class BeginningOfLine < Anchor::Base; end
7
+ class EndOfLine < Anchor::Base; end
8
+
9
+ class BeginningOfString < Anchor::Base; end
10
+ class EndOfString < Anchor::Base; end
11
+
12
+ class EndOfStringOrBeforeEndOfLine < Anchor::Base; end
13
+
14
+ class WordBoundary < Anchor::Base; end
15
+ class NonWordBoundary < Anchor::Base; end
16
+
17
+ class MatchStart < Anchor::Base; end
18
+
19
+ BOL = BeginningOfLine
20
+ EOL = EndOfLine
21
+ BOS = BeginningOfString
22
+ EOS = EndOfString
23
+ EOSobEOL = EndOfStringOrBeforeEndOfLine
24
+ end
25
+
26
+ end
@@ -0,0 +1,42 @@
1
+ module Regexp::Expression
2
+
3
+ module Backreference
4
+ class Base < Regexp::Expression::Base; end
5
+
6
+ class Name < Backreference::Base
7
+ attr_reader :name
8
+
9
+ def initialize(token)
10
+ @name = token.text[3..-2]
11
+ super(token)
12
+ end
13
+ end
14
+
15
+ class Number < Backreference::Base
16
+ attr_reader :number
17
+
18
+ def initialize(token)
19
+ @number = token.text[3..-2]
20
+ super(token)
21
+ end
22
+ end
23
+
24
+ class NumberRelative < Backreference::Number; end
25
+
26
+ class NameNestLevel < Backreference::Base; end
27
+ class NumberNestLevel < Backreference::Base; end
28
+
29
+ class NameCall < Backreference::Base
30
+ attr_reader :name
31
+
32
+ def initialize(token)
33
+ @name = token.text[3..-2]
34
+ super(token)
35
+ end
36
+ end
37
+
38
+ class NumberCall < Backreference::Base; end
39
+ class NumberCallRelative < Backreference::Base; end
40
+ end
41
+
42
+ end
@@ -0,0 +1,27 @@
1
+ module Regexp::Expression
2
+
3
+ module EscapeSequence
4
+ class Base < Regexp::Expression::Base; end
5
+
6
+ class Literal < EscapeSequence::Base; end
7
+
8
+ class AsciiEscape < EscapeSequence::Base; end
9
+ class Backspace < EscapeSequence::Base; end
10
+ class Bell < EscapeSequence::Base; end
11
+ class FormFeed < EscapeSequence::Base; end
12
+ class Newline < EscapeSequence::Base; end
13
+ class Return < EscapeSequence::Base; end
14
+ class Space < EscapeSequence::Base; end
15
+ class Tab < EscapeSequence::Base; end
16
+ class VerticalTab < EscapeSequence::Base; end
17
+
18
+ class Octal < EscapeSequence::Base; end
19
+ class Hex < EscapeSequence::Base; end
20
+ class HexWide < EscapeSequence::Base; end
21
+
22
+ class Control < EscapeSequence::Base; end
23
+ class Meta < EscapeSequence::Base; end
24
+ class MetaControl < EscapeSequence::Base; end
25
+ end
26
+
27
+ end
@@ -0,0 +1,67 @@
1
+ module Regexp::Expression
2
+
3
+ module Group
4
+ class Base < Regexp::Expression::Subexpression
5
+ def capturing?
6
+ [:capture, :named].include? @token
7
+ end
8
+
9
+ def comment?; @type == :comment end
10
+
11
+ def to_s(format = :full)
12
+ s = ''
13
+
14
+ case format
15
+ when :base
16
+ s << @text.dup
17
+ s << @expressions.join
18
+ s << ')'
19
+ else
20
+ s << @text.dup
21
+ s << @expressions.join
22
+ s << ')'
23
+ s << @quantifier.to_s if quantified?
24
+ end
25
+
26
+ s
27
+ end
28
+ end
29
+
30
+ class Atomic < Group::Base; end
31
+ class Capture < Group::Base; end
32
+ class Passive < Group::Base; end
33
+ class Options < Group::Base; end
34
+
35
+ class Named < Group::Capture
36
+ attr_reader :name
37
+
38
+ def initialize(token)
39
+ @name = token.text[3..-2]
40
+ super(token)
41
+ end
42
+
43
+ def clone
44
+ copy = super
45
+ copy.instance_variable_set(:@name, @name.dup)
46
+ copy
47
+ end
48
+ end
49
+
50
+ class Comment < Group::Base
51
+ def to_s(format = :full)
52
+ @text.dup
53
+ end
54
+ end
55
+ end
56
+
57
+ module Assertion
58
+ class Base < Regexp::Expression::Group::Base; end
59
+
60
+ class Lookahead < Assertion::Base; end
61
+ class NegativeLookahead < Assertion::Base; end
62
+
63
+ class Lookbehind < Assertion::Base; end
64
+ class NegativeLookbehind < Assertion::Base; end
65
+ end
66
+
67
+ end
@@ -0,0 +1,7 @@
1
+ module Regexp::Expression
2
+
3
+ class Literal < Regexp::Expression::Base
4
+ # Obviously nothing special here, yet.
5
+ end
6
+
7
+ end
@@ -1,6 +1,6 @@
1
1
  module Regexp::Expression
2
2
 
3
- module UnicodeProperty
3
+ module UnicodeProperty
4
4
  class Base < Regexp::Expression::Base
5
5
  def negative?
6
6
  @type == :nonproperty
@@ -0,0 +1,26 @@
1
+ module Regexp::Expression
2
+
3
+ class Root < Regexp::Expression::Subexpression
4
+ def initialize
5
+ super Regexp::Token.new(:expression, :root, '', 0)
6
+ end
7
+
8
+ def multiline?
9
+ @expressions[0].m?
10
+ end
11
+ alias :m? :multiline?
12
+
13
+ def case_insensitive?
14
+ @expressions[0].i?
15
+ end
16
+ alias :i? :case_insensitive?
17
+ alias :ignore_case? :case_insensitive?
18
+
19
+ def free_spacing?
20
+ @expressions[0].x?
21
+ end
22
+ alias :x? :free_spacing?
23
+ alias :extended? :free_spacing?
24
+ end
25
+
26
+ end
@@ -0,0 +1,100 @@
1
+ module Regexp::Expression
2
+
3
+ class CharacterSet < Regexp::Expression::Base
4
+ attr_accessor :members
5
+
6
+ def initialize(token)
7
+ @members = []
8
+ @negative = false
9
+ @closed = false
10
+ super
11
+ end
12
+
13
+ # Override base method to clone set members as well.
14
+ def clone
15
+ copy = super
16
+ copy.members = @members.map {|m| m.clone }
17
+ copy
18
+ end
19
+
20
+ def <<(member)
21
+ if @members.last.is_a?(CharacterSubSet) and not @members.last.closed?
22
+ @members.last << member
23
+ else
24
+ @members << member
25
+ end
26
+ end
27
+
28
+ def include?(member, directly = false)
29
+ @members.each do |m|
30
+ if m.is_a?(CharacterSubSet) and not directly
31
+ return true if m.include?(member)
32
+ else
33
+ return true if member == m.to_s
34
+ end
35
+ end; false
36
+ end
37
+
38
+ def each(&block)
39
+ @members.each {|m| yield m}
40
+ end
41
+
42
+ def each_with_index(&block)
43
+ @members.each_with_index {|m, i| yield m, i}
44
+ end
45
+
46
+ def length
47
+ @members.length
48
+ end
49
+
50
+ def negate
51
+ if @members.last.is_a?(CharacterSubSet)
52
+ @members.last.negate
53
+ else
54
+ @negative = true
55
+ end
56
+ end
57
+
58
+ def negative?
59
+ @negative
60
+ end
61
+ alias :negated? :negative?
62
+
63
+ def close
64
+ if @members.last.is_a?(CharacterSubSet) and not @members.last.closed?
65
+ @members.last.close
66
+ else
67
+ @closed = true
68
+ end
69
+ end
70
+
71
+ def closed?
72
+ @closed
73
+ end
74
+
75
+ def to_s(format = :full)
76
+ s = ''
77
+
78
+ s << @text.dup
79
+ s << '^' if negative?
80
+ s << @members.join
81
+ s << ']'
82
+
83
+ case format
84
+ when :base
85
+ else
86
+ s << @quantifier.to_s if quantified?
87
+ end
88
+
89
+ s
90
+ end
91
+
92
+ def matches?(input)
93
+ input =~ /#{to_s}/ ? true : false
94
+ end
95
+ end
96
+
97
+ class CharacterSubSet < CharacterSet
98
+ end
99
+
100
+ end # module Regexp::Expression
@@ -0,0 +1,17 @@
1
+ module Regexp::Expression
2
+
3
+ module CharacterType
4
+ class Base < Regexp::Expression::Base; end
5
+
6
+ class Any < CharacterType::Base; end
7
+ class Digit < CharacterType::Base; end
8
+ class NonDigit < CharacterType::Base; end
9
+ class Hex < CharacterType::Base; end
10
+ class NonHex < CharacterType::Base; end
11
+ class Word < CharacterType::Base; end
12
+ class NonWord < CharacterType::Base; end
13
+ class Space < CharacterType::Base; end
14
+ class NonSpace < CharacterType::Base; end
15
+ end
16
+
17
+ end
@@ -0,0 +1,26 @@
1
+ module Regexp::Expression
2
+
3
+ class Quantifier
4
+ attr_reader :token, :text, :min, :max, :mode
5
+
6
+ def initialize(token, text, min, max, mode)
7
+ @token = token
8
+ @text = text
9
+ @mode = mode
10
+ @min = min
11
+ @max = max
12
+ end
13
+
14
+ def clone
15
+ copy = self.dup
16
+ copy.instance_variable_set(:@text, @text.dup)
17
+ copy
18
+ end
19
+
20
+ def to_s
21
+ @text.dup
22
+ end
23
+ alias :to_str :to_s
24
+ end
25
+
26
+ end
@@ -0,0 +1,69 @@
1
+ module Regexp::Expression
2
+
3
+ class Subexpression < Regexp::Expression::Base
4
+ attr_accessor :expressions
5
+
6
+ def initialize(token)
7
+ super(token)
8
+
9
+ @expressions = []
10
+ end
11
+
12
+ # Override base method to clone the expressions as well.
13
+ def clone
14
+ copy = super
15
+ copy.expressions = @expressions.map {|e| e.clone }
16
+ copy
17
+ end
18
+
19
+ def <<(exp)
20
+ @expressions << exp
21
+ end
22
+
23
+ def insert(exp)
24
+ @expressions.insert 0, exp
25
+ end
26
+
27
+ def each(&block)
28
+ @expressions.each {|e| yield e}
29
+ end
30
+
31
+ def each_with_index(&block)
32
+ @expressions.each_with_index {|e, i| yield e, i}
33
+ end
34
+
35
+ def first
36
+ @expressions.first
37
+ end
38
+
39
+ def last
40
+ @expressions.last
41
+ end
42
+
43
+ def [](index)
44
+ @expressions[index]
45
+ end
46
+
47
+ def length
48
+ @expressions.length
49
+ end
50
+
51
+ def to_s(format = :full)
52
+ s = ''
53
+
54
+ # Note: the format does not get passed down to subexpressions.
55
+ case format
56
+ when :base
57
+ s << @text.dup
58
+ s << @expressions.map{|e| e.to_s}.join unless @expressions.empty?
59
+ else
60
+ s << @text.dup
61
+ s << @expressions.map{|e| e.to_s}.join unless @expressions.empty?
62
+ s << @quantifier if quantified?
63
+ end
64
+
65
+ s
66
+ end
67
+ end
68
+
69
+ end