regexp_parser 0.1.1 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. checksums.yaml +7 -0
  2. data/ChangeLog +45 -0
  3. data/Rakefile +12 -44
  4. data/VERSION.yml +5 -0
  5. data/lib/regexp_parser.rb +5 -38
  6. data/lib/regexp_parser/expression.rb +68 -221
  7. data/lib/regexp_parser/expression/classes/alternation.rb +47 -0
  8. data/lib/regexp_parser/expression/classes/anchor.rb +26 -0
  9. data/lib/regexp_parser/expression/classes/backref.rb +42 -0
  10. data/lib/regexp_parser/expression/classes/escape.rb +27 -0
  11. data/lib/regexp_parser/expression/classes/group.rb +67 -0
  12. data/lib/regexp_parser/expression/classes/literal.rb +7 -0
  13. data/lib/regexp_parser/expression/{property.rb → classes/property.rb} +1 -1
  14. data/lib/regexp_parser/expression/classes/root.rb +26 -0
  15. data/lib/regexp_parser/expression/classes/set.rb +100 -0
  16. data/lib/regexp_parser/expression/classes/type.rb +17 -0
  17. data/lib/regexp_parser/expression/quantifier.rb +26 -0
  18. data/lib/regexp_parser/expression/subexpression.rb +69 -0
  19. data/lib/regexp_parser/lexer.rb +4 -4
  20. data/lib/regexp_parser/parser.rb +31 -13
  21. data/lib/regexp_parser/scanner.rb +1849 -1488
  22. data/lib/regexp_parser/scanner/property.rl +7 -2
  23. data/lib/regexp_parser/scanner/scanner.rl +377 -191
  24. data/lib/regexp_parser/syntax.rb +7 -0
  25. data/lib/regexp_parser/syntax/ruby/1.8.6.rb +4 -4
  26. data/lib/regexp_parser/syntax/ruby/1.9.1.rb +9 -9
  27. data/lib/regexp_parser/syntax/ruby/2.0.0.rb +16 -0
  28. data/lib/regexp_parser/syntax/ruby/2.1.0.rb +13 -0
  29. data/lib/regexp_parser/syntax/tokens.rb +21 -320
  30. data/lib/regexp_parser/syntax/tokens/anchor.rb +17 -0
  31. data/lib/regexp_parser/syntax/tokens/assertion.rb +15 -0
  32. data/lib/regexp_parser/syntax/tokens/backref.rb +26 -0
  33. data/lib/regexp_parser/syntax/tokens/character_set.rb +48 -0
  34. data/lib/regexp_parser/syntax/tokens/character_type.rb +16 -0
  35. data/lib/regexp_parser/syntax/tokens/escape.rb +29 -0
  36. data/lib/regexp_parser/syntax/tokens/group.rb +22 -0
  37. data/lib/regexp_parser/syntax/tokens/meta.rb +15 -0
  38. data/lib/regexp_parser/syntax/tokens/quantifier.rb +37 -0
  39. data/lib/regexp_parser/syntax/tokens/unicode_property.rb +204 -0
  40. data/lib/regexp_parser/token.rb +37 -0
  41. data/test/expression/test_all.rb +7 -0
  42. data/test/expression/test_base.rb +72 -0
  43. data/test/expression/test_clone.rb +144 -0
  44. data/test/{parser/test_expression.rb → expression/test_to_s.rb} +10 -10
  45. data/test/helpers.rb +1 -0
  46. data/test/parser/test_all.rb +1 -1
  47. data/test/parser/test_alternation.rb +35 -0
  48. data/test/parser/test_anchors.rb +2 -2
  49. data/test/parser/test_refcalls.rb +1 -1
  50. data/test/parser/test_sets.rb +54 -8
  51. data/test/scanner/test_anchors.rb +2 -2
  52. data/test/scanner/test_conditionals.rb +31 -0
  53. data/test/scanner/test_errors.rb +88 -8
  54. data/test/scanner/test_escapes.rb +4 -4
  55. data/test/scanner/test_groups.rb +7 -0
  56. data/test/scanner/test_quoting.rb +29 -0
  57. data/test/scanner/test_sets.rb +1 -0
  58. data/test/syntax/ruby/test_1.8.rb +3 -3
  59. data/test/test_all.rb +1 -1
  60. metadata +62 -48
  61. data/lib/regexp_parser/expression/set.rb +0 -59
@@ -0,0 +1,47 @@
1
+ module Regexp::Expression
2
+
3
+ # This is not a subexpression really, but considering it one simplifies
4
+ # the API when it comes to handling the alternatives.
5
+ class Alternation < Regexp::Expression::Subexpression
6
+ def starts_at
7
+ @expressions.first.starts_at
8
+ end
9
+
10
+ def <<(exp)
11
+ @expressions.last << exp
12
+ end
13
+
14
+ def alternative(exp = nil)
15
+ @expressions << (exp ? exp : Sequence.new)
16
+ end
17
+
18
+ def alternatives
19
+ @expressions
20
+ end
21
+
22
+ def quantify(token, text, min = nil, max = nil, mode = :greedy)
23
+ alternatives.last.last.quantify(token, text, min, max, mode)
24
+ end
25
+
26
+ def to_s(format = :full)
27
+ alternatives.map{|e| e.to_s(format)}.join('|')
28
+ end
29
+ end
30
+
31
+ # A sequence of expressions, used by alternations as one alternative.
32
+ # TODO: perhaps rename this to Alternative?
33
+ class Sequence < Regexp::Expression::Subexpression
34
+ def initialize
35
+ super Regexp::Token.new(:expression, :sequence, '')
36
+ end
37
+
38
+ def starts_at
39
+ @expressions.first.starts_at
40
+ end
41
+
42
+ def quantify(token, text, min = nil, max = nil, mode = :greedy)
43
+ last.quantify(token, text, min, max, mode)
44
+ end
45
+ end
46
+
47
+ end
@@ -0,0 +1,26 @@
1
+ module Regexp::Expression
2
+
3
+ module Anchor
4
+ class Base < Regexp::Expression::Base; end
5
+
6
+ class BeginningOfLine < Anchor::Base; end
7
+ class EndOfLine < Anchor::Base; end
8
+
9
+ class BeginningOfString < Anchor::Base; end
10
+ class EndOfString < Anchor::Base; end
11
+
12
+ class EndOfStringOrBeforeEndOfLine < Anchor::Base; end
13
+
14
+ class WordBoundary < Anchor::Base; end
15
+ class NonWordBoundary < Anchor::Base; end
16
+
17
+ class MatchStart < Anchor::Base; end
18
+
19
+ BOL = BeginningOfLine
20
+ EOL = EndOfLine
21
+ BOS = BeginningOfString
22
+ EOS = EndOfString
23
+ EOSobEOL = EndOfStringOrBeforeEndOfLine
24
+ end
25
+
26
+ end
@@ -0,0 +1,42 @@
1
+ module Regexp::Expression
2
+
3
+ module Backreference
4
+ class Base < Regexp::Expression::Base; end
5
+
6
+ class Name < Backreference::Base
7
+ attr_reader :name
8
+
9
+ def initialize(token)
10
+ @name = token.text[3..-2]
11
+ super(token)
12
+ end
13
+ end
14
+
15
+ class Number < Backreference::Base
16
+ attr_reader :number
17
+
18
+ def initialize(token)
19
+ @number = token.text[3..-2]
20
+ super(token)
21
+ end
22
+ end
23
+
24
+ class NumberRelative < Backreference::Number; end
25
+
26
+ class NameNestLevel < Backreference::Base; end
27
+ class NumberNestLevel < Backreference::Base; end
28
+
29
+ class NameCall < Backreference::Base
30
+ attr_reader :name
31
+
32
+ def initialize(token)
33
+ @name = token.text[3..-2]
34
+ super(token)
35
+ end
36
+ end
37
+
38
+ class NumberCall < Backreference::Base; end
39
+ class NumberCallRelative < Backreference::Base; end
40
+ end
41
+
42
+ end
@@ -0,0 +1,27 @@
1
+ module Regexp::Expression
2
+
3
+ module EscapeSequence
4
+ class Base < Regexp::Expression::Base; end
5
+
6
+ class Literal < EscapeSequence::Base; end
7
+
8
+ class AsciiEscape < EscapeSequence::Base; end
9
+ class Backspace < EscapeSequence::Base; end
10
+ class Bell < EscapeSequence::Base; end
11
+ class FormFeed < EscapeSequence::Base; end
12
+ class Newline < EscapeSequence::Base; end
13
+ class Return < EscapeSequence::Base; end
14
+ class Space < EscapeSequence::Base; end
15
+ class Tab < EscapeSequence::Base; end
16
+ class VerticalTab < EscapeSequence::Base; end
17
+
18
+ class Octal < EscapeSequence::Base; end
19
+ class Hex < EscapeSequence::Base; end
20
+ class HexWide < EscapeSequence::Base; end
21
+
22
+ class Control < EscapeSequence::Base; end
23
+ class Meta < EscapeSequence::Base; end
24
+ class MetaControl < EscapeSequence::Base; end
25
+ end
26
+
27
+ end
@@ -0,0 +1,67 @@
1
+ module Regexp::Expression
2
+
3
+ module Group
4
+ class Base < Regexp::Expression::Subexpression
5
+ def capturing?
6
+ [:capture, :named].include? @token
7
+ end
8
+
9
+ def comment?; @type == :comment end
10
+
11
+ def to_s(format = :full)
12
+ s = ''
13
+
14
+ case format
15
+ when :base
16
+ s << @text.dup
17
+ s << @expressions.join
18
+ s << ')'
19
+ else
20
+ s << @text.dup
21
+ s << @expressions.join
22
+ s << ')'
23
+ s << @quantifier.to_s if quantified?
24
+ end
25
+
26
+ s
27
+ end
28
+ end
29
+
30
+ class Atomic < Group::Base; end
31
+ class Capture < Group::Base; end
32
+ class Passive < Group::Base; end
33
+ class Options < Group::Base; end
34
+
35
+ class Named < Group::Capture
36
+ attr_reader :name
37
+
38
+ def initialize(token)
39
+ @name = token.text[3..-2]
40
+ super(token)
41
+ end
42
+
43
+ def clone
44
+ copy = super
45
+ copy.instance_variable_set(:@name, @name.dup)
46
+ copy
47
+ end
48
+ end
49
+
50
+ class Comment < Group::Base
51
+ def to_s(format = :full)
52
+ @text.dup
53
+ end
54
+ end
55
+ end
56
+
57
+ module Assertion
58
+ class Base < Regexp::Expression::Group::Base; end
59
+
60
+ class Lookahead < Assertion::Base; end
61
+ class NegativeLookahead < Assertion::Base; end
62
+
63
+ class Lookbehind < Assertion::Base; end
64
+ class NegativeLookbehind < Assertion::Base; end
65
+ end
66
+
67
+ end
@@ -0,0 +1,7 @@
1
+ module Regexp::Expression
2
+
3
+ class Literal < Regexp::Expression::Base
4
+ # Obviously nothing special here, yet.
5
+ end
6
+
7
+ end
@@ -1,6 +1,6 @@
1
1
  module Regexp::Expression
2
2
 
3
- module UnicodeProperty
3
+ module UnicodeProperty
4
4
  class Base < Regexp::Expression::Base
5
5
  def negative?
6
6
  @type == :nonproperty
@@ -0,0 +1,26 @@
1
+ module Regexp::Expression
2
+
3
+ class Root < Regexp::Expression::Subexpression
4
+ def initialize
5
+ super Regexp::Token.new(:expression, :root, '', 0)
6
+ end
7
+
8
+ def multiline?
9
+ @expressions[0].m?
10
+ end
11
+ alias :m? :multiline?
12
+
13
+ def case_insensitive?
14
+ @expressions[0].i?
15
+ end
16
+ alias :i? :case_insensitive?
17
+ alias :ignore_case? :case_insensitive?
18
+
19
+ def free_spacing?
20
+ @expressions[0].x?
21
+ end
22
+ alias :x? :free_spacing?
23
+ alias :extended? :free_spacing?
24
+ end
25
+
26
+ end
@@ -0,0 +1,100 @@
1
+ module Regexp::Expression
2
+
3
+ class CharacterSet < Regexp::Expression::Base
4
+ attr_accessor :members
5
+
6
+ def initialize(token)
7
+ @members = []
8
+ @negative = false
9
+ @closed = false
10
+ super
11
+ end
12
+
13
+ # Override base method to clone set members as well.
14
+ def clone
15
+ copy = super
16
+ copy.members = @members.map {|m| m.clone }
17
+ copy
18
+ end
19
+
20
+ def <<(member)
21
+ if @members.last.is_a?(CharacterSubSet) and not @members.last.closed?
22
+ @members.last << member
23
+ else
24
+ @members << member
25
+ end
26
+ end
27
+
28
+ def include?(member, directly = false)
29
+ @members.each do |m|
30
+ if m.is_a?(CharacterSubSet) and not directly
31
+ return true if m.include?(member)
32
+ else
33
+ return true if member == m.to_s
34
+ end
35
+ end; false
36
+ end
37
+
38
+ def each(&block)
39
+ @members.each {|m| yield m}
40
+ end
41
+
42
+ def each_with_index(&block)
43
+ @members.each_with_index {|m, i| yield m, i}
44
+ end
45
+
46
+ def length
47
+ @members.length
48
+ end
49
+
50
+ def negate
51
+ if @members.last.is_a?(CharacterSubSet)
52
+ @members.last.negate
53
+ else
54
+ @negative = true
55
+ end
56
+ end
57
+
58
+ def negative?
59
+ @negative
60
+ end
61
+ alias :negated? :negative?
62
+
63
+ def close
64
+ if @members.last.is_a?(CharacterSubSet) and not @members.last.closed?
65
+ @members.last.close
66
+ else
67
+ @closed = true
68
+ end
69
+ end
70
+
71
+ def closed?
72
+ @closed
73
+ end
74
+
75
+ def to_s(format = :full)
76
+ s = ''
77
+
78
+ s << @text.dup
79
+ s << '^' if negative?
80
+ s << @members.join
81
+ s << ']'
82
+
83
+ case format
84
+ when :base
85
+ else
86
+ s << @quantifier.to_s if quantified?
87
+ end
88
+
89
+ s
90
+ end
91
+
92
+ def matches?(input)
93
+ input =~ /#{to_s}/ ? true : false
94
+ end
95
+ end
96
+
97
+ class CharacterSubSet < CharacterSet
98
+ end
99
+
100
+ end # module Regexp::Expression
@@ -0,0 +1,17 @@
1
+ module Regexp::Expression
2
+
3
+ module CharacterType
4
+ class Base < Regexp::Expression::Base; end
5
+
6
+ class Any < CharacterType::Base; end
7
+ class Digit < CharacterType::Base; end
8
+ class NonDigit < CharacterType::Base; end
9
+ class Hex < CharacterType::Base; end
10
+ class NonHex < CharacterType::Base; end
11
+ class Word < CharacterType::Base; end
12
+ class NonWord < CharacterType::Base; end
13
+ class Space < CharacterType::Base; end
14
+ class NonSpace < CharacterType::Base; end
15
+ end
16
+
17
+ end
@@ -0,0 +1,26 @@
1
+ module Regexp::Expression
2
+
3
+ class Quantifier
4
+ attr_reader :token, :text, :min, :max, :mode
5
+
6
+ def initialize(token, text, min, max, mode)
7
+ @token = token
8
+ @text = text
9
+ @mode = mode
10
+ @min = min
11
+ @max = max
12
+ end
13
+
14
+ def clone
15
+ copy = self.dup
16
+ copy.instance_variable_set(:@text, @text.dup)
17
+ copy
18
+ end
19
+
20
+ def to_s
21
+ @text.dup
22
+ end
23
+ alias :to_str :to_s
24
+ end
25
+
26
+ end
@@ -0,0 +1,69 @@
1
+ module Regexp::Expression
2
+
3
+ class Subexpression < Regexp::Expression::Base
4
+ attr_accessor :expressions
5
+
6
+ def initialize(token)
7
+ super(token)
8
+
9
+ @expressions = []
10
+ end
11
+
12
+ # Override base method to clone the expressions as well.
13
+ def clone
14
+ copy = super
15
+ copy.expressions = @expressions.map {|e| e.clone }
16
+ copy
17
+ end
18
+
19
+ def <<(exp)
20
+ @expressions << exp
21
+ end
22
+
23
+ def insert(exp)
24
+ @expressions.insert 0, exp
25
+ end
26
+
27
+ def each(&block)
28
+ @expressions.each {|e| yield e}
29
+ end
30
+
31
+ def each_with_index(&block)
32
+ @expressions.each_with_index {|e, i| yield e, i}
33
+ end
34
+
35
+ def first
36
+ @expressions.first
37
+ end
38
+
39
+ def last
40
+ @expressions.last
41
+ end
42
+
43
+ def [](index)
44
+ @expressions[index]
45
+ end
46
+
47
+ def length
48
+ @expressions.length
49
+ end
50
+
51
+ def to_s(format = :full)
52
+ s = ''
53
+
54
+ # Note: the format does not get passed down to subexpressions.
55
+ case format
56
+ when :base
57
+ s << @text.dup
58
+ s << @expressions.map{|e| e.to_s}.join unless @expressions.empty?
59
+ else
60
+ s << @text.dup
61
+ s << @expressions.map{|e| e.to_s}.join unless @expressions.empty?
62
+ s << @quantifier if quantified?
63
+ end
64
+
65
+ s
66
+ end
67
+ end
68
+
69
+ end