regexp_parser 0.1.6 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. checksums.yaml +4 -4
  2. data/ChangeLog +57 -0
  3. data/Gemfile +8 -0
  4. data/LICENSE +1 -1
  5. data/README.md +225 -206
  6. data/Rakefile +9 -3
  7. data/lib/regexp_parser.rb +7 -11
  8. data/lib/regexp_parser/expression.rb +72 -14
  9. data/lib/regexp_parser/expression/classes/alternation.rb +3 -16
  10. data/lib/regexp_parser/expression/classes/conditional.rb +57 -0
  11. data/lib/regexp_parser/expression/classes/free_space.rb +17 -0
  12. data/lib/regexp_parser/expression/classes/keep.rb +7 -0
  13. data/lib/regexp_parser/expression/classes/set.rb +28 -7
  14. data/lib/regexp_parser/expression/methods/strfregexp.rb +113 -0
  15. data/lib/regexp_parser/expression/methods/tests.rb +116 -0
  16. data/lib/regexp_parser/expression/methods/traverse.rb +63 -0
  17. data/lib/regexp_parser/expression/quantifier.rb +10 -0
  18. data/lib/regexp_parser/expression/sequence.rb +45 -0
  19. data/lib/regexp_parser/expression/subexpression.rb +29 -1
  20. data/lib/regexp_parser/lexer.rb +31 -8
  21. data/lib/regexp_parser/parser.rb +118 -45
  22. data/lib/regexp_parser/scanner.rb +1745 -1404
  23. data/lib/regexp_parser/scanner/property.rl +57 -3
  24. data/lib/regexp_parser/scanner/scanner.rl +161 -34
  25. data/lib/regexp_parser/syntax.rb +12 -2
  26. data/lib/regexp_parser/syntax/ruby/1.9.1.rb +3 -3
  27. data/lib/regexp_parser/syntax/ruby/1.9.3.rb +2 -7
  28. data/lib/regexp_parser/syntax/ruby/2.0.0.rb +4 -1
  29. data/lib/regexp_parser/syntax/ruby/2.1.4.rb +13 -0
  30. data/lib/regexp_parser/syntax/ruby/2.1.5.rb +13 -0
  31. data/lib/regexp_parser/syntax/ruby/2.1.rb +2 -2
  32. data/lib/regexp_parser/syntax/ruby/2.2.0.rb +16 -0
  33. data/lib/regexp_parser/syntax/ruby/2.2.rb +8 -0
  34. data/lib/regexp_parser/syntax/tokens.rb +19 -2
  35. data/lib/regexp_parser/syntax/tokens/conditional.rb +22 -0
  36. data/lib/regexp_parser/syntax/tokens/keep.rb +14 -0
  37. data/lib/regexp_parser/syntax/tokens/unicode_property.rb +45 -4
  38. data/lib/regexp_parser/token.rb +23 -8
  39. data/lib/regexp_parser/version.rb +5 -0
  40. data/regexp_parser.gemspec +35 -0
  41. data/test/expression/test_all.rb +6 -1
  42. data/test/expression/test_base.rb +19 -0
  43. data/test/expression/test_conditionals.rb +114 -0
  44. data/test/expression/test_free_space.rb +33 -0
  45. data/test/expression/test_set.rb +61 -0
  46. data/test/expression/test_strfregexp.rb +214 -0
  47. data/test/expression/test_subexpression.rb +24 -0
  48. data/test/expression/test_tests.rb +99 -0
  49. data/test/expression/test_to_h.rb +48 -0
  50. data/test/expression/test_to_s.rb +46 -0
  51. data/test/expression/test_traverse.rb +164 -0
  52. data/test/lexer/test_all.rb +16 -3
  53. data/test/lexer/test_conditionals.rb +101 -0
  54. data/test/lexer/test_keep.rb +24 -0
  55. data/test/lexer/test_literals.rb +51 -51
  56. data/test/lexer/test_nesting.rb +62 -62
  57. data/test/lexer/test_refcalls.rb +18 -20
  58. data/test/parser/test_all.rb +18 -3
  59. data/test/parser/test_alternation.rb +11 -14
  60. data/test/parser/test_conditionals.rb +148 -0
  61. data/test/parser/test_escapes.rb +29 -5
  62. data/test/parser/test_free_space.rb +139 -0
  63. data/test/parser/test_groups.rb +40 -0
  64. data/test/parser/test_keep.rb +21 -0
  65. data/test/scanner/test_all.rb +8 -2
  66. data/test/scanner/test_conditionals.rb +166 -0
  67. data/test/scanner/test_escapes.rb +8 -5
  68. data/test/scanner/test_free_space.rb +133 -0
  69. data/test/scanner/test_groups.rb +28 -0
  70. data/test/scanner/test_keep.rb +33 -0
  71. data/test/scanner/test_properties.rb +4 -0
  72. data/test/scanner/test_scripts.rb +71 -1
  73. data/test/syntax/ruby/test_1.9.3.rb +2 -2
  74. data/test/syntax/ruby/test_2.0.0.rb +38 -0
  75. data/test/syntax/ruby/test_2.2.0.rb +38 -0
  76. data/test/syntax/ruby/test_all.rb +1 -8
  77. data/test/syntax/ruby/test_files.rb +104 -0
  78. data/test/test_all.rb +2 -1
  79. data/test/token/test_all.rb +2 -0
  80. data/test/token/test_token.rb +109 -0
  81. metadata +75 -21
  82. data/VERSION.yml +0 -5
  83. data/lib/regexp_parser/ctype.rb +0 -48
  84. data/test/syntax/ruby/test_2.x.rb +0 -46
data/Rakefile CHANGED
@@ -18,7 +18,9 @@ Bundler::GemHelper.install_tasks
18
18
  task :default => [:test]
19
19
 
20
20
  Rake::TestTask.new('test') do |t|
21
- t.description = "Run all unit tests under the test directory"
21
+ if t.respond_to?(:description)
22
+ t.description = "Run all unit tests under the test directory"
23
+ end
22
24
 
23
25
  t.libs << "test"
24
26
  t.test_files = FileList['test/test_all.rb']
@@ -27,7 +29,9 @@ end
27
29
  namespace :test do
28
30
  %w{scanner lexer parser expression syntax}.each do |component|
29
31
  Rake::TestTask.new(component) do |t|
30
- t.description = "Run all #{component} unit tests under the test/#{component} directory"
32
+ if t.respond_to?(:description)
33
+ t.description = "Run all #{component} unit tests under the test/#{component} directory"
34
+ end
31
35
 
32
36
  t.libs << "test"
33
37
  t.test_files = ["test/#{component}/test_all.rb"]
@@ -35,7 +39,9 @@ namespace :test do
35
39
  end
36
40
 
37
41
  Rake::TestTask.new('full' => 'ragel:rb') do |t|
38
- t.description = "Regenerate the scanner and run all unit tests under the test directory"
42
+ if t.respond_to?(:description)
43
+ t.description = "Regenerate the scanner and run all unit tests under the test directory"
44
+ end
39
45
 
40
46
  t.libs << "test"
41
47
  t.test_files = FileList['test/test_all.rb']
data/lib/regexp_parser.rb CHANGED
@@ -1,12 +1,8 @@
1
- require 'yaml'
1
+ # encoding: utf-8
2
2
 
3
- class Regexp
4
- module Parser
5
- VERFILE = File.expand_path('../../VERSION.yml', __FILE__)
6
- VERSION = YAML.load(File.read(VERFILE)).values.compact.join('.')
7
- end
8
- end
9
-
10
- %w{token ctype scanner syntax lexer parser}.each do |file|
11
- require File.expand_path("../regexp_parser/#{file}", __FILE__)
12
- end
3
+ require 'regexp_parser/version'
4
+ require 'regexp_parser/token'
5
+ require 'regexp_parser/scanner'
6
+ require 'regexp_parser/syntax'
7
+ require 'regexp_parser/lexer'
8
+ require 'regexp_parser/parser'
@@ -2,18 +2,21 @@ module Regexp::Expression
2
2
 
3
3
  class Base
4
4
  attr_accessor :type, :token
5
- attr_accessor :level, :text, :ts
5
+ attr_accessor :text, :ts
6
+ attr_accessor :level, :set_level, :conditional_level
6
7
 
7
8
  attr_accessor :quantifier
8
9
  attr_accessor :options
9
10
 
10
11
  def initialize(token)
11
- @type = token.type
12
- @token = token.token
13
- @text = token.text
14
- @ts = token.ts
15
- @level = token.level
16
- @options = nil
12
+ @type = token.type
13
+ @token = token.token
14
+ @text = token.text
15
+ @ts = token.ts
16
+ @level = token.level
17
+ @set_level = token.set_level
18
+ @conditional_level = token.conditional_level
19
+ @options = nil
17
20
  end
18
21
 
19
22
  def clone
@@ -106,6 +109,47 @@ module Regexp::Expression
106
109
  end
107
110
  alias :x? :free_spacing?
108
111
  alias :extended? :free_spacing?
112
+
113
+ if RUBY_VERSION >= '2.0'
114
+ def default_classes?
115
+ (@options and @options[:d]) ? true : false
116
+ end
117
+ alias :d? :default_classes?
118
+
119
+ def ascii_classes?
120
+ (@options and @options[:a]) ? true : false
121
+ end
122
+ alias :a? :ascii_classes?
123
+
124
+ def unicode_classes?
125
+ (@options and @options[:u]) ? true : false
126
+ end
127
+ alias :u? :unicode_classes?
128
+ end
129
+
130
+ def matches?(string)
131
+ Regexp.new(to_s) =~ string ? true : false
132
+ end
133
+
134
+ def match(string, offset)
135
+ Regexp.new(to_s).match(string, offset)
136
+ end
137
+ alias :=~ :match
138
+
139
+ def to_h
140
+ {
141
+ :type => @type,
142
+ :token => @token,
143
+ :text => to_s(:base),
144
+ :starts_at => @ts,
145
+ :length => full_length,
146
+ :level => @level,
147
+ :set_level => @set_level,
148
+ :conditional_level => @conditional_level,
149
+ :options => @options,
150
+ :quantifier => quantified? ? @quantifier.to_h : nil
151
+ }
152
+ end
109
153
  end
110
154
 
111
155
  def self.parsed(exp)
@@ -125,10 +169,24 @@ module Regexp::Expression
125
169
 
126
170
  end # module Regexp::Expression
127
171
 
128
-
129
- [ # Order is important
130
- '/expression/*.rb',
131
- '/expression/classes/*.rb',
132
- ].each do |path|
133
- Dir[File.join(File.dirname(__FILE__), path)].each {|f| require f }
134
- end
172
+ require 'regexp_parser/expression/methods/tests'
173
+ require 'regexp_parser/expression/methods/traverse'
174
+ require 'regexp_parser/expression/methods/strfregexp'
175
+
176
+ require 'regexp_parser/expression/quantifier'
177
+ require 'regexp_parser/expression/subexpression'
178
+ require 'regexp_parser/expression/sequence'
179
+
180
+ require 'regexp_parser/expression/classes/alternation'
181
+ require 'regexp_parser/expression/classes/anchor'
182
+ require 'regexp_parser/expression/classes/backref'
183
+ require 'regexp_parser/expression/classes/conditional'
184
+ require 'regexp_parser/expression/classes/escape'
185
+ require 'regexp_parser/expression/classes/free_space'
186
+ require 'regexp_parser/expression/classes/group'
187
+ require 'regexp_parser/expression/classes/keep'
188
+ require 'regexp_parser/expression/classes/literal'
189
+ require 'regexp_parser/expression/classes/property'
190
+ require 'regexp_parser/expression/classes/root'
191
+ require 'regexp_parser/expression/classes/set'
192
+ require 'regexp_parser/expression/classes/type'
@@ -12,7 +12,7 @@ module Regexp::Expression
12
12
  end
13
13
 
14
14
  def alternative(exp = nil)
15
- @expressions << (exp ? exp : Sequence.new)
15
+ @expressions << (exp ? exp : Alternative.new(level, set_level, conditional_level))
16
16
  end
17
17
 
18
18
  def alternatives
@@ -28,20 +28,7 @@ module Regexp::Expression
28
28
  end
29
29
  end
30
30
 
31
- # A sequence of expressions, used by alternations as one alternative.
32
- # TODO: perhaps rename this to Alternative?
33
- class Sequence < Regexp::Expression::Subexpression
34
- def initialize
35
- super Regexp::Token.new(:expression, :sequence, '')
36
- end
37
-
38
- def starts_at
39
- @expressions.first.starts_at
40
- end
41
-
42
- def quantify(token, text, min = nil, max = nil, mode = :greedy)
43
- last.quantify(token, text, min, max, mode)
44
- end
45
- end
31
+ # A sequence of expressions, used by Alternation as one of its alternative.
32
+ class Alternative < Regexp::Expression::Sequence; end
46
33
 
47
34
  end
@@ -0,0 +1,57 @@
1
+ module Regexp::Expression
2
+
3
+ module Conditional
4
+ class TooManyBranches < StandardError
5
+ def initialize
6
+ super('The conditional expression has more than 2 branches')
7
+ end
8
+ end
9
+
10
+ class Condition < Regexp::Expression::Base; end
11
+ class Branch < Regexp::Expression::Sequence; end
12
+
13
+ class Expression < Regexp::Expression::Subexpression
14
+ def initialize(token)
15
+ super(token)
16
+
17
+ @condition = nil
18
+ @branches = []
19
+ end
20
+
21
+ def condition(exp = nil)
22
+ return @condition unless exp
23
+ @condition = exp
24
+ @expressions << exp
25
+ end
26
+
27
+ def <<(exp)
28
+ @expressions.last << exp
29
+ end
30
+
31
+ def branch(exp = nil)
32
+ raise TooManyBranches.new if @branches.length == 2
33
+
34
+ sequence = Branch.new(level, set_level, conditional_level + 1)
35
+
36
+ @expressions << sequence
37
+ @branches << @expressions.last
38
+ end
39
+
40
+ def branches
41
+ @branches
42
+ end
43
+
44
+ def quantify(token, text, min = nil, max = nil, mode = :greedy)
45
+ branches.last.last.quantify(token, text, min, max, mode)
46
+ end
47
+
48
+ def to_s
49
+ s = @text.dup
50
+ s << @condition.text
51
+ s << branches.map{|e| e.to_s}.join('|')
52
+ s << ')'
53
+ end
54
+ end
55
+ end
56
+
57
+ end
@@ -0,0 +1,17 @@
1
+ module Regexp::Expression
2
+
3
+ class FreeSpace < Regexp::Expression::Base
4
+ def quantify(token, text, min = nil, max = nil, mode = :greedy)
5
+ raise "Can not quantify a free space object"
6
+ end
7
+ end
8
+
9
+ class Comment < Regexp::Expression::FreeSpace; end
10
+
11
+ class WhiteSpace < Regexp::Expression::FreeSpace
12
+ def merge(exp)
13
+ @text << exp.text
14
+ end
15
+ end
16
+
17
+ end
@@ -0,0 +1,7 @@
1
+ module Regexp::Expression
2
+
3
+ module Keep
4
+ class Mark < Regexp::Expression::Base; end
5
+ end
6
+
7
+ end
@@ -72,6 +72,33 @@ module Regexp::Expression
72
72
  @closed
73
73
  end
74
74
 
75
+ # Returns an array of the members with any shorthand members like \d and \W
76
+ # expanded to either traditional form or unicode properties.
77
+ def expand_members(use_properties = false)
78
+ @members.map do |member|
79
+ case member
80
+ when "\\d"
81
+ use_properties ? '\p{Digit}' : '0-9'
82
+ when "\\D"
83
+ use_properties ? '\P{Digit}' : '^0-9'
84
+ when "\\w"
85
+ use_properties ? '\p{Word}' : 'A-Za-z0-9_'
86
+ when "\\W"
87
+ use_properties ? '\P{Word}' : '^A-Za-z0-9_'
88
+ when "\\s"
89
+ use_properties ? '\p{Space}' : ' \t\f\v\n\r'
90
+ when "\\S"
91
+ use_properties ? '\P{Space}' : '^ \t\f\v\n\r'
92
+ when "\\h"
93
+ use_properties ? '\p{Xdigit}' : '0-9A-Fa-f'
94
+ when "\\H"
95
+ use_properties ? '\P{Xdigit}' : '^0-9A-Fa-f'
96
+ else
97
+ member
98
+ end
99
+ end
100
+ end
101
+
75
102
  def to_s(format = :full)
76
103
  s = ''
77
104
 
@@ -80,18 +107,12 @@ module Regexp::Expression
80
107
  s << @members.join
81
108
  s << ']'
82
109
 
83
- case format
84
- when :base
85
- else
110
+ unless format == :base
86
111
  s << @quantifier.to_s if quantified?
87
112
  end
88
113
 
89
114
  s
90
115
  end
91
-
92
- def matches?(input)
93
- input =~ /#{to_s}/ ? true : false
94
- end
95
116
  end
96
117
 
97
118
  class CharacterSubSet < CharacterSet
@@ -0,0 +1,113 @@
1
+ module Regexp::Expression
2
+
3
+ class Base
4
+
5
+ # %l Level (depth) of the expression. Returns 'root' for the root
6
+ # expression, returns zero or higher for all others.
7
+ #
8
+ # %> Indentation at expression's level.
9
+ #
10
+ # %x Index of the expression at its depth. Available when using
11
+ # the sprintf_tree method only.
12
+ #
13
+ # %s Start offset within the whole expression.
14
+ # %e End offset within the whole expression.
15
+ # %S Length of expression.
16
+ #
17
+ # %o Coded offset and length, same as '@%s+%S'
18
+ #
19
+ # %y Type of expression.
20
+ # %k Token of expression.
21
+ # %i ID, same as '%y:%k'
22
+ # %c Class name
23
+ #
24
+ # %q Quantifier info, as {m[,M]}
25
+ # %Q Quantifier text
26
+ #
27
+ # %z Quantifier min
28
+ # %Z Quantifier max
29
+ #
30
+ # %t Base text of the expression (excludes quantifier, if any)
31
+ # %~t Full text if the expression is terminal, otherwise %i
32
+ # %T Full text of the expression (includes quantifier, if any)
33
+ #
34
+ # %b Basic info, same as '%o %i'
35
+ # %m Most info, same as '%b %q'
36
+ # %a All info, same as '%m %t'
37
+ #
38
+ def strfregexp(format = '%a', indent_offset = 0, index = nil)
39
+ have_index = index ? true : false
40
+
41
+ part = {}
42
+
43
+ # Order is important! Fields that use other fields in their
44
+ # definition must appear before the fields they use.
45
+ part_keys = %w{a m b o i l x s e S y k c q Q z Z t ~t T >}
46
+ part.keys.each {|k| part[k] = "<?#{k}?>"}
47
+
48
+ part['>'] = level ? (' ' * (level + indent_offset)) : ''
49
+
50
+ part['l'] = level ? "#{'%d' % level}" : 'root'
51
+ part['x'] = "#{'%d' % index}" if have_index
52
+
53
+ part['s'] = starts_at
54
+ part['S'] = full_length
55
+ part['e'] = starts_at + full_length
56
+ part['o'] = coded_offset
57
+
58
+ part['k'] = token
59
+ part['y'] = type
60
+ part['i'] = '%y:%k'
61
+ part['c'] = self.class.name
62
+
63
+ if quantified?
64
+ if quantifier.max == -1
65
+ part['q'] = "{#{quantifier.min}, or-more}"
66
+ else
67
+ part['q'] = "{#{quantifier.min}, #{quantifier.max}}"
68
+ end
69
+
70
+ part['Q'] = quantifier.text
71
+ part['z'] = quantifier.min
72
+ part['Z'] = quantifier.max
73
+ else
74
+ part['q'] = '{1}'
75
+ part['Q'] = ''
76
+ part['z'] = '1'
77
+ part['Z'] = '1'
78
+ end
79
+
80
+ part['t'] = to_s(:base)
81
+ part['~t'] = terminal? ? to_s : "#{type}:#{token}"
82
+ part['T'] = to_s(:full)
83
+
84
+ part['b'] = '%o %i'
85
+ part['m'] = '%b %q'
86
+ part['a'] = '%m %t'
87
+
88
+ out = format.dup
89
+
90
+ part_keys.each do |k|
91
+ out.gsub!(/%#{k}/, part[k].to_s)
92
+ end
93
+
94
+ out
95
+ end
96
+
97
+ alias :strfre :strfregexp
98
+ end
99
+
100
+ class Subexpression < Regexp::Expression::Base
101
+ def strfregexp_tree(format = '%a', include_self = true, separator = "\n")
102
+ output = include_self ? [self.strfregexp(format)] : []
103
+
104
+ output += map {|exp, index|
105
+ exp.strfregexp(format, (include_self ? 1 : 0), index)
106
+ }
107
+
108
+ output.join(separator)
109
+ end
110
+
111
+ alias :strfre_tree :strfregexp_tree
112
+ end
113
+ end