regexp_parser 0.1.6 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (84) hide show
  1. checksums.yaml +4 -4
  2. data/ChangeLog +57 -0
  3. data/Gemfile +8 -0
  4. data/LICENSE +1 -1
  5. data/README.md +225 -206
  6. data/Rakefile +9 -3
  7. data/lib/regexp_parser.rb +7 -11
  8. data/lib/regexp_parser/expression.rb +72 -14
  9. data/lib/regexp_parser/expression/classes/alternation.rb +3 -16
  10. data/lib/regexp_parser/expression/classes/conditional.rb +57 -0
  11. data/lib/regexp_parser/expression/classes/free_space.rb +17 -0
  12. data/lib/regexp_parser/expression/classes/keep.rb +7 -0
  13. data/lib/regexp_parser/expression/classes/set.rb +28 -7
  14. data/lib/regexp_parser/expression/methods/strfregexp.rb +113 -0
  15. data/lib/regexp_parser/expression/methods/tests.rb +116 -0
  16. data/lib/regexp_parser/expression/methods/traverse.rb +63 -0
  17. data/lib/regexp_parser/expression/quantifier.rb +10 -0
  18. data/lib/regexp_parser/expression/sequence.rb +45 -0
  19. data/lib/regexp_parser/expression/subexpression.rb +29 -1
  20. data/lib/regexp_parser/lexer.rb +31 -8
  21. data/lib/regexp_parser/parser.rb +118 -45
  22. data/lib/regexp_parser/scanner.rb +1745 -1404
  23. data/lib/regexp_parser/scanner/property.rl +57 -3
  24. data/lib/regexp_parser/scanner/scanner.rl +161 -34
  25. data/lib/regexp_parser/syntax.rb +12 -2
  26. data/lib/regexp_parser/syntax/ruby/1.9.1.rb +3 -3
  27. data/lib/regexp_parser/syntax/ruby/1.9.3.rb +2 -7
  28. data/lib/regexp_parser/syntax/ruby/2.0.0.rb +4 -1
  29. data/lib/regexp_parser/syntax/ruby/2.1.4.rb +13 -0
  30. data/lib/regexp_parser/syntax/ruby/2.1.5.rb +13 -0
  31. data/lib/regexp_parser/syntax/ruby/2.1.rb +2 -2
  32. data/lib/regexp_parser/syntax/ruby/2.2.0.rb +16 -0
  33. data/lib/regexp_parser/syntax/ruby/2.2.rb +8 -0
  34. data/lib/regexp_parser/syntax/tokens.rb +19 -2
  35. data/lib/regexp_parser/syntax/tokens/conditional.rb +22 -0
  36. data/lib/regexp_parser/syntax/tokens/keep.rb +14 -0
  37. data/lib/regexp_parser/syntax/tokens/unicode_property.rb +45 -4
  38. data/lib/regexp_parser/token.rb +23 -8
  39. data/lib/regexp_parser/version.rb +5 -0
  40. data/regexp_parser.gemspec +35 -0
  41. data/test/expression/test_all.rb +6 -1
  42. data/test/expression/test_base.rb +19 -0
  43. data/test/expression/test_conditionals.rb +114 -0
  44. data/test/expression/test_free_space.rb +33 -0
  45. data/test/expression/test_set.rb +61 -0
  46. data/test/expression/test_strfregexp.rb +214 -0
  47. data/test/expression/test_subexpression.rb +24 -0
  48. data/test/expression/test_tests.rb +99 -0
  49. data/test/expression/test_to_h.rb +48 -0
  50. data/test/expression/test_to_s.rb +46 -0
  51. data/test/expression/test_traverse.rb +164 -0
  52. data/test/lexer/test_all.rb +16 -3
  53. data/test/lexer/test_conditionals.rb +101 -0
  54. data/test/lexer/test_keep.rb +24 -0
  55. data/test/lexer/test_literals.rb +51 -51
  56. data/test/lexer/test_nesting.rb +62 -62
  57. data/test/lexer/test_refcalls.rb +18 -20
  58. data/test/parser/test_all.rb +18 -3
  59. data/test/parser/test_alternation.rb +11 -14
  60. data/test/parser/test_conditionals.rb +148 -0
  61. data/test/parser/test_escapes.rb +29 -5
  62. data/test/parser/test_free_space.rb +139 -0
  63. data/test/parser/test_groups.rb +40 -0
  64. data/test/parser/test_keep.rb +21 -0
  65. data/test/scanner/test_all.rb +8 -2
  66. data/test/scanner/test_conditionals.rb +166 -0
  67. data/test/scanner/test_escapes.rb +8 -5
  68. data/test/scanner/test_free_space.rb +133 -0
  69. data/test/scanner/test_groups.rb +28 -0
  70. data/test/scanner/test_keep.rb +33 -0
  71. data/test/scanner/test_properties.rb +4 -0
  72. data/test/scanner/test_scripts.rb +71 -1
  73. data/test/syntax/ruby/test_1.9.3.rb +2 -2
  74. data/test/syntax/ruby/test_2.0.0.rb +38 -0
  75. data/test/syntax/ruby/test_2.2.0.rb +38 -0
  76. data/test/syntax/ruby/test_all.rb +1 -8
  77. data/test/syntax/ruby/test_files.rb +104 -0
  78. data/test/test_all.rb +2 -1
  79. data/test/token/test_all.rb +2 -0
  80. data/test/token/test_token.rb +109 -0
  81. metadata +75 -21
  82. data/VERSION.yml +0 -5
  83. data/lib/regexp_parser/ctype.rb +0 -48
  84. data/test/syntax/ruby/test_2.x.rb +0 -46
data/Rakefile CHANGED
@@ -18,7 +18,9 @@ Bundler::GemHelper.install_tasks
18
18
  task :default => [:test]
19
19
 
20
20
  Rake::TestTask.new('test') do |t|
21
- t.description = "Run all unit tests under the test directory"
21
+ if t.respond_to?(:description)
22
+ t.description = "Run all unit tests under the test directory"
23
+ end
22
24
 
23
25
  t.libs << "test"
24
26
  t.test_files = FileList['test/test_all.rb']
@@ -27,7 +29,9 @@ end
27
29
  namespace :test do
28
30
  %w{scanner lexer parser expression syntax}.each do |component|
29
31
  Rake::TestTask.new(component) do |t|
30
- t.description = "Run all #{component} unit tests under the test/#{component} directory"
32
+ if t.respond_to?(:description)
33
+ t.description = "Run all #{component} unit tests under the test/#{component} directory"
34
+ end
31
35
 
32
36
  t.libs << "test"
33
37
  t.test_files = ["test/#{component}/test_all.rb"]
@@ -35,7 +39,9 @@ namespace :test do
35
39
  end
36
40
 
37
41
  Rake::TestTask.new('full' => 'ragel:rb') do |t|
38
- t.description = "Regenerate the scanner and run all unit tests under the test directory"
42
+ if t.respond_to?(:description)
43
+ t.description = "Regenerate the scanner and run all unit tests under the test directory"
44
+ end
39
45
 
40
46
  t.libs << "test"
41
47
  t.test_files = FileList['test/test_all.rb']
data/lib/regexp_parser.rb CHANGED
@@ -1,12 +1,8 @@
1
- require 'yaml'
1
+ # encoding: utf-8
2
2
 
3
- class Regexp
4
- module Parser
5
- VERFILE = File.expand_path('../../VERSION.yml', __FILE__)
6
- VERSION = YAML.load(File.read(VERFILE)).values.compact.join('.')
7
- end
8
- end
9
-
10
- %w{token ctype scanner syntax lexer parser}.each do |file|
11
- require File.expand_path("../regexp_parser/#{file}", __FILE__)
12
- end
3
+ require 'regexp_parser/version'
4
+ require 'regexp_parser/token'
5
+ require 'regexp_parser/scanner'
6
+ require 'regexp_parser/syntax'
7
+ require 'regexp_parser/lexer'
8
+ require 'regexp_parser/parser'
@@ -2,18 +2,21 @@ module Regexp::Expression
2
2
 
3
3
  class Base
4
4
  attr_accessor :type, :token
5
- attr_accessor :level, :text, :ts
5
+ attr_accessor :text, :ts
6
+ attr_accessor :level, :set_level, :conditional_level
6
7
 
7
8
  attr_accessor :quantifier
8
9
  attr_accessor :options
9
10
 
10
11
  def initialize(token)
11
- @type = token.type
12
- @token = token.token
13
- @text = token.text
14
- @ts = token.ts
15
- @level = token.level
16
- @options = nil
12
+ @type = token.type
13
+ @token = token.token
14
+ @text = token.text
15
+ @ts = token.ts
16
+ @level = token.level
17
+ @set_level = token.set_level
18
+ @conditional_level = token.conditional_level
19
+ @options = nil
17
20
  end
18
21
 
19
22
  def clone
@@ -106,6 +109,47 @@ module Regexp::Expression
106
109
  end
107
110
  alias :x? :free_spacing?
108
111
  alias :extended? :free_spacing?
112
+
113
+ if RUBY_VERSION >= '2.0'
114
+ def default_classes?
115
+ (@options and @options[:d]) ? true : false
116
+ end
117
+ alias :d? :default_classes?
118
+
119
+ def ascii_classes?
120
+ (@options and @options[:a]) ? true : false
121
+ end
122
+ alias :a? :ascii_classes?
123
+
124
+ def unicode_classes?
125
+ (@options and @options[:u]) ? true : false
126
+ end
127
+ alias :u? :unicode_classes?
128
+ end
129
+
130
+ def matches?(string)
131
+ Regexp.new(to_s) =~ string ? true : false
132
+ end
133
+
134
+ def match(string, offset)
135
+ Regexp.new(to_s).match(string, offset)
136
+ end
137
+ alias :=~ :match
138
+
139
+ def to_h
140
+ {
141
+ :type => @type,
142
+ :token => @token,
143
+ :text => to_s(:base),
144
+ :starts_at => @ts,
145
+ :length => full_length,
146
+ :level => @level,
147
+ :set_level => @set_level,
148
+ :conditional_level => @conditional_level,
149
+ :options => @options,
150
+ :quantifier => quantified? ? @quantifier.to_h : nil
151
+ }
152
+ end
109
153
  end
110
154
 
111
155
  def self.parsed(exp)
@@ -125,10 +169,24 @@ module Regexp::Expression
125
169
 
126
170
  end # module Regexp::Expression
127
171
 
128
-
129
- [ # Order is important
130
- '/expression/*.rb',
131
- '/expression/classes/*.rb',
132
- ].each do |path|
133
- Dir[File.join(File.dirname(__FILE__), path)].each {|f| require f }
134
- end
172
+ require 'regexp_parser/expression/methods/tests'
173
+ require 'regexp_parser/expression/methods/traverse'
174
+ require 'regexp_parser/expression/methods/strfregexp'
175
+
176
+ require 'regexp_parser/expression/quantifier'
177
+ require 'regexp_parser/expression/subexpression'
178
+ require 'regexp_parser/expression/sequence'
179
+
180
+ require 'regexp_parser/expression/classes/alternation'
181
+ require 'regexp_parser/expression/classes/anchor'
182
+ require 'regexp_parser/expression/classes/backref'
183
+ require 'regexp_parser/expression/classes/conditional'
184
+ require 'regexp_parser/expression/classes/escape'
185
+ require 'regexp_parser/expression/classes/free_space'
186
+ require 'regexp_parser/expression/classes/group'
187
+ require 'regexp_parser/expression/classes/keep'
188
+ require 'regexp_parser/expression/classes/literal'
189
+ require 'regexp_parser/expression/classes/property'
190
+ require 'regexp_parser/expression/classes/root'
191
+ require 'regexp_parser/expression/classes/set'
192
+ require 'regexp_parser/expression/classes/type'
@@ -12,7 +12,7 @@ module Regexp::Expression
12
12
  end
13
13
 
14
14
  def alternative(exp = nil)
15
- @expressions << (exp ? exp : Sequence.new)
15
+ @expressions << (exp ? exp : Alternative.new(level, set_level, conditional_level))
16
16
  end
17
17
 
18
18
  def alternatives
@@ -28,20 +28,7 @@ module Regexp::Expression
28
28
  end
29
29
  end
30
30
 
31
- # A sequence of expressions, used by alternations as one alternative.
32
- # TODO: perhaps rename this to Alternative?
33
- class Sequence < Regexp::Expression::Subexpression
34
- def initialize
35
- super Regexp::Token.new(:expression, :sequence, '')
36
- end
37
-
38
- def starts_at
39
- @expressions.first.starts_at
40
- end
41
-
42
- def quantify(token, text, min = nil, max = nil, mode = :greedy)
43
- last.quantify(token, text, min, max, mode)
44
- end
45
- end
31
+ # A sequence of expressions, used by Alternation as one of its alternative.
32
+ class Alternative < Regexp::Expression::Sequence; end
46
33
 
47
34
  end
@@ -0,0 +1,57 @@
1
+ module Regexp::Expression
2
+
3
+ module Conditional
4
+ class TooManyBranches < StandardError
5
+ def initialize
6
+ super('The conditional expression has more than 2 branches')
7
+ end
8
+ end
9
+
10
+ class Condition < Regexp::Expression::Base; end
11
+ class Branch < Regexp::Expression::Sequence; end
12
+
13
+ class Expression < Regexp::Expression::Subexpression
14
+ def initialize(token)
15
+ super(token)
16
+
17
+ @condition = nil
18
+ @branches = []
19
+ end
20
+
21
+ def condition(exp = nil)
22
+ return @condition unless exp
23
+ @condition = exp
24
+ @expressions << exp
25
+ end
26
+
27
+ def <<(exp)
28
+ @expressions.last << exp
29
+ end
30
+
31
+ def branch(exp = nil)
32
+ raise TooManyBranches.new if @branches.length == 2
33
+
34
+ sequence = Branch.new(level, set_level, conditional_level + 1)
35
+
36
+ @expressions << sequence
37
+ @branches << @expressions.last
38
+ end
39
+
40
+ def branches
41
+ @branches
42
+ end
43
+
44
+ def quantify(token, text, min = nil, max = nil, mode = :greedy)
45
+ branches.last.last.quantify(token, text, min, max, mode)
46
+ end
47
+
48
+ def to_s
49
+ s = @text.dup
50
+ s << @condition.text
51
+ s << branches.map{|e| e.to_s}.join('|')
52
+ s << ')'
53
+ end
54
+ end
55
+ end
56
+
57
+ end
@@ -0,0 +1,17 @@
1
+ module Regexp::Expression
2
+
3
+ class FreeSpace < Regexp::Expression::Base
4
+ def quantify(token, text, min = nil, max = nil, mode = :greedy)
5
+ raise "Can not quantify a free space object"
6
+ end
7
+ end
8
+
9
+ class Comment < Regexp::Expression::FreeSpace; end
10
+
11
+ class WhiteSpace < Regexp::Expression::FreeSpace
12
+ def merge(exp)
13
+ @text << exp.text
14
+ end
15
+ end
16
+
17
+ end
@@ -0,0 +1,7 @@
1
+ module Regexp::Expression
2
+
3
+ module Keep
4
+ class Mark < Regexp::Expression::Base; end
5
+ end
6
+
7
+ end
@@ -72,6 +72,33 @@ module Regexp::Expression
72
72
  @closed
73
73
  end
74
74
 
75
+ # Returns an array of the members with any shorthand members like \d and \W
76
+ # expanded to either traditional form or unicode properties.
77
+ def expand_members(use_properties = false)
78
+ @members.map do |member|
79
+ case member
80
+ when "\\d"
81
+ use_properties ? '\p{Digit}' : '0-9'
82
+ when "\\D"
83
+ use_properties ? '\P{Digit}' : '^0-9'
84
+ when "\\w"
85
+ use_properties ? '\p{Word}' : 'A-Za-z0-9_'
86
+ when "\\W"
87
+ use_properties ? '\P{Word}' : '^A-Za-z0-9_'
88
+ when "\\s"
89
+ use_properties ? '\p{Space}' : ' \t\f\v\n\r'
90
+ when "\\S"
91
+ use_properties ? '\P{Space}' : '^ \t\f\v\n\r'
92
+ when "\\h"
93
+ use_properties ? '\p{Xdigit}' : '0-9A-Fa-f'
94
+ when "\\H"
95
+ use_properties ? '\P{Xdigit}' : '^0-9A-Fa-f'
96
+ else
97
+ member
98
+ end
99
+ end
100
+ end
101
+
75
102
  def to_s(format = :full)
76
103
  s = ''
77
104
 
@@ -80,18 +107,12 @@ module Regexp::Expression
80
107
  s << @members.join
81
108
  s << ']'
82
109
 
83
- case format
84
- when :base
85
- else
110
+ unless format == :base
86
111
  s << @quantifier.to_s if quantified?
87
112
  end
88
113
 
89
114
  s
90
115
  end
91
-
92
- def matches?(input)
93
- input =~ /#{to_s}/ ? true : false
94
- end
95
116
  end
96
117
 
97
118
  class CharacterSubSet < CharacterSet
@@ -0,0 +1,113 @@
1
+ module Regexp::Expression
2
+
3
+ class Base
4
+
5
+ # %l Level (depth) of the expression. Returns 'root' for the root
6
+ # expression, returns zero or higher for all others.
7
+ #
8
+ # %> Indentation at expression's level.
9
+ #
10
+ # %x Index of the expression at its depth. Available when using
11
+ # the sprintf_tree method only.
12
+ #
13
+ # %s Start offset within the whole expression.
14
+ # %e End offset within the whole expression.
15
+ # %S Length of expression.
16
+ #
17
+ # %o Coded offset and length, same as '@%s+%S'
18
+ #
19
+ # %y Type of expression.
20
+ # %k Token of expression.
21
+ # %i ID, same as '%y:%k'
22
+ # %c Class name
23
+ #
24
+ # %q Quantifier info, as {m[,M]}
25
+ # %Q Quantifier text
26
+ #
27
+ # %z Quantifier min
28
+ # %Z Quantifier max
29
+ #
30
+ # %t Base text of the expression (excludes quantifier, if any)
31
+ # %~t Full text if the expression is terminal, otherwise %i
32
+ # %T Full text of the expression (includes quantifier, if any)
33
+ #
34
+ # %b Basic info, same as '%o %i'
35
+ # %m Most info, same as '%b %q'
36
+ # %a All info, same as '%m %t'
37
+ #
38
+ def strfregexp(format = '%a', indent_offset = 0, index = nil)
39
+ have_index = index ? true : false
40
+
41
+ part = {}
42
+
43
+ # Order is important! Fields that use other fields in their
44
+ # definition must appear before the fields they use.
45
+ part_keys = %w{a m b o i l x s e S y k c q Q z Z t ~t T >}
46
+ part.keys.each {|k| part[k] = "<?#{k}?>"}
47
+
48
+ part['>'] = level ? (' ' * (level + indent_offset)) : ''
49
+
50
+ part['l'] = level ? "#{'%d' % level}" : 'root'
51
+ part['x'] = "#{'%d' % index}" if have_index
52
+
53
+ part['s'] = starts_at
54
+ part['S'] = full_length
55
+ part['e'] = starts_at + full_length
56
+ part['o'] = coded_offset
57
+
58
+ part['k'] = token
59
+ part['y'] = type
60
+ part['i'] = '%y:%k'
61
+ part['c'] = self.class.name
62
+
63
+ if quantified?
64
+ if quantifier.max == -1
65
+ part['q'] = "{#{quantifier.min}, or-more}"
66
+ else
67
+ part['q'] = "{#{quantifier.min}, #{quantifier.max}}"
68
+ end
69
+
70
+ part['Q'] = quantifier.text
71
+ part['z'] = quantifier.min
72
+ part['Z'] = quantifier.max
73
+ else
74
+ part['q'] = '{1}'
75
+ part['Q'] = ''
76
+ part['z'] = '1'
77
+ part['Z'] = '1'
78
+ end
79
+
80
+ part['t'] = to_s(:base)
81
+ part['~t'] = terminal? ? to_s : "#{type}:#{token}"
82
+ part['T'] = to_s(:full)
83
+
84
+ part['b'] = '%o %i'
85
+ part['m'] = '%b %q'
86
+ part['a'] = '%m %t'
87
+
88
+ out = format.dup
89
+
90
+ part_keys.each do |k|
91
+ out.gsub!(/%#{k}/, part[k].to_s)
92
+ end
93
+
94
+ out
95
+ end
96
+
97
+ alias :strfre :strfregexp
98
+ end
99
+
100
+ class Subexpression < Regexp::Expression::Base
101
+ def strfregexp_tree(format = '%a', include_self = true, separator = "\n")
102
+ output = include_self ? [self.strfregexp(format)] : []
103
+
104
+ output += map {|exp, index|
105
+ exp.strfregexp(format, (include_self ? 1 : 0), index)
106
+ }
107
+
108
+ output.join(separator)
109
+ end
110
+
111
+ alias :strfre_tree :strfregexp_tree
112
+ end
113
+ end