regexp_parser 0.1.1 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. checksums.yaml +7 -0
  2. data/ChangeLog +45 -0
  3. data/Rakefile +12 -44
  4. data/VERSION.yml +5 -0
  5. data/lib/regexp_parser.rb +5 -38
  6. data/lib/regexp_parser/expression.rb +68 -221
  7. data/lib/regexp_parser/expression/classes/alternation.rb +47 -0
  8. data/lib/regexp_parser/expression/classes/anchor.rb +26 -0
  9. data/lib/regexp_parser/expression/classes/backref.rb +42 -0
  10. data/lib/regexp_parser/expression/classes/escape.rb +27 -0
  11. data/lib/regexp_parser/expression/classes/group.rb +67 -0
  12. data/lib/regexp_parser/expression/classes/literal.rb +7 -0
  13. data/lib/regexp_parser/expression/{property.rb → classes/property.rb} +1 -1
  14. data/lib/regexp_parser/expression/classes/root.rb +26 -0
  15. data/lib/regexp_parser/expression/classes/set.rb +100 -0
  16. data/lib/regexp_parser/expression/classes/type.rb +17 -0
  17. data/lib/regexp_parser/expression/quantifier.rb +26 -0
  18. data/lib/regexp_parser/expression/subexpression.rb +69 -0
  19. data/lib/regexp_parser/lexer.rb +4 -4
  20. data/lib/regexp_parser/parser.rb +31 -13
  21. data/lib/regexp_parser/scanner.rb +1849 -1488
  22. data/lib/regexp_parser/scanner/property.rl +7 -2
  23. data/lib/regexp_parser/scanner/scanner.rl +377 -191
  24. data/lib/regexp_parser/syntax.rb +7 -0
  25. data/lib/regexp_parser/syntax/ruby/1.8.6.rb +4 -4
  26. data/lib/regexp_parser/syntax/ruby/1.9.1.rb +9 -9
  27. data/lib/regexp_parser/syntax/ruby/2.0.0.rb +16 -0
  28. data/lib/regexp_parser/syntax/ruby/2.1.0.rb +13 -0
  29. data/lib/regexp_parser/syntax/tokens.rb +21 -320
  30. data/lib/regexp_parser/syntax/tokens/anchor.rb +17 -0
  31. data/lib/regexp_parser/syntax/tokens/assertion.rb +15 -0
  32. data/lib/regexp_parser/syntax/tokens/backref.rb +26 -0
  33. data/lib/regexp_parser/syntax/tokens/character_set.rb +48 -0
  34. data/lib/regexp_parser/syntax/tokens/character_type.rb +16 -0
  35. data/lib/regexp_parser/syntax/tokens/escape.rb +29 -0
  36. data/lib/regexp_parser/syntax/tokens/group.rb +22 -0
  37. data/lib/regexp_parser/syntax/tokens/meta.rb +15 -0
  38. data/lib/regexp_parser/syntax/tokens/quantifier.rb +37 -0
  39. data/lib/regexp_parser/syntax/tokens/unicode_property.rb +204 -0
  40. data/lib/regexp_parser/token.rb +37 -0
  41. data/test/expression/test_all.rb +7 -0
  42. data/test/expression/test_base.rb +72 -0
  43. data/test/expression/test_clone.rb +144 -0
  44. data/test/{parser/test_expression.rb → expression/test_to_s.rb} +10 -10
  45. data/test/helpers.rb +1 -0
  46. data/test/parser/test_all.rb +1 -1
  47. data/test/parser/test_alternation.rb +35 -0
  48. data/test/parser/test_anchors.rb +2 -2
  49. data/test/parser/test_refcalls.rb +1 -1
  50. data/test/parser/test_sets.rb +54 -8
  51. data/test/scanner/test_anchors.rb +2 -2
  52. data/test/scanner/test_conditionals.rb +31 -0
  53. data/test/scanner/test_errors.rb +88 -8
  54. data/test/scanner/test_escapes.rb +4 -4
  55. data/test/scanner/test_groups.rb +7 -0
  56. data/test/scanner/test_quoting.rb +29 -0
  57. data/test/scanner/test_sets.rb +1 -0
  58. data/test/syntax/ruby/test_1.8.rb +3 -3
  59. data/test/test_all.rb +1 -1
  60. metadata +62 -48
  61. data/lib/regexp_parser/expression/set.rb +0 -59
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 5d3544709ae86e53530ef7cbe037dcab48690c2c
4
+ data.tar.gz: fb96ce92b21303ec32e88103d708a508c80588fc
5
+ SHA512:
6
+ metadata.gz: d4191f06120c4e5abe9f0fc6b7eb466aab12a61b37c4cb7c33204f2c2dba9907866d42317320cf3916146f59e12ff3c027db76300321342b1615270d7085ade9
7
+ data.tar.gz: 8d0173c9d02e4e26eae291e0dbedd48fb7d9995629b761f47ab696ad9b2955efa542c6a3360fa711f27c0d7f4cc5df6f293f89685e47aacd4e9a2f8eab6d336f
data/ChangeLog CHANGED
@@ -1,3 +1,48 @@
1
+ Tue Jan 14 13:14:24 2014 Ammar Ali <ammarabuali@gmail.com>
2
+
3
+ * Released version 0.1.5, with a correct ChangeLog.
4
+
5
+ Tue Jan 14 13:14:24 2014 Ammar Ali <ammarabuali@gmail.com>
6
+
7
+ * Released version 0.1.4, after accidental yank of last version.
8
+
9
+ Tue Jan 14 13:05:13 2014 Ammar Ali <ammarabuali@gmail.com>
10
+
11
+ * Released version 0.1.3, adds missing VERSION.yml file.
12
+
13
+ Tue Jan 14 12:41:52 2014 Ammar Ali <ammarabuali@gmail.com>
14
+
15
+ * Released version 0.1.2
16
+
17
+ * Added syntax stubs for ruby versions 2.0 and 2.1
18
+
19
+ Sat Feb 9 12:27:13 2010 Ammar Ali <ammarabuali@gmail.com>
20
+
21
+ * Added clone methods for deep copying expressions.
22
+
23
+ * Added optional format argument for to_s on expressions to return the
24
+ text of the expression with (:full, the default) or without (:base)
25
+ its quantifier.
26
+
27
+ * Renamed the :beginning_of_line and :end_of_line tokens to :bol and
28
+ :eol.
29
+
30
+ * Fixed a bug where alternations with more than two alternatives and
31
+ one of them ending in a group were being incorrectly nested.
32
+
33
+ * Improved EOF handling in general and especially from sequences like
34
+ hex and control escapes.
35
+
36
+ * Fixed a bug where named groups with an empty name would return a
37
+ blank token [].
38
+
39
+ * Fixed a bug where member of a parent set where being added to its
40
+ last subset.
41
+
42
+ * Various code cleanups in scanner.rl
43
+
44
+ * Fixed a few mutable string bugs by calling dup on the originals.
45
+
1
46
  Tue Nov 23 11:35:56 2010 Ammar Ali <ammarabuali@gmail.com>
2
47
 
3
48
  * Made ruby 1.8.6 the base for all 1.8 syntax, and the 1.8 name a pointer
data/Rakefile CHANGED
@@ -1,26 +1,15 @@
1
1
  require 'rake'
2
- require 'yaml'
3
2
  require 'rake/testtask'
4
- require 'rake/gempackagetask'
3
+ require 'rubygems/package_task'
4
+ require 'yaml'
5
5
 
6
6
  task :default => [:test]
7
7
 
8
-
9
8
  RAGEL_SOURCE_DIR = File.expand_path '../lib/regexp_parser/scanner', __FILE__
10
9
  RAGEL_OUTPUT_DIR = File.expand_path '../lib/regexp_parser', __FILE__
11
10
 
12
11
  RAGEL_SOURCE_FILES = %w{scanner}
13
12
 
14
- RP_ROOT = File.expand_path '../', __FILE__
15
-
16
- def regexp_parser_version
17
- v = YAML.load(File.read("#{RP_ROOT}/VERSION.yml"))
18
- v[:build] ? "#{v[:major]}.#{v[:minor]}.#{v[:patch]}.#{v[:build]}" :
19
- "#{v[:major]}.#{v[:minor]}.#{v[:patch]}"
20
- end
21
-
22
- RP_VERSION = regexp_parser_version
23
-
24
13
  desc "Find and run all unit tests under test/ directory"
25
14
  Rake::TestTask.new("test") do |t|
26
15
  t.libs << "test"
@@ -30,28 +19,11 @@ end
30
19
  task :test
31
20
 
32
21
  namespace :test do
33
- desc "Run all scanner tests"
34
- Rake::TestTask.new("scanner") do |t|
35
- t.libs << "test"
36
- t.test_files = ['test/scanner/test_all.rb']
37
- end
38
-
39
- desc "Run all lexer tests"
40
- Rake::TestTask.new("lexer") do |t|
41
- t.libs << "test"
42
- t.test_files = ['test/lexer/test_all.rb']
43
- end
44
-
45
- desc "Run all parser tests"
46
- Rake::TestTask.new("parser") do |t|
47
- t.libs << "test"
48
- t.test_files = ['test/parser/test_all.rb']
49
- end
50
-
51
- desc "Run all syntax tests"
52
- Rake::TestTask.new("syntax") do |t|
53
- t.libs << "test"
54
- t.test_files = ['test/syntax/test_all.rb']
22
+ %w{scanner lexer parser expression syntax}.each do |component|
23
+ Rake::TestTask.new(component) do |t|
24
+ t.libs << "test"
25
+ t.test_files = ["test/#{component}/test_all.rb"]
26
+ end
55
27
  end
56
28
  end
57
29
 
@@ -74,8 +46,8 @@ end
74
46
 
75
47
  spec = Gem::Specification.new do |gem|
76
48
  gem.name = 'regexp_parser'
77
- gem.version = '0.1.1'
78
- gem.date = '2010-11-23'
49
+ gem.version = YAML.load(File.read('VERSION.yml')).values.compact.join('.')
50
+ gem.date = '2014-01-14'
79
51
 
80
52
  gem.license = 'MIT'
81
53
  gem.summary = %q{Scanner, lexer, parser for ruby's regular expressions}
@@ -91,7 +63,7 @@ spec = Gem::Specification.new do |gem|
91
63
  gem.require_paths = ["lib"]
92
64
 
93
65
  gem.files = Dir.glob("{lib,test}/**/*.rb") + Dir.glob("lib/**/*.rl") +
94
- %w(Rakefile LICENSE README.rdoc ChangeLog)
66
+ %w(VERSION.yml Rakefile LICENSE README.rdoc ChangeLog)
95
67
 
96
68
  gem.test_files = Dir.glob("test/**/*.rb")
97
69
 
@@ -99,7 +71,7 @@ spec = Gem::Specification.new do |gem|
99
71
  gem.respond_to? :required_rubygems_version=
100
72
  end
101
73
 
102
- Rake::GemPackageTask.new(spec) do |pkg|
74
+ Gem::PackageTask.new(spec) do |pkg|
103
75
  pkg.need_zip = true
104
76
  pkg.need_tar = true
105
77
  end
@@ -108,11 +80,7 @@ namespace :gem do
108
80
  desc "Release the gem to rubygems.org"
109
81
  task :release do |t|
110
82
  Rake::Task['ragel:rb'].execute
111
-
112
- Rake::Task['gem'].invoke("#{RP_ROOT}/pkg/regexp_parser-#{RP_VERSION}")
113
-
114
83
  Rake::Task['repackage'].execute
115
-
116
- sh "gem push #{RP_ROOT}/pkg/regexp_parser-#{RP_VERSION}.gem"
84
+ #sh "gem push"
117
85
  end
118
86
  end
data/VERSION.yml ADDED
@@ -0,0 +1,5 @@
1
+ ---
2
+ :major: 0
3
+ :minor: 1
4
+ :patch: 5
5
+ :build:
data/lib/regexp_parser.rb CHANGED
@@ -1,45 +1,12 @@
1
- class Regexp
1
+ require 'yaml'
2
2
 
3
+ class Regexp
3
4
  module Parser
4
- VERSION = '0.0.1'
5
+ VERFILE = File.expand_path('../../VERSION.yml', __FILE__)
6
+ VERSION = YAML.load(File.read(VERFILE)).values.compact.join('.')
5
7
  end
6
-
7
- TOKEN_KEYS = [:type, :token, :text, :ts, :te, :depth, :set_depth].freeze
8
- Token = Struct.new(*TOKEN_KEYS) do
9
- def offset
10
- [self.ts, self.te]
11
- end
12
-
13
- def length
14
- self.te - self.ts
15
- end
16
-
17
- def to_h
18
- hash = {}
19
- members.each do |member|
20
- hash[member.to_sym] = self.send(member.to_sym)
21
- end; hash
22
- end
23
-
24
- def next(exp = nil)
25
- if exp
26
- @next = exp
27
- else
28
- @next
29
- end
30
- end
31
-
32
- def previous(exp = nil)
33
- if exp
34
- @previous = exp
35
- else
36
- @previous
37
- end
38
- end
39
- end
40
-
41
8
  end
42
9
 
43
- %w{ctype scanner syntax lexer parser}.each do |file|
10
+ %w{token ctype scanner syntax lexer parser}.each do |file|
44
11
  require File.expand_path("../regexp_parser/#{file}", __FILE__)
45
12
  end
@@ -1,36 +1,67 @@
1
1
  module Regexp::Expression
2
+
2
3
  class Base
3
- attr_reader :type, :token, :text
4
- attr_reader :quantifier
5
- attr_reader :expressions
4
+ attr_accessor :type, :token
5
+ attr_accessor :level, :text, :ts
6
6
 
7
+ attr_accessor :quantifier
7
8
  attr_accessor :options
8
9
 
9
10
  def initialize(token)
10
11
  @type = token.type
11
12
  @token = token.token
12
13
  @text = token.text
14
+ @ts = token.ts
15
+ @level = token.level
13
16
  @options = nil
14
- @expressions = []
15
17
  end
16
18
 
17
- def to_s
18
- s = @text
19
- s << @expressions.map{|e| e.to_s}.join unless @expressions.empty?
20
- s << @quantifier if quantified?
21
- s
19
+ def clone
20
+ copy = self.dup
21
+
22
+ copy.text = (self.text ? self.text.dup : nil)
23
+ copy.options = (self.options ? self.options.dup : nil)
24
+ copy.quantifier = (self.quantifier ? self.quantifier.clone : nil)
25
+
26
+ copy
22
27
  end
23
28
 
24
- def <<(exp)
25
- @expressions << exp
29
+ def to_re(format = :full)
30
+ ::Regexp.new(to_s(format))
26
31
  end
27
32
 
28
- def each(&block)
29
- @expressions.each {|e| yield e}
33
+ def starts_at
34
+ @ts
30
35
  end
31
36
 
32
- def [](index)
33
- @expressions[index]
37
+ def full_length
38
+ to_s.length
39
+ end
40
+
41
+ def offset
42
+ [starts_at, full_length]
43
+ end
44
+
45
+ def coded_offset
46
+ '@%d+%d' % offset
47
+ end
48
+
49
+ def to_s(format = :full)
50
+ s = ''
51
+
52
+ case format
53
+ when :base
54
+ s << @text.dup
55
+ else
56
+ s << @text.dup
57
+ s << @quantifier if quantified?
58
+ end
59
+
60
+ s
61
+ end
62
+
63
+ def terminal?
64
+ !respond_to?(:expressions)
34
65
  end
35
66
 
36
67
  def quantify(token, text, min = nil, max = nil, mode = :greedy)
@@ -42,20 +73,21 @@ module Regexp::Expression
42
73
  end
43
74
 
44
75
  def quantity
76
+ return [nil,nil] unless quantified?
45
77
  [@quantifier.min, @quantifier.max]
46
78
  end
47
79
 
48
80
  def greedy?
49
- @quantifier.mode == :greedy
81
+ quantified? and @quantifier.mode == :greedy
50
82
  end
51
83
 
52
84
  def reluctant?
53
- @quantifier.mode == :reluctant
85
+ quantified? and @quantifier.mode == :reluctant
54
86
  end
55
87
  alias :lazy? :reluctant?
56
88
 
57
89
  def possessive?
58
- @quantifier.mode == :possessive
90
+ quantified? and @quantifier.mode == :possessive
59
91
  end
60
92
 
61
93
  def multiline?
@@ -76,212 +108,27 @@ module Regexp::Expression
76
108
  alias :extended? :free_spacing?
77
109
  end
78
110
 
79
- class Root < Regexp::Expression::Base
80
- def initialize
81
- super Regexp::Token.new(:expression, :root, '')
82
- end
83
-
84
- def multiline?
85
- @expressions[0].m?
86
- end
87
- alias :m? :multiline?
88
-
89
- def case_insensitive?
90
- @expressions[0].i?
91
- end
92
- alias :i? :case_insensitive?
93
-
94
- def free_spacing?
95
- @expressions[0].x?
96
- end
97
- alias :x? :free_spacing?
98
- end
99
-
100
- class Quantifier
101
- attr_reader :token, :text, :min, :max, :mode
102
-
103
- def initialize(token, text, min, max, mode)
104
- @token = token
105
- @text = text
106
- @mode = mode
107
- @min = min
108
- @max = max
111
+ def self.parsed(exp)
112
+ case exp
113
+ when String
114
+ Regexp::Parser.parse(exp)
115
+ when Regexp
116
+ Regexp::Parser.parse(exp.source)
117
+ when Regexp::Expression
118
+ exp
119
+ else
120
+ raise "Expression.parsed accepts a String, Regexp, or " +
121
+ "a Regexp::Expression as a value for exp, but it " +
122
+ "was given #{exp.class.name}."
109
123
  end
110
-
111
- def to_s
112
- @text
113
- end
114
- alias :to_str :to_s
115
- end
116
-
117
- class Literal < Regexp::Expression::Base; end
118
-
119
- module Backreference
120
- class Base < Regexp::Expression::Base; end
121
-
122
- class Name < Backreference::Base; end
123
- class Number < Backreference::Base; end
124
- class NumberRelative < Backreference::Base; end
125
-
126
- class NameNestLevel < Backreference::Base; end
127
- class NumberNestLevel < Backreference::Base; end
128
-
129
- class NameCall < Backreference::Base; end
130
- class NumberCall < Backreference::Base; end
131
- class NumberCallRelative < Backreference::Base; end
132
- end
133
-
134
- module Anchor
135
- class Base < Regexp::Expression::Base; end
136
-
137
- class BeginningOfLine < Anchor::Base; end
138
- class EndOfLine < Anchor::Base; end
139
-
140
- class BeginningOfString < Anchor::Base; end
141
- class EndOfString < Anchor::Base; end
142
-
143
- class EndOfStringOrBeforeEndOfLine < Anchor::Base; end
144
-
145
- class WordBoundary < Anchor::Base; end
146
- class NonWordBoundary < Anchor::Base; end
147
-
148
- class MatchStart < Anchor::Base; end
149
-
150
- BOL = BeginningOfLine
151
- EOL = EndOfLine
152
- BOS = BeginningOfString
153
- EOS = EndOfString
154
- EOSobEOL = EndOfStringOrBeforeEndOfLine
155
- end
156
-
157
- module CharacterType
158
- class Base < Regexp::Expression::Base; end
159
-
160
- class Any < CharacterType::Base; end
161
- class Digit < CharacterType::Base; end
162
- class NonDigit < CharacterType::Base; end
163
- class Hex < CharacterType::Base; end
164
- class NonHex < CharacterType::Base; end
165
- class Word < CharacterType::Base; end
166
- class NonWord < CharacterType::Base; end
167
- class Space < CharacterType::Base; end
168
- class NonSpace < CharacterType::Base; end
169
- end
170
-
171
- module EscapeSequence
172
- class Base < Regexp::Expression::Base; end
173
-
174
- class Literal < EscapeSequence::Base; end
175
-
176
- class AsciiEscape < EscapeSequence::Base; end
177
- class Backspace < EscapeSequence::Base; end
178
- class Bell < EscapeSequence::Base; end
179
- class FormFeed < EscapeSequence::Base; end
180
- class Newline < EscapeSequence::Base; end
181
- class Return < EscapeSequence::Base; end
182
- class Space < EscapeSequence::Base; end
183
- class Tab < EscapeSequence::Base; end
184
- class VerticalTab < EscapeSequence::Base; end
185
-
186
- class Octal < EscapeSequence::Base; end
187
- class Hex < EscapeSequence::Base; end
188
- class HexWide < EscapeSequence::Base; end
189
-
190
- class Control < EscapeSequence::Base; end
191
- class Meta < EscapeSequence::Base; end
192
- class MetaControl < EscapeSequence::Base; end
193
- end
194
-
195
- class Alternation < Regexp::Expression::Base
196
- def <<(exp)
197
- @expressions.last << exp
198
- end
199
-
200
- def alternative(exp = nil)
201
- @expressions << (exp ? exp : Sequence.new)
202
- end
203
-
204
- def alternatives
205
- @expressions
206
- end
207
-
208
- def quantify(token, text, min = nil, max = nil, mode = :greedy)
209
- @expressions.last.last.quantify(token, text, min, max, mode)
210
- end
211
-
212
- def to_s
213
- @expressions.map{|e| e.to_s}.join('|')
214
- end
215
- end
216
-
217
- # a sequence of expressions, used by alternations
218
- class Sequence < Regexp::Expression::Base
219
- def initialize
220
- super Regexp::Token.new(:expression, :sequence, '')
221
- end
222
-
223
- def <<(exp)
224
- @expressions << exp
225
- end
226
-
227
- def insert(exp)
228
- @expressions.insert 0, exp
229
- end
230
-
231
- def first
232
- @expressions.first
233
- end
234
-
235
- def last
236
- @expressions.last
237
- end
238
-
239
- def quantify(token, text, min = nil, max = nil, mode = :greedy)
240
- last.quantify(token, text, min, max, mode)
241
- end
242
- end
243
-
244
- module Group
245
- class Base < Regexp::Expression::Base
246
- def capturing?
247
- [:capture, :named].include? @token
248
- end
249
-
250
- def comment?; @type == :comment end
251
-
252
- def to_s
253
- s = @text
254
- s << @expressions.join
255
- s << ')'
256
- s << @quantifier.to_s if quantified?
257
- s
258
- end
259
- end
260
-
261
- class Atomic < Group::Base; end
262
- class Capture < Group::Base; end
263
- class Named < Group::Base; end
264
- class Passive < Group::Base; end
265
-
266
- class Options < Group::Base; end
267
-
268
- class Comment < Group::Base
269
- def to_s; @text end
270
- end
271
- end
272
-
273
- class Assertion
274
- class Base < Regexp::Expression::Group::Base; end
275
-
276
- class Lookahead < Assertion::Base; end
277
- class NegativeLookahead < Assertion::Base; end
278
-
279
- class Lookbehind < Assertion::Base; end
280
- class NegativeLookbehind < Assertion::Base; end
281
124
  end
282
125
 
283
126
  end # module Regexp::Expression
284
127
 
285
- %w{property set}.each do|file|
286
- require File.expand_path("../expression/#{file}", __FILE__)
128
+
129
+ [ # Order is important
130
+ '/expression/*.rb',
131
+ '/expression/classes/*.rb',
132
+ ].each do |path|
133
+ Dir[File.join(File.dirname(__FILE__), path)].each {|f| require f }
287
134
  end