regexp_parser 0.1.1 → 0.1.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (61) hide show
  1. checksums.yaml +7 -0
  2. data/ChangeLog +45 -0
  3. data/Rakefile +12 -44
  4. data/VERSION.yml +5 -0
  5. data/lib/regexp_parser.rb +5 -38
  6. data/lib/regexp_parser/expression.rb +68 -221
  7. data/lib/regexp_parser/expression/classes/alternation.rb +47 -0
  8. data/lib/regexp_parser/expression/classes/anchor.rb +26 -0
  9. data/lib/regexp_parser/expression/classes/backref.rb +42 -0
  10. data/lib/regexp_parser/expression/classes/escape.rb +27 -0
  11. data/lib/regexp_parser/expression/classes/group.rb +67 -0
  12. data/lib/regexp_parser/expression/classes/literal.rb +7 -0
  13. data/lib/regexp_parser/expression/{property.rb → classes/property.rb} +1 -1
  14. data/lib/regexp_parser/expression/classes/root.rb +26 -0
  15. data/lib/regexp_parser/expression/classes/set.rb +100 -0
  16. data/lib/regexp_parser/expression/classes/type.rb +17 -0
  17. data/lib/regexp_parser/expression/quantifier.rb +26 -0
  18. data/lib/regexp_parser/expression/subexpression.rb +69 -0
  19. data/lib/regexp_parser/lexer.rb +4 -4
  20. data/lib/regexp_parser/parser.rb +31 -13
  21. data/lib/regexp_parser/scanner.rb +1849 -1488
  22. data/lib/regexp_parser/scanner/property.rl +7 -2
  23. data/lib/regexp_parser/scanner/scanner.rl +377 -191
  24. data/lib/regexp_parser/syntax.rb +7 -0
  25. data/lib/regexp_parser/syntax/ruby/1.8.6.rb +4 -4
  26. data/lib/regexp_parser/syntax/ruby/1.9.1.rb +9 -9
  27. data/lib/regexp_parser/syntax/ruby/2.0.0.rb +16 -0
  28. data/lib/regexp_parser/syntax/ruby/2.1.0.rb +13 -0
  29. data/lib/regexp_parser/syntax/tokens.rb +21 -320
  30. data/lib/regexp_parser/syntax/tokens/anchor.rb +17 -0
  31. data/lib/regexp_parser/syntax/tokens/assertion.rb +15 -0
  32. data/lib/regexp_parser/syntax/tokens/backref.rb +26 -0
  33. data/lib/regexp_parser/syntax/tokens/character_set.rb +48 -0
  34. data/lib/regexp_parser/syntax/tokens/character_type.rb +16 -0
  35. data/lib/regexp_parser/syntax/tokens/escape.rb +29 -0
  36. data/lib/regexp_parser/syntax/tokens/group.rb +22 -0
  37. data/lib/regexp_parser/syntax/tokens/meta.rb +15 -0
  38. data/lib/regexp_parser/syntax/tokens/quantifier.rb +37 -0
  39. data/lib/regexp_parser/syntax/tokens/unicode_property.rb +204 -0
  40. data/lib/regexp_parser/token.rb +37 -0
  41. data/test/expression/test_all.rb +7 -0
  42. data/test/expression/test_base.rb +72 -0
  43. data/test/expression/test_clone.rb +144 -0
  44. data/test/{parser/test_expression.rb → expression/test_to_s.rb} +10 -10
  45. data/test/helpers.rb +1 -0
  46. data/test/parser/test_all.rb +1 -1
  47. data/test/parser/test_alternation.rb +35 -0
  48. data/test/parser/test_anchors.rb +2 -2
  49. data/test/parser/test_refcalls.rb +1 -1
  50. data/test/parser/test_sets.rb +54 -8
  51. data/test/scanner/test_anchors.rb +2 -2
  52. data/test/scanner/test_conditionals.rb +31 -0
  53. data/test/scanner/test_errors.rb +88 -8
  54. data/test/scanner/test_escapes.rb +4 -4
  55. data/test/scanner/test_groups.rb +7 -0
  56. data/test/scanner/test_quoting.rb +29 -0
  57. data/test/scanner/test_sets.rb +1 -0
  58. data/test/syntax/ruby/test_1.8.rb +3 -3
  59. data/test/test_all.rb +1 -1
  60. metadata +62 -48
  61. data/lib/regexp_parser/expression/set.rb +0 -59
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 5d3544709ae86e53530ef7cbe037dcab48690c2c
4
+ data.tar.gz: fb96ce92b21303ec32e88103d708a508c80588fc
5
+ SHA512:
6
+ metadata.gz: d4191f06120c4e5abe9f0fc6b7eb466aab12a61b37c4cb7c33204f2c2dba9907866d42317320cf3916146f59e12ff3c027db76300321342b1615270d7085ade9
7
+ data.tar.gz: 8d0173c9d02e4e26eae291e0dbedd48fb7d9995629b761f47ab696ad9b2955efa542c6a3360fa711f27c0d7f4cc5df6f293f89685e47aacd4e9a2f8eab6d336f
data/ChangeLog CHANGED
@@ -1,3 +1,48 @@
1
+ Tue Jan 14 13:14:24 2014 Ammar Ali <ammarabuali@gmail.com>
2
+
3
+ * Released version 0.1.5, with a correct ChangeLog.
4
+
5
+ Tue Jan 14 13:14:24 2014 Ammar Ali <ammarabuali@gmail.com>
6
+
7
+ * Released version 0.1.4, after accidental yank of last version.
8
+
9
+ Tue Jan 14 13:05:13 2014 Ammar Ali <ammarabuali@gmail.com>
10
+
11
+ * Released version 0.1.3, adds missing VERSION.yml file.
12
+
13
+ Tue Jan 14 12:41:52 2014 Ammar Ali <ammarabuali@gmail.com>
14
+
15
+ * Released version 0.1.2
16
+
17
+ * Added syntax stubs for ruby versions 2.0 and 2.1
18
+
19
+ Sat Feb 9 12:27:13 2010 Ammar Ali <ammarabuali@gmail.com>
20
+
21
+ * Added clone methods for deep copying expressions.
22
+
23
+ * Added optional format argument for to_s on expressions to return the
24
+ text of the expression with (:full, the default) or without (:base)
25
+ its quantifier.
26
+
27
+ * Renamed the :beginning_of_line and :end_of_line tokens to :bol and
28
+ :eol.
29
+
30
+ * Fixed a bug where alternations with more than two alternatives and
31
+ one of them ending in a group were being incorrectly nested.
32
+
33
+ * Improved EOF handling in general and especially from sequences like
34
+ hex and control escapes.
35
+
36
+ * Fixed a bug where named groups with an empty name would return a
37
+ blank token [].
38
+
39
+ * Fixed a bug where member of a parent set where being added to its
40
+ last subset.
41
+
42
+ * Various code cleanups in scanner.rl
43
+
44
+ * Fixed a few mutable string bugs by calling dup on the originals.
45
+
1
46
  Tue Nov 23 11:35:56 2010 Ammar Ali <ammarabuali@gmail.com>
2
47
 
3
48
  * Made ruby 1.8.6 the base for all 1.8 syntax, and the 1.8 name a pointer
data/Rakefile CHANGED
@@ -1,26 +1,15 @@
1
1
  require 'rake'
2
- require 'yaml'
3
2
  require 'rake/testtask'
4
- require 'rake/gempackagetask'
3
+ require 'rubygems/package_task'
4
+ require 'yaml'
5
5
 
6
6
  task :default => [:test]
7
7
 
8
-
9
8
  RAGEL_SOURCE_DIR = File.expand_path '../lib/regexp_parser/scanner', __FILE__
10
9
  RAGEL_OUTPUT_DIR = File.expand_path '../lib/regexp_parser', __FILE__
11
10
 
12
11
  RAGEL_SOURCE_FILES = %w{scanner}
13
12
 
14
- RP_ROOT = File.expand_path '../', __FILE__
15
-
16
- def regexp_parser_version
17
- v = YAML.load(File.read("#{RP_ROOT}/VERSION.yml"))
18
- v[:build] ? "#{v[:major]}.#{v[:minor]}.#{v[:patch]}.#{v[:build]}" :
19
- "#{v[:major]}.#{v[:minor]}.#{v[:patch]}"
20
- end
21
-
22
- RP_VERSION = regexp_parser_version
23
-
24
13
  desc "Find and run all unit tests under test/ directory"
25
14
  Rake::TestTask.new("test") do |t|
26
15
  t.libs << "test"
@@ -30,28 +19,11 @@ end
30
19
  task :test
31
20
 
32
21
  namespace :test do
33
- desc "Run all scanner tests"
34
- Rake::TestTask.new("scanner") do |t|
35
- t.libs << "test"
36
- t.test_files = ['test/scanner/test_all.rb']
37
- end
38
-
39
- desc "Run all lexer tests"
40
- Rake::TestTask.new("lexer") do |t|
41
- t.libs << "test"
42
- t.test_files = ['test/lexer/test_all.rb']
43
- end
44
-
45
- desc "Run all parser tests"
46
- Rake::TestTask.new("parser") do |t|
47
- t.libs << "test"
48
- t.test_files = ['test/parser/test_all.rb']
49
- end
50
-
51
- desc "Run all syntax tests"
52
- Rake::TestTask.new("syntax") do |t|
53
- t.libs << "test"
54
- t.test_files = ['test/syntax/test_all.rb']
22
+ %w{scanner lexer parser expression syntax}.each do |component|
23
+ Rake::TestTask.new(component) do |t|
24
+ t.libs << "test"
25
+ t.test_files = ["test/#{component}/test_all.rb"]
26
+ end
55
27
  end
56
28
  end
57
29
 
@@ -74,8 +46,8 @@ end
74
46
 
75
47
  spec = Gem::Specification.new do |gem|
76
48
  gem.name = 'regexp_parser'
77
- gem.version = '0.1.1'
78
- gem.date = '2010-11-23'
49
+ gem.version = YAML.load(File.read('VERSION.yml')).values.compact.join('.')
50
+ gem.date = '2014-01-14'
79
51
 
80
52
  gem.license = 'MIT'
81
53
  gem.summary = %q{Scanner, lexer, parser for ruby's regular expressions}
@@ -91,7 +63,7 @@ spec = Gem::Specification.new do |gem|
91
63
  gem.require_paths = ["lib"]
92
64
 
93
65
  gem.files = Dir.glob("{lib,test}/**/*.rb") + Dir.glob("lib/**/*.rl") +
94
- %w(Rakefile LICENSE README.rdoc ChangeLog)
66
+ %w(VERSION.yml Rakefile LICENSE README.rdoc ChangeLog)
95
67
 
96
68
  gem.test_files = Dir.glob("test/**/*.rb")
97
69
 
@@ -99,7 +71,7 @@ spec = Gem::Specification.new do |gem|
99
71
  gem.respond_to? :required_rubygems_version=
100
72
  end
101
73
 
102
- Rake::GemPackageTask.new(spec) do |pkg|
74
+ Gem::PackageTask.new(spec) do |pkg|
103
75
  pkg.need_zip = true
104
76
  pkg.need_tar = true
105
77
  end
@@ -108,11 +80,7 @@ namespace :gem do
108
80
  desc "Release the gem to rubygems.org"
109
81
  task :release do |t|
110
82
  Rake::Task['ragel:rb'].execute
111
-
112
- Rake::Task['gem'].invoke("#{RP_ROOT}/pkg/regexp_parser-#{RP_VERSION}")
113
-
114
83
  Rake::Task['repackage'].execute
115
-
116
- sh "gem push #{RP_ROOT}/pkg/regexp_parser-#{RP_VERSION}.gem"
84
+ #sh "gem push"
117
85
  end
118
86
  end
data/VERSION.yml ADDED
@@ -0,0 +1,5 @@
1
+ ---
2
+ :major: 0
3
+ :minor: 1
4
+ :patch: 5
5
+ :build:
data/lib/regexp_parser.rb CHANGED
@@ -1,45 +1,12 @@
1
- class Regexp
1
+ require 'yaml'
2
2
 
3
+ class Regexp
3
4
  module Parser
4
- VERSION = '0.0.1'
5
+ VERFILE = File.expand_path('../../VERSION.yml', __FILE__)
6
+ VERSION = YAML.load(File.read(VERFILE)).values.compact.join('.')
5
7
  end
6
-
7
- TOKEN_KEYS = [:type, :token, :text, :ts, :te, :depth, :set_depth].freeze
8
- Token = Struct.new(*TOKEN_KEYS) do
9
- def offset
10
- [self.ts, self.te]
11
- end
12
-
13
- def length
14
- self.te - self.ts
15
- end
16
-
17
- def to_h
18
- hash = {}
19
- members.each do |member|
20
- hash[member.to_sym] = self.send(member.to_sym)
21
- end; hash
22
- end
23
-
24
- def next(exp = nil)
25
- if exp
26
- @next = exp
27
- else
28
- @next
29
- end
30
- end
31
-
32
- def previous(exp = nil)
33
- if exp
34
- @previous = exp
35
- else
36
- @previous
37
- end
38
- end
39
- end
40
-
41
8
  end
42
9
 
43
- %w{ctype scanner syntax lexer parser}.each do |file|
10
+ %w{token ctype scanner syntax lexer parser}.each do |file|
44
11
  require File.expand_path("../regexp_parser/#{file}", __FILE__)
45
12
  end
@@ -1,36 +1,67 @@
1
1
  module Regexp::Expression
2
+
2
3
  class Base
3
- attr_reader :type, :token, :text
4
- attr_reader :quantifier
5
- attr_reader :expressions
4
+ attr_accessor :type, :token
5
+ attr_accessor :level, :text, :ts
6
6
 
7
+ attr_accessor :quantifier
7
8
  attr_accessor :options
8
9
 
9
10
  def initialize(token)
10
11
  @type = token.type
11
12
  @token = token.token
12
13
  @text = token.text
14
+ @ts = token.ts
15
+ @level = token.level
13
16
  @options = nil
14
- @expressions = []
15
17
  end
16
18
 
17
- def to_s
18
- s = @text
19
- s << @expressions.map{|e| e.to_s}.join unless @expressions.empty?
20
- s << @quantifier if quantified?
21
- s
19
+ def clone
20
+ copy = self.dup
21
+
22
+ copy.text = (self.text ? self.text.dup : nil)
23
+ copy.options = (self.options ? self.options.dup : nil)
24
+ copy.quantifier = (self.quantifier ? self.quantifier.clone : nil)
25
+
26
+ copy
22
27
  end
23
28
 
24
- def <<(exp)
25
- @expressions << exp
29
+ def to_re(format = :full)
30
+ ::Regexp.new(to_s(format))
26
31
  end
27
32
 
28
- def each(&block)
29
- @expressions.each {|e| yield e}
33
+ def starts_at
34
+ @ts
30
35
  end
31
36
 
32
- def [](index)
33
- @expressions[index]
37
+ def full_length
38
+ to_s.length
39
+ end
40
+
41
+ def offset
42
+ [starts_at, full_length]
43
+ end
44
+
45
+ def coded_offset
46
+ '@%d+%d' % offset
47
+ end
48
+
49
+ def to_s(format = :full)
50
+ s = ''
51
+
52
+ case format
53
+ when :base
54
+ s << @text.dup
55
+ else
56
+ s << @text.dup
57
+ s << @quantifier if quantified?
58
+ end
59
+
60
+ s
61
+ end
62
+
63
+ def terminal?
64
+ !respond_to?(:expressions)
34
65
  end
35
66
 
36
67
  def quantify(token, text, min = nil, max = nil, mode = :greedy)
@@ -42,20 +73,21 @@ module Regexp::Expression
42
73
  end
43
74
 
44
75
  def quantity
76
+ return [nil,nil] unless quantified?
45
77
  [@quantifier.min, @quantifier.max]
46
78
  end
47
79
 
48
80
  def greedy?
49
- @quantifier.mode == :greedy
81
+ quantified? and @quantifier.mode == :greedy
50
82
  end
51
83
 
52
84
  def reluctant?
53
- @quantifier.mode == :reluctant
85
+ quantified? and @quantifier.mode == :reluctant
54
86
  end
55
87
  alias :lazy? :reluctant?
56
88
 
57
89
  def possessive?
58
- @quantifier.mode == :possessive
90
+ quantified? and @quantifier.mode == :possessive
59
91
  end
60
92
 
61
93
  def multiline?
@@ -76,212 +108,27 @@ module Regexp::Expression
76
108
  alias :extended? :free_spacing?
77
109
  end
78
110
 
79
- class Root < Regexp::Expression::Base
80
- def initialize
81
- super Regexp::Token.new(:expression, :root, '')
82
- end
83
-
84
- def multiline?
85
- @expressions[0].m?
86
- end
87
- alias :m? :multiline?
88
-
89
- def case_insensitive?
90
- @expressions[0].i?
91
- end
92
- alias :i? :case_insensitive?
93
-
94
- def free_spacing?
95
- @expressions[0].x?
96
- end
97
- alias :x? :free_spacing?
98
- end
99
-
100
- class Quantifier
101
- attr_reader :token, :text, :min, :max, :mode
102
-
103
- def initialize(token, text, min, max, mode)
104
- @token = token
105
- @text = text
106
- @mode = mode
107
- @min = min
108
- @max = max
111
+ def self.parsed(exp)
112
+ case exp
113
+ when String
114
+ Regexp::Parser.parse(exp)
115
+ when Regexp
116
+ Regexp::Parser.parse(exp.source)
117
+ when Regexp::Expression
118
+ exp
119
+ else
120
+ raise "Expression.parsed accepts a String, Regexp, or " +
121
+ "a Regexp::Expression as a value for exp, but it " +
122
+ "was given #{exp.class.name}."
109
123
  end
110
-
111
- def to_s
112
- @text
113
- end
114
- alias :to_str :to_s
115
- end
116
-
117
- class Literal < Regexp::Expression::Base; end
118
-
119
- module Backreference
120
- class Base < Regexp::Expression::Base; end
121
-
122
- class Name < Backreference::Base; end
123
- class Number < Backreference::Base; end
124
- class NumberRelative < Backreference::Base; end
125
-
126
- class NameNestLevel < Backreference::Base; end
127
- class NumberNestLevel < Backreference::Base; end
128
-
129
- class NameCall < Backreference::Base; end
130
- class NumberCall < Backreference::Base; end
131
- class NumberCallRelative < Backreference::Base; end
132
- end
133
-
134
- module Anchor
135
- class Base < Regexp::Expression::Base; end
136
-
137
- class BeginningOfLine < Anchor::Base; end
138
- class EndOfLine < Anchor::Base; end
139
-
140
- class BeginningOfString < Anchor::Base; end
141
- class EndOfString < Anchor::Base; end
142
-
143
- class EndOfStringOrBeforeEndOfLine < Anchor::Base; end
144
-
145
- class WordBoundary < Anchor::Base; end
146
- class NonWordBoundary < Anchor::Base; end
147
-
148
- class MatchStart < Anchor::Base; end
149
-
150
- BOL = BeginningOfLine
151
- EOL = EndOfLine
152
- BOS = BeginningOfString
153
- EOS = EndOfString
154
- EOSobEOL = EndOfStringOrBeforeEndOfLine
155
- end
156
-
157
- module CharacterType
158
- class Base < Regexp::Expression::Base; end
159
-
160
- class Any < CharacterType::Base; end
161
- class Digit < CharacterType::Base; end
162
- class NonDigit < CharacterType::Base; end
163
- class Hex < CharacterType::Base; end
164
- class NonHex < CharacterType::Base; end
165
- class Word < CharacterType::Base; end
166
- class NonWord < CharacterType::Base; end
167
- class Space < CharacterType::Base; end
168
- class NonSpace < CharacterType::Base; end
169
- end
170
-
171
- module EscapeSequence
172
- class Base < Regexp::Expression::Base; end
173
-
174
- class Literal < EscapeSequence::Base; end
175
-
176
- class AsciiEscape < EscapeSequence::Base; end
177
- class Backspace < EscapeSequence::Base; end
178
- class Bell < EscapeSequence::Base; end
179
- class FormFeed < EscapeSequence::Base; end
180
- class Newline < EscapeSequence::Base; end
181
- class Return < EscapeSequence::Base; end
182
- class Space < EscapeSequence::Base; end
183
- class Tab < EscapeSequence::Base; end
184
- class VerticalTab < EscapeSequence::Base; end
185
-
186
- class Octal < EscapeSequence::Base; end
187
- class Hex < EscapeSequence::Base; end
188
- class HexWide < EscapeSequence::Base; end
189
-
190
- class Control < EscapeSequence::Base; end
191
- class Meta < EscapeSequence::Base; end
192
- class MetaControl < EscapeSequence::Base; end
193
- end
194
-
195
- class Alternation < Regexp::Expression::Base
196
- def <<(exp)
197
- @expressions.last << exp
198
- end
199
-
200
- def alternative(exp = nil)
201
- @expressions << (exp ? exp : Sequence.new)
202
- end
203
-
204
- def alternatives
205
- @expressions
206
- end
207
-
208
- def quantify(token, text, min = nil, max = nil, mode = :greedy)
209
- @expressions.last.last.quantify(token, text, min, max, mode)
210
- end
211
-
212
- def to_s
213
- @expressions.map{|e| e.to_s}.join('|')
214
- end
215
- end
216
-
217
- # a sequence of expressions, used by alternations
218
- class Sequence < Regexp::Expression::Base
219
- def initialize
220
- super Regexp::Token.new(:expression, :sequence, '')
221
- end
222
-
223
- def <<(exp)
224
- @expressions << exp
225
- end
226
-
227
- def insert(exp)
228
- @expressions.insert 0, exp
229
- end
230
-
231
- def first
232
- @expressions.first
233
- end
234
-
235
- def last
236
- @expressions.last
237
- end
238
-
239
- def quantify(token, text, min = nil, max = nil, mode = :greedy)
240
- last.quantify(token, text, min, max, mode)
241
- end
242
- end
243
-
244
- module Group
245
- class Base < Regexp::Expression::Base
246
- def capturing?
247
- [:capture, :named].include? @token
248
- end
249
-
250
- def comment?; @type == :comment end
251
-
252
- def to_s
253
- s = @text
254
- s << @expressions.join
255
- s << ')'
256
- s << @quantifier.to_s if quantified?
257
- s
258
- end
259
- end
260
-
261
- class Atomic < Group::Base; end
262
- class Capture < Group::Base; end
263
- class Named < Group::Base; end
264
- class Passive < Group::Base; end
265
-
266
- class Options < Group::Base; end
267
-
268
- class Comment < Group::Base
269
- def to_s; @text end
270
- end
271
- end
272
-
273
- class Assertion
274
- class Base < Regexp::Expression::Group::Base; end
275
-
276
- class Lookahead < Assertion::Base; end
277
- class NegativeLookahead < Assertion::Base; end
278
-
279
- class Lookbehind < Assertion::Base; end
280
- class NegativeLookbehind < Assertion::Base; end
281
124
  end
282
125
 
283
126
  end # module Regexp::Expression
284
127
 
285
- %w{property set}.each do|file|
286
- require File.expand_path("../expression/#{file}", __FILE__)
128
+
129
+ [ # Order is important
130
+ '/expression/*.rb',
131
+ '/expression/classes/*.rb',
132
+ ].each do |path|
133
+ Dir[File.join(File.dirname(__FILE__), path)].each {|f| require f }
287
134
  end