regexp_parser 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. data/ChangeLog +4 -0
  2. data/LICENSE +22 -0
  3. data/README.rdoc +307 -0
  4. data/Rakefile +91 -0
  5. data/lib/regexp_parser/ctype.rb +48 -0
  6. data/lib/regexp_parser/expression/property.rb +108 -0
  7. data/lib/regexp_parser/expression/set.rb +59 -0
  8. data/lib/regexp_parser/expression.rb +287 -0
  9. data/lib/regexp_parser/lexer.rb +105 -0
  10. data/lib/regexp_parser/parser.rb +417 -0
  11. data/lib/regexp_parser/scanner/property.rl +534 -0
  12. data/lib/regexp_parser/scanner/scanner.rl +712 -0
  13. data/lib/regexp_parser/scanner.rb +3325 -0
  14. data/lib/regexp_parser/syntax/ruby/1.8.6.rb +14 -0
  15. data/lib/regexp_parser/syntax/ruby/1.8.7.rb +14 -0
  16. data/lib/regexp_parser/syntax/ruby/1.8.rb +39 -0
  17. data/lib/regexp_parser/syntax/ruby/1.9.1.rb +39 -0
  18. data/lib/regexp_parser/syntax/ruby/1.9.2.rb +10 -0
  19. data/lib/regexp_parser/syntax/ruby/1.9.3.rb +24 -0
  20. data/lib/regexp_parser/syntax/ruby/1.9.rb +8 -0
  21. data/lib/regexp_parser/syntax/tokens.rb +332 -0
  22. data/lib/regexp_parser/syntax.rb +172 -0
  23. data/lib/regexp_parser.rb +45 -0
  24. data/test/helpers.rb +8 -0
  25. data/test/lexer/test_all.rb +26 -0
  26. data/test/lexer/test_literals.rb +120 -0
  27. data/test/lexer/test_nesting.rb +107 -0
  28. data/test/lexer/test_refcalls.rb +45 -0
  29. data/test/parser/test_all.rb +44 -0
  30. data/test/parser/test_alternation.rb +46 -0
  31. data/test/parser/test_anchors.rb +35 -0
  32. data/test/parser/test_errors.rb +59 -0
  33. data/test/parser/test_escapes.rb +48 -0
  34. data/test/parser/test_expression.rb +51 -0
  35. data/test/parser/test_groups.rb +69 -0
  36. data/test/parser/test_properties.rb +346 -0
  37. data/test/parser/test_quantifiers.rb +236 -0
  38. data/test/parser/test_refcalls.rb +101 -0
  39. data/test/parser/test_sets.rb +99 -0
  40. data/test/scanner/test_all.rb +30 -0
  41. data/test/scanner/test_anchors.rb +35 -0
  42. data/test/scanner/test_errors.rb +36 -0
  43. data/test/scanner/test_escapes.rb +49 -0
  44. data/test/scanner/test_groups.rb +41 -0
  45. data/test/scanner/test_literals.rb +85 -0
  46. data/test/scanner/test_meta.rb +36 -0
  47. data/test/scanner/test_properties.rb +315 -0
  48. data/test/scanner/test_quantifiers.rb +38 -0
  49. data/test/scanner/test_refcalls.rb +45 -0
  50. data/test/scanner/test_scripts.rb +314 -0
  51. data/test/scanner/test_sets.rb +80 -0
  52. data/test/scanner/test_types.rb +30 -0
  53. data/test/syntax/ruby/test_1.8.rb +57 -0
  54. data/test/syntax/ruby/test_1.9.1.rb +39 -0
  55. data/test/syntax/ruby/test_1.9.3.rb +38 -0
  56. data/test/syntax/ruby/test_all.rb +12 -0
  57. data/test/syntax/test_all.rb +19 -0
  58. data/test/test_all.rb +4 -0
  59. metadata +160 -0
@@ -0,0 +1,108 @@
1
+ module Regexp::Expression
2
+
3
+ module UnicodeProperty
4
+ class Base < Regexp::Expression::Base
5
+ def negative?
6
+ @type == :nonproperty
7
+ end
8
+
9
+ def name
10
+ @text =~ /\A\\[pP]\{([^}]+)\}\z/; $1
11
+ end
12
+ end
13
+
14
+ class Alnum < Base; end
15
+ class Alpha < Base; end
16
+ class Any < Base; end
17
+ class Ascii < Base; end
18
+ class Blank < Base; end
19
+ class Cntrl < Base; end
20
+ class Digit < Base; end
21
+ class Graph < Base; end
22
+ class Lower < Base; end
23
+ class Newline < Base; end
24
+ class Print < Base; end
25
+ class Punct < Base; end
26
+ class Space < Base; end
27
+ class Upper < Base; end
28
+ class Word < Base; end
29
+ class Xdigit < Base; end
30
+
31
+ module Letter
32
+ class Base < UnicodeProperty::Base; end
33
+
34
+ class Any < Letter::Base; end
35
+ class Uppercase < Letter::Base; end
36
+ class Lowercase < Letter::Base; end
37
+ class Titlecase < Letter::Base; end
38
+ class Modifier < Letter::Base; end
39
+ class Other < Letter::Base; end
40
+ end
41
+
42
+ module Mark
43
+ class Base < UnicodeProperty::Base; end
44
+
45
+ class Any < Mark::Base; end
46
+ class Nonspacing < Mark::Base; end
47
+ class Spacing < Mark::Base; end
48
+ class Enclosing < Mark::Base; end
49
+ end
50
+
51
+ module Number
52
+ class Base < UnicodeProperty::Base; end
53
+
54
+ class Any < Number::Base; end
55
+ class Decimal < Number::Base; end
56
+ class Letter < Number::Base; end
57
+ class Other < Number::Base; end
58
+ end
59
+
60
+ module Punctuation
61
+ class Base < UnicodeProperty::Base; end
62
+
63
+ class Any < Punctuation::Base; end
64
+ class Connector < Punctuation::Base; end
65
+ class Dash < Punctuation::Base; end
66
+ class Open < Punctuation::Base; end
67
+ class Close < Punctuation::Base; end
68
+ class Initial < Punctuation::Base; end
69
+ class Final < Punctuation::Base; end
70
+ class Other < Punctuation::Base; end
71
+ end
72
+
73
+ module Separator
74
+ class Base < UnicodeProperty::Base; end
75
+
76
+ class Any < Separator::Base; end
77
+ class Space < Separator::Base; end
78
+ class Line < Separator::Base; end
79
+ class Paragraph < Separator::Base; end
80
+ end
81
+
82
+ module Symbol
83
+ class Base < UnicodeProperty::Base; end
84
+
85
+ class Any < Symbol::Base; end
86
+ class Math < Symbol::Base; end
87
+ class Currency < Symbol::Base; end
88
+ class Modifier < Symbol::Base; end
89
+ class Other < Symbol::Base; end
90
+ end
91
+
92
+ module Codepoint
93
+ class Base < UnicodeProperty::Base; end
94
+
95
+ class Any < Codepoint::Base; end
96
+ class Control < Codepoint::Base; end
97
+ class Format < Codepoint::Base; end
98
+ class Surrogate < Codepoint::Base; end
99
+ class PrivateUse < Codepoint::Base; end
100
+ class Unassigned < Codepoint::Base; end
101
+ end
102
+
103
+ class Age < UnicodeProperty::Base; end
104
+ class Derived < UnicodeProperty::Base; end
105
+ class Script < UnicodeProperty::Base; end
106
+ end
107
+
108
+ end # module Regexp::Expression
@@ -0,0 +1,59 @@
1
+ module Regexp::Expression
2
+
3
+ class CharacterSet < Regexp::Expression::Base
4
+ attr_accessor :members
5
+
6
+ def initialize(token)
7
+ @members = []
8
+ @negative = false
9
+ super
10
+ end
11
+
12
+ def <<(member)
13
+ if @members.last.is_a?(CharacterSubSet)
14
+ @members.last << member
15
+ else
16
+ @members << member
17
+ end
18
+ end
19
+
20
+ def include?(member)
21
+ @members.each do |m|
22
+ if m.is_a?(CharacterSubSet)
23
+ return true if m.include?(member)
24
+ else
25
+ return true if member == m.to_s
26
+ end
27
+ end; false
28
+ end
29
+
30
+ def negate
31
+ if @members.last.is_a?(CharacterSubSet)
32
+ @members.last.negate
33
+ else
34
+ @negative = true
35
+ end
36
+ end
37
+
38
+ def negative?
39
+ @negative
40
+ end
41
+ alias :negated? :negative?
42
+
43
+ def to_s
44
+ s = @text
45
+ s << '^' if negative?
46
+ s << @members.join
47
+ s << ']'
48
+ s << @quantifier.to_s if quantified?
49
+ s
50
+ end
51
+
52
+ def matches?(input)
53
+ input =~ /#{to_s}/ ? true : false
54
+ end
55
+ end
56
+
57
+ class CharacterSubSet < CharacterSet; end
58
+
59
+ end # module Regexp::Expression
@@ -0,0 +1,287 @@
1
+ module Regexp::Expression
2
+ class Base
3
+ attr_reader :type, :token, :text
4
+ attr_reader :quantifier
5
+ attr_reader :expressions
6
+
7
+ attr_accessor :options
8
+
9
+ def initialize(token)
10
+ @type = token.type
11
+ @token = token.token
12
+ @text = token.text
13
+ @options = nil
14
+ @expressions = []
15
+ end
16
+
17
+ def to_s
18
+ s = @text
19
+ s << @expressions.map{|e| e.to_s}.join unless @expressions.empty?
20
+ s << @quantifier if quantified?
21
+ s
22
+ end
23
+
24
+ def <<(exp)
25
+ @expressions << exp
26
+ end
27
+
28
+ def each(&block)
29
+ @expressions.each {|e| yield e}
30
+ end
31
+
32
+ def [](index)
33
+ @expressions[index]
34
+ end
35
+
36
+ def quantify(token, text, min = nil, max = nil, mode = :greedy)
37
+ @quantifier = Quantifier.new(token, text, min, max, mode)
38
+ end
39
+
40
+ def quantified?
41
+ not @quantifier.nil?
42
+ end
43
+
44
+ def quantity
45
+ [@quantifier.min, @quantifier.max]
46
+ end
47
+
48
+ def greedy?
49
+ @quantifier.mode == :greedy
50
+ end
51
+
52
+ def reluctant?
53
+ @quantifier.mode == :reluctant
54
+ end
55
+ alias :lazy? :reluctant?
56
+
57
+ def possessive?
58
+ @quantifier.mode == :possessive
59
+ end
60
+
61
+ def multiline?
62
+ (@options and @options[:m]) ? true : false
63
+ end
64
+ alias :m? :multiline?
65
+
66
+ def case_insensitive?
67
+ (@options and @options[:i]) ? true : false
68
+ end
69
+ alias :i? :case_insensitive?
70
+ alias :ignore_case? :case_insensitive?
71
+
72
+ def free_spacing?
73
+ (@options and @options[:x]) ? true : false
74
+ end
75
+ alias :x? :free_spacing?
76
+ alias :extended? :free_spacing?
77
+ end
78
+
79
+ class Root < Regexp::Expression::Base
80
+ def initialize
81
+ super Regexp::Token.new(:expression, :root, '')
82
+ end
83
+
84
+ def multiline?
85
+ @expressions[0].m?
86
+ end
87
+ alias :m? :multiline?
88
+
89
+ def case_insensitive?
90
+ @expressions[0].i?
91
+ end
92
+ alias :i? :case_insensitive?
93
+
94
+ def free_spacing?
95
+ @expressions[0].x?
96
+ end
97
+ alias :x? :free_spacing?
98
+ end
99
+
100
+ class Quantifier
101
+ attr_reader :token, :text, :min, :max, :mode
102
+
103
+ def initialize(token, text, min, max, mode)
104
+ @token = token
105
+ @text = text
106
+ @mode = mode
107
+ @min = min
108
+ @max = max
109
+ end
110
+
111
+ def to_s
112
+ @text
113
+ end
114
+ alias :to_str :to_s
115
+ end
116
+
117
+ class Literal < Regexp::Expression::Base; end
118
+
119
+ module Backreference
120
+ class Base < Regexp::Expression::Base; end
121
+
122
+ class Name < Backreference::Base; end
123
+ class Number < Backreference::Base; end
124
+ class NumberRelative < Backreference::Base; end
125
+
126
+ class NameNestLevel < Backreference::Base; end
127
+ class NumberNestLevel < Backreference::Base; end
128
+
129
+ class NameCall < Backreference::Base; end
130
+ class NumberCall < Backreference::Base; end
131
+ class NumberCallRelative < Backreference::Base; end
132
+ end
133
+
134
+ module Anchor
135
+ class Base < Regexp::Expression::Base; end
136
+
137
+ class BeginningOfLine < Anchor::Base; end
138
+ class EndOfLine < Anchor::Base; end
139
+
140
+ class BeginningOfString < Anchor::Base; end
141
+ class EndOfString < Anchor::Base; end
142
+
143
+ class EndOfStringOrBeforeEndOfLine < Anchor::Base; end
144
+
145
+ class WordBoundary < Anchor::Base; end
146
+ class NonWordBoundary < Anchor::Base; end
147
+
148
+ class MatchStart < Anchor::Base; end
149
+
150
+ BOL = BeginningOfLine
151
+ EOL = EndOfLine
152
+ BOS = BeginningOfString
153
+ EOS = EndOfString
154
+ EOSobEOL = EndOfStringOrBeforeEndOfLine
155
+ end
156
+
157
+ module CharacterType
158
+ class Base < Regexp::Expression::Base; end
159
+
160
+ class Any < CharacterType::Base; end
161
+ class Digit < CharacterType::Base; end
162
+ class NonDigit < CharacterType::Base; end
163
+ class Hex < CharacterType::Base; end
164
+ class NonHex < CharacterType::Base; end
165
+ class Word < CharacterType::Base; end
166
+ class NonWord < CharacterType::Base; end
167
+ class Space < CharacterType::Base; end
168
+ class NonSpace < CharacterType::Base; end
169
+ end
170
+
171
+ module EscapeSequence
172
+ class Base < Regexp::Expression::Base; end
173
+
174
+ class Literal < EscapeSequence::Base; end
175
+
176
+ class AsciiEscape < EscapeSequence::Base; end
177
+ class Backspace < EscapeSequence::Base; end
178
+ class Bell < EscapeSequence::Base; end
179
+ class FormFeed < EscapeSequence::Base; end
180
+ class Newline < EscapeSequence::Base; end
181
+ class Return < EscapeSequence::Base; end
182
+ class Space < EscapeSequence::Base; end
183
+ class Tab < EscapeSequence::Base; end
184
+ class VerticalTab < EscapeSequence::Base; end
185
+
186
+ class Octal < EscapeSequence::Base; end
187
+ class Hex < EscapeSequence::Base; end
188
+ class HexWide < EscapeSequence::Base; end
189
+
190
+ class Control < EscapeSequence::Base; end
191
+ class Meta < EscapeSequence::Base; end
192
+ class MetaControl < EscapeSequence::Base; end
193
+ end
194
+
195
+ class Alternation < Regexp::Expression::Base
196
+ def <<(exp)
197
+ @expressions.last << exp
198
+ end
199
+
200
+ def alternative(exp = nil)
201
+ @expressions << (exp ? exp : Sequence.new)
202
+ end
203
+
204
+ def alternatives
205
+ @expressions
206
+ end
207
+
208
+ def quantify(token, text, min = nil, max = nil, mode = :greedy)
209
+ @expressions.last.last.quantify(token, text, min, max, mode)
210
+ end
211
+
212
+ def to_s
213
+ @expressions.map{|e| e.to_s}.join('|')
214
+ end
215
+ end
216
+
217
+ # a sequence of expressions, used by alternations
218
+ class Sequence < Regexp::Expression::Base
219
+ def initialize
220
+ super Regexp::Token.new(:expression, :sequence, '')
221
+ end
222
+
223
+ def <<(exp)
224
+ @expressions << exp
225
+ end
226
+
227
+ def insert(exp)
228
+ @expressions.insert 0, exp
229
+ end
230
+
231
+ def first
232
+ @expressions.first
233
+ end
234
+
235
+ def last
236
+ @expressions.last
237
+ end
238
+
239
+ def quantify(token, text, min = nil, max = nil, mode = :greedy)
240
+ last.quantify(token, text, min, max, mode)
241
+ end
242
+ end
243
+
244
+ module Group
245
+ class Base < Regexp::Expression::Base
246
+ def capturing?
247
+ [:capture, :named].include? @token
248
+ end
249
+
250
+ def comment?; @type == :comment end
251
+
252
+ def to_s
253
+ s = @text
254
+ s << @expressions.join
255
+ s << ')'
256
+ s << @quantifier.to_s if quantified?
257
+ s
258
+ end
259
+ end
260
+
261
+ class Atomic < Group::Base; end
262
+ class Capture < Group::Base; end
263
+ class Named < Group::Base; end
264
+ class Passive < Group::Base; end
265
+
266
+ class Options < Group::Base; end
267
+
268
+ class Comment < Group::Base
269
+ def to_s; @text end
270
+ end
271
+ end
272
+
273
+ class Assertion
274
+ class Base < Regexp::Expression::Group::Base; end
275
+
276
+ class Lookahead < Assertion::Base; end
277
+ class NegativeLookahead < Assertion::Base; end
278
+
279
+ class Lookbehind < Assertion::Base; end
280
+ class NegativeLookbehind < Assertion::Base; end
281
+ end
282
+
283
+ end # module Regexp::Expression
284
+
285
+ %w{property set}.each do|file|
286
+ require File.expand_path("../expression/#{file}", __FILE__)
287
+ end