regexp_parser 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (59) hide show
  1. data/ChangeLog +4 -0
  2. data/LICENSE +22 -0
  3. data/README.rdoc +307 -0
  4. data/Rakefile +91 -0
  5. data/lib/regexp_parser/ctype.rb +48 -0
  6. data/lib/regexp_parser/expression/property.rb +108 -0
  7. data/lib/regexp_parser/expression/set.rb +59 -0
  8. data/lib/regexp_parser/expression.rb +287 -0
  9. data/lib/regexp_parser/lexer.rb +105 -0
  10. data/lib/regexp_parser/parser.rb +417 -0
  11. data/lib/regexp_parser/scanner/property.rl +534 -0
  12. data/lib/regexp_parser/scanner/scanner.rl +712 -0
  13. data/lib/regexp_parser/scanner.rb +3325 -0
  14. data/lib/regexp_parser/syntax/ruby/1.8.6.rb +14 -0
  15. data/lib/regexp_parser/syntax/ruby/1.8.7.rb +14 -0
  16. data/lib/regexp_parser/syntax/ruby/1.8.rb +39 -0
  17. data/lib/regexp_parser/syntax/ruby/1.9.1.rb +39 -0
  18. data/lib/regexp_parser/syntax/ruby/1.9.2.rb +10 -0
  19. data/lib/regexp_parser/syntax/ruby/1.9.3.rb +24 -0
  20. data/lib/regexp_parser/syntax/ruby/1.9.rb +8 -0
  21. data/lib/regexp_parser/syntax/tokens.rb +332 -0
  22. data/lib/regexp_parser/syntax.rb +172 -0
  23. data/lib/regexp_parser.rb +45 -0
  24. data/test/helpers.rb +8 -0
  25. data/test/lexer/test_all.rb +26 -0
  26. data/test/lexer/test_literals.rb +120 -0
  27. data/test/lexer/test_nesting.rb +107 -0
  28. data/test/lexer/test_refcalls.rb +45 -0
  29. data/test/parser/test_all.rb +44 -0
  30. data/test/parser/test_alternation.rb +46 -0
  31. data/test/parser/test_anchors.rb +35 -0
  32. data/test/parser/test_errors.rb +59 -0
  33. data/test/parser/test_escapes.rb +48 -0
  34. data/test/parser/test_expression.rb +51 -0
  35. data/test/parser/test_groups.rb +69 -0
  36. data/test/parser/test_properties.rb +346 -0
  37. data/test/parser/test_quantifiers.rb +236 -0
  38. data/test/parser/test_refcalls.rb +101 -0
  39. data/test/parser/test_sets.rb +99 -0
  40. data/test/scanner/test_all.rb +30 -0
  41. data/test/scanner/test_anchors.rb +35 -0
  42. data/test/scanner/test_errors.rb +36 -0
  43. data/test/scanner/test_escapes.rb +49 -0
  44. data/test/scanner/test_groups.rb +41 -0
  45. data/test/scanner/test_literals.rb +85 -0
  46. data/test/scanner/test_meta.rb +36 -0
  47. data/test/scanner/test_properties.rb +315 -0
  48. data/test/scanner/test_quantifiers.rb +38 -0
  49. data/test/scanner/test_refcalls.rb +45 -0
  50. data/test/scanner/test_scripts.rb +314 -0
  51. data/test/scanner/test_sets.rb +80 -0
  52. data/test/scanner/test_types.rb +30 -0
  53. data/test/syntax/ruby/test_1.8.rb +57 -0
  54. data/test/syntax/ruby/test_1.9.1.rb +39 -0
  55. data/test/syntax/ruby/test_1.9.3.rb +38 -0
  56. data/test/syntax/ruby/test_all.rb +12 -0
  57. data/test/syntax/test_all.rb +19 -0
  58. data/test/test_all.rb +4 -0
  59. metadata +160 -0
@@ -0,0 +1,108 @@
1
+ module Regexp::Expression
2
+
3
+ module UnicodeProperty
4
+ class Base < Regexp::Expression::Base
5
+ def negative?
6
+ @type == :nonproperty
7
+ end
8
+
9
+ def name
10
+ @text =~ /\A\\[pP]\{([^}]+)\}\z/; $1
11
+ end
12
+ end
13
+
14
+ class Alnum < Base; end
15
+ class Alpha < Base; end
16
+ class Any < Base; end
17
+ class Ascii < Base; end
18
+ class Blank < Base; end
19
+ class Cntrl < Base; end
20
+ class Digit < Base; end
21
+ class Graph < Base; end
22
+ class Lower < Base; end
23
+ class Newline < Base; end
24
+ class Print < Base; end
25
+ class Punct < Base; end
26
+ class Space < Base; end
27
+ class Upper < Base; end
28
+ class Word < Base; end
29
+ class Xdigit < Base; end
30
+
31
+ module Letter
32
+ class Base < UnicodeProperty::Base; end
33
+
34
+ class Any < Letter::Base; end
35
+ class Uppercase < Letter::Base; end
36
+ class Lowercase < Letter::Base; end
37
+ class Titlecase < Letter::Base; end
38
+ class Modifier < Letter::Base; end
39
+ class Other < Letter::Base; end
40
+ end
41
+
42
+ module Mark
43
+ class Base < UnicodeProperty::Base; end
44
+
45
+ class Any < Mark::Base; end
46
+ class Nonspacing < Mark::Base; end
47
+ class Spacing < Mark::Base; end
48
+ class Enclosing < Mark::Base; end
49
+ end
50
+
51
+ module Number
52
+ class Base < UnicodeProperty::Base; end
53
+
54
+ class Any < Number::Base; end
55
+ class Decimal < Number::Base; end
56
+ class Letter < Number::Base; end
57
+ class Other < Number::Base; end
58
+ end
59
+
60
+ module Punctuation
61
+ class Base < UnicodeProperty::Base; end
62
+
63
+ class Any < Punctuation::Base; end
64
+ class Connector < Punctuation::Base; end
65
+ class Dash < Punctuation::Base; end
66
+ class Open < Punctuation::Base; end
67
+ class Close < Punctuation::Base; end
68
+ class Initial < Punctuation::Base; end
69
+ class Final < Punctuation::Base; end
70
+ class Other < Punctuation::Base; end
71
+ end
72
+
73
+ module Separator
74
+ class Base < UnicodeProperty::Base; end
75
+
76
+ class Any < Separator::Base; end
77
+ class Space < Separator::Base; end
78
+ class Line < Separator::Base; end
79
+ class Paragraph < Separator::Base; end
80
+ end
81
+
82
+ module Symbol
83
+ class Base < UnicodeProperty::Base; end
84
+
85
+ class Any < Symbol::Base; end
86
+ class Math < Symbol::Base; end
87
+ class Currency < Symbol::Base; end
88
+ class Modifier < Symbol::Base; end
89
+ class Other < Symbol::Base; end
90
+ end
91
+
92
+ module Codepoint
93
+ class Base < UnicodeProperty::Base; end
94
+
95
+ class Any < Codepoint::Base; end
96
+ class Control < Codepoint::Base; end
97
+ class Format < Codepoint::Base; end
98
+ class Surrogate < Codepoint::Base; end
99
+ class PrivateUse < Codepoint::Base; end
100
+ class Unassigned < Codepoint::Base; end
101
+ end
102
+
103
+ class Age < UnicodeProperty::Base; end
104
+ class Derived < UnicodeProperty::Base; end
105
+ class Script < UnicodeProperty::Base; end
106
+ end
107
+
108
+ end # module Regexp::Expression
@@ -0,0 +1,59 @@
1
+ module Regexp::Expression
2
+
3
+ class CharacterSet < Regexp::Expression::Base
4
+ attr_accessor :members
5
+
6
+ def initialize(token)
7
+ @members = []
8
+ @negative = false
9
+ super
10
+ end
11
+
12
+ def <<(member)
13
+ if @members.last.is_a?(CharacterSubSet)
14
+ @members.last << member
15
+ else
16
+ @members << member
17
+ end
18
+ end
19
+
20
+ def include?(member)
21
+ @members.each do |m|
22
+ if m.is_a?(CharacterSubSet)
23
+ return true if m.include?(member)
24
+ else
25
+ return true if member == m.to_s
26
+ end
27
+ end; false
28
+ end
29
+
30
+ def negate
31
+ if @members.last.is_a?(CharacterSubSet)
32
+ @members.last.negate
33
+ else
34
+ @negative = true
35
+ end
36
+ end
37
+
38
+ def negative?
39
+ @negative
40
+ end
41
+ alias :negated? :negative?
42
+
43
+ def to_s
44
+ s = @text
45
+ s << '^' if negative?
46
+ s << @members.join
47
+ s << ']'
48
+ s << @quantifier.to_s if quantified?
49
+ s
50
+ end
51
+
52
+ def matches?(input)
53
+ input =~ /#{to_s}/ ? true : false
54
+ end
55
+ end
56
+
57
+ class CharacterSubSet < CharacterSet; end
58
+
59
+ end # module Regexp::Expression
@@ -0,0 +1,287 @@
1
+ module Regexp::Expression
2
+ class Base
3
+ attr_reader :type, :token, :text
4
+ attr_reader :quantifier
5
+ attr_reader :expressions
6
+
7
+ attr_accessor :options
8
+
9
+ def initialize(token)
10
+ @type = token.type
11
+ @token = token.token
12
+ @text = token.text
13
+ @options = nil
14
+ @expressions = []
15
+ end
16
+
17
+ def to_s
18
+ s = @text
19
+ s << @expressions.map{|e| e.to_s}.join unless @expressions.empty?
20
+ s << @quantifier if quantified?
21
+ s
22
+ end
23
+
24
+ def <<(exp)
25
+ @expressions << exp
26
+ end
27
+
28
+ def each(&block)
29
+ @expressions.each {|e| yield e}
30
+ end
31
+
32
+ def [](index)
33
+ @expressions[index]
34
+ end
35
+
36
+ def quantify(token, text, min = nil, max = nil, mode = :greedy)
37
+ @quantifier = Quantifier.new(token, text, min, max, mode)
38
+ end
39
+
40
+ def quantified?
41
+ not @quantifier.nil?
42
+ end
43
+
44
+ def quantity
45
+ [@quantifier.min, @quantifier.max]
46
+ end
47
+
48
+ def greedy?
49
+ @quantifier.mode == :greedy
50
+ end
51
+
52
+ def reluctant?
53
+ @quantifier.mode == :reluctant
54
+ end
55
+ alias :lazy? :reluctant?
56
+
57
+ def possessive?
58
+ @quantifier.mode == :possessive
59
+ end
60
+
61
+ def multiline?
62
+ (@options and @options[:m]) ? true : false
63
+ end
64
+ alias :m? :multiline?
65
+
66
+ def case_insensitive?
67
+ (@options and @options[:i]) ? true : false
68
+ end
69
+ alias :i? :case_insensitive?
70
+ alias :ignore_case? :case_insensitive?
71
+
72
+ def free_spacing?
73
+ (@options and @options[:x]) ? true : false
74
+ end
75
+ alias :x? :free_spacing?
76
+ alias :extended? :free_spacing?
77
+ end
78
+
79
+ class Root < Regexp::Expression::Base
80
+ def initialize
81
+ super Regexp::Token.new(:expression, :root, '')
82
+ end
83
+
84
+ def multiline?
85
+ @expressions[0].m?
86
+ end
87
+ alias :m? :multiline?
88
+
89
+ def case_insensitive?
90
+ @expressions[0].i?
91
+ end
92
+ alias :i? :case_insensitive?
93
+
94
+ def free_spacing?
95
+ @expressions[0].x?
96
+ end
97
+ alias :x? :free_spacing?
98
+ end
99
+
100
+ class Quantifier
101
+ attr_reader :token, :text, :min, :max, :mode
102
+
103
+ def initialize(token, text, min, max, mode)
104
+ @token = token
105
+ @text = text
106
+ @mode = mode
107
+ @min = min
108
+ @max = max
109
+ end
110
+
111
+ def to_s
112
+ @text
113
+ end
114
+ alias :to_str :to_s
115
+ end
116
+
117
+ class Literal < Regexp::Expression::Base; end
118
+
119
+ module Backreference
120
+ class Base < Regexp::Expression::Base; end
121
+
122
+ class Name < Backreference::Base; end
123
+ class Number < Backreference::Base; end
124
+ class NumberRelative < Backreference::Base; end
125
+
126
+ class NameNestLevel < Backreference::Base; end
127
+ class NumberNestLevel < Backreference::Base; end
128
+
129
+ class NameCall < Backreference::Base; end
130
+ class NumberCall < Backreference::Base; end
131
+ class NumberCallRelative < Backreference::Base; end
132
+ end
133
+
134
+ module Anchor
135
+ class Base < Regexp::Expression::Base; end
136
+
137
+ class BeginningOfLine < Anchor::Base; end
138
+ class EndOfLine < Anchor::Base; end
139
+
140
+ class BeginningOfString < Anchor::Base; end
141
+ class EndOfString < Anchor::Base; end
142
+
143
+ class EndOfStringOrBeforeEndOfLine < Anchor::Base; end
144
+
145
+ class WordBoundary < Anchor::Base; end
146
+ class NonWordBoundary < Anchor::Base; end
147
+
148
+ class MatchStart < Anchor::Base; end
149
+
150
+ BOL = BeginningOfLine
151
+ EOL = EndOfLine
152
+ BOS = BeginningOfString
153
+ EOS = EndOfString
154
+ EOSobEOL = EndOfStringOrBeforeEndOfLine
155
+ end
156
+
157
+ module CharacterType
158
+ class Base < Regexp::Expression::Base; end
159
+
160
+ class Any < CharacterType::Base; end
161
+ class Digit < CharacterType::Base; end
162
+ class NonDigit < CharacterType::Base; end
163
+ class Hex < CharacterType::Base; end
164
+ class NonHex < CharacterType::Base; end
165
+ class Word < CharacterType::Base; end
166
+ class NonWord < CharacterType::Base; end
167
+ class Space < CharacterType::Base; end
168
+ class NonSpace < CharacterType::Base; end
169
+ end
170
+
171
+ module EscapeSequence
172
+ class Base < Regexp::Expression::Base; end
173
+
174
+ class Literal < EscapeSequence::Base; end
175
+
176
+ class AsciiEscape < EscapeSequence::Base; end
177
+ class Backspace < EscapeSequence::Base; end
178
+ class Bell < EscapeSequence::Base; end
179
+ class FormFeed < EscapeSequence::Base; end
180
+ class Newline < EscapeSequence::Base; end
181
+ class Return < EscapeSequence::Base; end
182
+ class Space < EscapeSequence::Base; end
183
+ class Tab < EscapeSequence::Base; end
184
+ class VerticalTab < EscapeSequence::Base; end
185
+
186
+ class Octal < EscapeSequence::Base; end
187
+ class Hex < EscapeSequence::Base; end
188
+ class HexWide < EscapeSequence::Base; end
189
+
190
+ class Control < EscapeSequence::Base; end
191
+ class Meta < EscapeSequence::Base; end
192
+ class MetaControl < EscapeSequence::Base; end
193
+ end
194
+
195
+ class Alternation < Regexp::Expression::Base
196
+ def <<(exp)
197
+ @expressions.last << exp
198
+ end
199
+
200
+ def alternative(exp = nil)
201
+ @expressions << (exp ? exp : Sequence.new)
202
+ end
203
+
204
+ def alternatives
205
+ @expressions
206
+ end
207
+
208
+ def quantify(token, text, min = nil, max = nil, mode = :greedy)
209
+ @expressions.last.last.quantify(token, text, min, max, mode)
210
+ end
211
+
212
+ def to_s
213
+ @expressions.map{|e| e.to_s}.join('|')
214
+ end
215
+ end
216
+
217
+ # a sequence of expressions, used by alternations
218
+ class Sequence < Regexp::Expression::Base
219
+ def initialize
220
+ super Regexp::Token.new(:expression, :sequence, '')
221
+ end
222
+
223
+ def <<(exp)
224
+ @expressions << exp
225
+ end
226
+
227
+ def insert(exp)
228
+ @expressions.insert 0, exp
229
+ end
230
+
231
+ def first
232
+ @expressions.first
233
+ end
234
+
235
+ def last
236
+ @expressions.last
237
+ end
238
+
239
+ def quantify(token, text, min = nil, max = nil, mode = :greedy)
240
+ last.quantify(token, text, min, max, mode)
241
+ end
242
+ end
243
+
244
+ module Group
245
+ class Base < Regexp::Expression::Base
246
+ def capturing?
247
+ [:capture, :named].include? @token
248
+ end
249
+
250
+ def comment?; @type == :comment end
251
+
252
+ def to_s
253
+ s = @text
254
+ s << @expressions.join
255
+ s << ')'
256
+ s << @quantifier.to_s if quantified?
257
+ s
258
+ end
259
+ end
260
+
261
+ class Atomic < Group::Base; end
262
+ class Capture < Group::Base; end
263
+ class Named < Group::Base; end
264
+ class Passive < Group::Base; end
265
+
266
+ class Options < Group::Base; end
267
+
268
+ class Comment < Group::Base
269
+ def to_s; @text end
270
+ end
271
+ end
272
+
273
+ class Assertion
274
+ class Base < Regexp::Expression::Group::Base; end
275
+
276
+ class Lookahead < Assertion::Base; end
277
+ class NegativeLookahead < Assertion::Base; end
278
+
279
+ class Lookbehind < Assertion::Base; end
280
+ class NegativeLookbehind < Assertion::Base; end
281
+ end
282
+
283
+ end # module Regexp::Expression
284
+
285
+ %w{property set}.each do|file|
286
+ require File.expand_path("../expression/#{file}", __FILE__)
287
+ end