regexp_parser 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (59) hide show
  1. data/ChangeLog +4 -0
  2. data/LICENSE +22 -0
  3. data/README.rdoc +307 -0
  4. data/Rakefile +91 -0
  5. data/lib/regexp_parser/ctype.rb +48 -0
  6. data/lib/regexp_parser/expression/property.rb +108 -0
  7. data/lib/regexp_parser/expression/set.rb +59 -0
  8. data/lib/regexp_parser/expression.rb +287 -0
  9. data/lib/regexp_parser/lexer.rb +105 -0
  10. data/lib/regexp_parser/parser.rb +417 -0
  11. data/lib/regexp_parser/scanner/property.rl +534 -0
  12. data/lib/regexp_parser/scanner/scanner.rl +712 -0
  13. data/lib/regexp_parser/scanner.rb +3325 -0
  14. data/lib/regexp_parser/syntax/ruby/1.8.6.rb +14 -0
  15. data/lib/regexp_parser/syntax/ruby/1.8.7.rb +14 -0
  16. data/lib/regexp_parser/syntax/ruby/1.8.rb +39 -0
  17. data/lib/regexp_parser/syntax/ruby/1.9.1.rb +39 -0
  18. data/lib/regexp_parser/syntax/ruby/1.9.2.rb +10 -0
  19. data/lib/regexp_parser/syntax/ruby/1.9.3.rb +24 -0
  20. data/lib/regexp_parser/syntax/ruby/1.9.rb +8 -0
  21. data/lib/regexp_parser/syntax/tokens.rb +332 -0
  22. data/lib/regexp_parser/syntax.rb +172 -0
  23. data/lib/regexp_parser.rb +45 -0
  24. data/test/helpers.rb +8 -0
  25. data/test/lexer/test_all.rb +26 -0
  26. data/test/lexer/test_literals.rb +120 -0
  27. data/test/lexer/test_nesting.rb +107 -0
  28. data/test/lexer/test_refcalls.rb +45 -0
  29. data/test/parser/test_all.rb +44 -0
  30. data/test/parser/test_alternation.rb +46 -0
  31. data/test/parser/test_anchors.rb +35 -0
  32. data/test/parser/test_errors.rb +59 -0
  33. data/test/parser/test_escapes.rb +48 -0
  34. data/test/parser/test_expression.rb +51 -0
  35. data/test/parser/test_groups.rb +69 -0
  36. data/test/parser/test_properties.rb +346 -0
  37. data/test/parser/test_quantifiers.rb +236 -0
  38. data/test/parser/test_refcalls.rb +101 -0
  39. data/test/parser/test_sets.rb +99 -0
  40. data/test/scanner/test_all.rb +30 -0
  41. data/test/scanner/test_anchors.rb +35 -0
  42. data/test/scanner/test_errors.rb +36 -0
  43. data/test/scanner/test_escapes.rb +49 -0
  44. data/test/scanner/test_groups.rb +41 -0
  45. data/test/scanner/test_literals.rb +85 -0
  46. data/test/scanner/test_meta.rb +36 -0
  47. data/test/scanner/test_properties.rb +315 -0
  48. data/test/scanner/test_quantifiers.rb +38 -0
  49. data/test/scanner/test_refcalls.rb +45 -0
  50. data/test/scanner/test_scripts.rb +314 -0
  51. data/test/scanner/test_sets.rb +80 -0
  52. data/test/scanner/test_types.rb +30 -0
  53. data/test/syntax/ruby/test_1.8.rb +57 -0
  54. data/test/syntax/ruby/test_1.9.1.rb +39 -0
  55. data/test/syntax/ruby/test_1.9.3.rb +38 -0
  56. data/test/syntax/ruby/test_all.rb +12 -0
  57. data/test/syntax/test_all.rb +19 -0
  58. data/test/test_all.rb +4 -0
  59. metadata +160 -0
@@ -0,0 +1,14 @@
1
+ require File.expand_path('../1.8', __FILE__)
2
+
3
+ module Regexp::Syntax
4
+
5
+ module Ruby
6
+ class V186 < Regexp::Syntax::Ruby::V18
7
+ def initialize
8
+ super
9
+ end
10
+
11
+ end
12
+ end
13
+
14
+ end
@@ -0,0 +1,14 @@
1
+ require File.expand_path('../1.8', __FILE__)
2
+
3
+ module Regexp::Syntax
4
+
5
+ module Ruby
6
+ class V187 < Regexp::Syntax::Ruby::V18
7
+ def initialize
8
+ super
9
+ end
10
+
11
+ end
12
+ end
13
+
14
+ end
@@ -0,0 +1,39 @@
1
+ module Regexp::Syntax
2
+
3
+ module Ruby
4
+ class V18 < Regexp::Syntax::Base
5
+ include Regexp::Syntax::Token
6
+
7
+ def initialize
8
+ super
9
+
10
+
11
+ implements :anchor, Anchor::All
12
+ implements :assertion, Group::Assertion::All
13
+ implements :backref, [:number]
14
+
15
+ implements :escape,
16
+ Escape::Basic + Escape::Backreference +
17
+ Escape::ASCII + Escape::Meta
18
+
19
+ implements :group, Group::All
20
+
21
+ implements :meta, Meta::Extended
22
+
23
+ implements :quantifier,
24
+ Quantifier::Greedy + Quantifier::Reluctant +
25
+ Quantifier::Interval + Quantifier::IntervalReluctant
26
+
27
+ implements :set, CharacterSet::OpenClose +
28
+ CharacterSet::Extended + CharacterSet::Types +
29
+ CharacterSet::POSIX::Standard
30
+
31
+ implements :type,
32
+ CharacterType::Extended
33
+
34
+
35
+ end
36
+ end
37
+ end
38
+
39
+ end
@@ -0,0 +1,39 @@
1
+ require File.expand_path('../1.8', __FILE__)
2
+
3
+ module Regexp::Syntax
4
+
5
+ module Ruby
6
+ class V191 < Regexp::Syntax::Ruby::V18
7
+ include Regexp::Syntax::Token
8
+
9
+ def initialize
10
+ super
11
+
12
+ implements :backref, Group::Backreference::All +
13
+ Group::SubexpressionCall::All
14
+
15
+ implements :escape, CharacterType::Hex
16
+
17
+ implements :property,
18
+ UnicodeProperty::All
19
+
20
+ implements :nonproperty,
21
+ UnicodeProperty::All
22
+
23
+ implements :quantifier,
24
+ Quantifier::Possessive + Quantifier::IntervalPossessive
25
+
26
+ implements :set,
27
+ CharacterSet::POSIX::StandardNegative +
28
+ CharacterSet::POSIX::Extensions +
29
+ CharacterSet::POSIX::ExtensionsNegative
30
+
31
+ implements :subset, CharacterSet::OpenClose +
32
+ CharacterSet::Extended + CharacterSet::Types +
33
+ CharacterSet::POSIX::Standard
34
+ end
35
+
36
+ end
37
+ end
38
+
39
+ end
@@ -0,0 +1,10 @@
1
+ require File.expand_path('../1.9.1', __FILE__)
2
+
3
+ module Regexp::Syntax
4
+
5
+ module Ruby
6
+ # no difference, so far
7
+ class V192 < Regexp::Syntax::Ruby::V191; end
8
+ end
9
+
10
+ end
@@ -0,0 +1,24 @@
1
+ require File.expand_path('../1.9.2', __FILE__)
2
+
3
+ module Regexp::Syntax
4
+
5
+ module Ruby
6
+ class V193 < Regexp::Syntax::Ruby::V192
7
+ include Regexp::Syntax::Token
8
+
9
+ def initialize
10
+ super
11
+
12
+ # these were added with update of Oniguruma to Unicode 6.0
13
+ implements :property,
14
+ [:script_mandaic, :script_batak, :script_brahmi] +
15
+ UnicodeProperty::Age
16
+
17
+ implements :nonproperty,
18
+ [:script_mandaic, :script_batak, :script_brahmi] +
19
+ UnicodeProperty::Age
20
+ end
21
+ end
22
+ end
23
+
24
+ end
@@ -0,0 +1,8 @@
1
+ require File.expand_path('../1.9.3', __FILE__)
2
+
3
+ module Regexp::Syntax
4
+ module Ruby
5
+ # uses the latest 1.9 release
6
+ class V19 < Regexp::Syntax::Ruby::V193; end
7
+ end
8
+ end
@@ -0,0 +1,332 @@
1
+ module Regexp::Syntax
2
+
3
+ module Token
4
+
5
+ # -------------------------------------------------------------------------
6
+ module Anchor
7
+ Basic = [:beginning_of_line, :end_of_line]
8
+ Extended = Basic + [:word_boundary, :nonword_boundary]
9
+ String = [:bos, :eos, :eos_ob_eol]
10
+ MatchStart = [:match_start]
11
+
12
+ All = Extended + String + MatchStart
13
+ end
14
+
15
+
16
+ # -------------------------------------------------------------------------
17
+ module CharacterSet
18
+ OpenClose = [:open, :close]
19
+
20
+ Basic = [:negate, :member, :range]
21
+ Extended = Basic + [:escape, :intersection, :range_hex, :backspace]
22
+
23
+ Types = [:type_digit, :type_nondigit, :type_hex, :type_nonhex,
24
+ :type_space, :type_nonspace, :type_word, :type_nonword]
25
+
26
+ module POSIX
27
+ Standard = [:class_alnum, :class_alpha, :class_blank, :class_cntrl,
28
+ :class_digit, :class_graph, :class_lower, :class_print,
29
+ :class_punct, :class_space, :class_upper, :class_xdigit]
30
+
31
+ StandardNegative = [
32
+ :class_nonalnum, :class_nonalpha, :class_nonblank,
33
+ :class_noncntrl, :class_nondigit, :class_nongraph,
34
+ :class_nonlower, :class_nonprint, :class_nonpunct,
35
+ :class_nonspace, :class_nonupper, :class_nonxdigit]
36
+
37
+ Extensions = [:class_ascii, :class_word]
38
+ ExtensionsNegative = [:class_nonascii, :class_nonword]
39
+
40
+ All = Standard + StandardNegative +
41
+ Extensions + ExtensionsNegative
42
+ end
43
+
44
+ All = Basic + Extended + Types + POSIX::All
45
+
46
+ module SubSet
47
+ OpenClose = [:open, :close]
48
+ All = CharacterSet::All
49
+ end
50
+ end
51
+
52
+
53
+ # -------------------------------------------------------------------------
54
+ module CharacterType
55
+ Basic = []
56
+ Extended = [:digit, :nondigit, :space, :nonspace, :word, :nonword]
57
+ Hex = [:hex, :nonhex]
58
+
59
+ All = Basic + Extended + Hex
60
+ end
61
+
62
+
63
+ # -------------------------------------------------------------------------
64
+ module Escape
65
+ Basic = [:backslash, :literal]
66
+
67
+ Backreference = [:digit]
68
+
69
+ ASCII = [:bell, :backspace, :escape, :form_feed, :newline, :carriage,
70
+ :space, :tab, :vertical_tab]
71
+
72
+ Meta = [:dot, :alternation, :zero_or_one, :zero_or_more, :one_or_more,
73
+ :beginning_of_line, :end_of_line, :group_open, :group_close,
74
+ :interval_open, :interval_close, :set_open, :set_close, :baclslash]
75
+
76
+ All = Basic + Backreference + ASCII + Meta
77
+ end
78
+
79
+
80
+ # -------------------------------------------------------------------------
81
+ module Group
82
+ Basic = [:capture, :close]
83
+ Extended = Basic + [:options]
84
+
85
+ Named = [:named]
86
+ Atomic = [:atomic]
87
+ Passive = [:passive]
88
+ Comment = [:comment]
89
+
90
+ module Assertion
91
+ Positive = [:lookahead, :lookbehind]
92
+ Negative = [:nlookahead, :nlookbehind]
93
+
94
+ All = Positive + Negative
95
+ end
96
+
97
+ module Backreference
98
+ Name = [:name_ref]
99
+ Number = [:number_ref, :number_rel_ref]
100
+
101
+ NestLevel = [:name_nest_ref, :number_nest_ref]
102
+
103
+ All = Name + Number + NestLevel
104
+ end
105
+
106
+ module SubexpressionCall
107
+ Name = [:name_call]
108
+ Number = [:number_call, :number_rel_call]
109
+
110
+ All = Name + Number
111
+ end
112
+
113
+ All = Group::Extended + Group::Named + Group::Atomic +
114
+ Group::Passive + Group::Comment
115
+ end
116
+
117
+
118
+ # -------------------------------------------------------------------------
119
+ module Meta
120
+ Basic = [:dot]
121
+ Extended = Basic + [:alternation]
122
+ end
123
+
124
+
125
+ # -------------------------------------------------------------------------
126
+ module Quantifier
127
+ Greedy = [:zero_or_one, :zero_or_more, :one_or_more]
128
+ Reluctant = [:zero_or_one_reluctant, :zero_or_more_reluctant, :one_or_more_reluctant]
129
+ Possessive = [:zero_or_one_possessive, :zero_or_more_possessive, :one_or_more_possessive]
130
+
131
+ Interval = [:interval]
132
+ IntervalReluctant = [:interval_reluctant]
133
+ IntervalPossessive = [:interval_possessive]
134
+ end
135
+
136
+
137
+ # -------------------------------------------------------------------------
138
+ module UnicodeProperty
139
+ Type = [:alnum, :alpha, :ascii, :blank, :cntrl, :digit, :graph, :lower,
140
+ :print, :punct, :space, :upper, :word, :xdigit]
141
+
142
+ POSIX = [:any, :assigned, :newline]
143
+
144
+ module Category
145
+ Letter = [:letter_any, :letter_uppercase, :letter_lowercase,
146
+ :letter_titlecase, :letter_modifier, :letter_other]
147
+
148
+ Mark = [:mark_any, :mark_nonspacing, :mark_spacing,
149
+ :mark_enclosing]
150
+
151
+ Number = [:number_any, :number_decimal, :number_letter,
152
+ :number_other]
153
+
154
+ Punctuation = [:punct_any, :punct_connector, :punct_dash,
155
+ :punct_open, :punct_close, :punct_initial,
156
+ :punct_final, :punct_other]
157
+
158
+ Symbol = [:symbol_any, :symbol_math, :symbol_currency,
159
+ :symbol_modifier, :symbol_other]
160
+
161
+ Separator = [:separator_any, :separator_space, :separator_line,
162
+ :separator_para]
163
+
164
+ Codepoint = [:other, :control, :format,
165
+ :surrogate, :private_use, :unassigned]
166
+
167
+ All = Letter + Mark + Number + Punctuation +
168
+ Symbol + Separator + Codepoint
169
+ end
170
+
171
+ Age = [:age_1_1, :age_2_0, :age_2_1, :age_3_0, :age_3_1,
172
+ :age_3_2, :age_4_0, :age_4_1, :age_5_0, :age_5_1,
173
+ :age_5_2, :age_6_0]
174
+
175
+ Derived = [
176
+ :ascii_hex,
177
+ :alphabetic,
178
+ :cased,
179
+ :changes_when_casefolded,
180
+ :changes_when_casemapped,
181
+ :changes_when_lowercased,
182
+ :changes_when_titlecased,
183
+ :changes_when_uppercased,
184
+ :case_ignorable,
185
+ :bidi_control,
186
+ :dash,
187
+ :deprecated,
188
+ :default_ignorable_cp,
189
+ :diacritic,
190
+ :extender,
191
+ :grapheme_base,
192
+ :grapheme_extend,
193
+ :grapheme_link,
194
+ :hex_digit,
195
+ :hyphen,
196
+ :id_continue,
197
+ :ideographic,
198
+ :id_start,
199
+ :ids_binary_op,
200
+ :ids_trinary_op,
201
+ :join_control,
202
+ :logical_order_exception,
203
+ :lowercase,
204
+ :math,
205
+ :non_character_cp,
206
+ :other_alphabetic,
207
+ :other_default_ignorable_cp,
208
+ :other_grapheme_extended,
209
+ :other_id_continue,
210
+ :other_id_start,
211
+ :other_lowercase,
212
+ :other_math,
213
+ :other_uppercase,
214
+ :pattern_syntax,
215
+ :pattern_whitespace,
216
+ :quotation_mark,
217
+ :radical,
218
+ :soft_dotted,
219
+ :sentence_terminal,
220
+ :terminal_punctuation,
221
+ :unified_ideograph,
222
+ :uppercase,
223
+ :variation_selector,
224
+ :whitespace,
225
+ :xid_start,
226
+ :xid_continue,
227
+ ]
228
+
229
+ Script =[
230
+ :script_arabic,
231
+ :script_imperial_aramaic,
232
+ :script_armenian,
233
+ :script_avestan,
234
+ :script_balinese,
235
+ :script_bamum,
236
+ :script_bengali,
237
+ :script_bopomofo,
238
+ :script_braille,
239
+ :script_buginese,
240
+ :script_buhid,
241
+ :script_canadian_aboriginal,
242
+ :script_carian,
243
+ :script_cham,
244
+ :script_cherokee,
245
+ :script_coptic,
246
+ :script_cypriot,
247
+ :script_cyrillic,
248
+ :script_devanagari,
249
+ :script_deseret,
250
+ :script_egyptian_hieroglyphs,
251
+ :script_ethiopic,
252
+ :script_georgian,
253
+ :script_glagolitic,
254
+ :script_gothic,
255
+ :script_greek,
256
+ :script_gujarati,
257
+ :script_gurmukhi,
258
+ :script_hangul,
259
+ :script_han,
260
+ :script_hanunoo,
261
+ :script_hebrew,
262
+ :script_hiragana,
263
+ :script_katakana_or_hiragana,
264
+ :script_old_italic,
265
+ :script_javanese,
266
+ :script_kayah_li,
267
+ :script_katakana,
268
+ :script_kharoshthi,
269
+ :script_khmer,
270
+ :script_kannada,
271
+ :script_kaithi,
272
+ :script_tai_tham,
273
+ :script_lao,
274
+ :script_latin,
275
+ :script_lepcha,
276
+ :script_limbu,
277
+ :script_linear_b,
278
+ :script_lisu,
279
+ :script_lycian,
280
+ :script_lydian,
281
+ :script_malayalam,
282
+ :script_mongolian,
283
+ :script_meetei_mayek,
284
+ :script_myanmar,
285
+ :script_nko,
286
+ :script_ogham,
287
+ :script_ol_chiki,
288
+ :script_old_turkic,
289
+ :script_oriya,
290
+ :script_osmanya,
291
+ :script_phags_pa,
292
+ :script_inscriptional_pahlavi,
293
+ :script_phoenician,
294
+ :script_inscriptional_parthian,
295
+ :script_rejang,
296
+ :script_runic,
297
+ :script_samaritan,
298
+ :script_old_south_arabian,
299
+ :script_saurashtra,
300
+ :script_shavian,
301
+ :script_sinhala,
302
+ :script_sundanese,
303
+ :script_syloti_nagri,
304
+ :script_syriac,
305
+ :script_tagbanwa,
306
+ :script_tai_le,
307
+ :script_new_tai_lue,
308
+ :script_tamil,
309
+ :script_tai_viet,
310
+ :script_telugu,
311
+ :script_tifinagh,
312
+ :script_tagalog,
313
+ :script_thaana,
314
+ :script_thai,
315
+ :script_tibetan,
316
+ :script_ugaritic,
317
+ :script_vai,
318
+ :script_old_persian,
319
+ :script_cuneiform,
320
+ :script_yi,
321
+ :script_inherited,
322
+ :script_common,
323
+ :script_unknown
324
+ ]
325
+
326
+ Script_6_0 = [:script_brahmi, :script_batak, :script_mandaic]
327
+
328
+ All = Type + POSIX + Category::All + Age + Derived + Script
329
+ end
330
+ end
331
+
332
+ end