regexp_parser 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. data/ChangeLog +4 -0
  2. data/LICENSE +22 -0
  3. data/README.rdoc +307 -0
  4. data/Rakefile +91 -0
  5. data/lib/regexp_parser/ctype.rb +48 -0
  6. data/lib/regexp_parser/expression/property.rb +108 -0
  7. data/lib/regexp_parser/expression/set.rb +59 -0
  8. data/lib/regexp_parser/expression.rb +287 -0
  9. data/lib/regexp_parser/lexer.rb +105 -0
  10. data/lib/regexp_parser/parser.rb +417 -0
  11. data/lib/regexp_parser/scanner/property.rl +534 -0
  12. data/lib/regexp_parser/scanner/scanner.rl +712 -0
  13. data/lib/regexp_parser/scanner.rb +3325 -0
  14. data/lib/regexp_parser/syntax/ruby/1.8.6.rb +14 -0
  15. data/lib/regexp_parser/syntax/ruby/1.8.7.rb +14 -0
  16. data/lib/regexp_parser/syntax/ruby/1.8.rb +39 -0
  17. data/lib/regexp_parser/syntax/ruby/1.9.1.rb +39 -0
  18. data/lib/regexp_parser/syntax/ruby/1.9.2.rb +10 -0
  19. data/lib/regexp_parser/syntax/ruby/1.9.3.rb +24 -0
  20. data/lib/regexp_parser/syntax/ruby/1.9.rb +8 -0
  21. data/lib/regexp_parser/syntax/tokens.rb +332 -0
  22. data/lib/regexp_parser/syntax.rb +172 -0
  23. data/lib/regexp_parser.rb +45 -0
  24. data/test/helpers.rb +8 -0
  25. data/test/lexer/test_all.rb +26 -0
  26. data/test/lexer/test_literals.rb +120 -0
  27. data/test/lexer/test_nesting.rb +107 -0
  28. data/test/lexer/test_refcalls.rb +45 -0
  29. data/test/parser/test_all.rb +44 -0
  30. data/test/parser/test_alternation.rb +46 -0
  31. data/test/parser/test_anchors.rb +35 -0
  32. data/test/parser/test_errors.rb +59 -0
  33. data/test/parser/test_escapes.rb +48 -0
  34. data/test/parser/test_expression.rb +51 -0
  35. data/test/parser/test_groups.rb +69 -0
  36. data/test/parser/test_properties.rb +346 -0
  37. data/test/parser/test_quantifiers.rb +236 -0
  38. data/test/parser/test_refcalls.rb +101 -0
  39. data/test/parser/test_sets.rb +99 -0
  40. data/test/scanner/test_all.rb +30 -0
  41. data/test/scanner/test_anchors.rb +35 -0
  42. data/test/scanner/test_errors.rb +36 -0
  43. data/test/scanner/test_escapes.rb +49 -0
  44. data/test/scanner/test_groups.rb +41 -0
  45. data/test/scanner/test_literals.rb +85 -0
  46. data/test/scanner/test_meta.rb +36 -0
  47. data/test/scanner/test_properties.rb +315 -0
  48. data/test/scanner/test_quantifiers.rb +38 -0
  49. data/test/scanner/test_refcalls.rb +45 -0
  50. data/test/scanner/test_scripts.rb +314 -0
  51. data/test/scanner/test_sets.rb +80 -0
  52. data/test/scanner/test_types.rb +30 -0
  53. data/test/syntax/ruby/test_1.8.rb +57 -0
  54. data/test/syntax/ruby/test_1.9.1.rb +39 -0
  55. data/test/syntax/ruby/test_1.9.3.rb +38 -0
  56. data/test/syntax/ruby/test_all.rb +12 -0
  57. data/test/syntax/test_all.rb +19 -0
  58. data/test/test_all.rb +4 -0
  59. metadata +160 -0
@@ -0,0 +1,14 @@
1
+ require File.expand_path('../1.8', __FILE__)
2
+
3
+ module Regexp::Syntax
4
+
5
+ module Ruby
6
+ class V186 < Regexp::Syntax::Ruby::V18
7
+ def initialize
8
+ super
9
+ end
10
+
11
+ end
12
+ end
13
+
14
+ end
@@ -0,0 +1,14 @@
1
+ require File.expand_path('../1.8', __FILE__)
2
+
3
+ module Regexp::Syntax
4
+
5
+ module Ruby
6
+ class V187 < Regexp::Syntax::Ruby::V18
7
+ def initialize
8
+ super
9
+ end
10
+
11
+ end
12
+ end
13
+
14
+ end
@@ -0,0 +1,39 @@
1
+ module Regexp::Syntax
2
+
3
+ module Ruby
4
+ class V18 < Regexp::Syntax::Base
5
+ include Regexp::Syntax::Token
6
+
7
+ def initialize
8
+ super
9
+
10
+
11
+ implements :anchor, Anchor::All
12
+ implements :assertion, Group::Assertion::All
13
+ implements :backref, [:number]
14
+
15
+ implements :escape,
16
+ Escape::Basic + Escape::Backreference +
17
+ Escape::ASCII + Escape::Meta
18
+
19
+ implements :group, Group::All
20
+
21
+ implements :meta, Meta::Extended
22
+
23
+ implements :quantifier,
24
+ Quantifier::Greedy + Quantifier::Reluctant +
25
+ Quantifier::Interval + Quantifier::IntervalReluctant
26
+
27
+ implements :set, CharacterSet::OpenClose +
28
+ CharacterSet::Extended + CharacterSet::Types +
29
+ CharacterSet::POSIX::Standard
30
+
31
+ implements :type,
32
+ CharacterType::Extended
33
+
34
+
35
+ end
36
+ end
37
+ end
38
+
39
+ end
@@ -0,0 +1,39 @@
1
+ require File.expand_path('../1.8', __FILE__)
2
+
3
+ module Regexp::Syntax
4
+
5
+ module Ruby
6
+ class V191 < Regexp::Syntax::Ruby::V18
7
+ include Regexp::Syntax::Token
8
+
9
+ def initialize
10
+ super
11
+
12
+ implements :backref, Group::Backreference::All +
13
+ Group::SubexpressionCall::All
14
+
15
+ implements :escape, CharacterType::Hex
16
+
17
+ implements :property,
18
+ UnicodeProperty::All
19
+
20
+ implements :nonproperty,
21
+ UnicodeProperty::All
22
+
23
+ implements :quantifier,
24
+ Quantifier::Possessive + Quantifier::IntervalPossessive
25
+
26
+ implements :set,
27
+ CharacterSet::POSIX::StandardNegative +
28
+ CharacterSet::POSIX::Extensions +
29
+ CharacterSet::POSIX::ExtensionsNegative
30
+
31
+ implements :subset, CharacterSet::OpenClose +
32
+ CharacterSet::Extended + CharacterSet::Types +
33
+ CharacterSet::POSIX::Standard
34
+ end
35
+
36
+ end
37
+ end
38
+
39
+ end
@@ -0,0 +1,10 @@
1
+ require File.expand_path('../1.9.1', __FILE__)
2
+
3
+ module Regexp::Syntax
4
+
5
+ module Ruby
6
+ # no difference, so far
7
+ class V192 < Regexp::Syntax::Ruby::V191; end
8
+ end
9
+
10
+ end
@@ -0,0 +1,24 @@
1
+ require File.expand_path('../1.9.2', __FILE__)
2
+
3
+ module Regexp::Syntax
4
+
5
+ module Ruby
6
+ class V193 < Regexp::Syntax::Ruby::V192
7
+ include Regexp::Syntax::Token
8
+
9
+ def initialize
10
+ super
11
+
12
+ # these were added with update of Oniguruma to Unicode 6.0
13
+ implements :property,
14
+ [:script_mandaic, :script_batak, :script_brahmi] +
15
+ UnicodeProperty::Age
16
+
17
+ implements :nonproperty,
18
+ [:script_mandaic, :script_batak, :script_brahmi] +
19
+ UnicodeProperty::Age
20
+ end
21
+ end
22
+ end
23
+
24
+ end
@@ -0,0 +1,8 @@
1
+ require File.expand_path('../1.9.3', __FILE__)
2
+
3
+ module Regexp::Syntax
4
+ module Ruby
5
+ # uses the latest 1.9 release
6
+ class V19 < Regexp::Syntax::Ruby::V193; end
7
+ end
8
+ end
@@ -0,0 +1,332 @@
1
+ module Regexp::Syntax
2
+
3
+ module Token
4
+
5
+ # -------------------------------------------------------------------------
6
+ module Anchor
7
+ Basic = [:beginning_of_line, :end_of_line]
8
+ Extended = Basic + [:word_boundary, :nonword_boundary]
9
+ String = [:bos, :eos, :eos_ob_eol]
10
+ MatchStart = [:match_start]
11
+
12
+ All = Extended + String + MatchStart
13
+ end
14
+
15
+
16
+ # -------------------------------------------------------------------------
17
+ module CharacterSet
18
+ OpenClose = [:open, :close]
19
+
20
+ Basic = [:negate, :member, :range]
21
+ Extended = Basic + [:escape, :intersection, :range_hex, :backspace]
22
+
23
+ Types = [:type_digit, :type_nondigit, :type_hex, :type_nonhex,
24
+ :type_space, :type_nonspace, :type_word, :type_nonword]
25
+
26
+ module POSIX
27
+ Standard = [:class_alnum, :class_alpha, :class_blank, :class_cntrl,
28
+ :class_digit, :class_graph, :class_lower, :class_print,
29
+ :class_punct, :class_space, :class_upper, :class_xdigit]
30
+
31
+ StandardNegative = [
32
+ :class_nonalnum, :class_nonalpha, :class_nonblank,
33
+ :class_noncntrl, :class_nondigit, :class_nongraph,
34
+ :class_nonlower, :class_nonprint, :class_nonpunct,
35
+ :class_nonspace, :class_nonupper, :class_nonxdigit]
36
+
37
+ Extensions = [:class_ascii, :class_word]
38
+ ExtensionsNegative = [:class_nonascii, :class_nonword]
39
+
40
+ All = Standard + StandardNegative +
41
+ Extensions + ExtensionsNegative
42
+ end
43
+
44
+ All = Basic + Extended + Types + POSIX::All
45
+
46
+ module SubSet
47
+ OpenClose = [:open, :close]
48
+ All = CharacterSet::All
49
+ end
50
+ end
51
+
52
+
53
+ # -------------------------------------------------------------------------
54
+ module CharacterType
55
+ Basic = []
56
+ Extended = [:digit, :nondigit, :space, :nonspace, :word, :nonword]
57
+ Hex = [:hex, :nonhex]
58
+
59
+ All = Basic + Extended + Hex
60
+ end
61
+
62
+
63
+ # -------------------------------------------------------------------------
64
+ module Escape
65
+ Basic = [:backslash, :literal]
66
+
67
+ Backreference = [:digit]
68
+
69
+ ASCII = [:bell, :backspace, :escape, :form_feed, :newline, :carriage,
70
+ :space, :tab, :vertical_tab]
71
+
72
+ Meta = [:dot, :alternation, :zero_or_one, :zero_or_more, :one_or_more,
73
+ :beginning_of_line, :end_of_line, :group_open, :group_close,
74
+ :interval_open, :interval_close, :set_open, :set_close, :baclslash]
75
+
76
+ All = Basic + Backreference + ASCII + Meta
77
+ end
78
+
79
+
80
+ # -------------------------------------------------------------------------
81
+ module Group
82
+ Basic = [:capture, :close]
83
+ Extended = Basic + [:options]
84
+
85
+ Named = [:named]
86
+ Atomic = [:atomic]
87
+ Passive = [:passive]
88
+ Comment = [:comment]
89
+
90
+ module Assertion
91
+ Positive = [:lookahead, :lookbehind]
92
+ Negative = [:nlookahead, :nlookbehind]
93
+
94
+ All = Positive + Negative
95
+ end
96
+
97
+ module Backreference
98
+ Name = [:name_ref]
99
+ Number = [:number_ref, :number_rel_ref]
100
+
101
+ NestLevel = [:name_nest_ref, :number_nest_ref]
102
+
103
+ All = Name + Number + NestLevel
104
+ end
105
+
106
+ module SubexpressionCall
107
+ Name = [:name_call]
108
+ Number = [:number_call, :number_rel_call]
109
+
110
+ All = Name + Number
111
+ end
112
+
113
+ All = Group::Extended + Group::Named + Group::Atomic +
114
+ Group::Passive + Group::Comment
115
+ end
116
+
117
+
118
+ # -------------------------------------------------------------------------
119
+ module Meta
120
+ Basic = [:dot]
121
+ Extended = Basic + [:alternation]
122
+ end
123
+
124
+
125
+ # -------------------------------------------------------------------------
126
+ module Quantifier
127
+ Greedy = [:zero_or_one, :zero_or_more, :one_or_more]
128
+ Reluctant = [:zero_or_one_reluctant, :zero_or_more_reluctant, :one_or_more_reluctant]
129
+ Possessive = [:zero_or_one_possessive, :zero_or_more_possessive, :one_or_more_possessive]
130
+
131
+ Interval = [:interval]
132
+ IntervalReluctant = [:interval_reluctant]
133
+ IntervalPossessive = [:interval_possessive]
134
+ end
135
+
136
+
137
+ # -------------------------------------------------------------------------
138
+ module UnicodeProperty
139
+ Type = [:alnum, :alpha, :ascii, :blank, :cntrl, :digit, :graph, :lower,
140
+ :print, :punct, :space, :upper, :word, :xdigit]
141
+
142
+ POSIX = [:any, :assigned, :newline]
143
+
144
+ module Category
145
+ Letter = [:letter_any, :letter_uppercase, :letter_lowercase,
146
+ :letter_titlecase, :letter_modifier, :letter_other]
147
+
148
+ Mark = [:mark_any, :mark_nonspacing, :mark_spacing,
149
+ :mark_enclosing]
150
+
151
+ Number = [:number_any, :number_decimal, :number_letter,
152
+ :number_other]
153
+
154
+ Punctuation = [:punct_any, :punct_connector, :punct_dash,
155
+ :punct_open, :punct_close, :punct_initial,
156
+ :punct_final, :punct_other]
157
+
158
+ Symbol = [:symbol_any, :symbol_math, :symbol_currency,
159
+ :symbol_modifier, :symbol_other]
160
+
161
+ Separator = [:separator_any, :separator_space, :separator_line,
162
+ :separator_para]
163
+
164
+ Codepoint = [:other, :control, :format,
165
+ :surrogate, :private_use, :unassigned]
166
+
167
+ All = Letter + Mark + Number + Punctuation +
168
+ Symbol + Separator + Codepoint
169
+ end
170
+
171
+ Age = [:age_1_1, :age_2_0, :age_2_1, :age_3_0, :age_3_1,
172
+ :age_3_2, :age_4_0, :age_4_1, :age_5_0, :age_5_1,
173
+ :age_5_2, :age_6_0]
174
+
175
+ Derived = [
176
+ :ascii_hex,
177
+ :alphabetic,
178
+ :cased,
179
+ :changes_when_casefolded,
180
+ :changes_when_casemapped,
181
+ :changes_when_lowercased,
182
+ :changes_when_titlecased,
183
+ :changes_when_uppercased,
184
+ :case_ignorable,
185
+ :bidi_control,
186
+ :dash,
187
+ :deprecated,
188
+ :default_ignorable_cp,
189
+ :diacritic,
190
+ :extender,
191
+ :grapheme_base,
192
+ :grapheme_extend,
193
+ :grapheme_link,
194
+ :hex_digit,
195
+ :hyphen,
196
+ :id_continue,
197
+ :ideographic,
198
+ :id_start,
199
+ :ids_binary_op,
200
+ :ids_trinary_op,
201
+ :join_control,
202
+ :logical_order_exception,
203
+ :lowercase,
204
+ :math,
205
+ :non_character_cp,
206
+ :other_alphabetic,
207
+ :other_default_ignorable_cp,
208
+ :other_grapheme_extended,
209
+ :other_id_continue,
210
+ :other_id_start,
211
+ :other_lowercase,
212
+ :other_math,
213
+ :other_uppercase,
214
+ :pattern_syntax,
215
+ :pattern_whitespace,
216
+ :quotation_mark,
217
+ :radical,
218
+ :soft_dotted,
219
+ :sentence_terminal,
220
+ :terminal_punctuation,
221
+ :unified_ideograph,
222
+ :uppercase,
223
+ :variation_selector,
224
+ :whitespace,
225
+ :xid_start,
226
+ :xid_continue,
227
+ ]
228
+
229
+ Script =[
230
+ :script_arabic,
231
+ :script_imperial_aramaic,
232
+ :script_armenian,
233
+ :script_avestan,
234
+ :script_balinese,
235
+ :script_bamum,
236
+ :script_bengali,
237
+ :script_bopomofo,
238
+ :script_braille,
239
+ :script_buginese,
240
+ :script_buhid,
241
+ :script_canadian_aboriginal,
242
+ :script_carian,
243
+ :script_cham,
244
+ :script_cherokee,
245
+ :script_coptic,
246
+ :script_cypriot,
247
+ :script_cyrillic,
248
+ :script_devanagari,
249
+ :script_deseret,
250
+ :script_egyptian_hieroglyphs,
251
+ :script_ethiopic,
252
+ :script_georgian,
253
+ :script_glagolitic,
254
+ :script_gothic,
255
+ :script_greek,
256
+ :script_gujarati,
257
+ :script_gurmukhi,
258
+ :script_hangul,
259
+ :script_han,
260
+ :script_hanunoo,
261
+ :script_hebrew,
262
+ :script_hiragana,
263
+ :script_katakana_or_hiragana,
264
+ :script_old_italic,
265
+ :script_javanese,
266
+ :script_kayah_li,
267
+ :script_katakana,
268
+ :script_kharoshthi,
269
+ :script_khmer,
270
+ :script_kannada,
271
+ :script_kaithi,
272
+ :script_tai_tham,
273
+ :script_lao,
274
+ :script_latin,
275
+ :script_lepcha,
276
+ :script_limbu,
277
+ :script_linear_b,
278
+ :script_lisu,
279
+ :script_lycian,
280
+ :script_lydian,
281
+ :script_malayalam,
282
+ :script_mongolian,
283
+ :script_meetei_mayek,
284
+ :script_myanmar,
285
+ :script_nko,
286
+ :script_ogham,
287
+ :script_ol_chiki,
288
+ :script_old_turkic,
289
+ :script_oriya,
290
+ :script_osmanya,
291
+ :script_phags_pa,
292
+ :script_inscriptional_pahlavi,
293
+ :script_phoenician,
294
+ :script_inscriptional_parthian,
295
+ :script_rejang,
296
+ :script_runic,
297
+ :script_samaritan,
298
+ :script_old_south_arabian,
299
+ :script_saurashtra,
300
+ :script_shavian,
301
+ :script_sinhala,
302
+ :script_sundanese,
303
+ :script_syloti_nagri,
304
+ :script_syriac,
305
+ :script_tagbanwa,
306
+ :script_tai_le,
307
+ :script_new_tai_lue,
308
+ :script_tamil,
309
+ :script_tai_viet,
310
+ :script_telugu,
311
+ :script_tifinagh,
312
+ :script_tagalog,
313
+ :script_thaana,
314
+ :script_thai,
315
+ :script_tibetan,
316
+ :script_ugaritic,
317
+ :script_vai,
318
+ :script_old_persian,
319
+ :script_cuneiform,
320
+ :script_yi,
321
+ :script_inherited,
322
+ :script_common,
323
+ :script_unknown
324
+ ]
325
+
326
+ Script_6_0 = [:script_brahmi, :script_batak, :script_mandaic]
327
+
328
+ All = Type + POSIX + Category::All + Age + Derived + Script
329
+ end
330
+ end
331
+
332
+ end