regexp_parser 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/ChangeLog +4 -0
- data/LICENSE +22 -0
- data/README.rdoc +307 -0
- data/Rakefile +91 -0
- data/lib/regexp_parser/ctype.rb +48 -0
- data/lib/regexp_parser/expression/property.rb +108 -0
- data/lib/regexp_parser/expression/set.rb +59 -0
- data/lib/regexp_parser/expression.rb +287 -0
- data/lib/regexp_parser/lexer.rb +105 -0
- data/lib/regexp_parser/parser.rb +417 -0
- data/lib/regexp_parser/scanner/property.rl +534 -0
- data/lib/regexp_parser/scanner/scanner.rl +712 -0
- data/lib/regexp_parser/scanner.rb +3325 -0
- data/lib/regexp_parser/syntax/ruby/1.8.6.rb +14 -0
- data/lib/regexp_parser/syntax/ruby/1.8.7.rb +14 -0
- data/lib/regexp_parser/syntax/ruby/1.8.rb +39 -0
- data/lib/regexp_parser/syntax/ruby/1.9.1.rb +39 -0
- data/lib/regexp_parser/syntax/ruby/1.9.2.rb +10 -0
- data/lib/regexp_parser/syntax/ruby/1.9.3.rb +24 -0
- data/lib/regexp_parser/syntax/ruby/1.9.rb +8 -0
- data/lib/regexp_parser/syntax/tokens.rb +332 -0
- data/lib/regexp_parser/syntax.rb +172 -0
- data/lib/regexp_parser.rb +45 -0
- data/test/helpers.rb +8 -0
- data/test/lexer/test_all.rb +26 -0
- data/test/lexer/test_literals.rb +120 -0
- data/test/lexer/test_nesting.rb +107 -0
- data/test/lexer/test_refcalls.rb +45 -0
- data/test/parser/test_all.rb +44 -0
- data/test/parser/test_alternation.rb +46 -0
- data/test/parser/test_anchors.rb +35 -0
- data/test/parser/test_errors.rb +59 -0
- data/test/parser/test_escapes.rb +48 -0
- data/test/parser/test_expression.rb +51 -0
- data/test/parser/test_groups.rb +69 -0
- data/test/parser/test_properties.rb +346 -0
- data/test/parser/test_quantifiers.rb +236 -0
- data/test/parser/test_refcalls.rb +101 -0
- data/test/parser/test_sets.rb +99 -0
- data/test/scanner/test_all.rb +30 -0
- data/test/scanner/test_anchors.rb +35 -0
- data/test/scanner/test_errors.rb +36 -0
- data/test/scanner/test_escapes.rb +49 -0
- data/test/scanner/test_groups.rb +41 -0
- data/test/scanner/test_literals.rb +85 -0
- data/test/scanner/test_meta.rb +36 -0
- data/test/scanner/test_properties.rb +315 -0
- data/test/scanner/test_quantifiers.rb +38 -0
- data/test/scanner/test_refcalls.rb +45 -0
- data/test/scanner/test_scripts.rb +314 -0
- data/test/scanner/test_sets.rb +80 -0
- data/test/scanner/test_types.rb +30 -0
- data/test/syntax/ruby/test_1.8.rb +57 -0
- data/test/syntax/ruby/test_1.9.1.rb +39 -0
- data/test/syntax/ruby/test_1.9.3.rb +38 -0
- data/test/syntax/ruby/test_all.rb +12 -0
- data/test/syntax/test_all.rb +19 -0
- data/test/test_all.rb +4 -0
- metadata +160 -0
@@ -0,0 +1,39 @@
|
|
1
|
+
module Regexp::Syntax
|
2
|
+
|
3
|
+
module Ruby
|
4
|
+
class V18 < Regexp::Syntax::Base
|
5
|
+
include Regexp::Syntax::Token
|
6
|
+
|
7
|
+
def initialize
|
8
|
+
super
|
9
|
+
|
10
|
+
|
11
|
+
implements :anchor, Anchor::All
|
12
|
+
implements :assertion, Group::Assertion::All
|
13
|
+
implements :backref, [:number]
|
14
|
+
|
15
|
+
implements :escape,
|
16
|
+
Escape::Basic + Escape::Backreference +
|
17
|
+
Escape::ASCII + Escape::Meta
|
18
|
+
|
19
|
+
implements :group, Group::All
|
20
|
+
|
21
|
+
implements :meta, Meta::Extended
|
22
|
+
|
23
|
+
implements :quantifier,
|
24
|
+
Quantifier::Greedy + Quantifier::Reluctant +
|
25
|
+
Quantifier::Interval + Quantifier::IntervalReluctant
|
26
|
+
|
27
|
+
implements :set, CharacterSet::OpenClose +
|
28
|
+
CharacterSet::Extended + CharacterSet::Types +
|
29
|
+
CharacterSet::POSIX::Standard
|
30
|
+
|
31
|
+
implements :type,
|
32
|
+
CharacterType::Extended
|
33
|
+
|
34
|
+
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
require File.expand_path('../1.8', __FILE__)
|
2
|
+
|
3
|
+
module Regexp::Syntax
|
4
|
+
|
5
|
+
module Ruby
|
6
|
+
class V191 < Regexp::Syntax::Ruby::V18
|
7
|
+
include Regexp::Syntax::Token
|
8
|
+
|
9
|
+
def initialize
|
10
|
+
super
|
11
|
+
|
12
|
+
implements :backref, Group::Backreference::All +
|
13
|
+
Group::SubexpressionCall::All
|
14
|
+
|
15
|
+
implements :escape, CharacterType::Hex
|
16
|
+
|
17
|
+
implements :property,
|
18
|
+
UnicodeProperty::All
|
19
|
+
|
20
|
+
implements :nonproperty,
|
21
|
+
UnicodeProperty::All
|
22
|
+
|
23
|
+
implements :quantifier,
|
24
|
+
Quantifier::Possessive + Quantifier::IntervalPossessive
|
25
|
+
|
26
|
+
implements :set,
|
27
|
+
CharacterSet::POSIX::StandardNegative +
|
28
|
+
CharacterSet::POSIX::Extensions +
|
29
|
+
CharacterSet::POSIX::ExtensionsNegative
|
30
|
+
|
31
|
+
implements :subset, CharacterSet::OpenClose +
|
32
|
+
CharacterSet::Extended + CharacterSet::Types +
|
33
|
+
CharacterSet::POSIX::Standard
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
require File.expand_path('../1.9.2', __FILE__)
|
2
|
+
|
3
|
+
module Regexp::Syntax
|
4
|
+
|
5
|
+
module Ruby
|
6
|
+
class V193 < Regexp::Syntax::Ruby::V192
|
7
|
+
include Regexp::Syntax::Token
|
8
|
+
|
9
|
+
def initialize
|
10
|
+
super
|
11
|
+
|
12
|
+
# these were added with update of Oniguruma to Unicode 6.0
|
13
|
+
implements :property,
|
14
|
+
[:script_mandaic, :script_batak, :script_brahmi] +
|
15
|
+
UnicodeProperty::Age
|
16
|
+
|
17
|
+
implements :nonproperty,
|
18
|
+
[:script_mandaic, :script_batak, :script_brahmi] +
|
19
|
+
UnicodeProperty::Age
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
@@ -0,0 +1,332 @@
|
|
1
|
+
module Regexp::Syntax
|
2
|
+
|
3
|
+
module Token
|
4
|
+
|
5
|
+
# -------------------------------------------------------------------------
|
6
|
+
module Anchor
|
7
|
+
Basic = [:beginning_of_line, :end_of_line]
|
8
|
+
Extended = Basic + [:word_boundary, :nonword_boundary]
|
9
|
+
String = [:bos, :eos, :eos_ob_eol]
|
10
|
+
MatchStart = [:match_start]
|
11
|
+
|
12
|
+
All = Extended + String + MatchStart
|
13
|
+
end
|
14
|
+
|
15
|
+
|
16
|
+
# -------------------------------------------------------------------------
|
17
|
+
module CharacterSet
|
18
|
+
OpenClose = [:open, :close]
|
19
|
+
|
20
|
+
Basic = [:negate, :member, :range]
|
21
|
+
Extended = Basic + [:escape, :intersection, :range_hex, :backspace]
|
22
|
+
|
23
|
+
Types = [:type_digit, :type_nondigit, :type_hex, :type_nonhex,
|
24
|
+
:type_space, :type_nonspace, :type_word, :type_nonword]
|
25
|
+
|
26
|
+
module POSIX
|
27
|
+
Standard = [:class_alnum, :class_alpha, :class_blank, :class_cntrl,
|
28
|
+
:class_digit, :class_graph, :class_lower, :class_print,
|
29
|
+
:class_punct, :class_space, :class_upper, :class_xdigit]
|
30
|
+
|
31
|
+
StandardNegative = [
|
32
|
+
:class_nonalnum, :class_nonalpha, :class_nonblank,
|
33
|
+
:class_noncntrl, :class_nondigit, :class_nongraph,
|
34
|
+
:class_nonlower, :class_nonprint, :class_nonpunct,
|
35
|
+
:class_nonspace, :class_nonupper, :class_nonxdigit]
|
36
|
+
|
37
|
+
Extensions = [:class_ascii, :class_word]
|
38
|
+
ExtensionsNegative = [:class_nonascii, :class_nonword]
|
39
|
+
|
40
|
+
All = Standard + StandardNegative +
|
41
|
+
Extensions + ExtensionsNegative
|
42
|
+
end
|
43
|
+
|
44
|
+
All = Basic + Extended + Types + POSIX::All
|
45
|
+
|
46
|
+
module SubSet
|
47
|
+
OpenClose = [:open, :close]
|
48
|
+
All = CharacterSet::All
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
|
53
|
+
# -------------------------------------------------------------------------
|
54
|
+
module CharacterType
|
55
|
+
Basic = []
|
56
|
+
Extended = [:digit, :nondigit, :space, :nonspace, :word, :nonword]
|
57
|
+
Hex = [:hex, :nonhex]
|
58
|
+
|
59
|
+
All = Basic + Extended + Hex
|
60
|
+
end
|
61
|
+
|
62
|
+
|
63
|
+
# -------------------------------------------------------------------------
|
64
|
+
module Escape
|
65
|
+
Basic = [:backslash, :literal]
|
66
|
+
|
67
|
+
Backreference = [:digit]
|
68
|
+
|
69
|
+
ASCII = [:bell, :backspace, :escape, :form_feed, :newline, :carriage,
|
70
|
+
:space, :tab, :vertical_tab]
|
71
|
+
|
72
|
+
Meta = [:dot, :alternation, :zero_or_one, :zero_or_more, :one_or_more,
|
73
|
+
:beginning_of_line, :end_of_line, :group_open, :group_close,
|
74
|
+
:interval_open, :interval_close, :set_open, :set_close, :baclslash]
|
75
|
+
|
76
|
+
All = Basic + Backreference + ASCII + Meta
|
77
|
+
end
|
78
|
+
|
79
|
+
|
80
|
+
# -------------------------------------------------------------------------
|
81
|
+
module Group
|
82
|
+
Basic = [:capture, :close]
|
83
|
+
Extended = Basic + [:options]
|
84
|
+
|
85
|
+
Named = [:named]
|
86
|
+
Atomic = [:atomic]
|
87
|
+
Passive = [:passive]
|
88
|
+
Comment = [:comment]
|
89
|
+
|
90
|
+
module Assertion
|
91
|
+
Positive = [:lookahead, :lookbehind]
|
92
|
+
Negative = [:nlookahead, :nlookbehind]
|
93
|
+
|
94
|
+
All = Positive + Negative
|
95
|
+
end
|
96
|
+
|
97
|
+
module Backreference
|
98
|
+
Name = [:name_ref]
|
99
|
+
Number = [:number_ref, :number_rel_ref]
|
100
|
+
|
101
|
+
NestLevel = [:name_nest_ref, :number_nest_ref]
|
102
|
+
|
103
|
+
All = Name + Number + NestLevel
|
104
|
+
end
|
105
|
+
|
106
|
+
module SubexpressionCall
|
107
|
+
Name = [:name_call]
|
108
|
+
Number = [:number_call, :number_rel_call]
|
109
|
+
|
110
|
+
All = Name + Number
|
111
|
+
end
|
112
|
+
|
113
|
+
All = Group::Extended + Group::Named + Group::Atomic +
|
114
|
+
Group::Passive + Group::Comment
|
115
|
+
end
|
116
|
+
|
117
|
+
|
118
|
+
# -------------------------------------------------------------------------
|
119
|
+
module Meta
|
120
|
+
Basic = [:dot]
|
121
|
+
Extended = Basic + [:alternation]
|
122
|
+
end
|
123
|
+
|
124
|
+
|
125
|
+
# -------------------------------------------------------------------------
|
126
|
+
module Quantifier
|
127
|
+
Greedy = [:zero_or_one, :zero_or_more, :one_or_more]
|
128
|
+
Reluctant = [:zero_or_one_reluctant, :zero_or_more_reluctant, :one_or_more_reluctant]
|
129
|
+
Possessive = [:zero_or_one_possessive, :zero_or_more_possessive, :one_or_more_possessive]
|
130
|
+
|
131
|
+
Interval = [:interval]
|
132
|
+
IntervalReluctant = [:interval_reluctant]
|
133
|
+
IntervalPossessive = [:interval_possessive]
|
134
|
+
end
|
135
|
+
|
136
|
+
|
137
|
+
# -------------------------------------------------------------------------
|
138
|
+
module UnicodeProperty
|
139
|
+
Type = [:alnum, :alpha, :ascii, :blank, :cntrl, :digit, :graph, :lower,
|
140
|
+
:print, :punct, :space, :upper, :word, :xdigit]
|
141
|
+
|
142
|
+
POSIX = [:any, :assigned, :newline]
|
143
|
+
|
144
|
+
module Category
|
145
|
+
Letter = [:letter_any, :letter_uppercase, :letter_lowercase,
|
146
|
+
:letter_titlecase, :letter_modifier, :letter_other]
|
147
|
+
|
148
|
+
Mark = [:mark_any, :mark_nonspacing, :mark_spacing,
|
149
|
+
:mark_enclosing]
|
150
|
+
|
151
|
+
Number = [:number_any, :number_decimal, :number_letter,
|
152
|
+
:number_other]
|
153
|
+
|
154
|
+
Punctuation = [:punct_any, :punct_connector, :punct_dash,
|
155
|
+
:punct_open, :punct_close, :punct_initial,
|
156
|
+
:punct_final, :punct_other]
|
157
|
+
|
158
|
+
Symbol = [:symbol_any, :symbol_math, :symbol_currency,
|
159
|
+
:symbol_modifier, :symbol_other]
|
160
|
+
|
161
|
+
Separator = [:separator_any, :separator_space, :separator_line,
|
162
|
+
:separator_para]
|
163
|
+
|
164
|
+
Codepoint = [:other, :control, :format,
|
165
|
+
:surrogate, :private_use, :unassigned]
|
166
|
+
|
167
|
+
All = Letter + Mark + Number + Punctuation +
|
168
|
+
Symbol + Separator + Codepoint
|
169
|
+
end
|
170
|
+
|
171
|
+
Age = [:age_1_1, :age_2_0, :age_2_1, :age_3_0, :age_3_1,
|
172
|
+
:age_3_2, :age_4_0, :age_4_1, :age_5_0, :age_5_1,
|
173
|
+
:age_5_2, :age_6_0]
|
174
|
+
|
175
|
+
Derived = [
|
176
|
+
:ascii_hex,
|
177
|
+
:alphabetic,
|
178
|
+
:cased,
|
179
|
+
:changes_when_casefolded,
|
180
|
+
:changes_when_casemapped,
|
181
|
+
:changes_when_lowercased,
|
182
|
+
:changes_when_titlecased,
|
183
|
+
:changes_when_uppercased,
|
184
|
+
:case_ignorable,
|
185
|
+
:bidi_control,
|
186
|
+
:dash,
|
187
|
+
:deprecated,
|
188
|
+
:default_ignorable_cp,
|
189
|
+
:diacritic,
|
190
|
+
:extender,
|
191
|
+
:grapheme_base,
|
192
|
+
:grapheme_extend,
|
193
|
+
:grapheme_link,
|
194
|
+
:hex_digit,
|
195
|
+
:hyphen,
|
196
|
+
:id_continue,
|
197
|
+
:ideographic,
|
198
|
+
:id_start,
|
199
|
+
:ids_binary_op,
|
200
|
+
:ids_trinary_op,
|
201
|
+
:join_control,
|
202
|
+
:logical_order_exception,
|
203
|
+
:lowercase,
|
204
|
+
:math,
|
205
|
+
:non_character_cp,
|
206
|
+
:other_alphabetic,
|
207
|
+
:other_default_ignorable_cp,
|
208
|
+
:other_grapheme_extended,
|
209
|
+
:other_id_continue,
|
210
|
+
:other_id_start,
|
211
|
+
:other_lowercase,
|
212
|
+
:other_math,
|
213
|
+
:other_uppercase,
|
214
|
+
:pattern_syntax,
|
215
|
+
:pattern_whitespace,
|
216
|
+
:quotation_mark,
|
217
|
+
:radical,
|
218
|
+
:soft_dotted,
|
219
|
+
:sentence_terminal,
|
220
|
+
:terminal_punctuation,
|
221
|
+
:unified_ideograph,
|
222
|
+
:uppercase,
|
223
|
+
:variation_selector,
|
224
|
+
:whitespace,
|
225
|
+
:xid_start,
|
226
|
+
:xid_continue,
|
227
|
+
]
|
228
|
+
|
229
|
+
Script =[
|
230
|
+
:script_arabic,
|
231
|
+
:script_imperial_aramaic,
|
232
|
+
:script_armenian,
|
233
|
+
:script_avestan,
|
234
|
+
:script_balinese,
|
235
|
+
:script_bamum,
|
236
|
+
:script_bengali,
|
237
|
+
:script_bopomofo,
|
238
|
+
:script_braille,
|
239
|
+
:script_buginese,
|
240
|
+
:script_buhid,
|
241
|
+
:script_canadian_aboriginal,
|
242
|
+
:script_carian,
|
243
|
+
:script_cham,
|
244
|
+
:script_cherokee,
|
245
|
+
:script_coptic,
|
246
|
+
:script_cypriot,
|
247
|
+
:script_cyrillic,
|
248
|
+
:script_devanagari,
|
249
|
+
:script_deseret,
|
250
|
+
:script_egyptian_hieroglyphs,
|
251
|
+
:script_ethiopic,
|
252
|
+
:script_georgian,
|
253
|
+
:script_glagolitic,
|
254
|
+
:script_gothic,
|
255
|
+
:script_greek,
|
256
|
+
:script_gujarati,
|
257
|
+
:script_gurmukhi,
|
258
|
+
:script_hangul,
|
259
|
+
:script_han,
|
260
|
+
:script_hanunoo,
|
261
|
+
:script_hebrew,
|
262
|
+
:script_hiragana,
|
263
|
+
:script_katakana_or_hiragana,
|
264
|
+
:script_old_italic,
|
265
|
+
:script_javanese,
|
266
|
+
:script_kayah_li,
|
267
|
+
:script_katakana,
|
268
|
+
:script_kharoshthi,
|
269
|
+
:script_khmer,
|
270
|
+
:script_kannada,
|
271
|
+
:script_kaithi,
|
272
|
+
:script_tai_tham,
|
273
|
+
:script_lao,
|
274
|
+
:script_latin,
|
275
|
+
:script_lepcha,
|
276
|
+
:script_limbu,
|
277
|
+
:script_linear_b,
|
278
|
+
:script_lisu,
|
279
|
+
:script_lycian,
|
280
|
+
:script_lydian,
|
281
|
+
:script_malayalam,
|
282
|
+
:script_mongolian,
|
283
|
+
:script_meetei_mayek,
|
284
|
+
:script_myanmar,
|
285
|
+
:script_nko,
|
286
|
+
:script_ogham,
|
287
|
+
:script_ol_chiki,
|
288
|
+
:script_old_turkic,
|
289
|
+
:script_oriya,
|
290
|
+
:script_osmanya,
|
291
|
+
:script_phags_pa,
|
292
|
+
:script_inscriptional_pahlavi,
|
293
|
+
:script_phoenician,
|
294
|
+
:script_inscriptional_parthian,
|
295
|
+
:script_rejang,
|
296
|
+
:script_runic,
|
297
|
+
:script_samaritan,
|
298
|
+
:script_old_south_arabian,
|
299
|
+
:script_saurashtra,
|
300
|
+
:script_shavian,
|
301
|
+
:script_sinhala,
|
302
|
+
:script_sundanese,
|
303
|
+
:script_syloti_nagri,
|
304
|
+
:script_syriac,
|
305
|
+
:script_tagbanwa,
|
306
|
+
:script_tai_le,
|
307
|
+
:script_new_tai_lue,
|
308
|
+
:script_tamil,
|
309
|
+
:script_tai_viet,
|
310
|
+
:script_telugu,
|
311
|
+
:script_tifinagh,
|
312
|
+
:script_tagalog,
|
313
|
+
:script_thaana,
|
314
|
+
:script_thai,
|
315
|
+
:script_tibetan,
|
316
|
+
:script_ugaritic,
|
317
|
+
:script_vai,
|
318
|
+
:script_old_persian,
|
319
|
+
:script_cuneiform,
|
320
|
+
:script_yi,
|
321
|
+
:script_inherited,
|
322
|
+
:script_common,
|
323
|
+
:script_unknown
|
324
|
+
]
|
325
|
+
|
326
|
+
Script_6_0 = [:script_brahmi, :script_batak, :script_mandaic]
|
327
|
+
|
328
|
+
All = Type + POSIX + Category::All + Age + Derived + Script
|
329
|
+
end
|
330
|
+
end
|
331
|
+
|
332
|
+
end
|