regexp_parser 0.1.1 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. checksums.yaml +7 -0
  2. data/ChangeLog +45 -0
  3. data/Rakefile +12 -44
  4. data/VERSION.yml +5 -0
  5. data/lib/regexp_parser.rb +5 -38
  6. data/lib/regexp_parser/expression.rb +68 -221
  7. data/lib/regexp_parser/expression/classes/alternation.rb +47 -0
  8. data/lib/regexp_parser/expression/classes/anchor.rb +26 -0
  9. data/lib/regexp_parser/expression/classes/backref.rb +42 -0
  10. data/lib/regexp_parser/expression/classes/escape.rb +27 -0
  11. data/lib/regexp_parser/expression/classes/group.rb +67 -0
  12. data/lib/regexp_parser/expression/classes/literal.rb +7 -0
  13. data/lib/regexp_parser/expression/{property.rb → classes/property.rb} +1 -1
  14. data/lib/regexp_parser/expression/classes/root.rb +26 -0
  15. data/lib/regexp_parser/expression/classes/set.rb +100 -0
  16. data/lib/regexp_parser/expression/classes/type.rb +17 -0
  17. data/lib/regexp_parser/expression/quantifier.rb +26 -0
  18. data/lib/regexp_parser/expression/subexpression.rb +69 -0
  19. data/lib/regexp_parser/lexer.rb +4 -4
  20. data/lib/regexp_parser/parser.rb +31 -13
  21. data/lib/regexp_parser/scanner.rb +1849 -1488
  22. data/lib/regexp_parser/scanner/property.rl +7 -2
  23. data/lib/regexp_parser/scanner/scanner.rl +377 -191
  24. data/lib/regexp_parser/syntax.rb +7 -0
  25. data/lib/regexp_parser/syntax/ruby/1.8.6.rb +4 -4
  26. data/lib/regexp_parser/syntax/ruby/1.9.1.rb +9 -9
  27. data/lib/regexp_parser/syntax/ruby/2.0.0.rb +16 -0
  28. data/lib/regexp_parser/syntax/ruby/2.1.0.rb +13 -0
  29. data/lib/regexp_parser/syntax/tokens.rb +21 -320
  30. data/lib/regexp_parser/syntax/tokens/anchor.rb +17 -0
  31. data/lib/regexp_parser/syntax/tokens/assertion.rb +15 -0
  32. data/lib/regexp_parser/syntax/tokens/backref.rb +26 -0
  33. data/lib/regexp_parser/syntax/tokens/character_set.rb +48 -0
  34. data/lib/regexp_parser/syntax/tokens/character_type.rb +16 -0
  35. data/lib/regexp_parser/syntax/tokens/escape.rb +29 -0
  36. data/lib/regexp_parser/syntax/tokens/group.rb +22 -0
  37. data/lib/regexp_parser/syntax/tokens/meta.rb +15 -0
  38. data/lib/regexp_parser/syntax/tokens/quantifier.rb +37 -0
  39. data/lib/regexp_parser/syntax/tokens/unicode_property.rb +204 -0
  40. data/lib/regexp_parser/token.rb +37 -0
  41. data/test/expression/test_all.rb +7 -0
  42. data/test/expression/test_base.rb +72 -0
  43. data/test/expression/test_clone.rb +144 -0
  44. data/test/{parser/test_expression.rb → expression/test_to_s.rb} +10 -10
  45. data/test/helpers.rb +1 -0
  46. data/test/parser/test_all.rb +1 -1
  47. data/test/parser/test_alternation.rb +35 -0
  48. data/test/parser/test_anchors.rb +2 -2
  49. data/test/parser/test_refcalls.rb +1 -1
  50. data/test/parser/test_sets.rb +54 -8
  51. data/test/scanner/test_anchors.rb +2 -2
  52. data/test/scanner/test_conditionals.rb +31 -0
  53. data/test/scanner/test_errors.rb +88 -8
  54. data/test/scanner/test_escapes.rb +4 -4
  55. data/test/scanner/test_groups.rb +7 -0
  56. data/test/scanner/test_quoting.rb +29 -0
  57. data/test/scanner/test_sets.rb +1 -0
  58. data/test/syntax/ruby/test_1.8.rb +3 -3
  59. data/test/test_all.rb +1 -1
  60. metadata +62 -48
  61. data/lib/regexp_parser/expression/set.rb +0 -59
@@ -0,0 +1,17 @@
1
+ module Regexp::Syntax
2
+ module Token
3
+
4
+ module Anchor
5
+ Basic = [:bol, :eol]
6
+ Extended = Basic + [:word_boundary, :nonword_boundary]
7
+ String = [:bos, :eos, :eos_ob_eol]
8
+ MatchStart = [:match_start]
9
+
10
+ All = Extended + String + MatchStart
11
+ Type = :anchor
12
+ end
13
+
14
+ Map[Anchor::Type] = Anchor::All
15
+
16
+ end
17
+ end
@@ -0,0 +1,15 @@
1
+ module Regexp::Syntax
2
+ module Token
3
+
4
+ module Assertion
5
+ Lookahead = [:lookahead, :nlookahead]
6
+ Lookbehind = [:lookbehind, :nlookbehind]
7
+
8
+ All = Lookahead + Lookbehind
9
+ Type = :assertion
10
+ end
11
+
12
+ Map[Assertion::Type] = Assertion::All
13
+
14
+ end
15
+ end
@@ -0,0 +1,26 @@
1
+ module Regexp::Syntax
2
+ module Token
3
+
4
+ module Backreference
5
+ Name = [:name_ref]
6
+ Number = [:number_ref, :number_rel_ref]
7
+
8
+ NestLevel = [:name_nest_ref, :number_nest_ref]
9
+
10
+ All = Name + Number + NestLevel
11
+ Type = :backref
12
+ end
13
+
14
+ # Type is the same as Backreference so keeping it here, for now.
15
+ module SubexpressionCall
16
+ Name = [:name_call]
17
+ Number = [:number_call, :number_rel_call]
18
+
19
+ All = Name + Number
20
+ end
21
+
22
+ Map[Backreference::Type] = Backreference::All +
23
+ SubexpressionCall::All
24
+
25
+ end
26
+ end
@@ -0,0 +1,48 @@
1
+ module Regexp::Syntax
2
+ module Token
3
+
4
+ module CharacterSet
5
+ OpenClose = [:open, :close]
6
+
7
+ Basic = [:negate, :member, :range]
8
+ Extended = Basic + [:escape, :intersection, :range_hex, :backspace]
9
+
10
+ Types = [:type_digit, :type_nondigit, :type_hex, :type_nonhex,
11
+ :type_space, :type_nonspace, :type_word, :type_nonword]
12
+
13
+ module POSIX
14
+ Standard = [
15
+ :class_alnum, :class_alpha, :class_blank, :class_cntrl,
16
+ :class_digit, :class_graph, :class_lower, :class_print,
17
+ :class_punct, :class_space, :class_upper, :class_xdigit,
18
+ ]
19
+
20
+ StandardNegative = [
21
+ :class_nonalnum, :class_nonalpha, :class_nonblank,
22
+ :class_noncntrl, :class_nondigit, :class_nongraph,
23
+ :class_nonlower, :class_nonprint, :class_nonpunct,
24
+ :class_nonspace, :class_nonupper, :class_nonxdigit,
25
+ ]
26
+
27
+ Extensions = [:class_ascii, :class_word]
28
+ ExtensionsNegative = [:class_nonascii, :class_nonword]
29
+
30
+ All = Standard + StandardNegative + Extensions + ExtensionsNegative
31
+ end
32
+
33
+ All = Basic + Extended + Types + POSIX::All
34
+ Type = :set
35
+
36
+ module SubSet
37
+ OpenClose = [:open, :close]
38
+
39
+ All = CharacterSet::All
40
+ Type = :subset
41
+ end
42
+ end
43
+
44
+ Map[CharacterSet::Type] = CharacterSet::All
45
+ Map[CharacterSet::SubSet::Type] = CharacterSet::All
46
+
47
+ end
48
+ end
@@ -0,0 +1,16 @@
1
+ module Regexp::Syntax
2
+ module Token
3
+
4
+ module CharacterType
5
+ Basic = []
6
+ Extended = [:digit, :nondigit, :space, :nonspace, :word, :nonword]
7
+ Hex = [:hex, :nonhex]
8
+
9
+ All = Basic + Extended + Hex
10
+ Type = :type
11
+ end
12
+
13
+ Map[CharacterType::Type] = CharacterType::All
14
+
15
+ end
16
+ end
@@ -0,0 +1,29 @@
1
+ module Regexp::Syntax
2
+ module Token
3
+
4
+ module Escape
5
+ Basic = [:backslash, :literal]
6
+
7
+ Backreference = [:digit]
8
+
9
+ Control = [:control, :meta_sequence]
10
+
11
+ ASCII = [:bell, :backspace, :escape, :form_feed, :newline, :carriage,
12
+ :space, :tab, :vertical_tab]
13
+
14
+ Meta = [:dot, :alternation,
15
+ :zero_or_one, :zero_or_more, :one_or_more,
16
+ :bol, :eol,
17
+ :group_open, :group_close,
18
+ :interval_open, :interval_close,
19
+ :set_open, :set_close,
20
+ :baclslash]
21
+
22
+ All = Basic + Backreference + ASCII + Meta
23
+ Type = :escape
24
+ end
25
+
26
+ Map[Escape::Type] = Escape::All
27
+
28
+ end
29
+ end
@@ -0,0 +1,22 @@
1
+ module Regexp::Syntax
2
+ module Token
3
+
4
+ module Group
5
+ Basic = [:capture, :close]
6
+ Extended = Basic + [:options]
7
+
8
+ Named = [:named]
9
+ Atomic = [:atomic]
10
+ Passive = [:passive]
11
+ Comment = [:comment]
12
+
13
+ All = Group::Extended + Group::Named + Group::Atomic +
14
+ Group::Passive + Group::Comment
15
+
16
+ Type = :group
17
+ end
18
+
19
+ Map[Group::Type] = Group::All
20
+
21
+ end
22
+ end
@@ -0,0 +1,15 @@
1
+ module Regexp::Syntax
2
+ module Token
3
+
4
+ module Meta
5
+ Basic = [:dot]
6
+ Extended = Basic + [:alternation]
7
+
8
+ All = Extended
9
+ Type = :meta
10
+ end
11
+
12
+ Map[Meta::Type] = Meta::All
13
+
14
+ end
15
+ end
@@ -0,0 +1,37 @@
1
+ module Regexp::Syntax
2
+ module Token
3
+
4
+ module Quantifier
5
+ Greedy = [
6
+ :zero_or_one,
7
+ :zero_or_more,
8
+ :one_or_more
9
+ ]
10
+
11
+ Reluctant = [
12
+ :zero_or_one_reluctant,
13
+ :zero_or_more_reluctant,
14
+ :one_or_more_reluctant
15
+ ]
16
+
17
+ Possessive = [
18
+ :zero_or_one_possessive,
19
+ :zero_or_more_possessive,
20
+ :one_or_more_possessive
21
+ ]
22
+
23
+ Interval = [:interval]
24
+ IntervalReluctant = [:interval_reluctant]
25
+ IntervalPossessive = [:interval_possessive]
26
+
27
+ IntervalAll = Interval + IntervalReluctant +
28
+ IntervalPossessive
29
+
30
+ All = Greedy + Reluctant + Possessive + IntervalAll
31
+ Type = :quantifier
32
+ end
33
+
34
+ Map[Quantifier::Type] = Quantifier::All
35
+
36
+ end
37
+ end
@@ -0,0 +1,204 @@
1
+ module Regexp::Syntax
2
+ module Token
3
+
4
+ module UnicodeProperty
5
+ CharType = [:alnum, :alpha, :ascii, :blank, :cntrl, :digit, :graph, :lower,
6
+ :print, :punct, :space, :upper, :word, :xdigit]
7
+
8
+ POSIX = [:any, :assigned, :newline]
9
+
10
+ module Category
11
+ Letter = [:letter_any, :letter_uppercase, :letter_lowercase,
12
+ :letter_titlecase, :letter_modifier, :letter_other]
13
+
14
+ Mark = [:mark_any, :mark_nonspacing, :mark_spacing,
15
+ :mark_enclosing]
16
+
17
+ Number = [:number_any, :number_decimal, :number_letter,
18
+ :number_other]
19
+
20
+ Punctuation = [:punct_any, :punct_connector, :punct_dash,
21
+ :punct_open, :punct_close, :punct_initial,
22
+ :punct_final, :punct_other]
23
+
24
+ Symbol = [:symbol_any, :symbol_math, :symbol_currency,
25
+ :symbol_modifier, :symbol_other]
26
+
27
+ Separator = [:separator_any, :separator_space, :separator_line,
28
+ :separator_para]
29
+
30
+ Codepoint = [:other, :control, :format,
31
+ :surrogate, :private_use, :unassigned]
32
+
33
+ All = Letter + Mark + Number + Punctuation +
34
+ Symbol + Separator + Codepoint
35
+ end
36
+
37
+ Age = [:age_1_1, :age_2_0, :age_2_1, :age_3_0, :age_3_1,
38
+ :age_3_2, :age_4_0, :age_4_1, :age_5_0, :age_5_1,
39
+ :age_5_2, :age_6_0]
40
+
41
+ Derived = [
42
+ :ascii_hex,
43
+ :alphabetic,
44
+ :cased,
45
+ :changes_when_casefolded,
46
+ :changes_when_casemapped,
47
+ :changes_when_lowercased,
48
+ :changes_when_titlecased,
49
+ :changes_when_uppercased,
50
+ :case_ignorable,
51
+ :bidi_control,
52
+ :dash,
53
+ :deprecated,
54
+ :default_ignorable_cp,
55
+ :diacritic,
56
+ :extender,
57
+ :grapheme_base,
58
+ :grapheme_extend,
59
+ :grapheme_link,
60
+ :hex_digit,
61
+ :hyphen,
62
+ :id_continue,
63
+ :ideographic,
64
+ :id_start,
65
+ :ids_binary_op,
66
+ :ids_trinary_op,
67
+ :join_control,
68
+ :logical_order_exception,
69
+ :lowercase,
70
+ :math,
71
+ :non_character_cp,
72
+ :other_alphabetic,
73
+ :other_default_ignorable_cp,
74
+ :other_grapheme_extended,
75
+ :other_id_continue,
76
+ :other_id_start,
77
+ :other_lowercase,
78
+ :other_math,
79
+ :other_uppercase,
80
+ :pattern_syntax,
81
+ :pattern_whitespace,
82
+ :quotation_mark,
83
+ :radical,
84
+ :soft_dotted,
85
+ :sentence_terminal,
86
+ :terminal_punctuation,
87
+ :unified_ideograph,
88
+ :uppercase,
89
+ :variation_selector,
90
+ :whitespace,
91
+ :xid_start,
92
+ :xid_continue,
93
+ ]
94
+
95
+ Script =[
96
+ :script_arabic,
97
+ :script_imperial_aramaic,
98
+ :script_armenian,
99
+ :script_avestan,
100
+ :script_balinese,
101
+ :script_bamum,
102
+ :script_bengali,
103
+ :script_bopomofo,
104
+ :script_braille,
105
+ :script_buginese,
106
+ :script_buhid,
107
+ :script_canadian_aboriginal,
108
+ :script_carian,
109
+ :script_cham,
110
+ :script_cherokee,
111
+ :script_coptic,
112
+ :script_cypriot,
113
+ :script_cyrillic,
114
+ :script_devanagari,
115
+ :script_deseret,
116
+ :script_egyptian_hieroglyphs,
117
+ :script_ethiopic,
118
+ :script_georgian,
119
+ :script_glagolitic,
120
+ :script_gothic,
121
+ :script_greek,
122
+ :script_gujarati,
123
+ :script_gurmukhi,
124
+ :script_hangul,
125
+ :script_han,
126
+ :script_hanunoo,
127
+ :script_hebrew,
128
+ :script_hiragana,
129
+ :script_katakana_or_hiragana,
130
+ :script_old_italic,
131
+ :script_javanese,
132
+ :script_kayah_li,
133
+ :script_katakana,
134
+ :script_kharoshthi,
135
+ :script_khmer,
136
+ :script_kannada,
137
+ :script_kaithi,
138
+ :script_tai_tham,
139
+ :script_lao,
140
+ :script_latin,
141
+ :script_lepcha,
142
+ :script_limbu,
143
+ :script_linear_b,
144
+ :script_lisu,
145
+ :script_lycian,
146
+ :script_lydian,
147
+ :script_malayalam,
148
+ :script_mongolian,
149
+ :script_meetei_mayek,
150
+ :script_myanmar,
151
+ :script_nko,
152
+ :script_ogham,
153
+ :script_ol_chiki,
154
+ :script_old_turkic,
155
+ :script_oriya,
156
+ :script_osmanya,
157
+ :script_phags_pa,
158
+ :script_inscriptional_pahlavi,
159
+ :script_phoenician,
160
+ :script_inscriptional_parthian,
161
+ :script_rejang,
162
+ :script_runic,
163
+ :script_samaritan,
164
+ :script_old_south_arabian,
165
+ :script_saurashtra,
166
+ :script_shavian,
167
+ :script_sinhala,
168
+ :script_sundanese,
169
+ :script_syloti_nagri,
170
+ :script_syriac,
171
+ :script_tagbanwa,
172
+ :script_tai_le,
173
+ :script_new_tai_lue,
174
+ :script_tamil,
175
+ :script_tai_viet,
176
+ :script_telugu,
177
+ :script_tifinagh,
178
+ :script_tagalog,
179
+ :script_thaana,
180
+ :script_thai,
181
+ :script_tibetan,
182
+ :script_ugaritic,
183
+ :script_vai,
184
+ :script_old_persian,
185
+ :script_cuneiform,
186
+ :script_yi,
187
+ :script_inherited,
188
+ :script_common,
189
+ :script_unknown
190
+ ]
191
+
192
+ Script_6_0 = [:script_brahmi, :script_batak, :script_mandaic]
193
+
194
+ All = CharType + POSIX + Category::All + Age + Derived + Script
195
+
196
+ Type = :property
197
+ NonType = :nonproperty
198
+ end
199
+
200
+ Map[UnicodeProperty::Type] = UnicodeProperty::All
201
+ Map[UnicodeProperty::NonType] = UnicodeProperty::All
202
+
203
+ end
204
+ end
@@ -0,0 +1,37 @@
1
+ class Regexp
2
+
3
+ TOKEN_KEYS = [:type, :token, :text, :ts, :te, :level, :set_level].freeze
4
+ Token = Struct.new(*TOKEN_KEYS) do
5
+ def offset
6
+ [self.ts, self.te]
7
+ end
8
+
9
+ def length
10
+ self.te - self.ts
11
+ end
12
+
13
+ def to_h
14
+ hash = {}
15
+ members.each do |member|
16
+ hash[member.to_sym] = self.send(member.to_sym)
17
+ end; hash
18
+ end
19
+
20
+ def next(exp = nil)
21
+ if exp
22
+ @next = exp
23
+ else
24
+ @next
25
+ end
26
+ end
27
+
28
+ def previous(exp = nil)
29
+ if exp
30
+ @previous = exp
31
+ else
32
+ @previous
33
+ end
34
+ end
35
+ end
36
+
37
+ end