regexp_parser 1.7.0 → 2.9.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (166) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +9 -3
  3. data/LICENSE +1 -1
  4. data/Rakefile +6 -70
  5. data/lib/regexp_parser/error.rb +4 -0
  6. data/lib/regexp_parser/expression/base.rb +76 -0
  7. data/lib/regexp_parser/expression/classes/alternation.rb +1 -1
  8. data/lib/regexp_parser/expression/classes/anchor.rb +0 -2
  9. data/lib/regexp_parser/expression/classes/{backref.rb → backreference.rb} +22 -2
  10. data/lib/regexp_parser/expression/classes/{set → character_set}/range.rb +4 -8
  11. data/lib/regexp_parser/expression/classes/{set.rb → character_set.rb} +4 -8
  12. data/lib/regexp_parser/expression/classes/{type.rb → character_type.rb} +0 -2
  13. data/lib/regexp_parser/expression/classes/conditional.rb +11 -5
  14. data/lib/regexp_parser/expression/classes/{escape.rb → escape_sequence.rb} +15 -7
  15. data/lib/regexp_parser/expression/classes/free_space.rb +5 -5
  16. data/lib/regexp_parser/expression/classes/group.rb +28 -15
  17. data/lib/regexp_parser/expression/classes/keep.rb +2 -0
  18. data/lib/regexp_parser/expression/classes/literal.rb +1 -5
  19. data/lib/regexp_parser/expression/classes/posix_class.rb +5 -5
  20. data/lib/regexp_parser/expression/classes/root.rb +4 -19
  21. data/lib/regexp_parser/expression/classes/{property.rb → unicode_property.rb} +11 -12
  22. data/lib/regexp_parser/expression/methods/construct.rb +41 -0
  23. data/lib/regexp_parser/expression/methods/human_name.rb +43 -0
  24. data/lib/regexp_parser/expression/methods/match_length.rb +11 -7
  25. data/lib/regexp_parser/expression/methods/negative.rb +20 -0
  26. data/lib/regexp_parser/expression/methods/parts.rb +23 -0
  27. data/lib/regexp_parser/expression/methods/printing.rb +26 -0
  28. data/lib/regexp_parser/expression/methods/strfregexp.rb +1 -1
  29. data/lib/regexp_parser/expression/methods/tests.rb +47 -1
  30. data/lib/regexp_parser/expression/methods/traverse.rb +34 -18
  31. data/lib/regexp_parser/expression/quantifier.rb +57 -17
  32. data/lib/regexp_parser/expression/sequence.rb +11 -47
  33. data/lib/regexp_parser/expression/sequence_operation.rb +4 -9
  34. data/lib/regexp_parser/expression/shared.rb +111 -0
  35. data/lib/regexp_parser/expression/subexpression.rb +27 -19
  36. data/lib/regexp_parser/expression.rb +15 -141
  37. data/lib/regexp_parser/lexer.rb +83 -41
  38. data/lib/regexp_parser/parser.rb +372 -429
  39. data/lib/regexp_parser/scanner/char_type.rl +11 -11
  40. data/lib/regexp_parser/scanner/errors/premature_end_error.rb +8 -0
  41. data/lib/regexp_parser/scanner/errors/scanner_error.rb +6 -0
  42. data/lib/regexp_parser/scanner/errors/validation_error.rb +63 -0
  43. data/lib/regexp_parser/scanner/properties/long.csv +651 -0
  44. data/lib/regexp_parser/scanner/properties/short.csv +249 -0
  45. data/lib/regexp_parser/scanner/property.rl +4 -4
  46. data/lib/regexp_parser/scanner/scanner.rl +303 -368
  47. data/lib/regexp_parser/scanner.rb +1423 -1674
  48. data/lib/regexp_parser/syntax/any.rb +2 -7
  49. data/lib/regexp_parser/syntax/base.rb +92 -67
  50. data/lib/regexp_parser/syntax/token/anchor.rb +15 -0
  51. data/lib/regexp_parser/syntax/{tokens → token}/assertion.rb +2 -2
  52. data/lib/regexp_parser/syntax/token/backreference.rb +33 -0
  53. data/lib/regexp_parser/syntax/token/character_set.rb +16 -0
  54. data/lib/regexp_parser/syntax/{tokens → token}/character_type.rb +3 -3
  55. data/lib/regexp_parser/syntax/{tokens → token}/conditional.rb +3 -3
  56. data/lib/regexp_parser/syntax/token/escape.rb +33 -0
  57. data/lib/regexp_parser/syntax/{tokens → token}/group.rb +7 -7
  58. data/lib/regexp_parser/syntax/{tokens → token}/keep.rb +1 -1
  59. data/lib/regexp_parser/syntax/token/meta.rb +20 -0
  60. data/lib/regexp_parser/syntax/{tokens → token}/posix_class.rb +3 -3
  61. data/lib/regexp_parser/syntax/token/quantifier.rb +35 -0
  62. data/lib/regexp_parser/syntax/token/unicode_property.rb +751 -0
  63. data/lib/regexp_parser/syntax/token/virtual.rb +11 -0
  64. data/lib/regexp_parser/syntax/token.rb +45 -0
  65. data/lib/regexp_parser/syntax/version_lookup.rb +19 -36
  66. data/lib/regexp_parser/syntax/versions/1.8.6.rb +13 -20
  67. data/lib/regexp_parser/syntax/versions/1.9.1.rb +10 -17
  68. data/lib/regexp_parser/syntax/versions/1.9.3.rb +3 -10
  69. data/lib/regexp_parser/syntax/versions/2.0.0.rb +8 -15
  70. data/lib/regexp_parser/syntax/versions/2.2.0.rb +3 -9
  71. data/lib/regexp_parser/syntax/versions/2.3.0.rb +3 -9
  72. data/lib/regexp_parser/syntax/versions/2.4.0.rb +3 -9
  73. data/lib/regexp_parser/syntax/versions/2.4.1.rb +2 -8
  74. data/lib/regexp_parser/syntax/versions/2.5.0.rb +3 -9
  75. data/lib/regexp_parser/syntax/versions/2.6.0.rb +3 -9
  76. data/lib/regexp_parser/syntax/versions/2.6.2.rb +3 -9
  77. data/lib/regexp_parser/syntax/versions/2.6.3.rb +3 -9
  78. data/lib/regexp_parser/syntax/versions/3.1.0.rb +4 -0
  79. data/lib/regexp_parser/syntax/versions/3.2.0.rb +4 -0
  80. data/lib/regexp_parser/syntax/versions.rb +3 -1
  81. data/lib/regexp_parser/syntax.rb +8 -6
  82. data/lib/regexp_parser/token.rb +9 -20
  83. data/lib/regexp_parser/version.rb +1 -1
  84. data/lib/regexp_parser.rb +0 -2
  85. data/regexp_parser.gemspec +19 -23
  86. metadata +53 -171
  87. data/CHANGELOG.md +0 -349
  88. data/README.md +0 -470
  89. data/lib/regexp_parser/scanner/properties/long.yml +0 -594
  90. data/lib/regexp_parser/scanner/properties/short.yml +0 -237
  91. data/lib/regexp_parser/syntax/tokens/anchor.rb +0 -15
  92. data/lib/regexp_parser/syntax/tokens/backref.rb +0 -24
  93. data/lib/regexp_parser/syntax/tokens/character_set.rb +0 -13
  94. data/lib/regexp_parser/syntax/tokens/escape.rb +0 -30
  95. data/lib/regexp_parser/syntax/tokens/meta.rb +0 -13
  96. data/lib/regexp_parser/syntax/tokens/quantifier.rb +0 -35
  97. data/lib/regexp_parser/syntax/tokens/unicode_property.rb +0 -675
  98. data/lib/regexp_parser/syntax/tokens.rb +0 -45
  99. data/spec/expression/base_spec.rb +0 -94
  100. data/spec/expression/clone_spec.rb +0 -120
  101. data/spec/expression/conditional_spec.rb +0 -89
  102. data/spec/expression/free_space_spec.rb +0 -27
  103. data/spec/expression/methods/match_length_spec.rb +0 -161
  104. data/spec/expression/methods/match_spec.rb +0 -25
  105. data/spec/expression/methods/strfregexp_spec.rb +0 -224
  106. data/spec/expression/methods/tests_spec.rb +0 -99
  107. data/spec/expression/methods/traverse_spec.rb +0 -161
  108. data/spec/expression/options_spec.rb +0 -128
  109. data/spec/expression/root_spec.rb +0 -9
  110. data/spec/expression/sequence_spec.rb +0 -9
  111. data/spec/expression/subexpression_spec.rb +0 -50
  112. data/spec/expression/to_h_spec.rb +0 -26
  113. data/spec/expression/to_s_spec.rb +0 -100
  114. data/spec/lexer/all_spec.rb +0 -22
  115. data/spec/lexer/conditionals_spec.rb +0 -53
  116. data/spec/lexer/escapes_spec.rb +0 -14
  117. data/spec/lexer/keep_spec.rb +0 -10
  118. data/spec/lexer/literals_spec.rb +0 -89
  119. data/spec/lexer/nesting_spec.rb +0 -99
  120. data/spec/lexer/refcalls_spec.rb +0 -55
  121. data/spec/parser/all_spec.rb +0 -43
  122. data/spec/parser/alternation_spec.rb +0 -88
  123. data/spec/parser/anchors_spec.rb +0 -17
  124. data/spec/parser/conditionals_spec.rb +0 -179
  125. data/spec/parser/errors_spec.rb +0 -30
  126. data/spec/parser/escapes_spec.rb +0 -121
  127. data/spec/parser/free_space_spec.rb +0 -130
  128. data/spec/parser/groups_spec.rb +0 -108
  129. data/spec/parser/keep_spec.rb +0 -6
  130. data/spec/parser/posix_classes_spec.rb +0 -8
  131. data/spec/parser/properties_spec.rb +0 -115
  132. data/spec/parser/quantifiers_spec.rb +0 -51
  133. data/spec/parser/refcalls_spec.rb +0 -112
  134. data/spec/parser/set/intersections_spec.rb +0 -127
  135. data/spec/parser/set/ranges_spec.rb +0 -111
  136. data/spec/parser/sets_spec.rb +0 -178
  137. data/spec/parser/types_spec.rb +0 -18
  138. data/spec/scanner/all_spec.rb +0 -18
  139. data/spec/scanner/anchors_spec.rb +0 -21
  140. data/spec/scanner/conditionals_spec.rb +0 -128
  141. data/spec/scanner/errors_spec.rb +0 -68
  142. data/spec/scanner/escapes_spec.rb +0 -53
  143. data/spec/scanner/free_space_spec.rb +0 -133
  144. data/spec/scanner/groups_spec.rb +0 -52
  145. data/spec/scanner/keep_spec.rb +0 -10
  146. data/spec/scanner/literals_spec.rb +0 -49
  147. data/spec/scanner/meta_spec.rb +0 -18
  148. data/spec/scanner/properties_spec.rb +0 -64
  149. data/spec/scanner/quantifiers_spec.rb +0 -20
  150. data/spec/scanner/refcalls_spec.rb +0 -36
  151. data/spec/scanner/sets_spec.rb +0 -102
  152. data/spec/scanner/types_spec.rb +0 -14
  153. data/spec/spec_helper.rb +0 -15
  154. data/spec/support/runner.rb +0 -42
  155. data/spec/support/shared_examples.rb +0 -77
  156. data/spec/support/warning_extractor.rb +0 -60
  157. data/spec/syntax/syntax_spec.rb +0 -48
  158. data/spec/syntax/syntax_token_map_spec.rb +0 -23
  159. data/spec/syntax/versions/1.8.6_spec.rb +0 -17
  160. data/spec/syntax/versions/1.9.1_spec.rb +0 -10
  161. data/spec/syntax/versions/1.9.3_spec.rb +0 -9
  162. data/spec/syntax/versions/2.0.0_spec.rb +0 -13
  163. data/spec/syntax/versions/2.2.0_spec.rb +0 -9
  164. data/spec/syntax/versions/aliases_spec.rb +0 -37
  165. data/spec/token/token_spec.rb +0 -85
  166. /data/lib/regexp_parser/expression/classes/{set → character_set}/intersection.rb +0 -0
@@ -1,237 +0,0 @@
1
- #
2
- # THIS FILE IS AUTO-GENERATED BY `rake props:update`, DO NOT EDIT
3
- #
4
- ---
5
- adlm: adlam
6
- aghb: caucasian_albanian
7
- ahex: ascii_hex_digit
8
- arab: arabic
9
- armi: imperial_aramaic
10
- armn: armenian
11
- avst: avestan
12
- bali: balinese
13
- bamu: bamum
14
- bass: bassa_vah
15
- batk: batak
16
- beng: bengali
17
- bhks: bhaiksuki
18
- bidic: bidi_control
19
- bopo: bopomofo
20
- brah: brahmi
21
- brai: braille
22
- bugi: buginese
23
- buhd: buhid
24
- c: other
25
- cakm: chakma
26
- cans: canadian_aboriginal
27
- cari: carian
28
- cc: control
29
- cf: format
30
- cher: cherokee
31
- ci: case_ignorable
32
- cn: unassigned
33
- co: private_use
34
- combiningmark: mark
35
- copt: coptic
36
- cprt: cypriot
37
- cs: surrogate
38
- cwcf: changes_when_casefolded
39
- cwcm: changes_when_casemapped
40
- cwl: changes_when_lowercased
41
- cwt: changes_when_titlecased
42
- cwu: changes_when_uppercased
43
- cyrl: cyrillic
44
- dep: deprecated
45
- deva: devanagari
46
- di: default_ignorable_code_point
47
- dia: diacritic
48
- dogr: dogra
49
- dsrt: deseret
50
- dupl: duployan
51
- egyp: egyptian_hieroglyphs
52
- elba: elbasan
53
- elym: elymaic
54
- ethi: ethiopic
55
- ext: extender
56
- geor: georgian
57
- glag: glagolitic
58
- gong: gunjala_gondi
59
- gonm: masaram_gondi
60
- goth: gothic
61
- gran: grantha
62
- grbase: grapheme_base
63
- grek: greek
64
- grext: grapheme_extend
65
- grlink: grapheme_link
66
- gujr: gujarati
67
- guru: gurmukhi
68
- hang: hangul
69
- hani: han
70
- hano: hanunoo
71
- hatr: hatran
72
- hebr: hebrew
73
- hex: hex_digit
74
- hira: hiragana
75
- hluw: anatolian_hieroglyphs
76
- hmng: pahawh_hmong
77
- hmnp: nyiakeng_puachue_hmong
78
- hung: old_hungarian
79
- idc: id_continue
80
- ideo: ideographic
81
- ids: id_start
82
- idsb: ids_binary_operator
83
- idst: ids_trinary_operator
84
- ital: old_italic
85
- java: javanese
86
- joinc: join_control
87
- kali: kayah_li
88
- kana: katakana
89
- khar: kharoshthi
90
- khmr: khmer
91
- khoj: khojki
92
- knda: kannada
93
- kthi: kaithi
94
- l: letter
95
- lana: tai_tham
96
- laoo: lao
97
- latn: latin
98
- lc: cased_letter
99
- lepc: lepcha
100
- limb: limbu
101
- lina: linear_a
102
- linb: linear_b
103
- ll: lowercase_letter
104
- lm: modifier_letter
105
- lo: other_letter
106
- loe: logical_order_exception
107
- lt: titlecase_letter
108
- lu: uppercase_letter
109
- lyci: lycian
110
- lydi: lydian
111
- m: mark
112
- mahj: mahajani
113
- maka: makasar
114
- mand: mandaic
115
- mani: manichaean
116
- marc: marchen
117
- mc: spacing_mark
118
- me: enclosing_mark
119
- medf: medefaidrin
120
- mend: mende_kikakui
121
- merc: meroitic_cursive
122
- mero: meroitic_hieroglyphs
123
- mlym: malayalam
124
- mn: nonspacing_mark
125
- mong: mongolian
126
- mroo: mro
127
- mtei: meetei_mayek
128
- mult: multani
129
- mymr: myanmar
130
- n: number
131
- nand: nandinagari
132
- narb: old_north_arabian
133
- nbat: nabataean
134
- nchar: noncharacter_code_point
135
- nd: decimal_number
136
- nkoo: nko
137
- nl: letter_number
138
- 'no': other_number
139
- nshu: nushu
140
- oalpha: other_alphabetic
141
- odi: other_default_ignorable_code_point
142
- ogam: ogham
143
- ogrext: other_grapheme_extend
144
- oidc: other_id_continue
145
- oids: other_id_start
146
- olck: ol_chiki
147
- olower: other_lowercase
148
- omath: other_math
149
- orkh: old_turkic
150
- orya: oriya
151
- osge: osage
152
- osma: osmanya
153
- oupper: other_uppercase
154
- p: punctuation
155
- palm: palmyrene
156
- patsyn: pattern_syntax
157
- patws: pattern_white_space
158
- pauc: pau_cin_hau
159
- pc: connector_punctuation
160
- pcm: prepended_concatenation_mark
161
- pd: dash_punctuation
162
- pe: close_punctuation
163
- perm: old_permic
164
- pf: final_punctuation
165
- phag: phags_pa
166
- phli: inscriptional_pahlavi
167
- phlp: psalter_pahlavi
168
- phnx: phoenician
169
- pi: initial_punctuation
170
- plrd: miao
171
- po: other_punctuation
172
- prti: inscriptional_parthian
173
- ps: open_punctuation
174
- qaac: coptic
175
- qaai: inherited
176
- qmark: quotation_mark
177
- ri: regional_indicator
178
- rjng: rejang
179
- rohg: hanifi_rohingya
180
- runr: runic
181
- s: symbol
182
- samr: samaritan
183
- sarb: old_south_arabian
184
- saur: saurashtra
185
- sc: currency_symbol
186
- sd: soft_dotted
187
- sgnw: signwriting
188
- shaw: shavian
189
- shrd: sharada
190
- sidd: siddham
191
- sind: khudawadi
192
- sinh: sinhala
193
- sk: modifier_symbol
194
- sm: math_symbol
195
- so: other_symbol
196
- sogd: sogdian
197
- sogo: old_sogdian
198
- sora: sora_sompeng
199
- soyo: soyombo
200
- sterm: sentence_terminal
201
- sund: sundanese
202
- sylo: syloti_nagri
203
- syrc: syriac
204
- tagb: tagbanwa
205
- takr: takri
206
- tale: tai_le
207
- talu: new_tai_lue
208
- taml: tamil
209
- tang: tangut
210
- tavt: tai_viet
211
- telu: telugu
212
- term: terminal_punctuation
213
- tfng: tifinagh
214
- tglg: tagalog
215
- thaa: thaana
216
- tibt: tibetan
217
- tirh: tirhuta
218
- ugar: ugaritic
219
- uideo: unified_ideograph
220
- vaii: vai
221
- vs: variation_selector
222
- wara: warang_citi
223
- wcho: wancho
224
- wspace: white_space
225
- xidc: xid_continue
226
- xids: xid_start
227
- xpeo: old_persian
228
- xsux: cuneiform
229
- yiii: yi
230
- z: separator
231
- zanb: zanabazar_square
232
- zinh: inherited
233
- zl: line_separator
234
- zp: paragraph_separator
235
- zs: space_separator
236
- zyyy: common
237
- zzzz: unknown
@@ -1,15 +0,0 @@
1
- module Regexp::Syntax
2
- module Token
3
- module Anchor
4
- Basic = [:bol, :eol]
5
- Extended = Basic + [:word_boundary, :nonword_boundary]
6
- String = [:bos, :eos, :eos_ob_eol]
7
- MatchStart = [:match_start]
8
-
9
- All = Extended + String + MatchStart
10
- Type = :anchor
11
- end
12
-
13
- Map[Anchor::Type] = Anchor::All
14
- end
15
- end
@@ -1,24 +0,0 @@
1
- module Regexp::Syntax
2
- module Token
3
- module Backreference
4
- Name = [:name_ref]
5
- Number = [:number, :number_ref, :number_rel_ref]
6
-
7
- RecursionLevel = [:name_recursion_ref, :number_recursion_ref]
8
-
9
- All = Name + Number + RecursionLevel
10
- Type = :backref
11
- end
12
-
13
- # Type is the same as Backreference so keeping it here, for now.
14
- module SubexpressionCall
15
- Name = [:name_call]
16
- Number = [:number_call, :number_rel_call]
17
-
18
- All = Name + Number
19
- end
20
-
21
- Map[Backreference::Type] = Backreference::All +
22
- SubexpressionCall::All
23
- end
24
- end
@@ -1,13 +0,0 @@
1
- module Regexp::Syntax
2
- module Token
3
- module CharacterSet
4
- Basic = [:open, :close, :negate, :range]
5
- Extended = Basic + [:intersection]
6
-
7
- All = Extended
8
- Type = :set
9
- end
10
-
11
- Map[CharacterSet::Type] = CharacterSet::All
12
- end
13
- end
@@ -1,30 +0,0 @@
1
- module Regexp::Syntax
2
- module Token
3
- module Escape
4
- Basic = [:backslash, :literal]
5
-
6
- Control = [:control, :meta_sequence]
7
-
8
- ASCII = [:bell, :backspace, :escape, :form_feed, :newline, :carriage,
9
- :tab, :vertical_tab]
10
-
11
- Unicode = [:codepoint, :codepoint_list]
12
-
13
- Meta = [:dot, :alternation,
14
- :zero_or_one, :zero_or_more, :one_or_more,
15
- :bol, :eol,
16
- :group_open, :group_close,
17
- :interval_open, :interval_close,
18
- :set_open, :set_close]
19
-
20
- Hex = [:hex]
21
-
22
- Octal = [:octal]
23
-
24
- All = Basic + Control + ASCII + Unicode + Meta + Hex + Octal
25
- Type = :escape
26
- end
27
-
28
- Map[Escape::Type] = Escape::All
29
- end
30
- end
@@ -1,13 +0,0 @@
1
- module Regexp::Syntax
2
- module Token
3
- module Meta
4
- Basic = [:dot]
5
- Extended = Basic + [:alternation]
6
-
7
- All = Extended
8
- Type = :meta
9
- end
10
-
11
- Map[Meta::Type] = Meta::All
12
- end
13
- end
@@ -1,35 +0,0 @@
1
- module Regexp::Syntax
2
- module Token
3
- module Quantifier
4
- Greedy = [
5
- :zero_or_one,
6
- :zero_or_more,
7
- :one_or_more
8
- ]
9
-
10
- Reluctant = [
11
- :zero_or_one_reluctant,
12
- :zero_or_more_reluctant,
13
- :one_or_more_reluctant
14
- ]
15
-
16
- Possessive = [
17
- :zero_or_one_possessive,
18
- :zero_or_more_possessive,
19
- :one_or_more_possessive
20
- ]
21
-
22
- Interval = [:interval]
23
- IntervalReluctant = [:interval_reluctant]
24
- IntervalPossessive = [:interval_possessive]
25
-
26
- IntervalAll = Interval + IntervalReluctant +
27
- IntervalPossessive
28
-
29
- All = Greedy + Reluctant + Possessive + IntervalAll
30
- Type = :quantifier
31
- end
32
-
33
- Map[Quantifier::Type] = Quantifier::All
34
- end
35
- end