regexp_parser 1.7.0 → 2.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (166) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +9 -3
  3. data/LICENSE +1 -1
  4. data/Rakefile +6 -70
  5. data/lib/regexp_parser/error.rb +4 -0
  6. data/lib/regexp_parser/expression/base.rb +76 -0
  7. data/lib/regexp_parser/expression/classes/alternation.rb +1 -1
  8. data/lib/regexp_parser/expression/classes/anchor.rb +0 -2
  9. data/lib/regexp_parser/expression/classes/{backref.rb → backreference.rb} +22 -2
  10. data/lib/regexp_parser/expression/classes/{set → character_set}/range.rb +4 -8
  11. data/lib/regexp_parser/expression/classes/{set.rb → character_set.rb} +4 -8
  12. data/lib/regexp_parser/expression/classes/{type.rb → character_type.rb} +0 -2
  13. data/lib/regexp_parser/expression/classes/conditional.rb +11 -5
  14. data/lib/regexp_parser/expression/classes/{escape.rb → escape_sequence.rb} +15 -7
  15. data/lib/regexp_parser/expression/classes/free_space.rb +5 -5
  16. data/lib/regexp_parser/expression/classes/group.rb +28 -15
  17. data/lib/regexp_parser/expression/classes/keep.rb +2 -0
  18. data/lib/regexp_parser/expression/classes/literal.rb +1 -5
  19. data/lib/regexp_parser/expression/classes/posix_class.rb +5 -5
  20. data/lib/regexp_parser/expression/classes/root.rb +4 -19
  21. data/lib/regexp_parser/expression/classes/{property.rb → unicode_property.rb} +11 -12
  22. data/lib/regexp_parser/expression/methods/construct.rb +41 -0
  23. data/lib/regexp_parser/expression/methods/human_name.rb +43 -0
  24. data/lib/regexp_parser/expression/methods/match_length.rb +11 -7
  25. data/lib/regexp_parser/expression/methods/negative.rb +20 -0
  26. data/lib/regexp_parser/expression/methods/parts.rb +23 -0
  27. data/lib/regexp_parser/expression/methods/printing.rb +26 -0
  28. data/lib/regexp_parser/expression/methods/strfregexp.rb +1 -1
  29. data/lib/regexp_parser/expression/methods/tests.rb +47 -1
  30. data/lib/regexp_parser/expression/methods/traverse.rb +34 -18
  31. data/lib/regexp_parser/expression/quantifier.rb +57 -17
  32. data/lib/regexp_parser/expression/sequence.rb +11 -47
  33. data/lib/regexp_parser/expression/sequence_operation.rb +4 -9
  34. data/lib/regexp_parser/expression/shared.rb +111 -0
  35. data/lib/regexp_parser/expression/subexpression.rb +27 -19
  36. data/lib/regexp_parser/expression.rb +15 -141
  37. data/lib/regexp_parser/lexer.rb +83 -41
  38. data/lib/regexp_parser/parser.rb +372 -429
  39. data/lib/regexp_parser/scanner/char_type.rl +11 -11
  40. data/lib/regexp_parser/scanner/errors/premature_end_error.rb +8 -0
  41. data/lib/regexp_parser/scanner/errors/scanner_error.rb +6 -0
  42. data/lib/regexp_parser/scanner/errors/validation_error.rb +63 -0
  43. data/lib/regexp_parser/scanner/properties/long.csv +651 -0
  44. data/lib/regexp_parser/scanner/properties/short.csv +249 -0
  45. data/lib/regexp_parser/scanner/property.rl +4 -4
  46. data/lib/regexp_parser/scanner/scanner.rl +303 -368
  47. data/lib/regexp_parser/scanner.rb +1423 -1674
  48. data/lib/regexp_parser/syntax/any.rb +2 -7
  49. data/lib/regexp_parser/syntax/base.rb +92 -67
  50. data/lib/regexp_parser/syntax/token/anchor.rb +15 -0
  51. data/lib/regexp_parser/syntax/{tokens → token}/assertion.rb +2 -2
  52. data/lib/regexp_parser/syntax/token/backreference.rb +33 -0
  53. data/lib/regexp_parser/syntax/token/character_set.rb +16 -0
  54. data/lib/regexp_parser/syntax/{tokens → token}/character_type.rb +3 -3
  55. data/lib/regexp_parser/syntax/{tokens → token}/conditional.rb +3 -3
  56. data/lib/regexp_parser/syntax/token/escape.rb +33 -0
  57. data/lib/regexp_parser/syntax/{tokens → token}/group.rb +7 -7
  58. data/lib/regexp_parser/syntax/{tokens → token}/keep.rb +1 -1
  59. data/lib/regexp_parser/syntax/token/meta.rb +20 -0
  60. data/lib/regexp_parser/syntax/{tokens → token}/posix_class.rb +3 -3
  61. data/lib/regexp_parser/syntax/token/quantifier.rb +35 -0
  62. data/lib/regexp_parser/syntax/token/unicode_property.rb +751 -0
  63. data/lib/regexp_parser/syntax/token/virtual.rb +11 -0
  64. data/lib/regexp_parser/syntax/token.rb +45 -0
  65. data/lib/regexp_parser/syntax/version_lookup.rb +19 -36
  66. data/lib/regexp_parser/syntax/versions/1.8.6.rb +13 -20
  67. data/lib/regexp_parser/syntax/versions/1.9.1.rb +10 -17
  68. data/lib/regexp_parser/syntax/versions/1.9.3.rb +3 -10
  69. data/lib/regexp_parser/syntax/versions/2.0.0.rb +8 -15
  70. data/lib/regexp_parser/syntax/versions/2.2.0.rb +3 -9
  71. data/lib/regexp_parser/syntax/versions/2.3.0.rb +3 -9
  72. data/lib/regexp_parser/syntax/versions/2.4.0.rb +3 -9
  73. data/lib/regexp_parser/syntax/versions/2.4.1.rb +2 -8
  74. data/lib/regexp_parser/syntax/versions/2.5.0.rb +3 -9
  75. data/lib/regexp_parser/syntax/versions/2.6.0.rb +3 -9
  76. data/lib/regexp_parser/syntax/versions/2.6.2.rb +3 -9
  77. data/lib/regexp_parser/syntax/versions/2.6.3.rb +3 -9
  78. data/lib/regexp_parser/syntax/versions/3.1.0.rb +4 -0
  79. data/lib/regexp_parser/syntax/versions/3.2.0.rb +4 -0
  80. data/lib/regexp_parser/syntax/versions.rb +3 -1
  81. data/lib/regexp_parser/syntax.rb +8 -6
  82. data/lib/regexp_parser/token.rb +9 -20
  83. data/lib/regexp_parser/version.rb +1 -1
  84. data/lib/regexp_parser.rb +0 -2
  85. data/regexp_parser.gemspec +19 -23
  86. metadata +53 -171
  87. data/CHANGELOG.md +0 -349
  88. data/README.md +0 -470
  89. data/lib/regexp_parser/scanner/properties/long.yml +0 -594
  90. data/lib/regexp_parser/scanner/properties/short.yml +0 -237
  91. data/lib/regexp_parser/syntax/tokens/anchor.rb +0 -15
  92. data/lib/regexp_parser/syntax/tokens/backref.rb +0 -24
  93. data/lib/regexp_parser/syntax/tokens/character_set.rb +0 -13
  94. data/lib/regexp_parser/syntax/tokens/escape.rb +0 -30
  95. data/lib/regexp_parser/syntax/tokens/meta.rb +0 -13
  96. data/lib/regexp_parser/syntax/tokens/quantifier.rb +0 -35
  97. data/lib/regexp_parser/syntax/tokens/unicode_property.rb +0 -675
  98. data/lib/regexp_parser/syntax/tokens.rb +0 -45
  99. data/spec/expression/base_spec.rb +0 -94
  100. data/spec/expression/clone_spec.rb +0 -120
  101. data/spec/expression/conditional_spec.rb +0 -89
  102. data/spec/expression/free_space_spec.rb +0 -27
  103. data/spec/expression/methods/match_length_spec.rb +0 -161
  104. data/spec/expression/methods/match_spec.rb +0 -25
  105. data/spec/expression/methods/strfregexp_spec.rb +0 -224
  106. data/spec/expression/methods/tests_spec.rb +0 -99
  107. data/spec/expression/methods/traverse_spec.rb +0 -161
  108. data/spec/expression/options_spec.rb +0 -128
  109. data/spec/expression/root_spec.rb +0 -9
  110. data/spec/expression/sequence_spec.rb +0 -9
  111. data/spec/expression/subexpression_spec.rb +0 -50
  112. data/spec/expression/to_h_spec.rb +0 -26
  113. data/spec/expression/to_s_spec.rb +0 -100
  114. data/spec/lexer/all_spec.rb +0 -22
  115. data/spec/lexer/conditionals_spec.rb +0 -53
  116. data/spec/lexer/escapes_spec.rb +0 -14
  117. data/spec/lexer/keep_spec.rb +0 -10
  118. data/spec/lexer/literals_spec.rb +0 -89
  119. data/spec/lexer/nesting_spec.rb +0 -99
  120. data/spec/lexer/refcalls_spec.rb +0 -55
  121. data/spec/parser/all_spec.rb +0 -43
  122. data/spec/parser/alternation_spec.rb +0 -88
  123. data/spec/parser/anchors_spec.rb +0 -17
  124. data/spec/parser/conditionals_spec.rb +0 -179
  125. data/spec/parser/errors_spec.rb +0 -30
  126. data/spec/parser/escapes_spec.rb +0 -121
  127. data/spec/parser/free_space_spec.rb +0 -130
  128. data/spec/parser/groups_spec.rb +0 -108
  129. data/spec/parser/keep_spec.rb +0 -6
  130. data/spec/parser/posix_classes_spec.rb +0 -8
  131. data/spec/parser/properties_spec.rb +0 -115
  132. data/spec/parser/quantifiers_spec.rb +0 -51
  133. data/spec/parser/refcalls_spec.rb +0 -112
  134. data/spec/parser/set/intersections_spec.rb +0 -127
  135. data/spec/parser/set/ranges_spec.rb +0 -111
  136. data/spec/parser/sets_spec.rb +0 -178
  137. data/spec/parser/types_spec.rb +0 -18
  138. data/spec/scanner/all_spec.rb +0 -18
  139. data/spec/scanner/anchors_spec.rb +0 -21
  140. data/spec/scanner/conditionals_spec.rb +0 -128
  141. data/spec/scanner/errors_spec.rb +0 -68
  142. data/spec/scanner/escapes_spec.rb +0 -53
  143. data/spec/scanner/free_space_spec.rb +0 -133
  144. data/spec/scanner/groups_spec.rb +0 -52
  145. data/spec/scanner/keep_spec.rb +0 -10
  146. data/spec/scanner/literals_spec.rb +0 -49
  147. data/spec/scanner/meta_spec.rb +0 -18
  148. data/spec/scanner/properties_spec.rb +0 -64
  149. data/spec/scanner/quantifiers_spec.rb +0 -20
  150. data/spec/scanner/refcalls_spec.rb +0 -36
  151. data/spec/scanner/sets_spec.rb +0 -102
  152. data/spec/scanner/types_spec.rb +0 -14
  153. data/spec/spec_helper.rb +0 -15
  154. data/spec/support/runner.rb +0 -42
  155. data/spec/support/shared_examples.rb +0 -77
  156. data/spec/support/warning_extractor.rb +0 -60
  157. data/spec/syntax/syntax_spec.rb +0 -48
  158. data/spec/syntax/syntax_token_map_spec.rb +0 -23
  159. data/spec/syntax/versions/1.8.6_spec.rb +0 -17
  160. data/spec/syntax/versions/1.9.1_spec.rb +0 -10
  161. data/spec/syntax/versions/1.9.3_spec.rb +0 -9
  162. data/spec/syntax/versions/2.0.0_spec.rb +0 -13
  163. data/spec/syntax/versions/2.2.0_spec.rb +0 -9
  164. data/spec/syntax/versions/aliases_spec.rb +0 -37
  165. data/spec/token/token_spec.rb +0 -85
  166. /data/lib/regexp_parser/expression/classes/{set → character_set}/intersection.rb +0 -0
@@ -1,237 +0,0 @@
1
- #
2
- # THIS FILE IS AUTO-GENERATED BY `rake props:update`, DO NOT EDIT
3
- #
4
- ---
5
- adlm: adlam
6
- aghb: caucasian_albanian
7
- ahex: ascii_hex_digit
8
- arab: arabic
9
- armi: imperial_aramaic
10
- armn: armenian
11
- avst: avestan
12
- bali: balinese
13
- bamu: bamum
14
- bass: bassa_vah
15
- batk: batak
16
- beng: bengali
17
- bhks: bhaiksuki
18
- bidic: bidi_control
19
- bopo: bopomofo
20
- brah: brahmi
21
- brai: braille
22
- bugi: buginese
23
- buhd: buhid
24
- c: other
25
- cakm: chakma
26
- cans: canadian_aboriginal
27
- cari: carian
28
- cc: control
29
- cf: format
30
- cher: cherokee
31
- ci: case_ignorable
32
- cn: unassigned
33
- co: private_use
34
- combiningmark: mark
35
- copt: coptic
36
- cprt: cypriot
37
- cs: surrogate
38
- cwcf: changes_when_casefolded
39
- cwcm: changes_when_casemapped
40
- cwl: changes_when_lowercased
41
- cwt: changes_when_titlecased
42
- cwu: changes_when_uppercased
43
- cyrl: cyrillic
44
- dep: deprecated
45
- deva: devanagari
46
- di: default_ignorable_code_point
47
- dia: diacritic
48
- dogr: dogra
49
- dsrt: deseret
50
- dupl: duployan
51
- egyp: egyptian_hieroglyphs
52
- elba: elbasan
53
- elym: elymaic
54
- ethi: ethiopic
55
- ext: extender
56
- geor: georgian
57
- glag: glagolitic
58
- gong: gunjala_gondi
59
- gonm: masaram_gondi
60
- goth: gothic
61
- gran: grantha
62
- grbase: grapheme_base
63
- grek: greek
64
- grext: grapheme_extend
65
- grlink: grapheme_link
66
- gujr: gujarati
67
- guru: gurmukhi
68
- hang: hangul
69
- hani: han
70
- hano: hanunoo
71
- hatr: hatran
72
- hebr: hebrew
73
- hex: hex_digit
74
- hira: hiragana
75
- hluw: anatolian_hieroglyphs
76
- hmng: pahawh_hmong
77
- hmnp: nyiakeng_puachue_hmong
78
- hung: old_hungarian
79
- idc: id_continue
80
- ideo: ideographic
81
- ids: id_start
82
- idsb: ids_binary_operator
83
- idst: ids_trinary_operator
84
- ital: old_italic
85
- java: javanese
86
- joinc: join_control
87
- kali: kayah_li
88
- kana: katakana
89
- khar: kharoshthi
90
- khmr: khmer
91
- khoj: khojki
92
- knda: kannada
93
- kthi: kaithi
94
- l: letter
95
- lana: tai_tham
96
- laoo: lao
97
- latn: latin
98
- lc: cased_letter
99
- lepc: lepcha
100
- limb: limbu
101
- lina: linear_a
102
- linb: linear_b
103
- ll: lowercase_letter
104
- lm: modifier_letter
105
- lo: other_letter
106
- loe: logical_order_exception
107
- lt: titlecase_letter
108
- lu: uppercase_letter
109
- lyci: lycian
110
- lydi: lydian
111
- m: mark
112
- mahj: mahajani
113
- maka: makasar
114
- mand: mandaic
115
- mani: manichaean
116
- marc: marchen
117
- mc: spacing_mark
118
- me: enclosing_mark
119
- medf: medefaidrin
120
- mend: mende_kikakui
121
- merc: meroitic_cursive
122
- mero: meroitic_hieroglyphs
123
- mlym: malayalam
124
- mn: nonspacing_mark
125
- mong: mongolian
126
- mroo: mro
127
- mtei: meetei_mayek
128
- mult: multani
129
- mymr: myanmar
130
- n: number
131
- nand: nandinagari
132
- narb: old_north_arabian
133
- nbat: nabataean
134
- nchar: noncharacter_code_point
135
- nd: decimal_number
136
- nkoo: nko
137
- nl: letter_number
138
- 'no': other_number
139
- nshu: nushu
140
- oalpha: other_alphabetic
141
- odi: other_default_ignorable_code_point
142
- ogam: ogham
143
- ogrext: other_grapheme_extend
144
- oidc: other_id_continue
145
- oids: other_id_start
146
- olck: ol_chiki
147
- olower: other_lowercase
148
- omath: other_math
149
- orkh: old_turkic
150
- orya: oriya
151
- osge: osage
152
- osma: osmanya
153
- oupper: other_uppercase
154
- p: punctuation
155
- palm: palmyrene
156
- patsyn: pattern_syntax
157
- patws: pattern_white_space
158
- pauc: pau_cin_hau
159
- pc: connector_punctuation
160
- pcm: prepended_concatenation_mark
161
- pd: dash_punctuation
162
- pe: close_punctuation
163
- perm: old_permic
164
- pf: final_punctuation
165
- phag: phags_pa
166
- phli: inscriptional_pahlavi
167
- phlp: psalter_pahlavi
168
- phnx: phoenician
169
- pi: initial_punctuation
170
- plrd: miao
171
- po: other_punctuation
172
- prti: inscriptional_parthian
173
- ps: open_punctuation
174
- qaac: coptic
175
- qaai: inherited
176
- qmark: quotation_mark
177
- ri: regional_indicator
178
- rjng: rejang
179
- rohg: hanifi_rohingya
180
- runr: runic
181
- s: symbol
182
- samr: samaritan
183
- sarb: old_south_arabian
184
- saur: saurashtra
185
- sc: currency_symbol
186
- sd: soft_dotted
187
- sgnw: signwriting
188
- shaw: shavian
189
- shrd: sharada
190
- sidd: siddham
191
- sind: khudawadi
192
- sinh: sinhala
193
- sk: modifier_symbol
194
- sm: math_symbol
195
- so: other_symbol
196
- sogd: sogdian
197
- sogo: old_sogdian
198
- sora: sora_sompeng
199
- soyo: soyombo
200
- sterm: sentence_terminal
201
- sund: sundanese
202
- sylo: syloti_nagri
203
- syrc: syriac
204
- tagb: tagbanwa
205
- takr: takri
206
- tale: tai_le
207
- talu: new_tai_lue
208
- taml: tamil
209
- tang: tangut
210
- tavt: tai_viet
211
- telu: telugu
212
- term: terminal_punctuation
213
- tfng: tifinagh
214
- tglg: tagalog
215
- thaa: thaana
216
- tibt: tibetan
217
- tirh: tirhuta
218
- ugar: ugaritic
219
- uideo: unified_ideograph
220
- vaii: vai
221
- vs: variation_selector
222
- wara: warang_citi
223
- wcho: wancho
224
- wspace: white_space
225
- xidc: xid_continue
226
- xids: xid_start
227
- xpeo: old_persian
228
- xsux: cuneiform
229
- yiii: yi
230
- z: separator
231
- zanb: zanabazar_square
232
- zinh: inherited
233
- zl: line_separator
234
- zp: paragraph_separator
235
- zs: space_separator
236
- zyyy: common
237
- zzzz: unknown
@@ -1,15 +0,0 @@
1
- module Regexp::Syntax
2
- module Token
3
- module Anchor
4
- Basic = [:bol, :eol]
5
- Extended = Basic + [:word_boundary, :nonword_boundary]
6
- String = [:bos, :eos, :eos_ob_eol]
7
- MatchStart = [:match_start]
8
-
9
- All = Extended + String + MatchStart
10
- Type = :anchor
11
- end
12
-
13
- Map[Anchor::Type] = Anchor::All
14
- end
15
- end
@@ -1,24 +0,0 @@
1
- module Regexp::Syntax
2
- module Token
3
- module Backreference
4
- Name = [:name_ref]
5
- Number = [:number, :number_ref, :number_rel_ref]
6
-
7
- RecursionLevel = [:name_recursion_ref, :number_recursion_ref]
8
-
9
- All = Name + Number + RecursionLevel
10
- Type = :backref
11
- end
12
-
13
- # Type is the same as Backreference so keeping it here, for now.
14
- module SubexpressionCall
15
- Name = [:name_call]
16
- Number = [:number_call, :number_rel_call]
17
-
18
- All = Name + Number
19
- end
20
-
21
- Map[Backreference::Type] = Backreference::All +
22
- SubexpressionCall::All
23
- end
24
- end
@@ -1,13 +0,0 @@
1
- module Regexp::Syntax
2
- module Token
3
- module CharacterSet
4
- Basic = [:open, :close, :negate, :range]
5
- Extended = Basic + [:intersection]
6
-
7
- All = Extended
8
- Type = :set
9
- end
10
-
11
- Map[CharacterSet::Type] = CharacterSet::All
12
- end
13
- end
@@ -1,30 +0,0 @@
1
- module Regexp::Syntax
2
- module Token
3
- module Escape
4
- Basic = [:backslash, :literal]
5
-
6
- Control = [:control, :meta_sequence]
7
-
8
- ASCII = [:bell, :backspace, :escape, :form_feed, :newline, :carriage,
9
- :tab, :vertical_tab]
10
-
11
- Unicode = [:codepoint, :codepoint_list]
12
-
13
- Meta = [:dot, :alternation,
14
- :zero_or_one, :zero_or_more, :one_or_more,
15
- :bol, :eol,
16
- :group_open, :group_close,
17
- :interval_open, :interval_close,
18
- :set_open, :set_close]
19
-
20
- Hex = [:hex]
21
-
22
- Octal = [:octal]
23
-
24
- All = Basic + Control + ASCII + Unicode + Meta + Hex + Octal
25
- Type = :escape
26
- end
27
-
28
- Map[Escape::Type] = Escape::All
29
- end
30
- end
@@ -1,13 +0,0 @@
1
- module Regexp::Syntax
2
- module Token
3
- module Meta
4
- Basic = [:dot]
5
- Extended = Basic + [:alternation]
6
-
7
- All = Extended
8
- Type = :meta
9
- end
10
-
11
- Map[Meta::Type] = Meta::All
12
- end
13
- end
@@ -1,35 +0,0 @@
1
- module Regexp::Syntax
2
- module Token
3
- module Quantifier
4
- Greedy = [
5
- :zero_or_one,
6
- :zero_or_more,
7
- :one_or_more
8
- ]
9
-
10
- Reluctant = [
11
- :zero_or_one_reluctant,
12
- :zero_or_more_reluctant,
13
- :one_or_more_reluctant
14
- ]
15
-
16
- Possessive = [
17
- :zero_or_one_possessive,
18
- :zero_or_more_possessive,
19
- :one_or_more_possessive
20
- ]
21
-
22
- Interval = [:interval]
23
- IntervalReluctant = [:interval_reluctant]
24
- IntervalPossessive = [:interval_possessive]
25
-
26
- IntervalAll = Interval + IntervalReluctant +
27
- IntervalPossessive
28
-
29
- All = Greedy + Reluctant + Possessive + IntervalAll
30
- Type = :quantifier
31
- end
32
-
33
- Map[Quantifier::Type] = Quantifier::All
34
- end
35
- end