regexp_parser 1.7.0 → 2.8.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (165) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +8 -2
  3. data/LICENSE +1 -1
  4. data/Rakefile +6 -70
  5. data/lib/regexp_parser/error.rb +4 -0
  6. data/lib/regexp_parser/expression/base.rb +76 -0
  7. data/lib/regexp_parser/expression/classes/alternation.rb +1 -1
  8. data/lib/regexp_parser/expression/classes/anchor.rb +0 -2
  9. data/lib/regexp_parser/expression/classes/{backref.rb → backreference.rb} +22 -2
  10. data/lib/regexp_parser/expression/classes/{set → character_set}/range.rb +4 -8
  11. data/lib/regexp_parser/expression/classes/{set.rb → character_set.rb} +3 -4
  12. data/lib/regexp_parser/expression/classes/{type.rb → character_type.rb} +0 -2
  13. data/lib/regexp_parser/expression/classes/conditional.rb +11 -5
  14. data/lib/regexp_parser/expression/classes/{escape.rb → escape_sequence.rb} +15 -7
  15. data/lib/regexp_parser/expression/classes/free_space.rb +5 -5
  16. data/lib/regexp_parser/expression/classes/group.rb +28 -15
  17. data/lib/regexp_parser/expression/classes/keep.rb +2 -0
  18. data/lib/regexp_parser/expression/classes/literal.rb +1 -5
  19. data/lib/regexp_parser/expression/classes/posix_class.rb +5 -1
  20. data/lib/regexp_parser/expression/classes/root.rb +4 -19
  21. data/lib/regexp_parser/expression/classes/{property.rb → unicode_property.rb} +5 -3
  22. data/lib/regexp_parser/expression/methods/construct.rb +41 -0
  23. data/lib/regexp_parser/expression/methods/human_name.rb +43 -0
  24. data/lib/regexp_parser/expression/methods/match_length.rb +11 -7
  25. data/lib/regexp_parser/expression/methods/parts.rb +23 -0
  26. data/lib/regexp_parser/expression/methods/printing.rb +26 -0
  27. data/lib/regexp_parser/expression/methods/strfregexp.rb +1 -1
  28. data/lib/regexp_parser/expression/methods/tests.rb +47 -1
  29. data/lib/regexp_parser/expression/methods/traverse.rb +34 -18
  30. data/lib/regexp_parser/expression/quantifier.rb +57 -17
  31. data/lib/regexp_parser/expression/sequence.rb +11 -47
  32. data/lib/regexp_parser/expression/sequence_operation.rb +4 -9
  33. data/lib/regexp_parser/expression/shared.rb +111 -0
  34. data/lib/regexp_parser/expression/subexpression.rb +27 -19
  35. data/lib/regexp_parser/expression.rb +14 -141
  36. data/lib/regexp_parser/lexer.rb +83 -41
  37. data/lib/regexp_parser/parser.rb +371 -429
  38. data/lib/regexp_parser/scanner/char_type.rl +11 -11
  39. data/lib/regexp_parser/scanner/errors/premature_end_error.rb +8 -0
  40. data/lib/regexp_parser/scanner/errors/scanner_error.rb +6 -0
  41. data/lib/regexp_parser/scanner/errors/validation_error.rb +63 -0
  42. data/lib/regexp_parser/scanner/properties/long.csv +633 -0
  43. data/lib/regexp_parser/scanner/properties/short.csv +248 -0
  44. data/lib/regexp_parser/scanner/property.rl +4 -4
  45. data/lib/regexp_parser/scanner/scanner.rl +303 -368
  46. data/lib/regexp_parser/scanner.rb +1423 -1674
  47. data/lib/regexp_parser/syntax/any.rb +2 -7
  48. data/lib/regexp_parser/syntax/base.rb +92 -67
  49. data/lib/regexp_parser/syntax/token/anchor.rb +15 -0
  50. data/lib/regexp_parser/syntax/{tokens → token}/assertion.rb +2 -2
  51. data/lib/regexp_parser/syntax/token/backreference.rb +33 -0
  52. data/lib/regexp_parser/syntax/token/character_set.rb +16 -0
  53. data/lib/regexp_parser/syntax/{tokens → token}/character_type.rb +3 -3
  54. data/lib/regexp_parser/syntax/{tokens → token}/conditional.rb +3 -3
  55. data/lib/regexp_parser/syntax/token/escape.rb +33 -0
  56. data/lib/regexp_parser/syntax/{tokens → token}/group.rb +7 -7
  57. data/lib/regexp_parser/syntax/{tokens → token}/keep.rb +1 -1
  58. data/lib/regexp_parser/syntax/token/meta.rb +20 -0
  59. data/lib/regexp_parser/syntax/{tokens → token}/posix_class.rb +3 -3
  60. data/lib/regexp_parser/syntax/token/quantifier.rb +35 -0
  61. data/lib/regexp_parser/syntax/token/unicode_property.rb +733 -0
  62. data/lib/regexp_parser/syntax/token/virtual.rb +11 -0
  63. data/lib/regexp_parser/syntax/token.rb +45 -0
  64. data/lib/regexp_parser/syntax/version_lookup.rb +19 -36
  65. data/lib/regexp_parser/syntax/versions/1.8.6.rb +13 -20
  66. data/lib/regexp_parser/syntax/versions/1.9.1.rb +10 -17
  67. data/lib/regexp_parser/syntax/versions/1.9.3.rb +3 -10
  68. data/lib/regexp_parser/syntax/versions/2.0.0.rb +8 -15
  69. data/lib/regexp_parser/syntax/versions/2.2.0.rb +3 -9
  70. data/lib/regexp_parser/syntax/versions/2.3.0.rb +3 -9
  71. data/lib/regexp_parser/syntax/versions/2.4.0.rb +3 -9
  72. data/lib/regexp_parser/syntax/versions/2.4.1.rb +2 -8
  73. data/lib/regexp_parser/syntax/versions/2.5.0.rb +3 -9
  74. data/lib/regexp_parser/syntax/versions/2.6.0.rb +3 -9
  75. data/lib/regexp_parser/syntax/versions/2.6.2.rb +3 -9
  76. data/lib/regexp_parser/syntax/versions/2.6.3.rb +3 -9
  77. data/lib/regexp_parser/syntax/versions/3.1.0.rb +4 -0
  78. data/lib/regexp_parser/syntax/versions/3.2.0.rb +4 -0
  79. data/lib/regexp_parser/syntax/versions.rb +3 -1
  80. data/lib/regexp_parser/syntax.rb +8 -6
  81. data/lib/regexp_parser/token.rb +9 -20
  82. data/lib/regexp_parser/version.rb +1 -1
  83. data/lib/regexp_parser.rb +0 -2
  84. data/regexp_parser.gemspec +19 -23
  85. metadata +52 -171
  86. data/CHANGELOG.md +0 -349
  87. data/README.md +0 -470
  88. data/lib/regexp_parser/scanner/properties/long.yml +0 -594
  89. data/lib/regexp_parser/scanner/properties/short.yml +0 -237
  90. data/lib/regexp_parser/syntax/tokens/anchor.rb +0 -15
  91. data/lib/regexp_parser/syntax/tokens/backref.rb +0 -24
  92. data/lib/regexp_parser/syntax/tokens/character_set.rb +0 -13
  93. data/lib/regexp_parser/syntax/tokens/escape.rb +0 -30
  94. data/lib/regexp_parser/syntax/tokens/meta.rb +0 -13
  95. data/lib/regexp_parser/syntax/tokens/quantifier.rb +0 -35
  96. data/lib/regexp_parser/syntax/tokens/unicode_property.rb +0 -675
  97. data/lib/regexp_parser/syntax/tokens.rb +0 -45
  98. data/spec/expression/base_spec.rb +0 -94
  99. data/spec/expression/clone_spec.rb +0 -120
  100. data/spec/expression/conditional_spec.rb +0 -89
  101. data/spec/expression/free_space_spec.rb +0 -27
  102. data/spec/expression/methods/match_length_spec.rb +0 -161
  103. data/spec/expression/methods/match_spec.rb +0 -25
  104. data/spec/expression/methods/strfregexp_spec.rb +0 -224
  105. data/spec/expression/methods/tests_spec.rb +0 -99
  106. data/spec/expression/methods/traverse_spec.rb +0 -161
  107. data/spec/expression/options_spec.rb +0 -128
  108. data/spec/expression/root_spec.rb +0 -9
  109. data/spec/expression/sequence_spec.rb +0 -9
  110. data/spec/expression/subexpression_spec.rb +0 -50
  111. data/spec/expression/to_h_spec.rb +0 -26
  112. data/spec/expression/to_s_spec.rb +0 -100
  113. data/spec/lexer/all_spec.rb +0 -22
  114. data/spec/lexer/conditionals_spec.rb +0 -53
  115. data/spec/lexer/escapes_spec.rb +0 -14
  116. data/spec/lexer/keep_spec.rb +0 -10
  117. data/spec/lexer/literals_spec.rb +0 -89
  118. data/spec/lexer/nesting_spec.rb +0 -99
  119. data/spec/lexer/refcalls_spec.rb +0 -55
  120. data/spec/parser/all_spec.rb +0 -43
  121. data/spec/parser/alternation_spec.rb +0 -88
  122. data/spec/parser/anchors_spec.rb +0 -17
  123. data/spec/parser/conditionals_spec.rb +0 -179
  124. data/spec/parser/errors_spec.rb +0 -30
  125. data/spec/parser/escapes_spec.rb +0 -121
  126. data/spec/parser/free_space_spec.rb +0 -130
  127. data/spec/parser/groups_spec.rb +0 -108
  128. data/spec/parser/keep_spec.rb +0 -6
  129. data/spec/parser/posix_classes_spec.rb +0 -8
  130. data/spec/parser/properties_spec.rb +0 -115
  131. data/spec/parser/quantifiers_spec.rb +0 -51
  132. data/spec/parser/refcalls_spec.rb +0 -112
  133. data/spec/parser/set/intersections_spec.rb +0 -127
  134. data/spec/parser/set/ranges_spec.rb +0 -111
  135. data/spec/parser/sets_spec.rb +0 -178
  136. data/spec/parser/types_spec.rb +0 -18
  137. data/spec/scanner/all_spec.rb +0 -18
  138. data/spec/scanner/anchors_spec.rb +0 -21
  139. data/spec/scanner/conditionals_spec.rb +0 -128
  140. data/spec/scanner/errors_spec.rb +0 -68
  141. data/spec/scanner/escapes_spec.rb +0 -53
  142. data/spec/scanner/free_space_spec.rb +0 -133
  143. data/spec/scanner/groups_spec.rb +0 -52
  144. data/spec/scanner/keep_spec.rb +0 -10
  145. data/spec/scanner/literals_spec.rb +0 -49
  146. data/spec/scanner/meta_spec.rb +0 -18
  147. data/spec/scanner/properties_spec.rb +0 -64
  148. data/spec/scanner/quantifiers_spec.rb +0 -20
  149. data/spec/scanner/refcalls_spec.rb +0 -36
  150. data/spec/scanner/sets_spec.rb +0 -102
  151. data/spec/scanner/types_spec.rb +0 -14
  152. data/spec/spec_helper.rb +0 -15
  153. data/spec/support/runner.rb +0 -42
  154. data/spec/support/shared_examples.rb +0 -77
  155. data/spec/support/warning_extractor.rb +0 -60
  156. data/spec/syntax/syntax_spec.rb +0 -48
  157. data/spec/syntax/syntax_token_map_spec.rb +0 -23
  158. data/spec/syntax/versions/1.8.6_spec.rb +0 -17
  159. data/spec/syntax/versions/1.9.1_spec.rb +0 -10
  160. data/spec/syntax/versions/1.9.3_spec.rb +0 -9
  161. data/spec/syntax/versions/2.0.0_spec.rb +0 -13
  162. data/spec/syntax/versions/2.2.0_spec.rb +0 -9
  163. data/spec/syntax/versions/aliases_spec.rb +0 -37
  164. data/spec/token/token_spec.rb +0 -85
  165. /data/lib/regexp_parser/expression/classes/{set → character_set}/intersection.rb +0 -0
@@ -0,0 +1,248 @@
1
+ # THIS FILE IS AUTO-GENERATED BY `rake props:update` - DO NOT EDIT
2
+ adlm,adlam
3
+ aghb,caucasian_albanian
4
+ ahex,ascii_hex_digit
5
+ arab,arabic
6
+ armi,imperial_aramaic
7
+ armn,armenian
8
+ avst,avestan
9
+ bali,balinese
10
+ bamu,bamum
11
+ bass,bassa_vah
12
+ batk,batak
13
+ beng,bengali
14
+ bhks,bhaiksuki
15
+ bidic,bidi_control
16
+ bopo,bopomofo
17
+ brah,brahmi
18
+ brai,braille
19
+ bugi,buginese
20
+ buhd,buhid
21
+ c,other
22
+ cakm,chakma
23
+ cans,canadian_aboriginal
24
+ cari,carian
25
+ cc,control
26
+ cf,format
27
+ cher,cherokee
28
+ chrs,chorasmian
29
+ ci,case_ignorable
30
+ cn,unassigned
31
+ co,private_use
32
+ combiningmark,mark
33
+ copt,coptic
34
+ cpmn,cypro_minoan
35
+ cprt,cypriot
36
+ cs,surrogate
37
+ cwcf,changes_when_casefolded
38
+ cwcm,changes_when_casemapped
39
+ cwl,changes_when_lowercased
40
+ cwt,changes_when_titlecased
41
+ cwu,changes_when_uppercased
42
+ cyrl,cyrillic
43
+ dep,deprecated
44
+ deva,devanagari
45
+ di,default_ignorable_code_point
46
+ dia,diacritic
47
+ diak,dives_akuru
48
+ dogr,dogra
49
+ dsrt,deseret
50
+ dupl,duployan
51
+ ebase,emoji_modifier_base
52
+ ecomp,emoji_component
53
+ egyp,egyptian_hieroglyphs
54
+ elba,elbasan
55
+ elym,elymaic
56
+ emod,emoji_modifier
57
+ epres,emoji_presentation
58
+ ethi,ethiopic
59
+ ext,extender
60
+ extpict,extended_pictographic
61
+ geor,georgian
62
+ glag,glagolitic
63
+ gong,gunjala_gondi
64
+ gonm,masaram_gondi
65
+ goth,gothic
66
+ gran,grantha
67
+ grbase,grapheme_base
68
+ grek,greek
69
+ grext,grapheme_extend
70
+ grlink,grapheme_link
71
+ gujr,gujarati
72
+ guru,gurmukhi
73
+ hang,hangul
74
+ hani,han
75
+ hano,hanunoo
76
+ hatr,hatran
77
+ hebr,hebrew
78
+ hex,hex_digit
79
+ hira,hiragana
80
+ hluw,anatolian_hieroglyphs
81
+ hmng,pahawh_hmong
82
+ hmnp,nyiakeng_puachue_hmong
83
+ hung,old_hungarian
84
+ idc,id_continue
85
+ ideo,ideographic
86
+ ids,id_start
87
+ idsb,ids_binary_operator
88
+ idst,ids_trinary_operator
89
+ ital,old_italic
90
+ java,javanese
91
+ joinc,join_control
92
+ kali,kayah_li
93
+ kana,katakana
94
+ khar,kharoshthi
95
+ khmr,khmer
96
+ khoj,khojki
97
+ kits,khitan_small_script
98
+ knda,kannada
99
+ kthi,kaithi
100
+ l,letter
101
+ lana,tai_tham
102
+ laoo,lao
103
+ latn,latin
104
+ lc,cased_letter
105
+ lepc,lepcha
106
+ limb,limbu
107
+ lina,linear_a
108
+ linb,linear_b
109
+ ll,lowercase_letter
110
+ lm,modifier_letter
111
+ lo,other_letter
112
+ loe,logical_order_exception
113
+ lt,titlecase_letter
114
+ lu,uppercase_letter
115
+ lyci,lycian
116
+ lydi,lydian
117
+ m,mark
118
+ mahj,mahajani
119
+ maka,makasar
120
+ mand,mandaic
121
+ mani,manichaean
122
+ marc,marchen
123
+ mc,spacing_mark
124
+ me,enclosing_mark
125
+ medf,medefaidrin
126
+ mend,mende_kikakui
127
+ merc,meroitic_cursive
128
+ mero,meroitic_hieroglyphs
129
+ mlym,malayalam
130
+ mn,nonspacing_mark
131
+ mong,mongolian
132
+ mroo,mro
133
+ mtei,meetei_mayek
134
+ mult,multani
135
+ mymr,myanmar
136
+ n,number
137
+ nagm,nag_mundari
138
+ nand,nandinagari
139
+ narb,old_north_arabian
140
+ nbat,nabataean
141
+ nchar,noncharacter_code_point
142
+ nd,decimal_number
143
+ nkoo,nko
144
+ nl,letter_number
145
+ no,other_number
146
+ nshu,nushu
147
+ oalpha,other_alphabetic
148
+ odi,other_default_ignorable_code_point
149
+ ogam,ogham
150
+ ogrext,other_grapheme_extend
151
+ oidc,other_id_continue
152
+ oids,other_id_start
153
+ olck,ol_chiki
154
+ olower,other_lowercase
155
+ omath,other_math
156
+ orkh,old_turkic
157
+ orya,oriya
158
+ osge,osage
159
+ osma,osmanya
160
+ ougr,old_uyghur
161
+ oupper,other_uppercase
162
+ p,punctuation
163
+ palm,palmyrene
164
+ patsyn,pattern_syntax
165
+ patws,pattern_white_space
166
+ pauc,pau_cin_hau
167
+ pc,connector_punctuation
168
+ pcm,prepended_concatenation_mark
169
+ pd,dash_punctuation
170
+ pe,close_punctuation
171
+ perm,old_permic
172
+ pf,final_punctuation
173
+ phag,phags_pa
174
+ phli,inscriptional_pahlavi
175
+ phlp,psalter_pahlavi
176
+ phnx,phoenician
177
+ pi,initial_punctuation
178
+ plrd,miao
179
+ po,other_punctuation
180
+ prti,inscriptional_parthian
181
+ ps,open_punctuation
182
+ qaac,coptic
183
+ qaai,inherited
184
+ qmark,quotation_mark
185
+ ri,regional_indicator
186
+ rjng,rejang
187
+ rohg,hanifi_rohingya
188
+ runr,runic
189
+ s,symbol
190
+ samr,samaritan
191
+ sarb,old_south_arabian
192
+ saur,saurashtra
193
+ sc,currency_symbol
194
+ sd,soft_dotted
195
+ sgnw,signwriting
196
+ shaw,shavian
197
+ shrd,sharada
198
+ sidd,siddham
199
+ sind,khudawadi
200
+ sinh,sinhala
201
+ sk,modifier_symbol
202
+ sm,math_symbol
203
+ so,other_symbol
204
+ sogd,sogdian
205
+ sogo,old_sogdian
206
+ sora,sora_sompeng
207
+ soyo,soyombo
208
+ sterm,sentence_terminal
209
+ sund,sundanese
210
+ sylo,syloti_nagri
211
+ syrc,syriac
212
+ tagb,tagbanwa
213
+ takr,takri
214
+ tale,tai_le
215
+ talu,new_tai_lue
216
+ taml,tamil
217
+ tang,tangut
218
+ tavt,tai_viet
219
+ telu,telugu
220
+ term,terminal_punctuation
221
+ tfng,tifinagh
222
+ tglg,tagalog
223
+ thaa,thaana
224
+ tibt,tibetan
225
+ tirh,tirhuta
226
+ tnsa,tangsa
227
+ ugar,ugaritic
228
+ uideo,unified_ideograph
229
+ vaii,vai
230
+ vith,vithkuqi
231
+ vs,variation_selector
232
+ wara,warang_citi
233
+ wcho,wancho
234
+ wspace,white_space
235
+ xidc,xid_continue
236
+ xids,xid_start
237
+ xpeo,old_persian
238
+ xsux,cuneiform
239
+ yezi,yezidi
240
+ yiii,yi
241
+ z,separator
242
+ zanb,zanabazar_square
243
+ zinh,inherited
244
+ zl,line_separator
245
+ zp,paragraph_separator
246
+ zs,space_separator
247
+ zyyy,common
248
+ zzzz,unknown
@@ -14,15 +14,15 @@
14
14
  unicode_property := |*
15
15
 
16
16
  property_sequence < eof(premature_property_end) {
17
- text = text(data, ts, te, 1).first
17
+ text = copy(data, ts-1, te)
18
18
  type = (text[1] == 'P') ^ (text[3] == '^') ? :nonproperty : :property
19
19
 
20
- name = data[ts+2..te-2].pack('c*').gsub(/[\^\s_\-]/, '').downcase
20
+ name = text[3..-2].gsub(/[\^\s_\-]/, '').downcase
21
21
 
22
22
  token = self.class.short_prop_map[name] || self.class.long_prop_map[name]
23
- raise UnknownUnicodePropertyError.new(name) unless token
23
+ raise ValidationError.for(:property, name) unless token
24
24
 
25
- self.emit(type, token.to_sym, text, ts-1, te)
25
+ self.emit(type, token.to_sym, text)
26
26
 
27
27
  fret;
28
28
  };