regexp_parser 2.1.1 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +31 -0
  3. data/Gemfile +1 -1
  4. data/LICENSE +1 -1
  5. data/README.md +31 -27
  6. data/Rakefile +6 -70
  7. data/lib/regexp_parser/expression/base.rb +123 -0
  8. data/lib/regexp_parser/expression/classes/anchor.rb +0 -2
  9. data/lib/regexp_parser/expression/classes/{backref.rb → backreference.rb} +0 -0
  10. data/lib/regexp_parser/expression/classes/{set → character_set}/intersection.rb +0 -0
  11. data/lib/regexp_parser/expression/classes/{set → character_set}/range.rb +0 -0
  12. data/lib/regexp_parser/expression/classes/{set.rb → character_set.rb} +0 -0
  13. data/lib/regexp_parser/expression/classes/{escape.rb → escape_sequence.rb} +13 -7
  14. data/lib/regexp_parser/expression/classes/free_space.rb +0 -2
  15. data/lib/regexp_parser/expression/classes/literal.rb +1 -5
  16. data/lib/regexp_parser/expression/classes/property.rb +0 -2
  17. data/lib/regexp_parser/expression/classes/root.rb +0 -1
  18. data/lib/regexp_parser/expression/classes/type.rb +0 -2
  19. data/lib/regexp_parser/expression/methods/strfregexp.rb +1 -1
  20. data/lib/regexp_parser/expression/quantifier.rb +1 -1
  21. data/lib/regexp_parser/expression/sequence.rb +0 -1
  22. data/lib/regexp_parser/expression/subexpression.rb +0 -1
  23. data/lib/regexp_parser/expression.rb +6 -130
  24. data/lib/regexp_parser/lexer.rb +8 -6
  25. data/lib/regexp_parser/scanner/properties/long.csv +622 -0
  26. data/lib/regexp_parser/scanner/properties/short.csv +246 -0
  27. data/lib/regexp_parser/scanner/scanner.rl +6 -4
  28. data/lib/regexp_parser/scanner.rb +126 -124
  29. data/lib/regexp_parser/syntax/any.rb +2 -7
  30. data/lib/regexp_parser/syntax/base.rb +91 -66
  31. data/lib/regexp_parser/syntax/token/anchor.rb +15 -0
  32. data/lib/regexp_parser/syntax/{tokens → token}/assertion.rb +2 -2
  33. data/lib/regexp_parser/syntax/token/backreference.rb +30 -0
  34. data/lib/regexp_parser/syntax/{tokens → token}/character_set.rb +2 -2
  35. data/lib/regexp_parser/syntax/{tokens → token}/character_type.rb +3 -3
  36. data/lib/regexp_parser/syntax/{tokens → token}/conditional.rb +3 -3
  37. data/lib/regexp_parser/syntax/token/escape.rb +31 -0
  38. data/lib/regexp_parser/syntax/{tokens → token}/group.rb +7 -7
  39. data/lib/regexp_parser/syntax/{tokens → token}/keep.rb +1 -1
  40. data/lib/regexp_parser/syntax/{tokens → token}/meta.rb +2 -2
  41. data/lib/regexp_parser/syntax/{tokens → token}/posix_class.rb +3 -3
  42. data/lib/regexp_parser/syntax/token/quantifier.rb +35 -0
  43. data/lib/regexp_parser/syntax/token/unicode_property.rb +722 -0
  44. data/lib/regexp_parser/syntax/token.rb +45 -0
  45. data/lib/regexp_parser/syntax/version_lookup.rb +20 -29
  46. data/lib/regexp_parser/syntax/versions/1.8.6.rb +13 -20
  47. data/lib/regexp_parser/syntax/versions/1.9.1.rb +10 -17
  48. data/lib/regexp_parser/syntax/versions/1.9.3.rb +3 -10
  49. data/lib/regexp_parser/syntax/versions/2.0.0.rb +8 -15
  50. data/lib/regexp_parser/syntax/versions/2.2.0.rb +3 -9
  51. data/lib/regexp_parser/syntax/versions/2.3.0.rb +3 -9
  52. data/lib/regexp_parser/syntax/versions/2.4.0.rb +3 -9
  53. data/lib/regexp_parser/syntax/versions/2.4.1.rb +2 -8
  54. data/lib/regexp_parser/syntax/versions/2.5.0.rb +3 -9
  55. data/lib/regexp_parser/syntax/versions/2.6.0.rb +3 -9
  56. data/lib/regexp_parser/syntax/versions/2.6.2.rb +3 -9
  57. data/lib/regexp_parser/syntax/versions/2.6.3.rb +3 -9
  58. data/lib/regexp_parser/syntax/versions/3.1.0.rb +4 -0
  59. data/lib/regexp_parser/syntax/versions/3.2.0.rb +4 -0
  60. data/lib/regexp_parser/syntax/versions.rb +1 -1
  61. data/lib/regexp_parser/syntax.rb +1 -1
  62. data/lib/regexp_parser/token.rb +9 -20
  63. data/lib/regexp_parser/version.rb +1 -1
  64. data/lib/regexp_parser.rb +0 -2
  65. data/regexp_parser.gemspec +20 -22
  66. metadata +36 -167
  67. data/lib/regexp_parser/scanner/properties/long.yml +0 -594
  68. data/lib/regexp_parser/scanner/properties/short.yml +0 -237
  69. data/lib/regexp_parser/syntax/tokens/anchor.rb +0 -15
  70. data/lib/regexp_parser/syntax/tokens/backref.rb +0 -24
  71. data/lib/regexp_parser/syntax/tokens/escape.rb +0 -30
  72. data/lib/regexp_parser/syntax/tokens/quantifier.rb +0 -35
  73. data/lib/regexp_parser/syntax/tokens/unicode_property.rb +0 -675
  74. data/lib/regexp_parser/syntax/tokens.rb +0 -45
  75. data/spec/expression/base_spec.rb +0 -104
  76. data/spec/expression/clone_spec.rb +0 -152
  77. data/spec/expression/conditional_spec.rb +0 -89
  78. data/spec/expression/free_space_spec.rb +0 -27
  79. data/spec/expression/methods/match_length_spec.rb +0 -161
  80. data/spec/expression/methods/match_spec.rb +0 -25
  81. data/spec/expression/methods/strfregexp_spec.rb +0 -224
  82. data/spec/expression/methods/tests_spec.rb +0 -99
  83. data/spec/expression/methods/traverse_spec.rb +0 -161
  84. data/spec/expression/options_spec.rb +0 -128
  85. data/spec/expression/subexpression_spec.rb +0 -50
  86. data/spec/expression/to_h_spec.rb +0 -26
  87. data/spec/expression/to_s_spec.rb +0 -108
  88. data/spec/lexer/all_spec.rb +0 -22
  89. data/spec/lexer/conditionals_spec.rb +0 -53
  90. data/spec/lexer/delimiters_spec.rb +0 -68
  91. data/spec/lexer/escapes_spec.rb +0 -14
  92. data/spec/lexer/keep_spec.rb +0 -10
  93. data/spec/lexer/literals_spec.rb +0 -64
  94. data/spec/lexer/nesting_spec.rb +0 -99
  95. data/spec/lexer/refcalls_spec.rb +0 -60
  96. data/spec/parser/all_spec.rb +0 -43
  97. data/spec/parser/alternation_spec.rb +0 -88
  98. data/spec/parser/anchors_spec.rb +0 -17
  99. data/spec/parser/conditionals_spec.rb +0 -179
  100. data/spec/parser/errors_spec.rb +0 -30
  101. data/spec/parser/escapes_spec.rb +0 -121
  102. data/spec/parser/free_space_spec.rb +0 -130
  103. data/spec/parser/groups_spec.rb +0 -108
  104. data/spec/parser/keep_spec.rb +0 -6
  105. data/spec/parser/options_spec.rb +0 -28
  106. data/spec/parser/posix_classes_spec.rb +0 -8
  107. data/spec/parser/properties_spec.rb +0 -115
  108. data/spec/parser/quantifiers_spec.rb +0 -68
  109. data/spec/parser/refcalls_spec.rb +0 -117
  110. data/spec/parser/set/intersections_spec.rb +0 -127
  111. data/spec/parser/set/ranges_spec.rb +0 -111
  112. data/spec/parser/sets_spec.rb +0 -178
  113. data/spec/parser/types_spec.rb +0 -18
  114. data/spec/scanner/all_spec.rb +0 -18
  115. data/spec/scanner/anchors_spec.rb +0 -21
  116. data/spec/scanner/conditionals_spec.rb +0 -128
  117. data/spec/scanner/delimiters_spec.rb +0 -52
  118. data/spec/scanner/errors_spec.rb +0 -67
  119. data/spec/scanner/escapes_spec.rb +0 -64
  120. data/spec/scanner/free_space_spec.rb +0 -165
  121. data/spec/scanner/groups_spec.rb +0 -61
  122. data/spec/scanner/keep_spec.rb +0 -10
  123. data/spec/scanner/literals_spec.rb +0 -39
  124. data/spec/scanner/meta_spec.rb +0 -18
  125. data/spec/scanner/options_spec.rb +0 -36
  126. data/spec/scanner/properties_spec.rb +0 -64
  127. data/spec/scanner/quantifiers_spec.rb +0 -25
  128. data/spec/scanner/refcalls_spec.rb +0 -55
  129. data/spec/scanner/sets_spec.rb +0 -151
  130. data/spec/scanner/types_spec.rb +0 -14
  131. data/spec/spec_helper.rb +0 -16
  132. data/spec/support/runner.rb +0 -42
  133. data/spec/support/shared_examples.rb +0 -77
  134. data/spec/support/warning_extractor.rb +0 -60
  135. data/spec/syntax/syntax_spec.rb +0 -48
  136. data/spec/syntax/syntax_token_map_spec.rb +0 -23
  137. data/spec/syntax/versions/1.8.6_spec.rb +0 -17
  138. data/spec/syntax/versions/1.9.1_spec.rb +0 -10
  139. data/spec/syntax/versions/1.9.3_spec.rb +0 -9
  140. data/spec/syntax/versions/2.0.0_spec.rb +0 -13
  141. data/spec/syntax/versions/2.2.0_spec.rb +0 -9
  142. data/spec/syntax/versions/aliases_spec.rb +0 -37
  143. data/spec/token/token_spec.rb +0 -85
@@ -0,0 +1,246 @@
1
+ # THIS FILE IS AUTO-GENERATED BY `rake props:update` - DO NOT EDIT
2
+ adlm,adlam
3
+ aghb,caucasian_albanian
4
+ ahex,ascii_hex_digit
5
+ arab,arabic
6
+ armi,imperial_aramaic
7
+ armn,armenian
8
+ avst,avestan
9
+ bali,balinese
10
+ bamu,bamum
11
+ bass,bassa_vah
12
+ batk,batak
13
+ beng,bengali
14
+ bhks,bhaiksuki
15
+ bidic,bidi_control
16
+ bopo,bopomofo
17
+ brah,brahmi
18
+ brai,braille
19
+ bugi,buginese
20
+ buhd,buhid
21
+ c,other
22
+ cakm,chakma
23
+ cans,canadian_aboriginal
24
+ cari,carian
25
+ cc,control
26
+ cf,format
27
+ cher,cherokee
28
+ chrs,chorasmian
29
+ ci,case_ignorable
30
+ cn,unassigned
31
+ co,private_use
32
+ combiningmark,mark
33
+ copt,coptic
34
+ cpmn,cypro_minoan
35
+ cprt,cypriot
36
+ cs,surrogate
37
+ cwcf,changes_when_casefolded
38
+ cwcm,changes_when_casemapped
39
+ cwl,changes_when_lowercased
40
+ cwt,changes_when_titlecased
41
+ cwu,changes_when_uppercased
42
+ cyrl,cyrillic
43
+ dep,deprecated
44
+ deva,devanagari
45
+ di,default_ignorable_code_point
46
+ dia,diacritic
47
+ diak,dives_akuru
48
+ dogr,dogra
49
+ dsrt,deseret
50
+ dupl,duployan
51
+ ebase,emoji_modifier_base
52
+ ecomp,emoji_component
53
+ egyp,egyptian_hieroglyphs
54
+ elba,elbasan
55
+ elym,elymaic
56
+ emod,emoji_modifier
57
+ epres,emoji_presentation
58
+ ethi,ethiopic
59
+ ext,extender
60
+ geor,georgian
61
+ glag,glagolitic
62
+ gong,gunjala_gondi
63
+ gonm,masaram_gondi
64
+ goth,gothic
65
+ gran,grantha
66
+ grbase,grapheme_base
67
+ grek,greek
68
+ grext,grapheme_extend
69
+ grlink,grapheme_link
70
+ gujr,gujarati
71
+ guru,gurmukhi
72
+ hang,hangul
73
+ hani,han
74
+ hano,hanunoo
75
+ hatr,hatran
76
+ hebr,hebrew
77
+ hex,hex_digit
78
+ hira,hiragana
79
+ hluw,anatolian_hieroglyphs
80
+ hmng,pahawh_hmong
81
+ hmnp,nyiakeng_puachue_hmong
82
+ hung,old_hungarian
83
+ idc,id_continue
84
+ ideo,ideographic
85
+ ids,id_start
86
+ idsb,ids_binary_operator
87
+ idst,ids_trinary_operator
88
+ ital,old_italic
89
+ java,javanese
90
+ joinc,join_control
91
+ kali,kayah_li
92
+ kana,katakana
93
+ khar,kharoshthi
94
+ khmr,khmer
95
+ khoj,khojki
96
+ kits,khitan_small_script
97
+ knda,kannada
98
+ kthi,kaithi
99
+ l,letter
100
+ lana,tai_tham
101
+ laoo,lao
102
+ latn,latin
103
+ lc,cased_letter
104
+ lepc,lepcha
105
+ limb,limbu
106
+ lina,linear_a
107
+ linb,linear_b
108
+ ll,lowercase_letter
109
+ lm,modifier_letter
110
+ lo,other_letter
111
+ loe,logical_order_exception
112
+ lt,titlecase_letter
113
+ lu,uppercase_letter
114
+ lyci,lycian
115
+ lydi,lydian
116
+ m,mark
117
+ mahj,mahajani
118
+ maka,makasar
119
+ mand,mandaic
120
+ mani,manichaean
121
+ marc,marchen
122
+ mc,spacing_mark
123
+ me,enclosing_mark
124
+ medf,medefaidrin
125
+ mend,mende_kikakui
126
+ merc,meroitic_cursive
127
+ mero,meroitic_hieroglyphs
128
+ mlym,malayalam
129
+ mn,nonspacing_mark
130
+ mong,mongolian
131
+ mroo,mro
132
+ mtei,meetei_mayek
133
+ mult,multani
134
+ mymr,myanmar
135
+ n,number
136
+ nand,nandinagari
137
+ narb,old_north_arabian
138
+ nbat,nabataean
139
+ nchar,noncharacter_code_point
140
+ nd,decimal_number
141
+ nkoo,nko
142
+ nl,letter_number
143
+ no,other_number
144
+ nshu,nushu
145
+ oalpha,other_alphabetic
146
+ odi,other_default_ignorable_code_point
147
+ ogam,ogham
148
+ ogrext,other_grapheme_extend
149
+ oidc,other_id_continue
150
+ oids,other_id_start
151
+ olck,ol_chiki
152
+ olower,other_lowercase
153
+ omath,other_math
154
+ orkh,old_turkic
155
+ orya,oriya
156
+ osge,osage
157
+ osma,osmanya
158
+ ougr,old_uyghur
159
+ oupper,other_uppercase
160
+ p,punctuation
161
+ palm,palmyrene
162
+ patsyn,pattern_syntax
163
+ patws,pattern_white_space
164
+ pauc,pau_cin_hau
165
+ pc,connector_punctuation
166
+ pcm,prepended_concatenation_mark
167
+ pd,dash_punctuation
168
+ pe,close_punctuation
169
+ perm,old_permic
170
+ pf,final_punctuation
171
+ phag,phags_pa
172
+ phli,inscriptional_pahlavi
173
+ phlp,psalter_pahlavi
174
+ phnx,phoenician
175
+ pi,initial_punctuation
176
+ plrd,miao
177
+ po,other_punctuation
178
+ prti,inscriptional_parthian
179
+ ps,open_punctuation
180
+ qaac,coptic
181
+ qaai,inherited
182
+ qmark,quotation_mark
183
+ ri,regional_indicator
184
+ rjng,rejang
185
+ rohg,hanifi_rohingya
186
+ runr,runic
187
+ s,symbol
188
+ samr,samaritan
189
+ sarb,old_south_arabian
190
+ saur,saurashtra
191
+ sc,currency_symbol
192
+ sd,soft_dotted
193
+ sgnw,signwriting
194
+ shaw,shavian
195
+ shrd,sharada
196
+ sidd,siddham
197
+ sind,khudawadi
198
+ sinh,sinhala
199
+ sk,modifier_symbol
200
+ sm,math_symbol
201
+ so,other_symbol
202
+ sogd,sogdian
203
+ sogo,old_sogdian
204
+ sora,sora_sompeng
205
+ soyo,soyombo
206
+ sterm,sentence_terminal
207
+ sund,sundanese
208
+ sylo,syloti_nagri
209
+ syrc,syriac
210
+ tagb,tagbanwa
211
+ takr,takri
212
+ tale,tai_le
213
+ talu,new_tai_lue
214
+ taml,tamil
215
+ tang,tangut
216
+ tavt,tai_viet
217
+ telu,telugu
218
+ term,terminal_punctuation
219
+ tfng,tifinagh
220
+ tglg,tagalog
221
+ thaa,thaana
222
+ tibt,tibetan
223
+ tirh,tirhuta
224
+ tnsa,tangsa
225
+ ugar,ugaritic
226
+ uideo,unified_ideograph
227
+ vaii,vai
228
+ vith,vithkuqi
229
+ vs,variation_selector
230
+ wara,warang_citi
231
+ wcho,wancho
232
+ wspace,white_space
233
+ xidc,xid_continue
234
+ xids,xid_start
235
+ xpeo,old_persian
236
+ xsux,cuneiform
237
+ yezi,yezidi
238
+ yiii,yi
239
+ z,separator
240
+ zanb,zanabazar_square
241
+ zinh,inherited
242
+ zl,line_separator
243
+ zp,paragraph_separator
244
+ zs,space_separator
245
+ zyyy,common
246
+ zzzz,unknown
@@ -759,14 +759,16 @@ class Regexp::Scanner
759
759
  end
760
760
 
761
761
  # lazy-load property maps when first needed
762
- require 'yaml'
763
-
764
762
  def self.short_prop_map
765
- @short_prop_map ||= YAML.load_file("#{__dir__}/scanner/properties/short.yml")
763
+ @short_prop_map ||= parse_prop_map('short')
766
764
  end
767
765
 
768
766
  def self.long_prop_map
769
- @long_prop_map ||= YAML.load_file("#{__dir__}/scanner/properties/long.yml")
767
+ @long_prop_map ||= parse_prop_map('long')
768
+ end
769
+
770
+ def self.parse_prop_map(name)
771
+ File.read("#{__dir__}/scanner/properties/#{name}.csv").scan(/(.+),(.+)/).to_h
770
772
  end
771
773
 
772
774
  # Emits an array with the details of the scanned pattern