regexp_parser 2.1.1 → 2.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (143) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +31 -0
  3. data/Gemfile +1 -1
  4. data/LICENSE +1 -1
  5. data/README.md +31 -27
  6. data/Rakefile +6 -70
  7. data/lib/regexp_parser/expression/base.rb +123 -0
  8. data/lib/regexp_parser/expression/classes/anchor.rb +0 -2
  9. data/lib/regexp_parser/expression/classes/{backref.rb → backreference.rb} +0 -0
  10. data/lib/regexp_parser/expression/classes/{set → character_set}/intersection.rb +0 -0
  11. data/lib/regexp_parser/expression/classes/{set → character_set}/range.rb +0 -0
  12. data/lib/regexp_parser/expression/classes/{set.rb → character_set.rb} +0 -0
  13. data/lib/regexp_parser/expression/classes/{escape.rb → escape_sequence.rb} +13 -7
  14. data/lib/regexp_parser/expression/classes/free_space.rb +0 -2
  15. data/lib/regexp_parser/expression/classes/literal.rb +1 -5
  16. data/lib/regexp_parser/expression/classes/property.rb +0 -2
  17. data/lib/regexp_parser/expression/classes/root.rb +0 -1
  18. data/lib/regexp_parser/expression/classes/type.rb +0 -2
  19. data/lib/regexp_parser/expression/methods/strfregexp.rb +1 -1
  20. data/lib/regexp_parser/expression/quantifier.rb +1 -1
  21. data/lib/regexp_parser/expression/sequence.rb +0 -1
  22. data/lib/regexp_parser/expression/subexpression.rb +0 -1
  23. data/lib/regexp_parser/expression.rb +6 -130
  24. data/lib/regexp_parser/lexer.rb +8 -6
  25. data/lib/regexp_parser/scanner/properties/long.csv +622 -0
  26. data/lib/regexp_parser/scanner/properties/short.csv +246 -0
  27. data/lib/regexp_parser/scanner/scanner.rl +6 -4
  28. data/lib/regexp_parser/scanner.rb +126 -124
  29. data/lib/regexp_parser/syntax/any.rb +2 -7
  30. data/lib/regexp_parser/syntax/base.rb +91 -66
  31. data/lib/regexp_parser/syntax/token/anchor.rb +15 -0
  32. data/lib/regexp_parser/syntax/{tokens → token}/assertion.rb +2 -2
  33. data/lib/regexp_parser/syntax/token/backreference.rb +30 -0
  34. data/lib/regexp_parser/syntax/{tokens → token}/character_set.rb +2 -2
  35. data/lib/regexp_parser/syntax/{tokens → token}/character_type.rb +3 -3
  36. data/lib/regexp_parser/syntax/{tokens → token}/conditional.rb +3 -3
  37. data/lib/regexp_parser/syntax/token/escape.rb +31 -0
  38. data/lib/regexp_parser/syntax/{tokens → token}/group.rb +7 -7
  39. data/lib/regexp_parser/syntax/{tokens → token}/keep.rb +1 -1
  40. data/lib/regexp_parser/syntax/{tokens → token}/meta.rb +2 -2
  41. data/lib/regexp_parser/syntax/{tokens → token}/posix_class.rb +3 -3
  42. data/lib/regexp_parser/syntax/token/quantifier.rb +35 -0
  43. data/lib/regexp_parser/syntax/token/unicode_property.rb +722 -0
  44. data/lib/regexp_parser/syntax/token.rb +45 -0
  45. data/lib/regexp_parser/syntax/version_lookup.rb +20 -29
  46. data/lib/regexp_parser/syntax/versions/1.8.6.rb +13 -20
  47. data/lib/regexp_parser/syntax/versions/1.9.1.rb +10 -17
  48. data/lib/regexp_parser/syntax/versions/1.9.3.rb +3 -10
  49. data/lib/regexp_parser/syntax/versions/2.0.0.rb +8 -15
  50. data/lib/regexp_parser/syntax/versions/2.2.0.rb +3 -9
  51. data/lib/regexp_parser/syntax/versions/2.3.0.rb +3 -9
  52. data/lib/regexp_parser/syntax/versions/2.4.0.rb +3 -9
  53. data/lib/regexp_parser/syntax/versions/2.4.1.rb +2 -8
  54. data/lib/regexp_parser/syntax/versions/2.5.0.rb +3 -9
  55. data/lib/regexp_parser/syntax/versions/2.6.0.rb +3 -9
  56. data/lib/regexp_parser/syntax/versions/2.6.2.rb +3 -9
  57. data/lib/regexp_parser/syntax/versions/2.6.3.rb +3 -9
  58. data/lib/regexp_parser/syntax/versions/3.1.0.rb +4 -0
  59. data/lib/regexp_parser/syntax/versions/3.2.0.rb +4 -0
  60. data/lib/regexp_parser/syntax/versions.rb +1 -1
  61. data/lib/regexp_parser/syntax.rb +1 -1
  62. data/lib/regexp_parser/token.rb +9 -20
  63. data/lib/regexp_parser/version.rb +1 -1
  64. data/lib/regexp_parser.rb +0 -2
  65. data/regexp_parser.gemspec +20 -22
  66. metadata +36 -167
  67. data/lib/regexp_parser/scanner/properties/long.yml +0 -594
  68. data/lib/regexp_parser/scanner/properties/short.yml +0 -237
  69. data/lib/regexp_parser/syntax/tokens/anchor.rb +0 -15
  70. data/lib/regexp_parser/syntax/tokens/backref.rb +0 -24
  71. data/lib/regexp_parser/syntax/tokens/escape.rb +0 -30
  72. data/lib/regexp_parser/syntax/tokens/quantifier.rb +0 -35
  73. data/lib/regexp_parser/syntax/tokens/unicode_property.rb +0 -675
  74. data/lib/regexp_parser/syntax/tokens.rb +0 -45
  75. data/spec/expression/base_spec.rb +0 -104
  76. data/spec/expression/clone_spec.rb +0 -152
  77. data/spec/expression/conditional_spec.rb +0 -89
  78. data/spec/expression/free_space_spec.rb +0 -27
  79. data/spec/expression/methods/match_length_spec.rb +0 -161
  80. data/spec/expression/methods/match_spec.rb +0 -25
  81. data/spec/expression/methods/strfregexp_spec.rb +0 -224
  82. data/spec/expression/methods/tests_spec.rb +0 -99
  83. data/spec/expression/methods/traverse_spec.rb +0 -161
  84. data/spec/expression/options_spec.rb +0 -128
  85. data/spec/expression/subexpression_spec.rb +0 -50
  86. data/spec/expression/to_h_spec.rb +0 -26
  87. data/spec/expression/to_s_spec.rb +0 -108
  88. data/spec/lexer/all_spec.rb +0 -22
  89. data/spec/lexer/conditionals_spec.rb +0 -53
  90. data/spec/lexer/delimiters_spec.rb +0 -68
  91. data/spec/lexer/escapes_spec.rb +0 -14
  92. data/spec/lexer/keep_spec.rb +0 -10
  93. data/spec/lexer/literals_spec.rb +0 -64
  94. data/spec/lexer/nesting_spec.rb +0 -99
  95. data/spec/lexer/refcalls_spec.rb +0 -60
  96. data/spec/parser/all_spec.rb +0 -43
  97. data/spec/parser/alternation_spec.rb +0 -88
  98. data/spec/parser/anchors_spec.rb +0 -17
  99. data/spec/parser/conditionals_spec.rb +0 -179
  100. data/spec/parser/errors_spec.rb +0 -30
  101. data/spec/parser/escapes_spec.rb +0 -121
  102. data/spec/parser/free_space_spec.rb +0 -130
  103. data/spec/parser/groups_spec.rb +0 -108
  104. data/spec/parser/keep_spec.rb +0 -6
  105. data/spec/parser/options_spec.rb +0 -28
  106. data/spec/parser/posix_classes_spec.rb +0 -8
  107. data/spec/parser/properties_spec.rb +0 -115
  108. data/spec/parser/quantifiers_spec.rb +0 -68
  109. data/spec/parser/refcalls_spec.rb +0 -117
  110. data/spec/parser/set/intersections_spec.rb +0 -127
  111. data/spec/parser/set/ranges_spec.rb +0 -111
  112. data/spec/parser/sets_spec.rb +0 -178
  113. data/spec/parser/types_spec.rb +0 -18
  114. data/spec/scanner/all_spec.rb +0 -18
  115. data/spec/scanner/anchors_spec.rb +0 -21
  116. data/spec/scanner/conditionals_spec.rb +0 -128
  117. data/spec/scanner/delimiters_spec.rb +0 -52
  118. data/spec/scanner/errors_spec.rb +0 -67
  119. data/spec/scanner/escapes_spec.rb +0 -64
  120. data/spec/scanner/free_space_spec.rb +0 -165
  121. data/spec/scanner/groups_spec.rb +0 -61
  122. data/spec/scanner/keep_spec.rb +0 -10
  123. data/spec/scanner/literals_spec.rb +0 -39
  124. data/spec/scanner/meta_spec.rb +0 -18
  125. data/spec/scanner/options_spec.rb +0 -36
  126. data/spec/scanner/properties_spec.rb +0 -64
  127. data/spec/scanner/quantifiers_spec.rb +0 -25
  128. data/spec/scanner/refcalls_spec.rb +0 -55
  129. data/spec/scanner/sets_spec.rb +0 -151
  130. data/spec/scanner/types_spec.rb +0 -14
  131. data/spec/spec_helper.rb +0 -16
  132. data/spec/support/runner.rb +0 -42
  133. data/spec/support/shared_examples.rb +0 -77
  134. data/spec/support/warning_extractor.rb +0 -60
  135. data/spec/syntax/syntax_spec.rb +0 -48
  136. data/spec/syntax/syntax_token_map_spec.rb +0 -23
  137. data/spec/syntax/versions/1.8.6_spec.rb +0 -17
  138. data/spec/syntax/versions/1.9.1_spec.rb +0 -10
  139. data/spec/syntax/versions/1.9.3_spec.rb +0 -9
  140. data/spec/syntax/versions/2.0.0_spec.rb +0 -13
  141. data/spec/syntax/versions/2.2.0_spec.rb +0 -9
  142. data/spec/syntax/versions/aliases_spec.rb +0 -37
  143. data/spec/token/token_spec.rb +0 -85
@@ -0,0 +1,246 @@
1
+ # THIS FILE IS AUTO-GENERATED BY `rake props:update` - DO NOT EDIT
2
+ adlm,adlam
3
+ aghb,caucasian_albanian
4
+ ahex,ascii_hex_digit
5
+ arab,arabic
6
+ armi,imperial_aramaic
7
+ armn,armenian
8
+ avst,avestan
9
+ bali,balinese
10
+ bamu,bamum
11
+ bass,bassa_vah
12
+ batk,batak
13
+ beng,bengali
14
+ bhks,bhaiksuki
15
+ bidic,bidi_control
16
+ bopo,bopomofo
17
+ brah,brahmi
18
+ brai,braille
19
+ bugi,buginese
20
+ buhd,buhid
21
+ c,other
22
+ cakm,chakma
23
+ cans,canadian_aboriginal
24
+ cari,carian
25
+ cc,control
26
+ cf,format
27
+ cher,cherokee
28
+ chrs,chorasmian
29
+ ci,case_ignorable
30
+ cn,unassigned
31
+ co,private_use
32
+ combiningmark,mark
33
+ copt,coptic
34
+ cpmn,cypro_minoan
35
+ cprt,cypriot
36
+ cs,surrogate
37
+ cwcf,changes_when_casefolded
38
+ cwcm,changes_when_casemapped
39
+ cwl,changes_when_lowercased
40
+ cwt,changes_when_titlecased
41
+ cwu,changes_when_uppercased
42
+ cyrl,cyrillic
43
+ dep,deprecated
44
+ deva,devanagari
45
+ di,default_ignorable_code_point
46
+ dia,diacritic
47
+ diak,dives_akuru
48
+ dogr,dogra
49
+ dsrt,deseret
50
+ dupl,duployan
51
+ ebase,emoji_modifier_base
52
+ ecomp,emoji_component
53
+ egyp,egyptian_hieroglyphs
54
+ elba,elbasan
55
+ elym,elymaic
56
+ emod,emoji_modifier
57
+ epres,emoji_presentation
58
+ ethi,ethiopic
59
+ ext,extender
60
+ geor,georgian
61
+ glag,glagolitic
62
+ gong,gunjala_gondi
63
+ gonm,masaram_gondi
64
+ goth,gothic
65
+ gran,grantha
66
+ grbase,grapheme_base
67
+ grek,greek
68
+ grext,grapheme_extend
69
+ grlink,grapheme_link
70
+ gujr,gujarati
71
+ guru,gurmukhi
72
+ hang,hangul
73
+ hani,han
74
+ hano,hanunoo
75
+ hatr,hatran
76
+ hebr,hebrew
77
+ hex,hex_digit
78
+ hira,hiragana
79
+ hluw,anatolian_hieroglyphs
80
+ hmng,pahawh_hmong
81
+ hmnp,nyiakeng_puachue_hmong
82
+ hung,old_hungarian
83
+ idc,id_continue
84
+ ideo,ideographic
85
+ ids,id_start
86
+ idsb,ids_binary_operator
87
+ idst,ids_trinary_operator
88
+ ital,old_italic
89
+ java,javanese
90
+ joinc,join_control
91
+ kali,kayah_li
92
+ kana,katakana
93
+ khar,kharoshthi
94
+ khmr,khmer
95
+ khoj,khojki
96
+ kits,khitan_small_script
97
+ knda,kannada
98
+ kthi,kaithi
99
+ l,letter
100
+ lana,tai_tham
101
+ laoo,lao
102
+ latn,latin
103
+ lc,cased_letter
104
+ lepc,lepcha
105
+ limb,limbu
106
+ lina,linear_a
107
+ linb,linear_b
108
+ ll,lowercase_letter
109
+ lm,modifier_letter
110
+ lo,other_letter
111
+ loe,logical_order_exception
112
+ lt,titlecase_letter
113
+ lu,uppercase_letter
114
+ lyci,lycian
115
+ lydi,lydian
116
+ m,mark
117
+ mahj,mahajani
118
+ maka,makasar
119
+ mand,mandaic
120
+ mani,manichaean
121
+ marc,marchen
122
+ mc,spacing_mark
123
+ me,enclosing_mark
124
+ medf,medefaidrin
125
+ mend,mende_kikakui
126
+ merc,meroitic_cursive
127
+ mero,meroitic_hieroglyphs
128
+ mlym,malayalam
129
+ mn,nonspacing_mark
130
+ mong,mongolian
131
+ mroo,mro
132
+ mtei,meetei_mayek
133
+ mult,multani
134
+ mymr,myanmar
135
+ n,number
136
+ nand,nandinagari
137
+ narb,old_north_arabian
138
+ nbat,nabataean
139
+ nchar,noncharacter_code_point
140
+ nd,decimal_number
141
+ nkoo,nko
142
+ nl,letter_number
143
+ no,other_number
144
+ nshu,nushu
145
+ oalpha,other_alphabetic
146
+ odi,other_default_ignorable_code_point
147
+ ogam,ogham
148
+ ogrext,other_grapheme_extend
149
+ oidc,other_id_continue
150
+ oids,other_id_start
151
+ olck,ol_chiki
152
+ olower,other_lowercase
153
+ omath,other_math
154
+ orkh,old_turkic
155
+ orya,oriya
156
+ osge,osage
157
+ osma,osmanya
158
+ ougr,old_uyghur
159
+ oupper,other_uppercase
160
+ p,punctuation
161
+ palm,palmyrene
162
+ patsyn,pattern_syntax
163
+ patws,pattern_white_space
164
+ pauc,pau_cin_hau
165
+ pc,connector_punctuation
166
+ pcm,prepended_concatenation_mark
167
+ pd,dash_punctuation
168
+ pe,close_punctuation
169
+ perm,old_permic
170
+ pf,final_punctuation
171
+ phag,phags_pa
172
+ phli,inscriptional_pahlavi
173
+ phlp,psalter_pahlavi
174
+ phnx,phoenician
175
+ pi,initial_punctuation
176
+ plrd,miao
177
+ po,other_punctuation
178
+ prti,inscriptional_parthian
179
+ ps,open_punctuation
180
+ qaac,coptic
181
+ qaai,inherited
182
+ qmark,quotation_mark
183
+ ri,regional_indicator
184
+ rjng,rejang
185
+ rohg,hanifi_rohingya
186
+ runr,runic
187
+ s,symbol
188
+ samr,samaritan
189
+ sarb,old_south_arabian
190
+ saur,saurashtra
191
+ sc,currency_symbol
192
+ sd,soft_dotted
193
+ sgnw,signwriting
194
+ shaw,shavian
195
+ shrd,sharada
196
+ sidd,siddham
197
+ sind,khudawadi
198
+ sinh,sinhala
199
+ sk,modifier_symbol
200
+ sm,math_symbol
201
+ so,other_symbol
202
+ sogd,sogdian
203
+ sogo,old_sogdian
204
+ sora,sora_sompeng
205
+ soyo,soyombo
206
+ sterm,sentence_terminal
207
+ sund,sundanese
208
+ sylo,syloti_nagri
209
+ syrc,syriac
210
+ tagb,tagbanwa
211
+ takr,takri
212
+ tale,tai_le
213
+ talu,new_tai_lue
214
+ taml,tamil
215
+ tang,tangut
216
+ tavt,tai_viet
217
+ telu,telugu
218
+ term,terminal_punctuation
219
+ tfng,tifinagh
220
+ tglg,tagalog
221
+ thaa,thaana
222
+ tibt,tibetan
223
+ tirh,tirhuta
224
+ tnsa,tangsa
225
+ ugar,ugaritic
226
+ uideo,unified_ideograph
227
+ vaii,vai
228
+ vith,vithkuqi
229
+ vs,variation_selector
230
+ wara,warang_citi
231
+ wcho,wancho
232
+ wspace,white_space
233
+ xidc,xid_continue
234
+ xids,xid_start
235
+ xpeo,old_persian
236
+ xsux,cuneiform
237
+ yezi,yezidi
238
+ yiii,yi
239
+ z,separator
240
+ zanb,zanabazar_square
241
+ zinh,inherited
242
+ zl,line_separator
243
+ zp,paragraph_separator
244
+ zs,space_separator
245
+ zyyy,common
246
+ zzzz,unknown
@@ -759,14 +759,16 @@ class Regexp::Scanner
759
759
  end
760
760
 
761
761
  # lazy-load property maps when first needed
762
- require 'yaml'
763
-
764
762
  def self.short_prop_map
765
- @short_prop_map ||= YAML.load_file("#{__dir__}/scanner/properties/short.yml")
763
+ @short_prop_map ||= parse_prop_map('short')
766
764
  end
767
765
 
768
766
  def self.long_prop_map
769
- @long_prop_map ||= YAML.load_file("#{__dir__}/scanner/properties/long.yml")
767
+ @long_prop_map ||= parse_prop_map('long')
768
+ end
769
+
770
+ def self.parse_prop_map(name)
771
+ File.read("#{__dir__}/scanner/properties/#{name}.csv").scan(/(.+),(.+)/).to_h
770
772
  end
771
773
 
772
774
  # Emits an array with the details of the scanned pattern