regexp_parser 2.1.1 → 2.2.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (129) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +19 -1
  3. data/LICENSE +1 -1
  4. data/README.md +17 -23
  5. data/Rakefile +10 -19
  6. data/lib/regexp_parser/expression/base.rb +123 -0
  7. data/lib/regexp_parser/expression/classes/anchor.rb +0 -2
  8. data/lib/regexp_parser/expression/classes/{backref.rb → backreference.rb} +0 -0
  9. data/lib/regexp_parser/expression/classes/{set → character_set}/intersection.rb +0 -0
  10. data/lib/regexp_parser/expression/classes/{set → character_set}/range.rb +0 -0
  11. data/lib/regexp_parser/expression/classes/{set.rb → character_set.rb} +0 -0
  12. data/lib/regexp_parser/expression/classes/{escape.rb → escape_sequence.rb} +13 -7
  13. data/lib/regexp_parser/expression/classes/free_space.rb +0 -2
  14. data/lib/regexp_parser/expression/classes/literal.rb +1 -5
  15. data/lib/regexp_parser/expression/classes/property.rb +0 -2
  16. data/lib/regexp_parser/expression/classes/root.rb +0 -1
  17. data/lib/regexp_parser/expression/classes/type.rb +0 -2
  18. data/lib/regexp_parser/expression/methods/strfregexp.rb +1 -1
  19. data/lib/regexp_parser/expression/quantifier.rb +1 -1
  20. data/lib/regexp_parser/expression/sequence.rb +0 -1
  21. data/lib/regexp_parser/expression/subexpression.rb +0 -1
  22. data/lib/regexp_parser/expression.rb +6 -130
  23. data/lib/regexp_parser/lexer.rb +7 -5
  24. data/lib/regexp_parser/scanner/properties/long.csv +604 -0
  25. data/lib/regexp_parser/scanner/properties/short.csv +242 -0
  26. data/lib/regexp_parser/scanner/scanner.rl +6 -4
  27. data/lib/regexp_parser/scanner.rb +126 -124
  28. data/lib/regexp_parser/syntax/any.rb +1 -3
  29. data/lib/regexp_parser/syntax/base.rb +12 -14
  30. data/lib/regexp_parser/syntax/token/anchor.rb +15 -0
  31. data/lib/regexp_parser/syntax/{tokens → token}/assertion.rb +2 -2
  32. data/lib/regexp_parser/syntax/token/backreference.rb +30 -0
  33. data/lib/regexp_parser/syntax/{tokens → token}/character_set.rb +2 -2
  34. data/lib/regexp_parser/syntax/{tokens → token}/character_type.rb +3 -3
  35. data/lib/regexp_parser/syntax/{tokens → token}/conditional.rb +3 -3
  36. data/lib/regexp_parser/syntax/token/escape.rb +31 -0
  37. data/lib/regexp_parser/syntax/{tokens → token}/group.rb +7 -7
  38. data/lib/regexp_parser/syntax/{tokens → token}/keep.rb +1 -1
  39. data/lib/regexp_parser/syntax/{tokens → token}/meta.rb +2 -2
  40. data/lib/regexp_parser/syntax/{tokens → token}/posix_class.rb +3 -3
  41. data/lib/regexp_parser/syntax/token/quantifier.rb +35 -0
  42. data/lib/regexp_parser/syntax/token/unicode_property.rb +696 -0
  43. data/lib/regexp_parser/syntax/token.rb +45 -0
  44. data/lib/regexp_parser/syntax/versions/1.8.6.rb +2 -2
  45. data/lib/regexp_parser/syntax/versions/1.9.1.rb +1 -1
  46. data/lib/regexp_parser/syntax/versions/3.1.0.rb +10 -0
  47. data/lib/regexp_parser/syntax.rb +1 -1
  48. data/lib/regexp_parser/token.rb +9 -20
  49. data/lib/regexp_parser/version.rb +1 -1
  50. data/lib/regexp_parser.rb +0 -2
  51. data/regexp_parser.gemspec +20 -22
  52. metadata +32 -164
  53. data/lib/regexp_parser/scanner/properties/long.yml +0 -594
  54. data/lib/regexp_parser/scanner/properties/short.yml +0 -237
  55. data/lib/regexp_parser/syntax/tokens/anchor.rb +0 -15
  56. data/lib/regexp_parser/syntax/tokens/backref.rb +0 -24
  57. data/lib/regexp_parser/syntax/tokens/escape.rb +0 -30
  58. data/lib/regexp_parser/syntax/tokens/quantifier.rb +0 -35
  59. data/lib/regexp_parser/syntax/tokens/unicode_property.rb +0 -675
  60. data/lib/regexp_parser/syntax/tokens.rb +0 -45
  61. data/spec/expression/base_spec.rb +0 -104
  62. data/spec/expression/clone_spec.rb +0 -152
  63. data/spec/expression/conditional_spec.rb +0 -89
  64. data/spec/expression/free_space_spec.rb +0 -27
  65. data/spec/expression/methods/match_length_spec.rb +0 -161
  66. data/spec/expression/methods/match_spec.rb +0 -25
  67. data/spec/expression/methods/strfregexp_spec.rb +0 -224
  68. data/spec/expression/methods/tests_spec.rb +0 -99
  69. data/spec/expression/methods/traverse_spec.rb +0 -161
  70. data/spec/expression/options_spec.rb +0 -128
  71. data/spec/expression/subexpression_spec.rb +0 -50
  72. data/spec/expression/to_h_spec.rb +0 -26
  73. data/spec/expression/to_s_spec.rb +0 -108
  74. data/spec/lexer/all_spec.rb +0 -22
  75. data/spec/lexer/conditionals_spec.rb +0 -53
  76. data/spec/lexer/delimiters_spec.rb +0 -68
  77. data/spec/lexer/escapes_spec.rb +0 -14
  78. data/spec/lexer/keep_spec.rb +0 -10
  79. data/spec/lexer/literals_spec.rb +0 -64
  80. data/spec/lexer/nesting_spec.rb +0 -99
  81. data/spec/lexer/refcalls_spec.rb +0 -60
  82. data/spec/parser/all_spec.rb +0 -43
  83. data/spec/parser/alternation_spec.rb +0 -88
  84. data/spec/parser/anchors_spec.rb +0 -17
  85. data/spec/parser/conditionals_spec.rb +0 -179
  86. data/spec/parser/errors_spec.rb +0 -30
  87. data/spec/parser/escapes_spec.rb +0 -121
  88. data/spec/parser/free_space_spec.rb +0 -130
  89. data/spec/parser/groups_spec.rb +0 -108
  90. data/spec/parser/keep_spec.rb +0 -6
  91. data/spec/parser/options_spec.rb +0 -28
  92. data/spec/parser/posix_classes_spec.rb +0 -8
  93. data/spec/parser/properties_spec.rb +0 -115
  94. data/spec/parser/quantifiers_spec.rb +0 -68
  95. data/spec/parser/refcalls_spec.rb +0 -117
  96. data/spec/parser/set/intersections_spec.rb +0 -127
  97. data/spec/parser/set/ranges_spec.rb +0 -111
  98. data/spec/parser/sets_spec.rb +0 -178
  99. data/spec/parser/types_spec.rb +0 -18
  100. data/spec/scanner/all_spec.rb +0 -18
  101. data/spec/scanner/anchors_spec.rb +0 -21
  102. data/spec/scanner/conditionals_spec.rb +0 -128
  103. data/spec/scanner/delimiters_spec.rb +0 -52
  104. data/spec/scanner/errors_spec.rb +0 -67
  105. data/spec/scanner/escapes_spec.rb +0 -64
  106. data/spec/scanner/free_space_spec.rb +0 -165
  107. data/spec/scanner/groups_spec.rb +0 -61
  108. data/spec/scanner/keep_spec.rb +0 -10
  109. data/spec/scanner/literals_spec.rb +0 -39
  110. data/spec/scanner/meta_spec.rb +0 -18
  111. data/spec/scanner/options_spec.rb +0 -36
  112. data/spec/scanner/properties_spec.rb +0 -64
  113. data/spec/scanner/quantifiers_spec.rb +0 -25
  114. data/spec/scanner/refcalls_spec.rb +0 -55
  115. data/spec/scanner/sets_spec.rb +0 -151
  116. data/spec/scanner/types_spec.rb +0 -14
  117. data/spec/spec_helper.rb +0 -16
  118. data/spec/support/runner.rb +0 -42
  119. data/spec/support/shared_examples.rb +0 -77
  120. data/spec/support/warning_extractor.rb +0 -60
  121. data/spec/syntax/syntax_spec.rb +0 -48
  122. data/spec/syntax/syntax_token_map_spec.rb +0 -23
  123. data/spec/syntax/versions/1.8.6_spec.rb +0 -17
  124. data/spec/syntax/versions/1.9.1_spec.rb +0 -10
  125. data/spec/syntax/versions/1.9.3_spec.rb +0 -9
  126. data/spec/syntax/versions/2.0.0_spec.rb +0 -13
  127. data/spec/syntax/versions/2.2.0_spec.rb +0 -9
  128. data/spec/syntax/versions/aliases_spec.rb +0 -37
  129. data/spec/token/token_spec.rb +0 -85
@@ -0,0 +1,242 @@
1
+ # THIS FILE IS AUTO-GENERATED BY `rake props:update` - DO NOT EDIT
2
+ adlm,adlam
3
+ aghb,caucasian_albanian
4
+ ahex,ascii_hex_digit
5
+ arab,arabic
6
+ armi,imperial_aramaic
7
+ armn,armenian
8
+ avst,avestan
9
+ bali,balinese
10
+ bamu,bamum
11
+ bass,bassa_vah
12
+ batk,batak
13
+ beng,bengali
14
+ bhks,bhaiksuki
15
+ bidic,bidi_control
16
+ bopo,bopomofo
17
+ brah,brahmi
18
+ brai,braille
19
+ bugi,buginese
20
+ buhd,buhid
21
+ c,other
22
+ cakm,chakma
23
+ cans,canadian_aboriginal
24
+ cari,carian
25
+ cc,control
26
+ cf,format
27
+ cher,cherokee
28
+ chrs,chorasmian
29
+ ci,case_ignorable
30
+ cn,unassigned
31
+ co,private_use
32
+ combiningmark,mark
33
+ copt,coptic
34
+ cprt,cypriot
35
+ cs,surrogate
36
+ cwcf,changes_when_casefolded
37
+ cwcm,changes_when_casemapped
38
+ cwl,changes_when_lowercased
39
+ cwt,changes_when_titlecased
40
+ cwu,changes_when_uppercased
41
+ cyrl,cyrillic
42
+ dep,deprecated
43
+ deva,devanagari
44
+ di,default_ignorable_code_point
45
+ dia,diacritic
46
+ diak,dives_akuru
47
+ dogr,dogra
48
+ dsrt,deseret
49
+ dupl,duployan
50
+ ebase,emoji_modifier_base
51
+ ecomp,emoji_component
52
+ egyp,egyptian_hieroglyphs
53
+ elba,elbasan
54
+ elym,elymaic
55
+ emod,emoji_modifier
56
+ epres,emoji_presentation
57
+ ethi,ethiopic
58
+ ext,extender
59
+ geor,georgian
60
+ glag,glagolitic
61
+ gong,gunjala_gondi
62
+ gonm,masaram_gondi
63
+ goth,gothic
64
+ gran,grantha
65
+ grbase,grapheme_base
66
+ grek,greek
67
+ grext,grapheme_extend
68
+ grlink,grapheme_link
69
+ gujr,gujarati
70
+ guru,gurmukhi
71
+ hang,hangul
72
+ hani,han
73
+ hano,hanunoo
74
+ hatr,hatran
75
+ hebr,hebrew
76
+ hex,hex_digit
77
+ hira,hiragana
78
+ hluw,anatolian_hieroglyphs
79
+ hmng,pahawh_hmong
80
+ hmnp,nyiakeng_puachue_hmong
81
+ hung,old_hungarian
82
+ idc,id_continue
83
+ ideo,ideographic
84
+ ids,id_start
85
+ idsb,ids_binary_operator
86
+ idst,ids_trinary_operator
87
+ ital,old_italic
88
+ java,javanese
89
+ joinc,join_control
90
+ kali,kayah_li
91
+ kana,katakana
92
+ khar,kharoshthi
93
+ khmr,khmer
94
+ khoj,khojki
95
+ kits,khitan_small_script
96
+ knda,kannada
97
+ kthi,kaithi
98
+ l,letter
99
+ lana,tai_tham
100
+ laoo,lao
101
+ latn,latin
102
+ lc,cased_letter
103
+ lepc,lepcha
104
+ limb,limbu
105
+ lina,linear_a
106
+ linb,linear_b
107
+ ll,lowercase_letter
108
+ lm,modifier_letter
109
+ lo,other_letter
110
+ loe,logical_order_exception
111
+ lt,titlecase_letter
112
+ lu,uppercase_letter
113
+ lyci,lycian
114
+ lydi,lydian
115
+ m,mark
116
+ mahj,mahajani
117
+ maka,makasar
118
+ mand,mandaic
119
+ mani,manichaean
120
+ marc,marchen
121
+ mc,spacing_mark
122
+ me,enclosing_mark
123
+ medf,medefaidrin
124
+ mend,mende_kikakui
125
+ merc,meroitic_cursive
126
+ mero,meroitic_hieroglyphs
127
+ mlym,malayalam
128
+ mn,nonspacing_mark
129
+ mong,mongolian
130
+ mroo,mro
131
+ mtei,meetei_mayek
132
+ mult,multani
133
+ mymr,myanmar
134
+ n,number
135
+ nand,nandinagari
136
+ narb,old_north_arabian
137
+ nbat,nabataean
138
+ nchar,noncharacter_code_point
139
+ nd,decimal_number
140
+ nkoo,nko
141
+ nl,letter_number
142
+ no,other_number
143
+ nshu,nushu
144
+ oalpha,other_alphabetic
145
+ odi,other_default_ignorable_code_point
146
+ ogam,ogham
147
+ ogrext,other_grapheme_extend
148
+ oidc,other_id_continue
149
+ oids,other_id_start
150
+ olck,ol_chiki
151
+ olower,other_lowercase
152
+ omath,other_math
153
+ orkh,old_turkic
154
+ orya,oriya
155
+ osge,osage
156
+ osma,osmanya
157
+ oupper,other_uppercase
158
+ p,punctuation
159
+ palm,palmyrene
160
+ patsyn,pattern_syntax
161
+ patws,pattern_white_space
162
+ pauc,pau_cin_hau
163
+ pc,connector_punctuation
164
+ pcm,prepended_concatenation_mark
165
+ pd,dash_punctuation
166
+ pe,close_punctuation
167
+ perm,old_permic
168
+ pf,final_punctuation
169
+ phag,phags_pa
170
+ phli,inscriptional_pahlavi
171
+ phlp,psalter_pahlavi
172
+ phnx,phoenician
173
+ pi,initial_punctuation
174
+ plrd,miao
175
+ po,other_punctuation
176
+ prti,inscriptional_parthian
177
+ ps,open_punctuation
178
+ qaac,coptic
179
+ qaai,inherited
180
+ qmark,quotation_mark
181
+ ri,regional_indicator
182
+ rjng,rejang
183
+ rohg,hanifi_rohingya
184
+ runr,runic
185
+ s,symbol
186
+ samr,samaritan
187
+ sarb,old_south_arabian
188
+ saur,saurashtra
189
+ sc,currency_symbol
190
+ sd,soft_dotted
191
+ sgnw,signwriting
192
+ shaw,shavian
193
+ shrd,sharada
194
+ sidd,siddham
195
+ sind,khudawadi
196
+ sinh,sinhala
197
+ sk,modifier_symbol
198
+ sm,math_symbol
199
+ so,other_symbol
200
+ sogd,sogdian
201
+ sogo,old_sogdian
202
+ sora,sora_sompeng
203
+ soyo,soyombo
204
+ sterm,sentence_terminal
205
+ sund,sundanese
206
+ sylo,syloti_nagri
207
+ syrc,syriac
208
+ tagb,tagbanwa
209
+ takr,takri
210
+ tale,tai_le
211
+ talu,new_tai_lue
212
+ taml,tamil
213
+ tang,tangut
214
+ tavt,tai_viet
215
+ telu,telugu
216
+ term,terminal_punctuation
217
+ tfng,tifinagh
218
+ tglg,tagalog
219
+ thaa,thaana
220
+ tibt,tibetan
221
+ tirh,tirhuta
222
+ ugar,ugaritic
223
+ uideo,unified_ideograph
224
+ vaii,vai
225
+ vs,variation_selector
226
+ wara,warang_citi
227
+ wcho,wancho
228
+ wspace,white_space
229
+ xidc,xid_continue
230
+ xids,xid_start
231
+ xpeo,old_persian
232
+ xsux,cuneiform
233
+ yezi,yezidi
234
+ yiii,yi
235
+ z,separator
236
+ zanb,zanabazar_square
237
+ zinh,inherited
238
+ zl,line_separator
239
+ zp,paragraph_separator
240
+ zs,space_separator
241
+ zyyy,common
242
+ zzzz,unknown
@@ -759,14 +759,16 @@ class Regexp::Scanner
759
759
  end
760
760
 
761
761
  # lazy-load property maps when first needed
762
- require 'yaml'
763
-
764
762
  def self.short_prop_map
765
- @short_prop_map ||= YAML.load_file("#{__dir__}/scanner/properties/short.yml")
763
+ @short_prop_map ||= parse_prop_map('short')
766
764
  end
767
765
 
768
766
  def self.long_prop_map
769
- @long_prop_map ||= YAML.load_file("#{__dir__}/scanner/properties/long.yml")
767
+ @long_prop_map ||= parse_prop_map('long')
768
+ end
769
+
770
+ def self.parse_prop_map(name)
771
+ File.read("#{__dir__}/scanner/properties/#{name}.csv").scan(/(.+),(.+)/).to_h
770
772
  end
771
773
 
772
774
  # Emits an array with the details of the scanned pattern