regexp_parser 1.7.1 → 2.2.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (139) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +157 -1
  3. data/Gemfile +6 -1
  4. data/LICENSE +1 -1
  5. data/README.md +38 -32
  6. data/Rakefile +18 -27
  7. data/lib/regexp_parser/error.rb +4 -0
  8. data/lib/regexp_parser/expression/base.rb +123 -0
  9. data/lib/regexp_parser/expression/classes/anchor.rb +0 -2
  10. data/lib/regexp_parser/expression/classes/{backref.rb → backreference.rb} +5 -0
  11. data/lib/regexp_parser/expression/classes/{set → character_set}/intersection.rb +0 -0
  12. data/lib/regexp_parser/expression/classes/{set → character_set}/range.rb +2 -1
  13. data/lib/regexp_parser/expression/classes/{set.rb → character_set.rb} +0 -0
  14. data/lib/regexp_parser/expression/classes/conditional.rb +11 -1
  15. data/lib/regexp_parser/expression/classes/{escape.rb → escape_sequence.rb} +13 -7
  16. data/lib/regexp_parser/expression/classes/free_space.rb +2 -4
  17. data/lib/regexp_parser/expression/classes/group.rb +28 -3
  18. data/lib/regexp_parser/expression/classes/literal.rb +1 -5
  19. data/lib/regexp_parser/expression/classes/property.rb +1 -3
  20. data/lib/regexp_parser/expression/classes/root.rb +4 -17
  21. data/lib/regexp_parser/expression/classes/type.rb +0 -2
  22. data/lib/regexp_parser/expression/methods/match_length.rb +2 -2
  23. data/lib/regexp_parser/expression/methods/strfregexp.rb +1 -1
  24. data/lib/regexp_parser/expression/methods/traverse.rb +2 -2
  25. data/lib/regexp_parser/expression/quantifier.rb +11 -2
  26. data/lib/regexp_parser/expression/sequence.rb +3 -20
  27. data/lib/regexp_parser/expression/subexpression.rb +1 -2
  28. data/lib/regexp_parser/expression.rb +7 -139
  29. data/lib/regexp_parser/lexer.rb +13 -11
  30. data/lib/regexp_parser/parser.rb +325 -344
  31. data/lib/regexp_parser/scanner/char_type.rl +11 -11
  32. data/lib/regexp_parser/scanner/properties/long.csv +604 -0
  33. data/lib/regexp_parser/scanner/properties/short.csv +242 -0
  34. data/lib/regexp_parser/scanner/property.rl +2 -2
  35. data/lib/regexp_parser/scanner/scanner.rl +235 -255
  36. data/lib/regexp_parser/scanner.rb +1324 -1387
  37. data/lib/regexp_parser/syntax/any.rb +4 -6
  38. data/lib/regexp_parser/syntax/base.rb +13 -15
  39. data/lib/regexp_parser/syntax/token/anchor.rb +15 -0
  40. data/lib/regexp_parser/syntax/{tokens → token}/assertion.rb +2 -2
  41. data/lib/regexp_parser/syntax/token/backreference.rb +30 -0
  42. data/lib/regexp_parser/syntax/{tokens → token}/character_set.rb +2 -2
  43. data/lib/regexp_parser/syntax/{tokens → token}/character_type.rb +3 -3
  44. data/lib/regexp_parser/syntax/{tokens → token}/conditional.rb +3 -3
  45. data/lib/regexp_parser/syntax/token/escape.rb +31 -0
  46. data/lib/regexp_parser/syntax/{tokens → token}/group.rb +7 -7
  47. data/lib/regexp_parser/syntax/{tokens → token}/keep.rb +1 -1
  48. data/lib/regexp_parser/syntax/{tokens → token}/meta.rb +2 -2
  49. data/lib/regexp_parser/syntax/{tokens → token}/posix_class.rb +3 -3
  50. data/lib/regexp_parser/syntax/token/quantifier.rb +35 -0
  51. data/lib/regexp_parser/syntax/token/unicode_property.rb +696 -0
  52. data/lib/regexp_parser/syntax/token.rb +45 -0
  53. data/lib/regexp_parser/syntax/version_lookup.rb +4 -4
  54. data/lib/regexp_parser/syntax/versions/1.8.6.rb +2 -2
  55. data/lib/regexp_parser/syntax/versions/1.9.1.rb +1 -1
  56. data/lib/regexp_parser/syntax/versions/3.1.0.rb +10 -0
  57. data/lib/regexp_parser/syntax.rb +8 -6
  58. data/lib/regexp_parser/token.rb +9 -20
  59. data/lib/regexp_parser/version.rb +1 -1
  60. data/lib/regexp_parser.rb +0 -2
  61. data/regexp_parser.gemspec +20 -22
  62. metadata +34 -165
  63. data/lib/regexp_parser/scanner/properties/long.yml +0 -594
  64. data/lib/regexp_parser/scanner/properties/short.yml +0 -237
  65. data/lib/regexp_parser/syntax/tokens/anchor.rb +0 -15
  66. data/lib/regexp_parser/syntax/tokens/backref.rb +0 -24
  67. data/lib/regexp_parser/syntax/tokens/escape.rb +0 -30
  68. data/lib/regexp_parser/syntax/tokens/quantifier.rb +0 -35
  69. data/lib/regexp_parser/syntax/tokens/unicode_property.rb +0 -675
  70. data/lib/regexp_parser/syntax/tokens.rb +0 -45
  71. data/spec/expression/base_spec.rb +0 -94
  72. data/spec/expression/clone_spec.rb +0 -120
  73. data/spec/expression/conditional_spec.rb +0 -89
  74. data/spec/expression/free_space_spec.rb +0 -27
  75. data/spec/expression/methods/match_length_spec.rb +0 -161
  76. data/spec/expression/methods/match_spec.rb +0 -25
  77. data/spec/expression/methods/strfregexp_spec.rb +0 -224
  78. data/spec/expression/methods/tests_spec.rb +0 -99
  79. data/spec/expression/methods/traverse_spec.rb +0 -161
  80. data/spec/expression/options_spec.rb +0 -128
  81. data/spec/expression/root_spec.rb +0 -9
  82. data/spec/expression/sequence_spec.rb +0 -9
  83. data/spec/expression/subexpression_spec.rb +0 -50
  84. data/spec/expression/to_h_spec.rb +0 -26
  85. data/spec/expression/to_s_spec.rb +0 -100
  86. data/spec/lexer/all_spec.rb +0 -22
  87. data/spec/lexer/conditionals_spec.rb +0 -53
  88. data/spec/lexer/delimiters_spec.rb +0 -68
  89. data/spec/lexer/escapes_spec.rb +0 -14
  90. data/spec/lexer/keep_spec.rb +0 -10
  91. data/spec/lexer/literals_spec.rb +0 -89
  92. data/spec/lexer/nesting_spec.rb +0 -99
  93. data/spec/lexer/refcalls_spec.rb +0 -55
  94. data/spec/parser/all_spec.rb +0 -43
  95. data/spec/parser/alternation_spec.rb +0 -88
  96. data/spec/parser/anchors_spec.rb +0 -17
  97. data/spec/parser/conditionals_spec.rb +0 -179
  98. data/spec/parser/errors_spec.rb +0 -30
  99. data/spec/parser/escapes_spec.rb +0 -121
  100. data/spec/parser/free_space_spec.rb +0 -130
  101. data/spec/parser/groups_spec.rb +0 -108
  102. data/spec/parser/keep_spec.rb +0 -6
  103. data/spec/parser/posix_classes_spec.rb +0 -8
  104. data/spec/parser/properties_spec.rb +0 -115
  105. data/spec/parser/quantifiers_spec.rb +0 -52
  106. data/spec/parser/refcalls_spec.rb +0 -112
  107. data/spec/parser/set/intersections_spec.rb +0 -127
  108. data/spec/parser/set/ranges_spec.rb +0 -111
  109. data/spec/parser/sets_spec.rb +0 -178
  110. data/spec/parser/types_spec.rb +0 -18
  111. data/spec/scanner/all_spec.rb +0 -18
  112. data/spec/scanner/anchors_spec.rb +0 -21
  113. data/spec/scanner/conditionals_spec.rb +0 -128
  114. data/spec/scanner/delimiters_spec.rb +0 -52
  115. data/spec/scanner/errors_spec.rb +0 -67
  116. data/spec/scanner/escapes_spec.rb +0 -53
  117. data/spec/scanner/free_space_spec.rb +0 -133
  118. data/spec/scanner/groups_spec.rb +0 -52
  119. data/spec/scanner/keep_spec.rb +0 -10
  120. data/spec/scanner/literals_spec.rb +0 -49
  121. data/spec/scanner/meta_spec.rb +0 -18
  122. data/spec/scanner/properties_spec.rb +0 -64
  123. data/spec/scanner/quantifiers_spec.rb +0 -20
  124. data/spec/scanner/refcalls_spec.rb +0 -36
  125. data/spec/scanner/sets_spec.rb +0 -102
  126. data/spec/scanner/types_spec.rb +0 -14
  127. data/spec/spec_helper.rb +0 -15
  128. data/spec/support/runner.rb +0 -42
  129. data/spec/support/shared_examples.rb +0 -77
  130. data/spec/support/warning_extractor.rb +0 -60
  131. data/spec/syntax/syntax_spec.rb +0 -48
  132. data/spec/syntax/syntax_token_map_spec.rb +0 -23
  133. data/spec/syntax/versions/1.8.6_spec.rb +0 -17
  134. data/spec/syntax/versions/1.9.1_spec.rb +0 -10
  135. data/spec/syntax/versions/1.9.3_spec.rb +0 -9
  136. data/spec/syntax/versions/2.0.0_spec.rb +0 -13
  137. data/spec/syntax/versions/2.2.0_spec.rb +0 -9
  138. data/spec/syntax/versions/aliases_spec.rb +0 -37
  139. data/spec/token/token_spec.rb +0 -85
@@ -0,0 +1,242 @@
1
+ # THIS FILE IS AUTO-GENERATED BY `rake props:update` - DO NOT EDIT
2
+ adlm,adlam
3
+ aghb,caucasian_albanian
4
+ ahex,ascii_hex_digit
5
+ arab,arabic
6
+ armi,imperial_aramaic
7
+ armn,armenian
8
+ avst,avestan
9
+ bali,balinese
10
+ bamu,bamum
11
+ bass,bassa_vah
12
+ batk,batak
13
+ beng,bengali
14
+ bhks,bhaiksuki
15
+ bidic,bidi_control
16
+ bopo,bopomofo
17
+ brah,brahmi
18
+ brai,braille
19
+ bugi,buginese
20
+ buhd,buhid
21
+ c,other
22
+ cakm,chakma
23
+ cans,canadian_aboriginal
24
+ cari,carian
25
+ cc,control
26
+ cf,format
27
+ cher,cherokee
28
+ chrs,chorasmian
29
+ ci,case_ignorable
30
+ cn,unassigned
31
+ co,private_use
32
+ combiningmark,mark
33
+ copt,coptic
34
+ cprt,cypriot
35
+ cs,surrogate
36
+ cwcf,changes_when_casefolded
37
+ cwcm,changes_when_casemapped
38
+ cwl,changes_when_lowercased
39
+ cwt,changes_when_titlecased
40
+ cwu,changes_when_uppercased
41
+ cyrl,cyrillic
42
+ dep,deprecated
43
+ deva,devanagari
44
+ di,default_ignorable_code_point
45
+ dia,diacritic
46
+ diak,dives_akuru
47
+ dogr,dogra
48
+ dsrt,deseret
49
+ dupl,duployan
50
+ ebase,emoji_modifier_base
51
+ ecomp,emoji_component
52
+ egyp,egyptian_hieroglyphs
53
+ elba,elbasan
54
+ elym,elymaic
55
+ emod,emoji_modifier
56
+ epres,emoji_presentation
57
+ ethi,ethiopic
58
+ ext,extender
59
+ geor,georgian
60
+ glag,glagolitic
61
+ gong,gunjala_gondi
62
+ gonm,masaram_gondi
63
+ goth,gothic
64
+ gran,grantha
65
+ grbase,grapheme_base
66
+ grek,greek
67
+ grext,grapheme_extend
68
+ grlink,grapheme_link
69
+ gujr,gujarati
70
+ guru,gurmukhi
71
+ hang,hangul
72
+ hani,han
73
+ hano,hanunoo
74
+ hatr,hatran
75
+ hebr,hebrew
76
+ hex,hex_digit
77
+ hira,hiragana
78
+ hluw,anatolian_hieroglyphs
79
+ hmng,pahawh_hmong
80
+ hmnp,nyiakeng_puachue_hmong
81
+ hung,old_hungarian
82
+ idc,id_continue
83
+ ideo,ideographic
84
+ ids,id_start
85
+ idsb,ids_binary_operator
86
+ idst,ids_trinary_operator
87
+ ital,old_italic
88
+ java,javanese
89
+ joinc,join_control
90
+ kali,kayah_li
91
+ kana,katakana
92
+ khar,kharoshthi
93
+ khmr,khmer
94
+ khoj,khojki
95
+ kits,khitan_small_script
96
+ knda,kannada
97
+ kthi,kaithi
98
+ l,letter
99
+ lana,tai_tham
100
+ laoo,lao
101
+ latn,latin
102
+ lc,cased_letter
103
+ lepc,lepcha
104
+ limb,limbu
105
+ lina,linear_a
106
+ linb,linear_b
107
+ ll,lowercase_letter
108
+ lm,modifier_letter
109
+ lo,other_letter
110
+ loe,logical_order_exception
111
+ lt,titlecase_letter
112
+ lu,uppercase_letter
113
+ lyci,lycian
114
+ lydi,lydian
115
+ m,mark
116
+ mahj,mahajani
117
+ maka,makasar
118
+ mand,mandaic
119
+ mani,manichaean
120
+ marc,marchen
121
+ mc,spacing_mark
122
+ me,enclosing_mark
123
+ medf,medefaidrin
124
+ mend,mende_kikakui
125
+ merc,meroitic_cursive
126
+ mero,meroitic_hieroglyphs
127
+ mlym,malayalam
128
+ mn,nonspacing_mark
129
+ mong,mongolian
130
+ mroo,mro
131
+ mtei,meetei_mayek
132
+ mult,multani
133
+ mymr,myanmar
134
+ n,number
135
+ nand,nandinagari
136
+ narb,old_north_arabian
137
+ nbat,nabataean
138
+ nchar,noncharacter_code_point
139
+ nd,decimal_number
140
+ nkoo,nko
141
+ nl,letter_number
142
+ no,other_number
143
+ nshu,nushu
144
+ oalpha,other_alphabetic
145
+ odi,other_default_ignorable_code_point
146
+ ogam,ogham
147
+ ogrext,other_grapheme_extend
148
+ oidc,other_id_continue
149
+ oids,other_id_start
150
+ olck,ol_chiki
151
+ olower,other_lowercase
152
+ omath,other_math
153
+ orkh,old_turkic
154
+ orya,oriya
155
+ osge,osage
156
+ osma,osmanya
157
+ oupper,other_uppercase
158
+ p,punctuation
159
+ palm,palmyrene
160
+ patsyn,pattern_syntax
161
+ patws,pattern_white_space
162
+ pauc,pau_cin_hau
163
+ pc,connector_punctuation
164
+ pcm,prepended_concatenation_mark
165
+ pd,dash_punctuation
166
+ pe,close_punctuation
167
+ perm,old_permic
168
+ pf,final_punctuation
169
+ phag,phags_pa
170
+ phli,inscriptional_pahlavi
171
+ phlp,psalter_pahlavi
172
+ phnx,phoenician
173
+ pi,initial_punctuation
174
+ plrd,miao
175
+ po,other_punctuation
176
+ prti,inscriptional_parthian
177
+ ps,open_punctuation
178
+ qaac,coptic
179
+ qaai,inherited
180
+ qmark,quotation_mark
181
+ ri,regional_indicator
182
+ rjng,rejang
183
+ rohg,hanifi_rohingya
184
+ runr,runic
185
+ s,symbol
186
+ samr,samaritan
187
+ sarb,old_south_arabian
188
+ saur,saurashtra
189
+ sc,currency_symbol
190
+ sd,soft_dotted
191
+ sgnw,signwriting
192
+ shaw,shavian
193
+ shrd,sharada
194
+ sidd,siddham
195
+ sind,khudawadi
196
+ sinh,sinhala
197
+ sk,modifier_symbol
198
+ sm,math_symbol
199
+ so,other_symbol
200
+ sogd,sogdian
201
+ sogo,old_sogdian
202
+ sora,sora_sompeng
203
+ soyo,soyombo
204
+ sterm,sentence_terminal
205
+ sund,sundanese
206
+ sylo,syloti_nagri
207
+ syrc,syriac
208
+ tagb,tagbanwa
209
+ takr,takri
210
+ tale,tai_le
211
+ talu,new_tai_lue
212
+ taml,tamil
213
+ tang,tangut
214
+ tavt,tai_viet
215
+ telu,telugu
216
+ term,terminal_punctuation
217
+ tfng,tifinagh
218
+ tglg,tagalog
219
+ thaa,thaana
220
+ tibt,tibetan
221
+ tirh,tirhuta
222
+ ugar,ugaritic
223
+ uideo,unified_ideograph
224
+ vaii,vai
225
+ vs,variation_selector
226
+ wara,warang_citi
227
+ wcho,wancho
228
+ wspace,white_space
229
+ xidc,xid_continue
230
+ xids,xid_start
231
+ xpeo,old_persian
232
+ xsux,cuneiform
233
+ yezi,yezidi
234
+ yiii,yi
235
+ z,separator
236
+ zanb,zanabazar_square
237
+ zinh,inherited
238
+ zl,line_separator
239
+ zp,paragraph_separator
240
+ zs,space_separator
241
+ zyyy,common
242
+ zzzz,unknown
@@ -14,7 +14,7 @@
14
14
  unicode_property := |*
15
15
 
16
16
  property_sequence < eof(premature_property_end) {
17
- text = text(data, ts, te, 1).first
17
+ text = copy(data, ts-1, te)
18
18
  type = (text[1] == 'P') ^ (text[3] == '^') ? :nonproperty : :property
19
19
 
20
20
  name = data[ts+2..te-2].pack('c*').gsub(/[\^\s_\-]/, '').downcase
@@ -22,7 +22,7 @@
22
22
  token = self.class.short_prop_map[name] || self.class.long_prop_map[name]
23
23
  raise UnknownUnicodePropertyError.new(name) unless token
24
24
 
25
- self.emit(type, token.to_sym, text, ts-1, te)
25
+ self.emit(type, token.to_sym, text)
26
26
 
27
27
  fret;
28
28
  };