regexp_parser 1.7.1 → 2.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +157 -1
- data/Gemfile +6 -1
- data/LICENSE +1 -1
- data/README.md +38 -32
- data/Rakefile +18 -27
- data/lib/regexp_parser/error.rb +4 -0
- data/lib/regexp_parser/expression/base.rb +123 -0
- data/lib/regexp_parser/expression/classes/anchor.rb +0 -2
- data/lib/regexp_parser/expression/classes/{backref.rb → backreference.rb} +5 -0
- data/lib/regexp_parser/expression/classes/{set → character_set}/intersection.rb +0 -0
- data/lib/regexp_parser/expression/classes/{set → character_set}/range.rb +2 -1
- data/lib/regexp_parser/expression/classes/{set.rb → character_set.rb} +0 -0
- data/lib/regexp_parser/expression/classes/conditional.rb +11 -1
- data/lib/regexp_parser/expression/classes/{escape.rb → escape_sequence.rb} +13 -7
- data/lib/regexp_parser/expression/classes/free_space.rb +2 -4
- data/lib/regexp_parser/expression/classes/group.rb +28 -3
- data/lib/regexp_parser/expression/classes/literal.rb +1 -5
- data/lib/regexp_parser/expression/classes/property.rb +1 -3
- data/lib/regexp_parser/expression/classes/root.rb +4 -17
- data/lib/regexp_parser/expression/classes/type.rb +0 -2
- data/lib/regexp_parser/expression/methods/match_length.rb +2 -2
- data/lib/regexp_parser/expression/methods/strfregexp.rb +1 -1
- data/lib/regexp_parser/expression/methods/traverse.rb +2 -2
- data/lib/regexp_parser/expression/quantifier.rb +11 -2
- data/lib/regexp_parser/expression/sequence.rb +3 -20
- data/lib/regexp_parser/expression/subexpression.rb +1 -2
- data/lib/regexp_parser/expression.rb +7 -139
- data/lib/regexp_parser/lexer.rb +13 -11
- data/lib/regexp_parser/parser.rb +325 -344
- data/lib/regexp_parser/scanner/char_type.rl +11 -11
- data/lib/regexp_parser/scanner/properties/long.csv +604 -0
- data/lib/regexp_parser/scanner/properties/short.csv +242 -0
- data/lib/regexp_parser/scanner/property.rl +2 -2
- data/lib/regexp_parser/scanner/scanner.rl +235 -255
- data/lib/regexp_parser/scanner.rb +1324 -1387
- data/lib/regexp_parser/syntax/any.rb +4 -6
- data/lib/regexp_parser/syntax/base.rb +13 -15
- data/lib/regexp_parser/syntax/token/anchor.rb +15 -0
- data/lib/regexp_parser/syntax/{tokens → token}/assertion.rb +2 -2
- data/lib/regexp_parser/syntax/token/backreference.rb +30 -0
- data/lib/regexp_parser/syntax/{tokens → token}/character_set.rb +2 -2
- data/lib/regexp_parser/syntax/{tokens → token}/character_type.rb +3 -3
- data/lib/regexp_parser/syntax/{tokens → token}/conditional.rb +3 -3
- data/lib/regexp_parser/syntax/token/escape.rb +31 -0
- data/lib/regexp_parser/syntax/{tokens → token}/group.rb +7 -7
- data/lib/regexp_parser/syntax/{tokens → token}/keep.rb +1 -1
- data/lib/regexp_parser/syntax/{tokens → token}/meta.rb +2 -2
- data/lib/regexp_parser/syntax/{tokens → token}/posix_class.rb +3 -3
- data/lib/regexp_parser/syntax/token/quantifier.rb +35 -0
- data/lib/regexp_parser/syntax/token/unicode_property.rb +696 -0
- data/lib/regexp_parser/syntax/token.rb +45 -0
- data/lib/regexp_parser/syntax/version_lookup.rb +4 -4
- data/lib/regexp_parser/syntax/versions/1.8.6.rb +2 -2
- data/lib/regexp_parser/syntax/versions/1.9.1.rb +1 -1
- data/lib/regexp_parser/syntax/versions/3.1.0.rb +10 -0
- data/lib/regexp_parser/syntax.rb +8 -6
- data/lib/regexp_parser/token.rb +9 -20
- data/lib/regexp_parser/version.rb +1 -1
- data/lib/regexp_parser.rb +0 -2
- data/regexp_parser.gemspec +20 -22
- metadata +34 -165
- data/lib/regexp_parser/scanner/properties/long.yml +0 -594
- data/lib/regexp_parser/scanner/properties/short.yml +0 -237
- data/lib/regexp_parser/syntax/tokens/anchor.rb +0 -15
- data/lib/regexp_parser/syntax/tokens/backref.rb +0 -24
- data/lib/regexp_parser/syntax/tokens/escape.rb +0 -30
- data/lib/regexp_parser/syntax/tokens/quantifier.rb +0 -35
- data/lib/regexp_parser/syntax/tokens/unicode_property.rb +0 -675
- data/lib/regexp_parser/syntax/tokens.rb +0 -45
- data/spec/expression/base_spec.rb +0 -94
- data/spec/expression/clone_spec.rb +0 -120
- data/spec/expression/conditional_spec.rb +0 -89
- data/spec/expression/free_space_spec.rb +0 -27
- data/spec/expression/methods/match_length_spec.rb +0 -161
- data/spec/expression/methods/match_spec.rb +0 -25
- data/spec/expression/methods/strfregexp_spec.rb +0 -224
- data/spec/expression/methods/tests_spec.rb +0 -99
- data/spec/expression/methods/traverse_spec.rb +0 -161
- data/spec/expression/options_spec.rb +0 -128
- data/spec/expression/root_spec.rb +0 -9
- data/spec/expression/sequence_spec.rb +0 -9
- data/spec/expression/subexpression_spec.rb +0 -50
- data/spec/expression/to_h_spec.rb +0 -26
- data/spec/expression/to_s_spec.rb +0 -100
- data/spec/lexer/all_spec.rb +0 -22
- data/spec/lexer/conditionals_spec.rb +0 -53
- data/spec/lexer/delimiters_spec.rb +0 -68
- data/spec/lexer/escapes_spec.rb +0 -14
- data/spec/lexer/keep_spec.rb +0 -10
- data/spec/lexer/literals_spec.rb +0 -89
- data/spec/lexer/nesting_spec.rb +0 -99
- data/spec/lexer/refcalls_spec.rb +0 -55
- data/spec/parser/all_spec.rb +0 -43
- data/spec/parser/alternation_spec.rb +0 -88
- data/spec/parser/anchors_spec.rb +0 -17
- data/spec/parser/conditionals_spec.rb +0 -179
- data/spec/parser/errors_spec.rb +0 -30
- data/spec/parser/escapes_spec.rb +0 -121
- data/spec/parser/free_space_spec.rb +0 -130
- data/spec/parser/groups_spec.rb +0 -108
- data/spec/parser/keep_spec.rb +0 -6
- data/spec/parser/posix_classes_spec.rb +0 -8
- data/spec/parser/properties_spec.rb +0 -115
- data/spec/parser/quantifiers_spec.rb +0 -52
- data/spec/parser/refcalls_spec.rb +0 -112
- data/spec/parser/set/intersections_spec.rb +0 -127
- data/spec/parser/set/ranges_spec.rb +0 -111
- data/spec/parser/sets_spec.rb +0 -178
- data/spec/parser/types_spec.rb +0 -18
- data/spec/scanner/all_spec.rb +0 -18
- data/spec/scanner/anchors_spec.rb +0 -21
- data/spec/scanner/conditionals_spec.rb +0 -128
- data/spec/scanner/delimiters_spec.rb +0 -52
- data/spec/scanner/errors_spec.rb +0 -67
- data/spec/scanner/escapes_spec.rb +0 -53
- data/spec/scanner/free_space_spec.rb +0 -133
- data/spec/scanner/groups_spec.rb +0 -52
- data/spec/scanner/keep_spec.rb +0 -10
- data/spec/scanner/literals_spec.rb +0 -49
- data/spec/scanner/meta_spec.rb +0 -18
- data/spec/scanner/properties_spec.rb +0 -64
- data/spec/scanner/quantifiers_spec.rb +0 -20
- data/spec/scanner/refcalls_spec.rb +0 -36
- data/spec/scanner/sets_spec.rb +0 -102
- data/spec/scanner/types_spec.rb +0 -14
- data/spec/spec_helper.rb +0 -15
- data/spec/support/runner.rb +0 -42
- data/spec/support/shared_examples.rb +0 -77
- data/spec/support/warning_extractor.rb +0 -60
- data/spec/syntax/syntax_spec.rb +0 -48
- data/spec/syntax/syntax_token_map_spec.rb +0 -23
- data/spec/syntax/versions/1.8.6_spec.rb +0 -17
- data/spec/syntax/versions/1.9.1_spec.rb +0 -10
- data/spec/syntax/versions/1.9.3_spec.rb +0 -9
- data/spec/syntax/versions/2.0.0_spec.rb +0 -13
- data/spec/syntax/versions/2.2.0_spec.rb +0 -9
- data/spec/syntax/versions/aliases_spec.rb +0 -37
- data/spec/token/token_spec.rb +0 -85
@@ -0,0 +1,242 @@
|
|
1
|
+
# THIS FILE IS AUTO-GENERATED BY `rake props:update` - DO NOT EDIT
|
2
|
+
adlm,adlam
|
3
|
+
aghb,caucasian_albanian
|
4
|
+
ahex,ascii_hex_digit
|
5
|
+
arab,arabic
|
6
|
+
armi,imperial_aramaic
|
7
|
+
armn,armenian
|
8
|
+
avst,avestan
|
9
|
+
bali,balinese
|
10
|
+
bamu,bamum
|
11
|
+
bass,bassa_vah
|
12
|
+
batk,batak
|
13
|
+
beng,bengali
|
14
|
+
bhks,bhaiksuki
|
15
|
+
bidic,bidi_control
|
16
|
+
bopo,bopomofo
|
17
|
+
brah,brahmi
|
18
|
+
brai,braille
|
19
|
+
bugi,buginese
|
20
|
+
buhd,buhid
|
21
|
+
c,other
|
22
|
+
cakm,chakma
|
23
|
+
cans,canadian_aboriginal
|
24
|
+
cari,carian
|
25
|
+
cc,control
|
26
|
+
cf,format
|
27
|
+
cher,cherokee
|
28
|
+
chrs,chorasmian
|
29
|
+
ci,case_ignorable
|
30
|
+
cn,unassigned
|
31
|
+
co,private_use
|
32
|
+
combiningmark,mark
|
33
|
+
copt,coptic
|
34
|
+
cprt,cypriot
|
35
|
+
cs,surrogate
|
36
|
+
cwcf,changes_when_casefolded
|
37
|
+
cwcm,changes_when_casemapped
|
38
|
+
cwl,changes_when_lowercased
|
39
|
+
cwt,changes_when_titlecased
|
40
|
+
cwu,changes_when_uppercased
|
41
|
+
cyrl,cyrillic
|
42
|
+
dep,deprecated
|
43
|
+
deva,devanagari
|
44
|
+
di,default_ignorable_code_point
|
45
|
+
dia,diacritic
|
46
|
+
diak,dives_akuru
|
47
|
+
dogr,dogra
|
48
|
+
dsrt,deseret
|
49
|
+
dupl,duployan
|
50
|
+
ebase,emoji_modifier_base
|
51
|
+
ecomp,emoji_component
|
52
|
+
egyp,egyptian_hieroglyphs
|
53
|
+
elba,elbasan
|
54
|
+
elym,elymaic
|
55
|
+
emod,emoji_modifier
|
56
|
+
epres,emoji_presentation
|
57
|
+
ethi,ethiopic
|
58
|
+
ext,extender
|
59
|
+
geor,georgian
|
60
|
+
glag,glagolitic
|
61
|
+
gong,gunjala_gondi
|
62
|
+
gonm,masaram_gondi
|
63
|
+
goth,gothic
|
64
|
+
gran,grantha
|
65
|
+
grbase,grapheme_base
|
66
|
+
grek,greek
|
67
|
+
grext,grapheme_extend
|
68
|
+
grlink,grapheme_link
|
69
|
+
gujr,gujarati
|
70
|
+
guru,gurmukhi
|
71
|
+
hang,hangul
|
72
|
+
hani,han
|
73
|
+
hano,hanunoo
|
74
|
+
hatr,hatran
|
75
|
+
hebr,hebrew
|
76
|
+
hex,hex_digit
|
77
|
+
hira,hiragana
|
78
|
+
hluw,anatolian_hieroglyphs
|
79
|
+
hmng,pahawh_hmong
|
80
|
+
hmnp,nyiakeng_puachue_hmong
|
81
|
+
hung,old_hungarian
|
82
|
+
idc,id_continue
|
83
|
+
ideo,ideographic
|
84
|
+
ids,id_start
|
85
|
+
idsb,ids_binary_operator
|
86
|
+
idst,ids_trinary_operator
|
87
|
+
ital,old_italic
|
88
|
+
java,javanese
|
89
|
+
joinc,join_control
|
90
|
+
kali,kayah_li
|
91
|
+
kana,katakana
|
92
|
+
khar,kharoshthi
|
93
|
+
khmr,khmer
|
94
|
+
khoj,khojki
|
95
|
+
kits,khitan_small_script
|
96
|
+
knda,kannada
|
97
|
+
kthi,kaithi
|
98
|
+
l,letter
|
99
|
+
lana,tai_tham
|
100
|
+
laoo,lao
|
101
|
+
latn,latin
|
102
|
+
lc,cased_letter
|
103
|
+
lepc,lepcha
|
104
|
+
limb,limbu
|
105
|
+
lina,linear_a
|
106
|
+
linb,linear_b
|
107
|
+
ll,lowercase_letter
|
108
|
+
lm,modifier_letter
|
109
|
+
lo,other_letter
|
110
|
+
loe,logical_order_exception
|
111
|
+
lt,titlecase_letter
|
112
|
+
lu,uppercase_letter
|
113
|
+
lyci,lycian
|
114
|
+
lydi,lydian
|
115
|
+
m,mark
|
116
|
+
mahj,mahajani
|
117
|
+
maka,makasar
|
118
|
+
mand,mandaic
|
119
|
+
mani,manichaean
|
120
|
+
marc,marchen
|
121
|
+
mc,spacing_mark
|
122
|
+
me,enclosing_mark
|
123
|
+
medf,medefaidrin
|
124
|
+
mend,mende_kikakui
|
125
|
+
merc,meroitic_cursive
|
126
|
+
mero,meroitic_hieroglyphs
|
127
|
+
mlym,malayalam
|
128
|
+
mn,nonspacing_mark
|
129
|
+
mong,mongolian
|
130
|
+
mroo,mro
|
131
|
+
mtei,meetei_mayek
|
132
|
+
mult,multani
|
133
|
+
mymr,myanmar
|
134
|
+
n,number
|
135
|
+
nand,nandinagari
|
136
|
+
narb,old_north_arabian
|
137
|
+
nbat,nabataean
|
138
|
+
nchar,noncharacter_code_point
|
139
|
+
nd,decimal_number
|
140
|
+
nkoo,nko
|
141
|
+
nl,letter_number
|
142
|
+
no,other_number
|
143
|
+
nshu,nushu
|
144
|
+
oalpha,other_alphabetic
|
145
|
+
odi,other_default_ignorable_code_point
|
146
|
+
ogam,ogham
|
147
|
+
ogrext,other_grapheme_extend
|
148
|
+
oidc,other_id_continue
|
149
|
+
oids,other_id_start
|
150
|
+
olck,ol_chiki
|
151
|
+
olower,other_lowercase
|
152
|
+
omath,other_math
|
153
|
+
orkh,old_turkic
|
154
|
+
orya,oriya
|
155
|
+
osge,osage
|
156
|
+
osma,osmanya
|
157
|
+
oupper,other_uppercase
|
158
|
+
p,punctuation
|
159
|
+
palm,palmyrene
|
160
|
+
patsyn,pattern_syntax
|
161
|
+
patws,pattern_white_space
|
162
|
+
pauc,pau_cin_hau
|
163
|
+
pc,connector_punctuation
|
164
|
+
pcm,prepended_concatenation_mark
|
165
|
+
pd,dash_punctuation
|
166
|
+
pe,close_punctuation
|
167
|
+
perm,old_permic
|
168
|
+
pf,final_punctuation
|
169
|
+
phag,phags_pa
|
170
|
+
phli,inscriptional_pahlavi
|
171
|
+
phlp,psalter_pahlavi
|
172
|
+
phnx,phoenician
|
173
|
+
pi,initial_punctuation
|
174
|
+
plrd,miao
|
175
|
+
po,other_punctuation
|
176
|
+
prti,inscriptional_parthian
|
177
|
+
ps,open_punctuation
|
178
|
+
qaac,coptic
|
179
|
+
qaai,inherited
|
180
|
+
qmark,quotation_mark
|
181
|
+
ri,regional_indicator
|
182
|
+
rjng,rejang
|
183
|
+
rohg,hanifi_rohingya
|
184
|
+
runr,runic
|
185
|
+
s,symbol
|
186
|
+
samr,samaritan
|
187
|
+
sarb,old_south_arabian
|
188
|
+
saur,saurashtra
|
189
|
+
sc,currency_symbol
|
190
|
+
sd,soft_dotted
|
191
|
+
sgnw,signwriting
|
192
|
+
shaw,shavian
|
193
|
+
shrd,sharada
|
194
|
+
sidd,siddham
|
195
|
+
sind,khudawadi
|
196
|
+
sinh,sinhala
|
197
|
+
sk,modifier_symbol
|
198
|
+
sm,math_symbol
|
199
|
+
so,other_symbol
|
200
|
+
sogd,sogdian
|
201
|
+
sogo,old_sogdian
|
202
|
+
sora,sora_sompeng
|
203
|
+
soyo,soyombo
|
204
|
+
sterm,sentence_terminal
|
205
|
+
sund,sundanese
|
206
|
+
sylo,syloti_nagri
|
207
|
+
syrc,syriac
|
208
|
+
tagb,tagbanwa
|
209
|
+
takr,takri
|
210
|
+
tale,tai_le
|
211
|
+
talu,new_tai_lue
|
212
|
+
taml,tamil
|
213
|
+
tang,tangut
|
214
|
+
tavt,tai_viet
|
215
|
+
telu,telugu
|
216
|
+
term,terminal_punctuation
|
217
|
+
tfng,tifinagh
|
218
|
+
tglg,tagalog
|
219
|
+
thaa,thaana
|
220
|
+
tibt,tibetan
|
221
|
+
tirh,tirhuta
|
222
|
+
ugar,ugaritic
|
223
|
+
uideo,unified_ideograph
|
224
|
+
vaii,vai
|
225
|
+
vs,variation_selector
|
226
|
+
wara,warang_citi
|
227
|
+
wcho,wancho
|
228
|
+
wspace,white_space
|
229
|
+
xidc,xid_continue
|
230
|
+
xids,xid_start
|
231
|
+
xpeo,old_persian
|
232
|
+
xsux,cuneiform
|
233
|
+
yezi,yezidi
|
234
|
+
yiii,yi
|
235
|
+
z,separator
|
236
|
+
zanb,zanabazar_square
|
237
|
+
zinh,inherited
|
238
|
+
zl,line_separator
|
239
|
+
zp,paragraph_separator
|
240
|
+
zs,space_separator
|
241
|
+
zyyy,common
|
242
|
+
zzzz,unknown
|
@@ -14,7 +14,7 @@
|
|
14
14
|
unicode_property := |*
|
15
15
|
|
16
16
|
property_sequence < eof(premature_property_end) {
|
17
|
-
text =
|
17
|
+
text = copy(data, ts-1, te)
|
18
18
|
type = (text[1] == 'P') ^ (text[3] == '^') ? :nonproperty : :property
|
19
19
|
|
20
20
|
name = data[ts+2..te-2].pack('c*').gsub(/[\^\s_\-]/, '').downcase
|
@@ -22,7 +22,7 @@
|
|
22
22
|
token = self.class.short_prop_map[name] || self.class.long_prop_map[name]
|
23
23
|
raise UnknownUnicodePropertyError.new(name) unless token
|
24
24
|
|
25
|
-
self.emit(type, token.to_sym, text
|
25
|
+
self.emit(type, token.to_sym, text)
|
26
26
|
|
27
27
|
fret;
|
28
28
|
};
|