regexp_parser 0.4.9 → 0.4.10
Sign up to get free protection for your applications and to get access to all the features.
- data/ChangeLog +11 -1
- data/lib/regexp_parser/expression/classes/property.rb +7 -2
- data/lib/regexp_parser/parser.rb +11 -3
- data/lib/regexp_parser/scanner/property.rl +37 -12
- data/lib/regexp_parser/scanner.rb +598 -574
- data/lib/regexp_parser/syntax/ruby/2.3.0.rb +3 -0
- data/lib/regexp_parser/syntax/ruby/2.4.0.rb +3 -0
- data/lib/regexp_parser/syntax/ruby/2.5.0.rb +3 -0
- data/lib/regexp_parser/syntax/ruby/2.6.0.rb +13 -0
- data/lib/regexp_parser/syntax/ruby/2.6.rb +8 -0
- data/lib/regexp_parser/syntax/tokens/unicode_property.rb +42 -12
- data/lib/regexp_parser/syntax/versions.rb +8 -86
- data/lib/regexp_parser/syntax.rb +1 -1
- data/lib/regexp_parser/version.rb +1 -1
- data/test/parser/test_properties.rb +34 -3
- data/test/scanner/test_all.rb +6 -0
- data/test/scanner/test_emojis.rb +31 -0
- data/test/scanner/test_properties.rb +12 -2
- data/test/scanner/test_unicode_blocks.rb +1 -1
- data/test/syntax/ruby/test_1.9.3.rb +2 -2
- data/test/syntax/ruby/test_2.2.0.rb +2 -2
- data/test/syntax/ruby/test_files.rb +14 -0
- data/test/syntax/test_syntax.rb +2 -0
- metadata +195 -187
- checksums.yaml +0 -7
@@ -2,8 +2,10 @@ module Regexp::Syntax
|
|
2
2
|
module Token
|
3
3
|
|
4
4
|
module UnicodeProperty
|
5
|
-
|
6
|
-
|
5
|
+
CharType_V190 = [:alnum, :alpha, :ascii, :blank, :cntrl, :digit, :graph,
|
6
|
+
:lower, :print, :punct, :space, :upper, :word, :xdigit]
|
7
|
+
|
8
|
+
CharType_V250 = [:xposixpunct]
|
7
9
|
|
8
10
|
POSIX = [:any, :assigned, :newline]
|
9
11
|
|
@@ -44,9 +46,15 @@ module Regexp::Syntax
|
|
44
46
|
# These were merged (from Onigmo) in the branch for 2.2.0
|
45
47
|
Age_V220 = [:age_6_2, :age_6_3, :age_7_0]
|
46
48
|
|
47
|
-
|
49
|
+
Age_V230 = [:age_8_0]
|
50
|
+
|
51
|
+
Age_V240 = [:age_9_0]
|
52
|
+
|
53
|
+
Age_V250 = [:age_10_0]
|
54
|
+
|
55
|
+
Age = Age_V193 + Age_V200 + Age_V220 + Age_V230 + Age_V240 + Age_V250
|
48
56
|
|
49
|
-
|
57
|
+
Derived_V190 = [
|
50
58
|
:ascii_hex,
|
51
59
|
:alphabetic,
|
52
60
|
:cased,
|
@@ -100,7 +108,13 @@ module Regexp::Syntax
|
|
100
108
|
:xid_continue,
|
101
109
|
]
|
102
110
|
|
103
|
-
|
111
|
+
Derived_V250 = [
|
112
|
+
:regional_indicator
|
113
|
+
]
|
114
|
+
|
115
|
+
Derived = Derived_V190 + Derived_V250
|
116
|
+
|
117
|
+
Script_V190 = [
|
104
118
|
:script_arabic,
|
105
119
|
:script_imperial_aramaic,
|
106
120
|
:script_armenian,
|
@@ -197,9 +211,9 @@ module Regexp::Syntax
|
|
197
211
|
:script_unknown
|
198
212
|
]
|
199
213
|
|
200
|
-
|
214
|
+
Script_V193 = [:script_brahmi, :script_batak, :script_mandaic]
|
201
215
|
|
202
|
-
|
216
|
+
Script_V220 = [
|
203
217
|
:script_caucasian_albanian,
|
204
218
|
:script_bassa_vah,
|
205
219
|
:script_duployan,
|
@@ -225,6 +239,8 @@ module Regexp::Syntax
|
|
225
239
|
:script_warang_citi
|
226
240
|
]
|
227
241
|
|
242
|
+
Script = Script_V190 + Script_V193 + Script_V220
|
243
|
+
|
228
244
|
UnicodeBlock = [
|
229
245
|
:block_inalphabetic_presentation_forms,
|
230
246
|
:block_inarabic_presentation_forms_a,
|
@@ -253,7 +269,7 @@ module Regexp::Syntax
|
|
253
269
|
:block_incombining_half_marks,
|
254
270
|
:block_incontrol_pictures,
|
255
271
|
:block_incurrency_symbols,
|
256
|
-
:
|
272
|
+
:block_incyrillic_supplement,
|
257
273
|
:block_incyrillic,
|
258
274
|
:block_indevanagari,
|
259
275
|
:block_indingbats,
|
@@ -333,14 +349,28 @@ module Regexp::Syntax
|
|
333
349
|
:block_inyijing_hexagram_symbols,
|
334
350
|
]
|
335
351
|
|
336
|
-
|
337
|
-
|
352
|
+
Emoji = [
|
353
|
+
:emoji_any,
|
354
|
+
:emoji_component,
|
355
|
+
:emoji_modifier,
|
356
|
+
:emoji_modifier_base,
|
357
|
+
:emoji_presentation,
|
358
|
+
]
|
359
|
+
|
360
|
+
V190 = CharType_V190 + POSIX + Category::All + Derived_V190 + Script_V190 + UnicodeBlock
|
361
|
+
V193 = Age_V193 + Script_V193
|
338
362
|
|
339
363
|
V200 = Age_V200
|
340
364
|
|
341
|
-
V220 = Age_V220 +
|
365
|
+
V220 = Age_V220 + Script_V220
|
366
|
+
|
367
|
+
V230 = Age_V230
|
368
|
+
|
369
|
+
V240 = Age_V240
|
370
|
+
|
371
|
+
V250 = Age_V250 + CharType_V250 + Derived_V250 + Emoji
|
342
372
|
|
343
|
-
All = V190 + V193 + V200 + V220
|
373
|
+
All = V190 + V193 + V200 + V220 + V230 + V240 + V250
|
344
374
|
|
345
375
|
Type = :property
|
346
376
|
NonType = :nonproperty
|
@@ -1,88 +1,10 @@
|
|
1
|
+
# Ruby 1.8.x is no longer a supported runtime,
|
2
|
+
# but its regex features are still recognized.
|
3
|
+
#
|
4
|
+
# Aliases for the latest patch version are provided as 'ruby/n.n',
|
5
|
+
# e.g. 'ruby/1.9' refers to Ruby v1.9.3.
|
1
6
|
module Regexp::Syntax
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
# but its regex features are still recognized.)
|
6
|
-
'ruby/1.8.6',
|
7
|
-
'ruby/1.8.7',
|
8
|
-
|
9
|
-
# alias for the latest 1.8 implementation
|
10
|
-
'ruby/1.8',
|
11
|
-
|
12
|
-
# Ruby 1.9.x
|
13
|
-
'ruby/1.9.1',
|
14
|
-
'ruby/1.9.2',
|
15
|
-
'ruby/1.9.3',
|
16
|
-
|
17
|
-
# alias for the latest 1.9 implementation
|
18
|
-
'ruby/1.9',
|
19
|
-
|
20
|
-
# Ruby 2.0.x
|
21
|
-
'ruby/2.0.0',
|
22
|
-
|
23
|
-
# alias for the latest 2.0 implementations
|
24
|
-
'ruby/2.0',
|
25
|
-
|
26
|
-
# Ruby 2.1.x
|
27
|
-
'ruby/2.1.0',
|
28
|
-
'ruby/2.1.2',
|
29
|
-
'ruby/2.1.3',
|
30
|
-
'ruby/2.1.4',
|
31
|
-
'ruby/2.1.5',
|
32
|
-
'ruby/2.1.6',
|
33
|
-
'ruby/2.1.7',
|
34
|
-
'ruby/2.1.8',
|
35
|
-
'ruby/2.1.9',
|
36
|
-
'ruby/2.1.10',
|
37
|
-
|
38
|
-
# alias for the latest 2.1 implementations
|
39
|
-
'ruby/2.1',
|
40
|
-
|
41
|
-
# Ruby 2.2.x
|
42
|
-
'ruby/2.2.0',
|
43
|
-
'ruby/2.2.1',
|
44
|
-
'ruby/2.2.2',
|
45
|
-
'ruby/2.2.3',
|
46
|
-
'ruby/2.2.4',
|
47
|
-
'ruby/2.2.5',
|
48
|
-
'ruby/2.2.6',
|
49
|
-
'ruby/2.2.7',
|
50
|
-
'ruby/2.2.8',
|
51
|
-
'ruby/2.2.9',
|
52
|
-
|
53
|
-
# alias for the latest 2.2 implementations
|
54
|
-
'ruby/2.2',
|
55
|
-
|
56
|
-
# Ruby 2.3.x
|
57
|
-
'ruby/2.3.0',
|
58
|
-
'ruby/2.3.1',
|
59
|
-
'ruby/2.3.2',
|
60
|
-
'ruby/2.3.3',
|
61
|
-
'ruby/2.3.4',
|
62
|
-
'ruby/2.3.5',
|
63
|
-
'ruby/2.3.6',
|
64
|
-
|
65
|
-
# alias for the latest 2.3 implementation
|
66
|
-
'ruby/2.3',
|
67
|
-
|
68
|
-
# Ruby 2.4.x
|
69
|
-
'ruby/2.4.0',
|
70
|
-
'ruby/2.4.1',
|
71
|
-
'ruby/2.4.2',
|
72
|
-
'ruby/2.4.3',
|
73
|
-
|
74
|
-
# alias for the latest 2.4 implementation
|
75
|
-
'ruby/2.4',
|
76
|
-
|
77
|
-
# Ruby 2.5.x
|
78
|
-
'ruby/2.5.0',
|
79
|
-
|
80
|
-
# alias for the latest 2.5 implementation
|
81
|
-
'ruby/2.5',
|
82
|
-
]
|
83
|
-
|
84
|
-
end
|
85
|
-
|
86
|
-
Regexp::Syntax::VERSIONS.each do |version|
|
87
|
-
require File.expand_path("../#{version}", __FILE__)
|
7
|
+
version_file_paths = Dir[File.expand_path('../ruby/*.rb', __FILE__)]
|
8
|
+
version_file_paths.each { |path| require path }
|
9
|
+
VERSIONS = version_file_paths.map { |path| path[%r{(ruby/.*)\.rb}, 1] }
|
88
10
|
end
|
data/lib/regexp_parser/syntax.rb
CHANGED
@@ -6,20 +6,24 @@ class ParserProperties < Test::Unit::TestCase
|
|
6
6
|
props = [
|
7
7
|
'Alnum',
|
8
8
|
'Alpha',
|
9
|
-
'Any',
|
10
9
|
'Ascii',
|
11
10
|
'Blank',
|
12
11
|
'Cntrl',
|
13
12
|
'Digit',
|
14
13
|
'Graph',
|
15
14
|
'Lower',
|
16
|
-
'Newline',
|
17
15
|
'Print',
|
18
16
|
'Punct',
|
19
17
|
'Space',
|
20
18
|
'Upper',
|
21
19
|
'Word',
|
22
20
|
'Xdigit',
|
21
|
+
'XPosixPunct',
|
22
|
+
|
23
|
+
'Newline',
|
24
|
+
|
25
|
+
'Any',
|
26
|
+
'Assigned',
|
23
27
|
|
24
28
|
'L',
|
25
29
|
'Letter',
|
@@ -144,6 +148,10 @@ class ParserProperties < Test::Unit::TestCase
|
|
144
148
|
'Age=5.1',
|
145
149
|
'Age=5.2',
|
146
150
|
'Age=6.0',
|
151
|
+
'Age=7.0',
|
152
|
+
'Age=8.0',
|
153
|
+
'Age=9.0',
|
154
|
+
'Age=10.0',
|
147
155
|
|
148
156
|
'ahex',
|
149
157
|
'ASCII_Hex_Digit',
|
@@ -264,6 +272,9 @@ class ParserProperties < Test::Unit::TestCase
|
|
264
272
|
|
265
273
|
'radical',
|
266
274
|
|
275
|
+
'ri',
|
276
|
+
'Regional_Indicator',
|
277
|
+
|
267
278
|
'sd',
|
268
279
|
'Soft_Dotted',
|
269
280
|
|
@@ -288,6 +299,12 @@ class ParserProperties < Test::Unit::TestCase
|
|
288
299
|
|
289
300
|
'xidc',
|
290
301
|
'XID_Continue',
|
302
|
+
|
303
|
+
'Emoji',
|
304
|
+
'Emoji_Component',
|
305
|
+
'Emoji_Modifier',
|
306
|
+
'Emoji_Modifier_Base',
|
307
|
+
'Emoji_Presentation',
|
291
308
|
]
|
292
309
|
|
293
310
|
modes.each do |mode|
|
@@ -295,7 +312,7 @@ class ParserProperties < Test::Unit::TestCase
|
|
295
312
|
|
296
313
|
props.each do |property|
|
297
314
|
define_method "test_parse_#{token_type}_#{property}" do
|
298
|
-
t = RP.parse "ab\\#{mode}{#{property}}", 'ruby/
|
315
|
+
t = RP.parse "ab\\#{mode}{#{property}}", 'ruby/2.5'
|
299
316
|
|
300
317
|
assert t.expressions.last.is_a?(UnicodeProperty::Base),
|
301
318
|
"Expected property, but got #{t.expressions.last.class.name}"
|
@@ -351,6 +368,20 @@ class ParserProperties < Test::Unit::TestCase
|
|
351
368
|
"Expected Script property, but got #{t.expressions[1].class.name}"
|
352
369
|
end
|
353
370
|
|
371
|
+
def test_parse_property_script_v193
|
372
|
+
t = RP.parse 'ab\p{Brahmi}cd', 'ruby/1.9.3'
|
373
|
+
|
374
|
+
assert t.expressions[1].is_a?(UnicodeProperty::Script),
|
375
|
+
"Expected Script property, but got #{t.expressions[1].class.name}"
|
376
|
+
end
|
377
|
+
|
378
|
+
def test_parse_property_script_v220
|
379
|
+
t = RP.parse 'ab\p{Caucasian_Albanian}cd', 'ruby/2.2'
|
380
|
+
|
381
|
+
assert t.expressions[1].is_a?(UnicodeProperty::Script),
|
382
|
+
"Expected Script property, but got #{t.expressions[1].class.name}"
|
383
|
+
end
|
384
|
+
|
354
385
|
def test_parse_property_block
|
355
386
|
t = RP.parse 'ab\p{InArmenian}cd', 'ruby/1.9'
|
356
387
|
|
data/test/scanner/test_all.rb
CHANGED
@@ -13,6 +13,12 @@ if RUBY_VERSION >= '2.0.0'
|
|
13
13
|
end
|
14
14
|
end
|
15
15
|
|
16
|
+
if RUBY_VERSION >= '2.5.0'
|
17
|
+
%w{emojis}.each do|tc|
|
18
|
+
require File.expand_path("../test_#{tc}", __FILE__)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
16
22
|
class TestRegexpScanner < Test::Unit::TestCase
|
17
23
|
|
18
24
|
def test_scanner_returns_an_array
|
@@ -0,0 +1,31 @@
|
|
1
|
+
require File.expand_path("../../helpers", __FILE__)
|
2
|
+
|
3
|
+
class ScannerUnicodeEmojis < Test::Unit::TestCase
|
4
|
+
|
5
|
+
tests = {
|
6
|
+
'Emoji' => :emoji_any,
|
7
|
+
'Emoji_Component' => :emoji_component,
|
8
|
+
'Emoji_Modifier' => :emoji_modifier,
|
9
|
+
'Emoji_Modifier_Base' => :emoji_modifier_base,
|
10
|
+
'Emoji_Presentation' => :emoji_presentation,
|
11
|
+
}
|
12
|
+
|
13
|
+
tests.each_with_index do |(property, token), count|
|
14
|
+
define_method "test_scanner_property_#{token}_#{count}" do
|
15
|
+
tokens = RS.scan("a\\p{#{property}}c")
|
16
|
+
result = tokens.at(1)
|
17
|
+
|
18
|
+
assert_equal :property, result[0]
|
19
|
+
assert_equal token, result[1]
|
20
|
+
end
|
21
|
+
|
22
|
+
define_method "test_scanner_nonproperty_#{token}_#{count}" do
|
23
|
+
tokens = RS.scan("a\\P{#{property}}c")
|
24
|
+
result = tokens.at(1)
|
25
|
+
|
26
|
+
assert_equal :nonproperty, result[0]
|
27
|
+
assert_equal token, result[1]
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
end
|
@@ -5,20 +5,24 @@ class ScannerProperties < Test::Unit::TestCase
|
|
5
5
|
tests = {
|
6
6
|
'Alnum' => :alnum,
|
7
7
|
'Alpha' => :alpha,
|
8
|
-
'Any' => :any,
|
9
8
|
'Ascii' => :ascii,
|
10
9
|
'Blank' => :blank,
|
11
10
|
'Cntrl' => :cntrl,
|
12
11
|
'Digit' => :digit,
|
13
12
|
'Graph' => :graph,
|
14
13
|
'Lower' => :lower,
|
15
|
-
'Newline' => :newline,
|
16
14
|
'Print' => :print,
|
17
15
|
'Punct' => :punct,
|
18
16
|
'Space' => :space,
|
19
17
|
'Upper' => :upper,
|
20
18
|
'Word' => :word,
|
21
19
|
'Xdigit' => :xdigit,
|
20
|
+
'XPosixPunct' => :xposixpunct,
|
21
|
+
|
22
|
+
'Newline' => :newline,
|
23
|
+
|
24
|
+
'Any' => :any,
|
25
|
+
'Assigned' => :assigned,
|
22
26
|
|
23
27
|
'L' => :letter_any,
|
24
28
|
'Letter' => :letter_any,
|
@@ -147,6 +151,9 @@ class ScannerProperties < Test::Unit::TestCase
|
|
147
151
|
'Age=6.2' => :age_6_2,
|
148
152
|
'Age=6.3' => :age_6_3,
|
149
153
|
'Age=7.0' => :age_7_0,
|
154
|
+
'Age=8.0' => :age_8_0,
|
155
|
+
'Age=9.0' => :age_9_0,
|
156
|
+
'Age=10.0' => :age_10_0,
|
150
157
|
|
151
158
|
'ahex' => :ascii_hex,
|
152
159
|
'ASCII_Hex_Digit' => :ascii_hex,
|
@@ -267,6 +274,9 @@ class ScannerProperties < Test::Unit::TestCase
|
|
267
274
|
|
268
275
|
'radical' => :radical,
|
269
276
|
|
277
|
+
'ri' => :regional_indicator,
|
278
|
+
'Regional_Indicator' => :regional_indicator,
|
279
|
+
|
270
280
|
'sd' => :soft_dotted,
|
271
281
|
'Soft_Dotted' => :soft_dotted,
|
272
282
|
|