regexp_parser 0.4.9 → 0.4.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ChangeLog +11 -1
- data/lib/regexp_parser/expression/classes/property.rb +7 -2
- data/lib/regexp_parser/parser.rb +11 -3
- data/lib/regexp_parser/scanner/property.rl +37 -12
- data/lib/regexp_parser/scanner.rb +598 -574
- data/lib/regexp_parser/syntax/ruby/2.3.0.rb +3 -0
- data/lib/regexp_parser/syntax/ruby/2.4.0.rb +3 -0
- data/lib/regexp_parser/syntax/ruby/2.5.0.rb +3 -0
- data/lib/regexp_parser/syntax/ruby/2.6.0.rb +13 -0
- data/lib/regexp_parser/syntax/ruby/2.6.rb +8 -0
- data/lib/regexp_parser/syntax/tokens/unicode_property.rb +42 -12
- data/lib/regexp_parser/syntax/versions.rb +8 -86
- data/lib/regexp_parser/syntax.rb +1 -1
- data/lib/regexp_parser/version.rb +1 -1
- data/test/parser/test_properties.rb +34 -3
- data/test/scanner/test_all.rb +6 -0
- data/test/scanner/test_emojis.rb +31 -0
- data/test/scanner/test_properties.rb +12 -2
- data/test/scanner/test_unicode_blocks.rb +1 -1
- data/test/syntax/ruby/test_1.9.3.rb +2 -2
- data/test/syntax/ruby/test_2.2.0.rb +2 -2
- data/test/syntax/ruby/test_files.rb +14 -0
- data/test/syntax/test_syntax.rb +2 -0
- metadata +195 -187
- checksums.yaml +0 -7
@@ -2,8 +2,10 @@ module Regexp::Syntax
|
|
2
2
|
module Token
|
3
3
|
|
4
4
|
module UnicodeProperty
|
5
|
-
|
6
|
-
|
5
|
+
CharType_V190 = [:alnum, :alpha, :ascii, :blank, :cntrl, :digit, :graph,
|
6
|
+
:lower, :print, :punct, :space, :upper, :word, :xdigit]
|
7
|
+
|
8
|
+
CharType_V250 = [:xposixpunct]
|
7
9
|
|
8
10
|
POSIX = [:any, :assigned, :newline]
|
9
11
|
|
@@ -44,9 +46,15 @@ module Regexp::Syntax
|
|
44
46
|
# These were merged (from Onigmo) in the branch for 2.2.0
|
45
47
|
Age_V220 = [:age_6_2, :age_6_3, :age_7_0]
|
46
48
|
|
47
|
-
|
49
|
+
Age_V230 = [:age_8_0]
|
50
|
+
|
51
|
+
Age_V240 = [:age_9_0]
|
52
|
+
|
53
|
+
Age_V250 = [:age_10_0]
|
54
|
+
|
55
|
+
Age = Age_V193 + Age_V200 + Age_V220 + Age_V230 + Age_V240 + Age_V250
|
48
56
|
|
49
|
-
|
57
|
+
Derived_V190 = [
|
50
58
|
:ascii_hex,
|
51
59
|
:alphabetic,
|
52
60
|
:cased,
|
@@ -100,7 +108,13 @@ module Regexp::Syntax
|
|
100
108
|
:xid_continue,
|
101
109
|
]
|
102
110
|
|
103
|
-
|
111
|
+
Derived_V250 = [
|
112
|
+
:regional_indicator
|
113
|
+
]
|
114
|
+
|
115
|
+
Derived = Derived_V190 + Derived_V250
|
116
|
+
|
117
|
+
Script_V190 = [
|
104
118
|
:script_arabic,
|
105
119
|
:script_imperial_aramaic,
|
106
120
|
:script_armenian,
|
@@ -197,9 +211,9 @@ module Regexp::Syntax
|
|
197
211
|
:script_unknown
|
198
212
|
]
|
199
213
|
|
200
|
-
|
214
|
+
Script_V193 = [:script_brahmi, :script_batak, :script_mandaic]
|
201
215
|
|
202
|
-
|
216
|
+
Script_V220 = [
|
203
217
|
:script_caucasian_albanian,
|
204
218
|
:script_bassa_vah,
|
205
219
|
:script_duployan,
|
@@ -225,6 +239,8 @@ module Regexp::Syntax
|
|
225
239
|
:script_warang_citi
|
226
240
|
]
|
227
241
|
|
242
|
+
Script = Script_V190 + Script_V193 + Script_V220
|
243
|
+
|
228
244
|
UnicodeBlock = [
|
229
245
|
:block_inalphabetic_presentation_forms,
|
230
246
|
:block_inarabic_presentation_forms_a,
|
@@ -253,7 +269,7 @@ module Regexp::Syntax
|
|
253
269
|
:block_incombining_half_marks,
|
254
270
|
:block_incontrol_pictures,
|
255
271
|
:block_incurrency_symbols,
|
256
|
-
:
|
272
|
+
:block_incyrillic_supplement,
|
257
273
|
:block_incyrillic,
|
258
274
|
:block_indevanagari,
|
259
275
|
:block_indingbats,
|
@@ -333,14 +349,28 @@ module Regexp::Syntax
|
|
333
349
|
:block_inyijing_hexagram_symbols,
|
334
350
|
]
|
335
351
|
|
336
|
-
|
337
|
-
|
352
|
+
Emoji = [
|
353
|
+
:emoji_any,
|
354
|
+
:emoji_component,
|
355
|
+
:emoji_modifier,
|
356
|
+
:emoji_modifier_base,
|
357
|
+
:emoji_presentation,
|
358
|
+
]
|
359
|
+
|
360
|
+
V190 = CharType_V190 + POSIX + Category::All + Derived_V190 + Script_V190 + UnicodeBlock
|
361
|
+
V193 = Age_V193 + Script_V193
|
338
362
|
|
339
363
|
V200 = Age_V200
|
340
364
|
|
341
|
-
V220 = Age_V220 +
|
365
|
+
V220 = Age_V220 + Script_V220
|
366
|
+
|
367
|
+
V230 = Age_V230
|
368
|
+
|
369
|
+
V240 = Age_V240
|
370
|
+
|
371
|
+
V250 = Age_V250 + CharType_V250 + Derived_V250 + Emoji
|
342
372
|
|
343
|
-
All = V190 + V193 + V200 + V220
|
373
|
+
All = V190 + V193 + V200 + V220 + V230 + V240 + V250
|
344
374
|
|
345
375
|
Type = :property
|
346
376
|
NonType = :nonproperty
|
@@ -1,88 +1,10 @@
|
|
1
|
+
# Ruby 1.8.x is no longer a supported runtime,
|
2
|
+
# but its regex features are still recognized.
|
3
|
+
#
|
4
|
+
# Aliases for the latest patch version are provided as 'ruby/n.n',
|
5
|
+
# e.g. 'ruby/1.9' refers to Ruby v1.9.3.
|
1
6
|
module Regexp::Syntax
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
# but its regex features are still recognized.)
|
6
|
-
'ruby/1.8.6',
|
7
|
-
'ruby/1.8.7',
|
8
|
-
|
9
|
-
# alias for the latest 1.8 implementation
|
10
|
-
'ruby/1.8',
|
11
|
-
|
12
|
-
# Ruby 1.9.x
|
13
|
-
'ruby/1.9.1',
|
14
|
-
'ruby/1.9.2',
|
15
|
-
'ruby/1.9.3',
|
16
|
-
|
17
|
-
# alias for the latest 1.9 implementation
|
18
|
-
'ruby/1.9',
|
19
|
-
|
20
|
-
# Ruby 2.0.x
|
21
|
-
'ruby/2.0.0',
|
22
|
-
|
23
|
-
# alias for the latest 2.0 implementations
|
24
|
-
'ruby/2.0',
|
25
|
-
|
26
|
-
# Ruby 2.1.x
|
27
|
-
'ruby/2.1.0',
|
28
|
-
'ruby/2.1.2',
|
29
|
-
'ruby/2.1.3',
|
30
|
-
'ruby/2.1.4',
|
31
|
-
'ruby/2.1.5',
|
32
|
-
'ruby/2.1.6',
|
33
|
-
'ruby/2.1.7',
|
34
|
-
'ruby/2.1.8',
|
35
|
-
'ruby/2.1.9',
|
36
|
-
'ruby/2.1.10',
|
37
|
-
|
38
|
-
# alias for the latest 2.1 implementations
|
39
|
-
'ruby/2.1',
|
40
|
-
|
41
|
-
# Ruby 2.2.x
|
42
|
-
'ruby/2.2.0',
|
43
|
-
'ruby/2.2.1',
|
44
|
-
'ruby/2.2.2',
|
45
|
-
'ruby/2.2.3',
|
46
|
-
'ruby/2.2.4',
|
47
|
-
'ruby/2.2.5',
|
48
|
-
'ruby/2.2.6',
|
49
|
-
'ruby/2.2.7',
|
50
|
-
'ruby/2.2.8',
|
51
|
-
'ruby/2.2.9',
|
52
|
-
|
53
|
-
# alias for the latest 2.2 implementations
|
54
|
-
'ruby/2.2',
|
55
|
-
|
56
|
-
# Ruby 2.3.x
|
57
|
-
'ruby/2.3.0',
|
58
|
-
'ruby/2.3.1',
|
59
|
-
'ruby/2.3.2',
|
60
|
-
'ruby/2.3.3',
|
61
|
-
'ruby/2.3.4',
|
62
|
-
'ruby/2.3.5',
|
63
|
-
'ruby/2.3.6',
|
64
|
-
|
65
|
-
# alias for the latest 2.3 implementation
|
66
|
-
'ruby/2.3',
|
67
|
-
|
68
|
-
# Ruby 2.4.x
|
69
|
-
'ruby/2.4.0',
|
70
|
-
'ruby/2.4.1',
|
71
|
-
'ruby/2.4.2',
|
72
|
-
'ruby/2.4.3',
|
73
|
-
|
74
|
-
# alias for the latest 2.4 implementation
|
75
|
-
'ruby/2.4',
|
76
|
-
|
77
|
-
# Ruby 2.5.x
|
78
|
-
'ruby/2.5.0',
|
79
|
-
|
80
|
-
# alias for the latest 2.5 implementation
|
81
|
-
'ruby/2.5',
|
82
|
-
]
|
83
|
-
|
84
|
-
end
|
85
|
-
|
86
|
-
Regexp::Syntax::VERSIONS.each do |version|
|
87
|
-
require File.expand_path("../#{version}", __FILE__)
|
7
|
+
version_file_paths = Dir[File.expand_path('../ruby/*.rb', __FILE__)]
|
8
|
+
version_file_paths.each { |path| require path }
|
9
|
+
VERSIONS = version_file_paths.map { |path| path[%r{(ruby/.*)\.rb}, 1] }
|
88
10
|
end
|
data/lib/regexp_parser/syntax.rb
CHANGED
@@ -6,20 +6,24 @@ class ParserProperties < Test::Unit::TestCase
|
|
6
6
|
props = [
|
7
7
|
'Alnum',
|
8
8
|
'Alpha',
|
9
|
-
'Any',
|
10
9
|
'Ascii',
|
11
10
|
'Blank',
|
12
11
|
'Cntrl',
|
13
12
|
'Digit',
|
14
13
|
'Graph',
|
15
14
|
'Lower',
|
16
|
-
'Newline',
|
17
15
|
'Print',
|
18
16
|
'Punct',
|
19
17
|
'Space',
|
20
18
|
'Upper',
|
21
19
|
'Word',
|
22
20
|
'Xdigit',
|
21
|
+
'XPosixPunct',
|
22
|
+
|
23
|
+
'Newline',
|
24
|
+
|
25
|
+
'Any',
|
26
|
+
'Assigned',
|
23
27
|
|
24
28
|
'L',
|
25
29
|
'Letter',
|
@@ -144,6 +148,10 @@ class ParserProperties < Test::Unit::TestCase
|
|
144
148
|
'Age=5.1',
|
145
149
|
'Age=5.2',
|
146
150
|
'Age=6.0',
|
151
|
+
'Age=7.0',
|
152
|
+
'Age=8.0',
|
153
|
+
'Age=9.0',
|
154
|
+
'Age=10.0',
|
147
155
|
|
148
156
|
'ahex',
|
149
157
|
'ASCII_Hex_Digit',
|
@@ -264,6 +272,9 @@ class ParserProperties < Test::Unit::TestCase
|
|
264
272
|
|
265
273
|
'radical',
|
266
274
|
|
275
|
+
'ri',
|
276
|
+
'Regional_Indicator',
|
277
|
+
|
267
278
|
'sd',
|
268
279
|
'Soft_Dotted',
|
269
280
|
|
@@ -288,6 +299,12 @@ class ParserProperties < Test::Unit::TestCase
|
|
288
299
|
|
289
300
|
'xidc',
|
290
301
|
'XID_Continue',
|
302
|
+
|
303
|
+
'Emoji',
|
304
|
+
'Emoji_Component',
|
305
|
+
'Emoji_Modifier',
|
306
|
+
'Emoji_Modifier_Base',
|
307
|
+
'Emoji_Presentation',
|
291
308
|
]
|
292
309
|
|
293
310
|
modes.each do |mode|
|
@@ -295,7 +312,7 @@ class ParserProperties < Test::Unit::TestCase
|
|
295
312
|
|
296
313
|
props.each do |property|
|
297
314
|
define_method "test_parse_#{token_type}_#{property}" do
|
298
|
-
t = RP.parse "ab\\#{mode}{#{property}}", 'ruby/
|
315
|
+
t = RP.parse "ab\\#{mode}{#{property}}", 'ruby/2.5'
|
299
316
|
|
300
317
|
assert t.expressions.last.is_a?(UnicodeProperty::Base),
|
301
318
|
"Expected property, but got #{t.expressions.last.class.name}"
|
@@ -351,6 +368,20 @@ class ParserProperties < Test::Unit::TestCase
|
|
351
368
|
"Expected Script property, but got #{t.expressions[1].class.name}"
|
352
369
|
end
|
353
370
|
|
371
|
+
def test_parse_property_script_v193
|
372
|
+
t = RP.parse 'ab\p{Brahmi}cd', 'ruby/1.9.3'
|
373
|
+
|
374
|
+
assert t.expressions[1].is_a?(UnicodeProperty::Script),
|
375
|
+
"Expected Script property, but got #{t.expressions[1].class.name}"
|
376
|
+
end
|
377
|
+
|
378
|
+
def test_parse_property_script_v220
|
379
|
+
t = RP.parse 'ab\p{Caucasian_Albanian}cd', 'ruby/2.2'
|
380
|
+
|
381
|
+
assert t.expressions[1].is_a?(UnicodeProperty::Script),
|
382
|
+
"Expected Script property, but got #{t.expressions[1].class.name}"
|
383
|
+
end
|
384
|
+
|
354
385
|
def test_parse_property_block
|
355
386
|
t = RP.parse 'ab\p{InArmenian}cd', 'ruby/1.9'
|
356
387
|
|
data/test/scanner/test_all.rb
CHANGED
@@ -13,6 +13,12 @@ if RUBY_VERSION >= '2.0.0'
|
|
13
13
|
end
|
14
14
|
end
|
15
15
|
|
16
|
+
if RUBY_VERSION >= '2.5.0'
|
17
|
+
%w{emojis}.each do|tc|
|
18
|
+
require File.expand_path("../test_#{tc}", __FILE__)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
16
22
|
class TestRegexpScanner < Test::Unit::TestCase
|
17
23
|
|
18
24
|
def test_scanner_returns_an_array
|
@@ -0,0 +1,31 @@
|
|
1
|
+
require File.expand_path("../../helpers", __FILE__)
|
2
|
+
|
3
|
+
class ScannerUnicodeEmojis < Test::Unit::TestCase
|
4
|
+
|
5
|
+
tests = {
|
6
|
+
'Emoji' => :emoji_any,
|
7
|
+
'Emoji_Component' => :emoji_component,
|
8
|
+
'Emoji_Modifier' => :emoji_modifier,
|
9
|
+
'Emoji_Modifier_Base' => :emoji_modifier_base,
|
10
|
+
'Emoji_Presentation' => :emoji_presentation,
|
11
|
+
}
|
12
|
+
|
13
|
+
tests.each_with_index do |(property, token), count|
|
14
|
+
define_method "test_scanner_property_#{token}_#{count}" do
|
15
|
+
tokens = RS.scan("a\\p{#{property}}c")
|
16
|
+
result = tokens.at(1)
|
17
|
+
|
18
|
+
assert_equal :property, result[0]
|
19
|
+
assert_equal token, result[1]
|
20
|
+
end
|
21
|
+
|
22
|
+
define_method "test_scanner_nonproperty_#{token}_#{count}" do
|
23
|
+
tokens = RS.scan("a\\P{#{property}}c")
|
24
|
+
result = tokens.at(1)
|
25
|
+
|
26
|
+
assert_equal :nonproperty, result[0]
|
27
|
+
assert_equal token, result[1]
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
end
|
@@ -5,20 +5,24 @@ class ScannerProperties < Test::Unit::TestCase
|
|
5
5
|
tests = {
|
6
6
|
'Alnum' => :alnum,
|
7
7
|
'Alpha' => :alpha,
|
8
|
-
'Any' => :any,
|
9
8
|
'Ascii' => :ascii,
|
10
9
|
'Blank' => :blank,
|
11
10
|
'Cntrl' => :cntrl,
|
12
11
|
'Digit' => :digit,
|
13
12
|
'Graph' => :graph,
|
14
13
|
'Lower' => :lower,
|
15
|
-
'Newline' => :newline,
|
16
14
|
'Print' => :print,
|
17
15
|
'Punct' => :punct,
|
18
16
|
'Space' => :space,
|
19
17
|
'Upper' => :upper,
|
20
18
|
'Word' => :word,
|
21
19
|
'Xdigit' => :xdigit,
|
20
|
+
'XPosixPunct' => :xposixpunct,
|
21
|
+
|
22
|
+
'Newline' => :newline,
|
23
|
+
|
24
|
+
'Any' => :any,
|
25
|
+
'Assigned' => :assigned,
|
22
26
|
|
23
27
|
'L' => :letter_any,
|
24
28
|
'Letter' => :letter_any,
|
@@ -147,6 +151,9 @@ class ScannerProperties < Test::Unit::TestCase
|
|
147
151
|
'Age=6.2' => :age_6_2,
|
148
152
|
'Age=6.3' => :age_6_3,
|
149
153
|
'Age=7.0' => :age_7_0,
|
154
|
+
'Age=8.0' => :age_8_0,
|
155
|
+
'Age=9.0' => :age_9_0,
|
156
|
+
'Age=10.0' => :age_10_0,
|
150
157
|
|
151
158
|
'ahex' => :ascii_hex,
|
152
159
|
'ASCII_Hex_Digit' => :ascii_hex,
|
@@ -267,6 +274,9 @@ class ScannerProperties < Test::Unit::TestCase
|
|
267
274
|
|
268
275
|
'radical' => :radical,
|
269
276
|
|
277
|
+
'ri' => :regional_indicator,
|
278
|
+
'Regional_Indicator' => :regional_indicator,
|
279
|
+
|
270
280
|
'sd' => :soft_dotted,
|
271
281
|
'Soft_Dotted' => :soft_dotted,
|
272
282
|
|