regexp_parser 0.4.9 → 0.4.10

Sign up to get free protection for your applications and to get access to all the features.
@@ -6,6 +6,9 @@ module Regexp::Syntax
6
6
  class V230 < Regexp::Syntax::Ruby::V22
7
7
  def initialize
8
8
  super
9
+
10
+ implements :property, UnicodeProperty::V230
11
+ implements :nonproperty, UnicodeProperty::V230
9
12
  end
10
13
  end
11
14
 
@@ -6,6 +6,9 @@ module Regexp::Syntax
6
6
  class V240 < Regexp::Syntax::Ruby::V23
7
7
  def initialize
8
8
  super
9
+
10
+ implements :property, UnicodeProperty::V240
11
+ implements :nonproperty, UnicodeProperty::V240
9
12
  end
10
13
  end
11
14
 
@@ -6,6 +6,9 @@ module Regexp::Syntax
6
6
  class V250 < Regexp::Syntax::Ruby::V24
7
7
  def initialize
8
8
  super
9
+
10
+ implements :property, UnicodeProperty::V250
11
+ implements :nonproperty, UnicodeProperty::V250
9
12
  end
10
13
  end
11
14
 
@@ -0,0 +1,13 @@
1
+ require File.expand_path('../2.5', __FILE__)
2
+
3
+ module Regexp::Syntax
4
+ module Ruby
5
+
6
+ class V260 < Regexp::Syntax::Ruby::V25
7
+ def initialize
8
+ super
9
+ end
10
+ end
11
+
12
+ end
13
+ end
@@ -0,0 +1,8 @@
1
+ require File.expand_path('../2.6.0', __FILE__)
2
+
3
+ module Regexp::Syntax
4
+ module Ruby
5
+ # uses the latest 2.6 release
6
+ class V26 < Regexp::Syntax::Ruby::V260; end
7
+ end
8
+ end
@@ -2,8 +2,10 @@ module Regexp::Syntax
2
2
  module Token
3
3
 
4
4
  module UnicodeProperty
5
- CharType = [:alnum, :alpha, :ascii, :blank, :cntrl, :digit, :graph, :lower,
6
- :print, :punct, :space, :upper, :word, :xdigit]
5
+ CharType_V190 = [:alnum, :alpha, :ascii, :blank, :cntrl, :digit, :graph,
6
+ :lower, :print, :punct, :space, :upper, :word, :xdigit]
7
+
8
+ CharType_V250 = [:xposixpunct]
7
9
 
8
10
  POSIX = [:any, :assigned, :newline]
9
11
 
@@ -44,9 +46,15 @@ module Regexp::Syntax
44
46
  # These were merged (from Onigmo) in the branch for 2.2.0
45
47
  Age_V220 = [:age_6_2, :age_6_3, :age_7_0]
46
48
 
47
- Age = Age_V193 + Age_V200 + Age_V220
49
+ Age_V230 = [:age_8_0]
50
+
51
+ Age_V240 = [:age_9_0]
52
+
53
+ Age_V250 = [:age_10_0]
54
+
55
+ Age = Age_V193 + Age_V200 + Age_V220 + Age_V230 + Age_V240 + Age_V250
48
56
 
49
- Derived = [
57
+ Derived_V190 = [
50
58
  :ascii_hex,
51
59
  :alphabetic,
52
60
  :cased,
@@ -100,7 +108,13 @@ module Regexp::Syntax
100
108
  :xid_continue,
101
109
  ]
102
110
 
103
- Script =[
111
+ Derived_V250 = [
112
+ :regional_indicator
113
+ ]
114
+
115
+ Derived = Derived_V190 + Derived_V250
116
+
117
+ Script_V190 = [
104
118
  :script_arabic,
105
119
  :script_imperial_aramaic,
106
120
  :script_armenian,
@@ -197,9 +211,9 @@ module Regexp::Syntax
197
211
  :script_unknown
198
212
  ]
199
213
 
200
- Script_6_0 = [:script_brahmi, :script_batak, :script_mandaic]
214
+ Script_V193 = [:script_brahmi, :script_batak, :script_mandaic]
201
215
 
202
- Script_7_0 = [
216
+ Script_V220 = [
203
217
  :script_caucasian_albanian,
204
218
  :script_bassa_vah,
205
219
  :script_duployan,
@@ -225,6 +239,8 @@ module Regexp::Syntax
225
239
  :script_warang_citi
226
240
  ]
227
241
 
242
+ Script = Script_V190 + Script_V193 + Script_V220
243
+
228
244
  UnicodeBlock = [
229
245
  :block_inalphabetic_presentation_forms,
230
246
  :block_inarabic_presentation_forms_a,
@@ -253,7 +269,7 @@ module Regexp::Syntax
253
269
  :block_incombining_half_marks,
254
270
  :block_incontrol_pictures,
255
271
  :block_incurrency_symbols,
256
- :block_incyrillic_supplementary,
272
+ :block_incyrillic_supplement,
257
273
  :block_incyrillic,
258
274
  :block_indevanagari,
259
275
  :block_indingbats,
@@ -333,14 +349,28 @@ module Regexp::Syntax
333
349
  :block_inyijing_hexagram_symbols,
334
350
  ]
335
351
 
336
- V190 = CharType + POSIX + Category::All + Derived + Script + UnicodeBlock
337
- V193 = Age_V193 + Script_6_0
352
+ Emoji = [
353
+ :emoji_any,
354
+ :emoji_component,
355
+ :emoji_modifier,
356
+ :emoji_modifier_base,
357
+ :emoji_presentation,
358
+ ]
359
+
360
+ V190 = CharType_V190 + POSIX + Category::All + Derived_V190 + Script_V190 + UnicodeBlock
361
+ V193 = Age_V193 + Script_V193
338
362
 
339
363
  V200 = Age_V200
340
364
 
341
- V220 = Age_V220 + Script_7_0
365
+ V220 = Age_V220 + Script_V220
366
+
367
+ V230 = Age_V230
368
+
369
+ V240 = Age_V240
370
+
371
+ V250 = Age_V250 + CharType_V250 + Derived_V250 + Emoji
342
372
 
343
- All = V190 + V193 + V200 + V220
373
+ All = V190 + V193 + V200 + V220 + V230 + V240 + V250
344
374
 
345
375
  Type = :property
346
376
  NonType = :nonproperty
@@ -1,88 +1,10 @@
1
+ # Ruby 1.8.x is no longer a supported runtime,
2
+ # but its regex features are still recognized.
3
+ #
4
+ # Aliases for the latest patch version are provided as 'ruby/n.n',
5
+ # e.g. 'ruby/1.9' refers to Ruby v1.9.3.
1
6
  module Regexp::Syntax
2
-
3
- VERSIONS = [
4
- # Ruby 1.8.x (NOTE: 1.8.6 is no longer a supported runtime,
5
- # but its regex features are still recognized.)
6
- 'ruby/1.8.6',
7
- 'ruby/1.8.7',
8
-
9
- # alias for the latest 1.8 implementation
10
- 'ruby/1.8',
11
-
12
- # Ruby 1.9.x
13
- 'ruby/1.9.1',
14
- 'ruby/1.9.2',
15
- 'ruby/1.9.3',
16
-
17
- # alias for the latest 1.9 implementation
18
- 'ruby/1.9',
19
-
20
- # Ruby 2.0.x
21
- 'ruby/2.0.0',
22
-
23
- # alias for the latest 2.0 implementations
24
- 'ruby/2.0',
25
-
26
- # Ruby 2.1.x
27
- 'ruby/2.1.0',
28
- 'ruby/2.1.2',
29
- 'ruby/2.1.3',
30
- 'ruby/2.1.4',
31
- 'ruby/2.1.5',
32
- 'ruby/2.1.6',
33
- 'ruby/2.1.7',
34
- 'ruby/2.1.8',
35
- 'ruby/2.1.9',
36
- 'ruby/2.1.10',
37
-
38
- # alias for the latest 2.1 implementations
39
- 'ruby/2.1',
40
-
41
- # Ruby 2.2.x
42
- 'ruby/2.2.0',
43
- 'ruby/2.2.1',
44
- 'ruby/2.2.2',
45
- 'ruby/2.2.3',
46
- 'ruby/2.2.4',
47
- 'ruby/2.2.5',
48
- 'ruby/2.2.6',
49
- 'ruby/2.2.7',
50
- 'ruby/2.2.8',
51
- 'ruby/2.2.9',
52
-
53
- # alias for the latest 2.2 implementations
54
- 'ruby/2.2',
55
-
56
- # Ruby 2.3.x
57
- 'ruby/2.3.0',
58
- 'ruby/2.3.1',
59
- 'ruby/2.3.2',
60
- 'ruby/2.3.3',
61
- 'ruby/2.3.4',
62
- 'ruby/2.3.5',
63
- 'ruby/2.3.6',
64
-
65
- # alias for the latest 2.3 implementation
66
- 'ruby/2.3',
67
-
68
- # Ruby 2.4.x
69
- 'ruby/2.4.0',
70
- 'ruby/2.4.1',
71
- 'ruby/2.4.2',
72
- 'ruby/2.4.3',
73
-
74
- # alias for the latest 2.4 implementation
75
- 'ruby/2.4',
76
-
77
- # Ruby 2.5.x
78
- 'ruby/2.5.0',
79
-
80
- # alias for the latest 2.5 implementation
81
- 'ruby/2.5',
82
- ]
83
-
84
- end
85
-
86
- Regexp::Syntax::VERSIONS.each do |version|
87
- require File.expand_path("../#{version}", __FILE__)
7
+ version_file_paths = Dir[File.expand_path('../ruby/*.rb', __FILE__)]
8
+ version_file_paths.each { |path| require path }
9
+ VERSIONS = version_file_paths.map { |path| path[%r{(ruby/.*)\.rb}, 1] }
88
10
  end
@@ -16,7 +16,7 @@ module Regexp::Syntax
16
16
 
17
17
  class UnknownSyntaxNameError < SyntaxError
18
18
  def initialize(name)
19
- super "Unknown syntax name '#{name}'. Forgot to add it to Regexp::Syntax::VERSIONS?"
19
+ super "Unknown syntax name '#{name}'."
20
20
  end
21
21
  end
22
22
 
@@ -1,5 +1,5 @@
1
1
  class Regexp
2
2
  class Parser
3
- VERSION = '0.4.9'
3
+ VERSION = '0.4.10'
4
4
  end
5
5
  end
@@ -6,20 +6,24 @@ class ParserProperties < Test::Unit::TestCase
6
6
  props = [
7
7
  'Alnum',
8
8
  'Alpha',
9
- 'Any',
10
9
  'Ascii',
11
10
  'Blank',
12
11
  'Cntrl',
13
12
  'Digit',
14
13
  'Graph',
15
14
  'Lower',
16
- 'Newline',
17
15
  'Print',
18
16
  'Punct',
19
17
  'Space',
20
18
  'Upper',
21
19
  'Word',
22
20
  'Xdigit',
21
+ 'XPosixPunct',
22
+
23
+ 'Newline',
24
+
25
+ 'Any',
26
+ 'Assigned',
23
27
 
24
28
  'L',
25
29
  'Letter',
@@ -144,6 +148,10 @@ class ParserProperties < Test::Unit::TestCase
144
148
  'Age=5.1',
145
149
  'Age=5.2',
146
150
  'Age=6.0',
151
+ 'Age=7.0',
152
+ 'Age=8.0',
153
+ 'Age=9.0',
154
+ 'Age=10.0',
147
155
 
148
156
  'ahex',
149
157
  'ASCII_Hex_Digit',
@@ -264,6 +272,9 @@ class ParserProperties < Test::Unit::TestCase
264
272
 
265
273
  'radical',
266
274
 
275
+ 'ri',
276
+ 'Regional_Indicator',
277
+
267
278
  'sd',
268
279
  'Soft_Dotted',
269
280
 
@@ -288,6 +299,12 @@ class ParserProperties < Test::Unit::TestCase
288
299
 
289
300
  'xidc',
290
301
  'XID_Continue',
302
+
303
+ 'Emoji',
304
+ 'Emoji_Component',
305
+ 'Emoji_Modifier',
306
+ 'Emoji_Modifier_Base',
307
+ 'Emoji_Presentation',
291
308
  ]
292
309
 
293
310
  modes.each do |mode|
@@ -295,7 +312,7 @@ class ParserProperties < Test::Unit::TestCase
295
312
 
296
313
  props.each do |property|
297
314
  define_method "test_parse_#{token_type}_#{property}" do
298
- t = RP.parse "ab\\#{mode}{#{property}}", 'ruby/1.9'
315
+ t = RP.parse "ab\\#{mode}{#{property}}", 'ruby/2.5'
299
316
 
300
317
  assert t.expressions.last.is_a?(UnicodeProperty::Base),
301
318
  "Expected property, but got #{t.expressions.last.class.name}"
@@ -351,6 +368,20 @@ class ParserProperties < Test::Unit::TestCase
351
368
  "Expected Script property, but got #{t.expressions[1].class.name}"
352
369
  end
353
370
 
371
+ def test_parse_property_script_v193
372
+ t = RP.parse 'ab\p{Brahmi}cd', 'ruby/1.9.3'
373
+
374
+ assert t.expressions[1].is_a?(UnicodeProperty::Script),
375
+ "Expected Script property, but got #{t.expressions[1].class.name}"
376
+ end
377
+
378
+ def test_parse_property_script_v220
379
+ t = RP.parse 'ab\p{Caucasian_Albanian}cd', 'ruby/2.2'
380
+
381
+ assert t.expressions[1].is_a?(UnicodeProperty::Script),
382
+ "Expected Script property, but got #{t.expressions[1].class.name}"
383
+ end
384
+
354
385
  def test_parse_property_block
355
386
  t = RP.parse 'ab\p{InArmenian}cd', 'ruby/1.9'
356
387
 
@@ -13,6 +13,12 @@ if RUBY_VERSION >= '2.0.0'
13
13
  end
14
14
  end
15
15
 
16
+ if RUBY_VERSION >= '2.5.0'
17
+ %w{emojis}.each do|tc|
18
+ require File.expand_path("../test_#{tc}", __FILE__)
19
+ end
20
+ end
21
+
16
22
  class TestRegexpScanner < Test::Unit::TestCase
17
23
 
18
24
  def test_scanner_returns_an_array
@@ -0,0 +1,31 @@
1
+ require File.expand_path("../../helpers", __FILE__)
2
+
3
+ class ScannerUnicodeEmojis < Test::Unit::TestCase
4
+
5
+ tests = {
6
+ 'Emoji' => :emoji_any,
7
+ 'Emoji_Component' => :emoji_component,
8
+ 'Emoji_Modifier' => :emoji_modifier,
9
+ 'Emoji_Modifier_Base' => :emoji_modifier_base,
10
+ 'Emoji_Presentation' => :emoji_presentation,
11
+ }
12
+
13
+ tests.each_with_index do |(property, token), count|
14
+ define_method "test_scanner_property_#{token}_#{count}" do
15
+ tokens = RS.scan("a\\p{#{property}}c")
16
+ result = tokens.at(1)
17
+
18
+ assert_equal :property, result[0]
19
+ assert_equal token, result[1]
20
+ end
21
+
22
+ define_method "test_scanner_nonproperty_#{token}_#{count}" do
23
+ tokens = RS.scan("a\\P{#{property}}c")
24
+ result = tokens.at(1)
25
+
26
+ assert_equal :nonproperty, result[0]
27
+ assert_equal token, result[1]
28
+ end
29
+ end
30
+
31
+ end
@@ -5,20 +5,24 @@ class ScannerProperties < Test::Unit::TestCase
5
5
  tests = {
6
6
  'Alnum' => :alnum,
7
7
  'Alpha' => :alpha,
8
- 'Any' => :any,
9
8
  'Ascii' => :ascii,
10
9
  'Blank' => :blank,
11
10
  'Cntrl' => :cntrl,
12
11
  'Digit' => :digit,
13
12
  'Graph' => :graph,
14
13
  'Lower' => :lower,
15
- 'Newline' => :newline,
16
14
  'Print' => :print,
17
15
  'Punct' => :punct,
18
16
  'Space' => :space,
19
17
  'Upper' => :upper,
20
18
  'Word' => :word,
21
19
  'Xdigit' => :xdigit,
20
+ 'XPosixPunct' => :xposixpunct,
21
+
22
+ 'Newline' => :newline,
23
+
24
+ 'Any' => :any,
25
+ 'Assigned' => :assigned,
22
26
 
23
27
  'L' => :letter_any,
24
28
  'Letter' => :letter_any,
@@ -147,6 +151,9 @@ class ScannerProperties < Test::Unit::TestCase
147
151
  'Age=6.2' => :age_6_2,
148
152
  'Age=6.3' => :age_6_3,
149
153
  'Age=7.0' => :age_7_0,
154
+ 'Age=8.0' => :age_8_0,
155
+ 'Age=9.0' => :age_9_0,
156
+ 'Age=10.0' => :age_10_0,
150
157
 
151
158
  'ahex' => :ascii_hex,
152
159
  'ASCII_Hex_Digit' => :ascii_hex,
@@ -267,6 +274,9 @@ class ScannerProperties < Test::Unit::TestCase
267
274
 
268
275
  'radical' => :radical,
269
276
 
277
+ 'ri' => :regional_indicator,
278
+ 'Regional_Indicator' => :regional_indicator,
279
+
270
280
  'sd' => :soft_dotted,
271
281
  'Soft_Dotted' => :soft_dotted,
272
282