regexp_parser 0.4.9 → 0.4.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,6 +6,9 @@ module Regexp::Syntax
6
6
  class V230 < Regexp::Syntax::Ruby::V22
7
7
  def initialize
8
8
  super
9
+
10
+ implements :property, UnicodeProperty::V230
11
+ implements :nonproperty, UnicodeProperty::V230
9
12
  end
10
13
  end
11
14
 
@@ -6,6 +6,9 @@ module Regexp::Syntax
6
6
  class V240 < Regexp::Syntax::Ruby::V23
7
7
  def initialize
8
8
  super
9
+
10
+ implements :property, UnicodeProperty::V240
11
+ implements :nonproperty, UnicodeProperty::V240
9
12
  end
10
13
  end
11
14
 
@@ -6,6 +6,9 @@ module Regexp::Syntax
6
6
  class V250 < Regexp::Syntax::Ruby::V24
7
7
  def initialize
8
8
  super
9
+
10
+ implements :property, UnicodeProperty::V250
11
+ implements :nonproperty, UnicodeProperty::V250
9
12
  end
10
13
  end
11
14
 
@@ -0,0 +1,13 @@
1
+ require File.expand_path('../2.5', __FILE__)
2
+
3
+ module Regexp::Syntax
4
+ module Ruby
5
+
6
+ class V260 < Regexp::Syntax::Ruby::V25
7
+ def initialize
8
+ super
9
+ end
10
+ end
11
+
12
+ end
13
+ end
@@ -0,0 +1,8 @@
1
+ require File.expand_path('../2.6.0', __FILE__)
2
+
3
+ module Regexp::Syntax
4
+ module Ruby
5
+ # uses the latest 2.6 release
6
+ class V26 < Regexp::Syntax::Ruby::V260; end
7
+ end
8
+ end
@@ -2,8 +2,10 @@ module Regexp::Syntax
2
2
  module Token
3
3
 
4
4
  module UnicodeProperty
5
- CharType = [:alnum, :alpha, :ascii, :blank, :cntrl, :digit, :graph, :lower,
6
- :print, :punct, :space, :upper, :word, :xdigit]
5
+ CharType_V190 = [:alnum, :alpha, :ascii, :blank, :cntrl, :digit, :graph,
6
+ :lower, :print, :punct, :space, :upper, :word, :xdigit]
7
+
8
+ CharType_V250 = [:xposixpunct]
7
9
 
8
10
  POSIX = [:any, :assigned, :newline]
9
11
 
@@ -44,9 +46,15 @@ module Regexp::Syntax
44
46
  # These were merged (from Onigmo) in the branch for 2.2.0
45
47
  Age_V220 = [:age_6_2, :age_6_3, :age_7_0]
46
48
 
47
- Age = Age_V193 + Age_V200 + Age_V220
49
+ Age_V230 = [:age_8_0]
50
+
51
+ Age_V240 = [:age_9_0]
52
+
53
+ Age_V250 = [:age_10_0]
54
+
55
+ Age = Age_V193 + Age_V200 + Age_V220 + Age_V230 + Age_V240 + Age_V250
48
56
 
49
- Derived = [
57
+ Derived_V190 = [
50
58
  :ascii_hex,
51
59
  :alphabetic,
52
60
  :cased,
@@ -100,7 +108,13 @@ module Regexp::Syntax
100
108
  :xid_continue,
101
109
  ]
102
110
 
103
- Script =[
111
+ Derived_V250 = [
112
+ :regional_indicator
113
+ ]
114
+
115
+ Derived = Derived_V190 + Derived_V250
116
+
117
+ Script_V190 = [
104
118
  :script_arabic,
105
119
  :script_imperial_aramaic,
106
120
  :script_armenian,
@@ -197,9 +211,9 @@ module Regexp::Syntax
197
211
  :script_unknown
198
212
  ]
199
213
 
200
- Script_6_0 = [:script_brahmi, :script_batak, :script_mandaic]
214
+ Script_V193 = [:script_brahmi, :script_batak, :script_mandaic]
201
215
 
202
- Script_7_0 = [
216
+ Script_V220 = [
203
217
  :script_caucasian_albanian,
204
218
  :script_bassa_vah,
205
219
  :script_duployan,
@@ -225,6 +239,8 @@ module Regexp::Syntax
225
239
  :script_warang_citi
226
240
  ]
227
241
 
242
+ Script = Script_V190 + Script_V193 + Script_V220
243
+
228
244
  UnicodeBlock = [
229
245
  :block_inalphabetic_presentation_forms,
230
246
  :block_inarabic_presentation_forms_a,
@@ -253,7 +269,7 @@ module Regexp::Syntax
253
269
  :block_incombining_half_marks,
254
270
  :block_incontrol_pictures,
255
271
  :block_incurrency_symbols,
256
- :block_incyrillic_supplementary,
272
+ :block_incyrillic_supplement,
257
273
  :block_incyrillic,
258
274
  :block_indevanagari,
259
275
  :block_indingbats,
@@ -333,14 +349,28 @@ module Regexp::Syntax
333
349
  :block_inyijing_hexagram_symbols,
334
350
  ]
335
351
 
336
- V190 = CharType + POSIX + Category::All + Derived + Script + UnicodeBlock
337
- V193 = Age_V193 + Script_6_0
352
+ Emoji = [
353
+ :emoji_any,
354
+ :emoji_component,
355
+ :emoji_modifier,
356
+ :emoji_modifier_base,
357
+ :emoji_presentation,
358
+ ]
359
+
360
+ V190 = CharType_V190 + POSIX + Category::All + Derived_V190 + Script_V190 + UnicodeBlock
361
+ V193 = Age_V193 + Script_V193
338
362
 
339
363
  V200 = Age_V200
340
364
 
341
- V220 = Age_V220 + Script_7_0
365
+ V220 = Age_V220 + Script_V220
366
+
367
+ V230 = Age_V230
368
+
369
+ V240 = Age_V240
370
+
371
+ V250 = Age_V250 + CharType_V250 + Derived_V250 + Emoji
342
372
 
343
- All = V190 + V193 + V200 + V220
373
+ All = V190 + V193 + V200 + V220 + V230 + V240 + V250
344
374
 
345
375
  Type = :property
346
376
  NonType = :nonproperty
@@ -1,88 +1,10 @@
1
+ # Ruby 1.8.x is no longer a supported runtime,
2
+ # but its regex features are still recognized.
3
+ #
4
+ # Aliases for the latest patch version are provided as 'ruby/n.n',
5
+ # e.g. 'ruby/1.9' refers to Ruby v1.9.3.
1
6
  module Regexp::Syntax
2
-
3
- VERSIONS = [
4
- # Ruby 1.8.x (NOTE: 1.8.6 is no longer a supported runtime,
5
- # but its regex features are still recognized.)
6
- 'ruby/1.8.6',
7
- 'ruby/1.8.7',
8
-
9
- # alias for the latest 1.8 implementation
10
- 'ruby/1.8',
11
-
12
- # Ruby 1.9.x
13
- 'ruby/1.9.1',
14
- 'ruby/1.9.2',
15
- 'ruby/1.9.3',
16
-
17
- # alias for the latest 1.9 implementation
18
- 'ruby/1.9',
19
-
20
- # Ruby 2.0.x
21
- 'ruby/2.0.0',
22
-
23
- # alias for the latest 2.0 implementations
24
- 'ruby/2.0',
25
-
26
- # Ruby 2.1.x
27
- 'ruby/2.1.0',
28
- 'ruby/2.1.2',
29
- 'ruby/2.1.3',
30
- 'ruby/2.1.4',
31
- 'ruby/2.1.5',
32
- 'ruby/2.1.6',
33
- 'ruby/2.1.7',
34
- 'ruby/2.1.8',
35
- 'ruby/2.1.9',
36
- 'ruby/2.1.10',
37
-
38
- # alias for the latest 2.1 implementations
39
- 'ruby/2.1',
40
-
41
- # Ruby 2.2.x
42
- 'ruby/2.2.0',
43
- 'ruby/2.2.1',
44
- 'ruby/2.2.2',
45
- 'ruby/2.2.3',
46
- 'ruby/2.2.4',
47
- 'ruby/2.2.5',
48
- 'ruby/2.2.6',
49
- 'ruby/2.2.7',
50
- 'ruby/2.2.8',
51
- 'ruby/2.2.9',
52
-
53
- # alias for the latest 2.2 implementations
54
- 'ruby/2.2',
55
-
56
- # Ruby 2.3.x
57
- 'ruby/2.3.0',
58
- 'ruby/2.3.1',
59
- 'ruby/2.3.2',
60
- 'ruby/2.3.3',
61
- 'ruby/2.3.4',
62
- 'ruby/2.3.5',
63
- 'ruby/2.3.6',
64
-
65
- # alias for the latest 2.3 implementation
66
- 'ruby/2.3',
67
-
68
- # Ruby 2.4.x
69
- 'ruby/2.4.0',
70
- 'ruby/2.4.1',
71
- 'ruby/2.4.2',
72
- 'ruby/2.4.3',
73
-
74
- # alias for the latest 2.4 implementation
75
- 'ruby/2.4',
76
-
77
- # Ruby 2.5.x
78
- 'ruby/2.5.0',
79
-
80
- # alias for the latest 2.5 implementation
81
- 'ruby/2.5',
82
- ]
83
-
84
- end
85
-
86
- Regexp::Syntax::VERSIONS.each do |version|
87
- require File.expand_path("../#{version}", __FILE__)
7
+ version_file_paths = Dir[File.expand_path('../ruby/*.rb', __FILE__)]
8
+ version_file_paths.each { |path| require path }
9
+ VERSIONS = version_file_paths.map { |path| path[%r{(ruby/.*)\.rb}, 1] }
88
10
  end
@@ -16,7 +16,7 @@ module Regexp::Syntax
16
16
 
17
17
  class UnknownSyntaxNameError < SyntaxError
18
18
  def initialize(name)
19
- super "Unknown syntax name '#{name}'. Forgot to add it to Regexp::Syntax::VERSIONS?"
19
+ super "Unknown syntax name '#{name}'."
20
20
  end
21
21
  end
22
22
 
@@ -1,5 +1,5 @@
1
1
  class Regexp
2
2
  class Parser
3
- VERSION = '0.4.9'
3
+ VERSION = '0.4.10'
4
4
  end
5
5
  end
@@ -6,20 +6,24 @@ class ParserProperties < Test::Unit::TestCase
6
6
  props = [
7
7
  'Alnum',
8
8
  'Alpha',
9
- 'Any',
10
9
  'Ascii',
11
10
  'Blank',
12
11
  'Cntrl',
13
12
  'Digit',
14
13
  'Graph',
15
14
  'Lower',
16
- 'Newline',
17
15
  'Print',
18
16
  'Punct',
19
17
  'Space',
20
18
  'Upper',
21
19
  'Word',
22
20
  'Xdigit',
21
+ 'XPosixPunct',
22
+
23
+ 'Newline',
24
+
25
+ 'Any',
26
+ 'Assigned',
23
27
 
24
28
  'L',
25
29
  'Letter',
@@ -144,6 +148,10 @@ class ParserProperties < Test::Unit::TestCase
144
148
  'Age=5.1',
145
149
  'Age=5.2',
146
150
  'Age=6.0',
151
+ 'Age=7.0',
152
+ 'Age=8.0',
153
+ 'Age=9.0',
154
+ 'Age=10.0',
147
155
 
148
156
  'ahex',
149
157
  'ASCII_Hex_Digit',
@@ -264,6 +272,9 @@ class ParserProperties < Test::Unit::TestCase
264
272
 
265
273
  'radical',
266
274
 
275
+ 'ri',
276
+ 'Regional_Indicator',
277
+
267
278
  'sd',
268
279
  'Soft_Dotted',
269
280
 
@@ -288,6 +299,12 @@ class ParserProperties < Test::Unit::TestCase
288
299
 
289
300
  'xidc',
290
301
  'XID_Continue',
302
+
303
+ 'Emoji',
304
+ 'Emoji_Component',
305
+ 'Emoji_Modifier',
306
+ 'Emoji_Modifier_Base',
307
+ 'Emoji_Presentation',
291
308
  ]
292
309
 
293
310
  modes.each do |mode|
@@ -295,7 +312,7 @@ class ParserProperties < Test::Unit::TestCase
295
312
 
296
313
  props.each do |property|
297
314
  define_method "test_parse_#{token_type}_#{property}" do
298
- t = RP.parse "ab\\#{mode}{#{property}}", 'ruby/1.9'
315
+ t = RP.parse "ab\\#{mode}{#{property}}", 'ruby/2.5'
299
316
 
300
317
  assert t.expressions.last.is_a?(UnicodeProperty::Base),
301
318
  "Expected property, but got #{t.expressions.last.class.name}"
@@ -351,6 +368,20 @@ class ParserProperties < Test::Unit::TestCase
351
368
  "Expected Script property, but got #{t.expressions[1].class.name}"
352
369
  end
353
370
 
371
+ def test_parse_property_script_v193
372
+ t = RP.parse 'ab\p{Brahmi}cd', 'ruby/1.9.3'
373
+
374
+ assert t.expressions[1].is_a?(UnicodeProperty::Script),
375
+ "Expected Script property, but got #{t.expressions[1].class.name}"
376
+ end
377
+
378
+ def test_parse_property_script_v220
379
+ t = RP.parse 'ab\p{Caucasian_Albanian}cd', 'ruby/2.2'
380
+
381
+ assert t.expressions[1].is_a?(UnicodeProperty::Script),
382
+ "Expected Script property, but got #{t.expressions[1].class.name}"
383
+ end
384
+
354
385
  def test_parse_property_block
355
386
  t = RP.parse 'ab\p{InArmenian}cd', 'ruby/1.9'
356
387
 
@@ -13,6 +13,12 @@ if RUBY_VERSION >= '2.0.0'
13
13
  end
14
14
  end
15
15
 
16
+ if RUBY_VERSION >= '2.5.0'
17
+ %w{emojis}.each do|tc|
18
+ require File.expand_path("../test_#{tc}", __FILE__)
19
+ end
20
+ end
21
+
16
22
  class TestRegexpScanner < Test::Unit::TestCase
17
23
 
18
24
  def test_scanner_returns_an_array
@@ -0,0 +1,31 @@
1
+ require File.expand_path("../../helpers", __FILE__)
2
+
3
+ class ScannerUnicodeEmojis < Test::Unit::TestCase
4
+
5
+ tests = {
6
+ 'Emoji' => :emoji_any,
7
+ 'Emoji_Component' => :emoji_component,
8
+ 'Emoji_Modifier' => :emoji_modifier,
9
+ 'Emoji_Modifier_Base' => :emoji_modifier_base,
10
+ 'Emoji_Presentation' => :emoji_presentation,
11
+ }
12
+
13
+ tests.each_with_index do |(property, token), count|
14
+ define_method "test_scanner_property_#{token}_#{count}" do
15
+ tokens = RS.scan("a\\p{#{property}}c")
16
+ result = tokens.at(1)
17
+
18
+ assert_equal :property, result[0]
19
+ assert_equal token, result[1]
20
+ end
21
+
22
+ define_method "test_scanner_nonproperty_#{token}_#{count}" do
23
+ tokens = RS.scan("a\\P{#{property}}c")
24
+ result = tokens.at(1)
25
+
26
+ assert_equal :nonproperty, result[0]
27
+ assert_equal token, result[1]
28
+ end
29
+ end
30
+
31
+ end
@@ -5,20 +5,24 @@ class ScannerProperties < Test::Unit::TestCase
5
5
  tests = {
6
6
  'Alnum' => :alnum,
7
7
  'Alpha' => :alpha,
8
- 'Any' => :any,
9
8
  'Ascii' => :ascii,
10
9
  'Blank' => :blank,
11
10
  'Cntrl' => :cntrl,
12
11
  'Digit' => :digit,
13
12
  'Graph' => :graph,
14
13
  'Lower' => :lower,
15
- 'Newline' => :newline,
16
14
  'Print' => :print,
17
15
  'Punct' => :punct,
18
16
  'Space' => :space,
19
17
  'Upper' => :upper,
20
18
  'Word' => :word,
21
19
  'Xdigit' => :xdigit,
20
+ 'XPosixPunct' => :xposixpunct,
21
+
22
+ 'Newline' => :newline,
23
+
24
+ 'Any' => :any,
25
+ 'Assigned' => :assigned,
22
26
 
23
27
  'L' => :letter_any,
24
28
  'Letter' => :letter_any,
@@ -147,6 +151,9 @@ class ScannerProperties < Test::Unit::TestCase
147
151
  'Age=6.2' => :age_6_2,
148
152
  'Age=6.3' => :age_6_3,
149
153
  'Age=7.0' => :age_7_0,
154
+ 'Age=8.0' => :age_8_0,
155
+ 'Age=9.0' => :age_9_0,
156
+ 'Age=10.0' => :age_10_0,
150
157
 
151
158
  'ahex' => :ascii_hex,
152
159
  'ASCII_Hex_Digit' => :ascii_hex,
@@ -267,6 +274,9 @@ class ScannerProperties < Test::Unit::TestCase
267
274
 
268
275
  'radical' => :radical,
269
276
 
277
+ 'ri' => :regional_indicator,
278
+ 'Regional_Indicator' => :regional_indicator,
279
+
270
280
  'sd' => :soft_dotted,
271
281
  'Soft_Dotted' => :soft_dotted,
272
282