character_set 1.3.0-java → 1.6.0-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -21,7 +21,7 @@
21
21
  85E,85E
22
22
  860,86A
23
23
  8A0,8B4
24
- 8B6,8BD
24
+ 8B6,8C7
25
25
  8D3,983
26
26
  985,98C
27
27
  98F,990
@@ -76,7 +76,7 @@ B35,B39
76
76
  B3C,B44
77
77
  B47,B48
78
78
  B4B,B4D
79
- B56,B57
79
+ B55,B57
80
80
  B5C,B5D
81
81
  B5F,B63
82
82
  B66,B77
@@ -120,15 +120,14 @@ CDE,CDE
120
120
  CE0,CE3
121
121
  CE6,CEF
122
122
  CF1,CF2
123
- D00,D03
124
- D05,D0C
123
+ D00,D0C
125
124
  D0E,D10
126
125
  D12,D44
127
126
  D46,D48
128
127
  D4A,D4F
129
128
  D54,D63
130
129
  D66,D7F
131
- D82,D83
130
+ D81,D83
132
131
  D85,D96
133
132
  D9A,DB1
134
133
  DB3,DBB
@@ -214,7 +213,7 @@ FCE,FDA
214
213
  1A7F,1A89
215
214
  1A90,1A99
216
215
  1AA0,1AAD
217
- 1AB0,1ABE
216
+ 1AB0,1AC0
218
217
  1B00,1B4B
219
218
  1B50,1B7C
220
219
  1B80,1BF3
@@ -252,7 +251,7 @@ FCE,FDA
252
251
  2440,244A
253
252
  2460,2B73
254
253
  2B76,2B95
255
- 2B98,2C2E
254
+ 2B97,2C2E
256
255
  2C30,2C5E
257
256
  2C60,2CF3
258
257
  2CF9,2D25
@@ -269,7 +268,7 @@ FCE,FDA
269
268
  2DC8,2DCE
270
269
  2DD0,2DD6
271
270
  2DD8,2DDE
272
- 2DE0,2E4F
271
+ 2DE0,2E52
273
272
  2E80,2E99
274
273
  2E9B,2EF3
275
274
  2F00,2FD5
@@ -279,18 +278,16 @@ FCE,FDA
279
278
  3099,30FF
280
279
  3105,312F
281
280
  3131,318E
282
- 3190,31BA
283
- 31C0,31E3
281
+ 3190,31E3
284
282
  31F0,321E
285
- 3220,4DB5
286
- 4DC0,9FEF
283
+ 3220,9FFC
287
284
  A000,A48C
288
285
  A490,A4C6
289
286
  A4D0,A62B
290
287
  A640,A6F7
291
288
  A700,A7BF
292
- A7C2,A7C6
293
- A7F7,A82B
289
+ A7C2,A7CA
290
+ A7F5,A82C
294
291
  A830,A839
295
292
  A840,A877
296
293
  A880,A8C5
@@ -310,7 +307,7 @@ AB09,AB0E
310
307
  AB11,AB16
311
308
  AB20,AB26
312
309
  AB28,AB2E
313
- AB30,AB67
310
+ AB30,AB6B
314
311
  AB70,ABED
315
312
  ABF0,ABF9
316
313
  AC00,D7A3
@@ -355,7 +352,7 @@ FFF9,FFFD
355
352
  10100,10102
356
353
  10107,10133
357
354
  10137,1018E
358
- 10190,1019B
355
+ 10190,1019C
359
356
  101A0,101A0
360
357
  101D0,101FD
361
358
  10280,1029C
@@ -415,8 +412,12 @@ FFF9,FFFD
415
412
  10CFA,10D27
416
413
  10D30,10D39
417
414
  10E60,10E7E
415
+ 10E80,10EA9
416
+ 10EAB,10EAD
417
+ 10EB0,10EB1
418
418
  10F00,10F27
419
419
  10F30,10F59
420
+ 10FB0,10FCB
420
421
  10FE0,10FF6
421
422
  11000,1104D
422
423
  11052,1106F
@@ -425,10 +426,9 @@ FFF9,FFFD
425
426
  110D0,110E8
426
427
  110F0,110F9
427
428
  11100,11134
428
- 11136,11146
429
+ 11136,11147
429
430
  11150,11176
430
- 11180,111CD
431
- 111D0,111DF
431
+ 11180,111DF
432
432
  111E1,111F4
433
433
  11200,11211
434
434
  11213,1123E
@@ -454,9 +454,8 @@ FFF9,FFFD
454
454
  1135D,11363
455
455
  11366,1136C
456
456
  11370,11374
457
- 11400,11459
458
- 1145B,1145B
459
- 1145D,1145F
457
+ 11400,1145B
458
+ 1145D,11461
460
459
  11480,114C7
461
460
  114D0,114D9
462
461
  11580,115B5
@@ -471,7 +470,14 @@ FFF9,FFFD
471
470
  11730,1173F
472
471
  11800,1183B
473
472
  118A0,118F2
474
- 118FF,118FF
473
+ 118FF,11906
474
+ 11909,11909
475
+ 1190C,11913
476
+ 11915,11916
477
+ 11918,11935
478
+ 11937,11938
479
+ 1193B,11946
480
+ 11950,11959
475
481
  119A0,119A7
476
482
  119AA,119D7
477
483
  119DA,119E4
@@ -499,6 +505,7 @@ FFF9,FFFD
499
505
  11D93,11D98
500
506
  11DA0,11DA9
501
507
  11EE0,11EF8
508
+ 11FB0,11FB0
502
509
  11FC0,11FF1
503
510
  11FFF,12399
504
511
  12400,1246E
@@ -522,9 +529,11 @@ FFF9,FFFD
522
529
  16F00,16F4A
523
530
  16F4F,16F87
524
531
  16F8F,16F9F
525
- 16FE0,16FE3
532
+ 16FE0,16FE4
533
+ 16FF0,16FF1
526
534
  17000,187F7
527
- 18800,18AF2
535
+ 18800,18CD5
536
+ 18D00,18D08
528
537
  1B000,1B11E
529
538
  1B150,1B152
530
539
  1B164,1B167
@@ -622,17 +631,15 @@ FFF9,FFFD
622
631
  1F0B1,1F0BF
623
632
  1F0C1,1F0CF
624
633
  1F0D1,1F0F5
625
- 1F100,1F10C
626
- 1F110,1F16C
627
- 1F170,1F1AC
634
+ 1F100,1F1AD
628
635
  1F1E6,1F202
629
636
  1F210,1F23B
630
637
  1F240,1F248
631
638
  1F250,1F251
632
639
  1F260,1F265
633
- 1F300,1F6D5
640
+ 1F300,1F6D7
634
641
  1F6E0,1F6EC
635
- 1F6F0,1F6FA
642
+ 1F6F0,1F6FC
636
643
  1F700,1F773
637
644
  1F780,1F7D8
638
645
  1F7E0,1F7EB
@@ -641,24 +648,28 @@ FFF9,FFFD
641
648
  1F850,1F859
642
649
  1F860,1F887
643
650
  1F890,1F8AD
644
- 1F900,1F90B
645
- 1F90D,1F971
646
- 1F973,1F976
647
- 1F97A,1F9A2
648
- 1F9A5,1F9AA
649
- 1F9AE,1F9CA
651
+ 1F8B0,1F8B1
652
+ 1F900,1F978
653
+ 1F97A,1F9CB
650
654
  1F9CD,1FA53
651
655
  1FA60,1FA6D
652
- 1FA70,1FA73
656
+ 1FA70,1FA74
653
657
  1FA78,1FA7A
654
- 1FA80,1FA82
655
- 1FA90,1FA95
656
- 20000,2A6D6
658
+ 1FA80,1FA86
659
+ 1FA90,1FAA8
660
+ 1FAB0,1FAB6
661
+ 1FAC0,1FAC2
662
+ 1FAD0,1FAD6
663
+ 1FB00,1FB92
664
+ 1FB94,1FBCA
665
+ 1FBF0,1FBF9
666
+ 20000,2A6DD
657
667
  2A700,2B734
658
668
  2B740,2B81D
659
669
  2B820,2CEA1
660
670
  2CEB0,2EBE0
661
671
  2F800,2FA1D
672
+ 30000,3134A
662
673
  E0001,E0001
663
674
  E0020,E007F
664
675
  E0100,E01EF
@@ -44,6 +44,7 @@ AE,AE
44
44
  2699,2699
45
45
  269B,269C
46
46
  26A0,26A1
47
+ 26A7,26A7
47
48
  26AA,26AB
48
49
  26B0,26B1
49
50
  26BD,26BE
@@ -130,22 +131,22 @@ AE,AE
130
131
  1F5FA,1F64F
131
132
  1F680,1F6C5
132
133
  1F6CB,1F6D2
133
- 1F6D5,1F6D5
134
+ 1F6D5,1F6D7
134
135
  1F6E0,1F6E5
135
136
  1F6E9,1F6E9
136
137
  1F6EB,1F6EC
137
138
  1F6F0,1F6F0
138
- 1F6F3,1F6FA
139
+ 1F6F3,1F6FC
139
140
  1F7E0,1F7EB
140
- 1F90D,1F93A
141
+ 1F90C,1F93A
141
142
  1F93C,1F945
142
- 1F947,1F971
143
- 1F973,1F976
144
- 1F97A,1F9A2
145
- 1F9A5,1F9AA
146
- 1F9AE,1F9CA
143
+ 1F947,1F978
144
+ 1F97A,1F9CB
147
145
  1F9CD,1F9FF
148
- 1FA70,1FA73
146
+ 1FA70,1FA74
149
147
  1FA78,1FA7A
150
- 1FA80,1FA82
151
- 1FA90,1FA95
148
+ 1FA80,1FA86
149
+ 1FA90,1FAA8
150
+ 1FAB0,1FAB6
151
+ 1FAC0,1FAC2
152
+ 1FAD0,1FAD6
@@ -22,6 +22,17 @@ class CharacterSet
22
22
  alias valid unicode
23
23
 
24
24
  def build_from_cps_file(path)
25
+ if defined?(Ractor) && Ractor.current != Ractor.main
26
+ raise <<-EOS.gsub(/^ */, '')
27
+ CharacterSet's predefined sets are lazy-loaded.
28
+ Pre-load them to use them in Ractors. E.g.:
29
+
30
+ CharacterSet.ascii # pre-load
31
+ Ractor.new { CharacterSet.ascii.size }.take # => 128
32
+ Ractor.new { 'abc'.keep_character_set(:ascii) }.take # => 'abc'
33
+ EOS
34
+ end
35
+
25
36
  File.readlines(path).inject(new) do |set, line|
26
37
  range_start, range_end = line.split(',')
27
38
  set.merge((range_start.to_i(16))..(range_end.to_i(16)))
@@ -6,9 +6,9 @@ class CharacterSet
6
6
  new(Array(ranges).flat_map(&:to_a))
7
7
  end
8
8
 
9
- def of(string)
10
- raise ArgumentError, 'pass a String' unless string.is_a?(String)
11
- new(string.codepoints)
9
+ def of_string(str)
10
+ raise ArgumentError, 'pass a String' unless str.respond_to?(:codepoints)
11
+ str.codepoints.each_with_object(new) { |cp, set| set << cp }
12
12
  end
13
13
  end
14
14
 
@@ -31,7 +31,7 @@ class CharacterSet
31
31
  end
32
32
 
33
33
  def ranges
34
- CharacterSet.require_optional_dependency('range_compressor')
34
+ CharacterSet.require_optional_dependency('range_compressor', __method__)
35
35
  RangeCompressor.compress(self)
36
36
  end
37
37
 
@@ -121,10 +121,9 @@ class CharacterSet
121
121
  end
122
122
 
123
123
  def make_new_str(original, &block)
124
- new_string = str!(original)
124
+ str!(original)
125
125
  .each_codepoint
126
126
  .each_with_object(''.encode(original.encoding), &block)
127
- original.tainted? ? new_string.taint : new_string
128
127
  end
129
128
  end
130
129
  end
@@ -1,7 +1,9 @@
1
1
  class CharacterSet
2
2
  module RubyFallback
3
3
  module SetMethods
4
- Enumerable.instance_methods.concat(%w[empty? length size]).each do |mthd|
4
+ (Enumerable.instance_methods -
5
+ %i[include? member? to_a] +
6
+ %i[empty? length size]).each do |mthd|
5
7
  class_eval <<-RUBY, __FILE__, __LINE__ + 1
6
8
  def #{mthd}(*args, &block)
7
9
  @__set.#{mthd}(*args, &block)
@@ -9,7 +11,7 @@ class CharacterSet
9
11
  RUBY
10
12
  end
11
13
 
12
- %w[< <= > >= disjoint? intersect? proper_subset? proper_superset?
14
+ %i[< <= > >= disjoint? intersect? proper_subset? proper_superset?
13
15
  subset? superset?].each do |mthd|
14
16
  class_eval <<-RUBY, __FILE__, __LINE__ + 1
15
17
  def #{mthd}(enum, &block)
@@ -21,8 +23,8 @@ class CharacterSet
21
23
  RUBY
22
24
  end
23
25
 
24
- %w[<< === add add? clear collect! delete delete? delete_if
25
- each filter! hash include? map! member? keep_if reject!
26
+ %i[<< add add? clear collect! delete delete? delete_if
27
+ each filter! map! keep_if reject!
26
28
  select! subtract].each do |mthd|
27
29
  class_eval <<-RUBY, __FILE__, __LINE__ + 1
28
30
  def #{mthd}(*args, &block)
@@ -32,22 +34,22 @@ class CharacterSet
32
34
  RUBY
33
35
  end
34
36
 
35
- %w[& + - ^ | difference intersection union].each do |mthd|
37
+ # revert if https://github.com/knu/sorted_set/issues/2 is resolved
38
+ %i[=== include? member?].each do |mthd|
36
39
  class_eval <<-RUBY, __FILE__, __LINE__ + 1
37
- def #{mthd}(enum, &block)
38
- if enum.respond_to?(:map)
39
- enum = enum.map { |el| el.is_a?(String) ? el.ord : el }
40
- end
41
- self.class.new(@__set.#{mthd}(enum, &block).to_a)
40
+ def #{mthd}(*args, &block)
41
+ !!@__set.#{mthd}(*args, &block)
42
42
  end
43
43
  RUBY
44
44
  end
45
45
 
46
- %w[taint untaint].each do |mthd|
46
+ %i[& + - ^ | difference intersection union].each do |mthd|
47
47
  class_eval <<-RUBY, __FILE__, __LINE__ + 1
48
- def #{mthd}
49
- @__set.#{mthd}
50
- super
48
+ def #{mthd}(enum, &block)
49
+ if enum.respond_to?(:map)
50
+ enum = enum.map { |el| el.is_a?(String) ? el.ord : el }
51
+ end
52
+ self.class.new(@__set.#{mthd}(enum, &block).to_a)
51
53
  end
52
54
  RUBY
53
55
  end
@@ -81,7 +83,13 @@ class CharacterSet
81
83
 
82
84
  def eql?(other)
83
85
  return false unless other.is_a?(self.class)
84
- @__set.eql?(other.instance_variable_get(:@__set))
86
+ # revert if https://github.com/knu/sorted_set/issues/3 is resolved
87
+ hash == other.hash
88
+ end
89
+
90
+ # revert if https://github.com/knu/sorted_set/issues/3 is resolved
91
+ def hash
92
+ @__set.to_a.hash
85
93
  end
86
94
 
87
95
  def initialize_dup(orig)
@@ -1,4 +1,8 @@
1
- require 'set'
1
+ if RUBY_VERSION.to_f >= 3.0 && !RUBY_PLATFORM[/java/i]
2
+ require 'sorted_set'
3
+ else
4
+ require 'set'
5
+ end
2
6
  require 'character_set/ruby_fallback/set_methods'
3
7
  require 'character_set/ruby_fallback/character_set_methods'
4
8
 
@@ -22,13 +22,14 @@ class CharacterSet
22
22
 
23
23
  # Allow some methods to take an Enum just as well as another CharacterSet.
24
24
  # Tested by ruby-spec.
25
- %w[& + - ^ | difference intersection subtract union].each do |method|
25
+ %w[& + - ^ | difference disjoint? intersect? intersection
26
+ subtract union].each do |method|
26
27
  class_eval <<-RUBY, __FILE__, __LINE__ + 1
27
28
  def #{method}(arg)
28
29
  if arg.is_a?(CharacterSet)
29
- super
30
+ super(arg)
30
31
  elsif arg.respond_to?(:each)
31
- super(CharacterSet.new(arg.to_a))
32
+ super(self.class.new(arg.to_a))
32
33
  else
33
34
  raise ArgumentError, 'pass an enumerable'
34
35
  end
@@ -15,6 +15,12 @@ class CharacterSet
15
15
  new(Array(args))
16
16
  end
17
17
 
18
+ def of(*args)
19
+ args.map do |arg|
20
+ arg.is_a?(Regexp) ? of_regexp(arg) : of_string(arg)
21
+ end.reduce(:merge) || new
22
+ end
23
+
18
24
  def parse(string)
19
25
  codepoints = Parser.codepoints_from_bracket_expression(string)
20
26
  result = new(codepoints)
@@ -22,33 +28,29 @@ class CharacterSet
22
28
  end
23
29
 
24
30
  def of_property(property_name)
25
- require_optional_dependency('regexp_property_values')
31
+ require_optional_dependency('regexp_property_values', __method__)
26
32
 
27
33
  property = RegexpPropertyValues[property_name.to_s]
28
34
  from_ranges(*property.matched_ranges)
29
35
  end
30
36
 
31
37
  def of_regexp(regexp)
32
- require_optional_dependency('regexp_parser')
38
+ require_optional_dependency('regexp_parser', __method__)
33
39
 
34
40
  root = ::Regexp::Parser.parse(regexp)
35
41
  of_expression(root)
36
42
  end
37
43
 
38
44
  def of_expression(expression)
39
- ExpressionConverter.convert(expression)
45
+ ExpressionConverter.convert(expression, self)
40
46
  end
41
47
 
42
- def require_optional_dependency(name)
48
+ def require_optional_dependency(name, method)
43
49
  required_optional_dependencies[name] ||= begin
44
50
  require name
45
51
  true
46
52
  rescue ::LoadError
47
- entry_point = caller_locations.reverse.find do |loc|
48
- loc.absolute_path.to_s.include?('/lib/character_set')
49
- end
50
- method = entry_point && entry_point.label
51
- raise LoadError, 'You must the install the optional dependency '\
53
+ raise LoadError, 'You must install the optional dependency '\
52
54
  "'\#{name}' to use the method `\#{method}'."
53
55
  end
54
56
  end
@@ -86,10 +88,22 @@ class CharacterSet
86
88
  Writer.write(ranges, opts, &block)
87
89
  end
88
90
 
91
+ def to_s_with_surrogate_ranges
92
+ Writer.write_surrogate_ranges(bmp_part.ranges, astral_part.ranges)
93
+ end
94
+
89
95
  def to_s_with_surrogate_alternation
90
96
  Writer.write_surrogate_alternation(bmp_part.ranges, astral_part.ranges)
91
97
  end
92
98
 
99
+ def secure_token(length = 32)
100
+ CharacterSet.require_optional_dependency('securerandom', __method__)
101
+ cps = to_a
102
+ len = cps.count
103
+ 1.upto(length).map { cps[SecureRandom.random_number(len)] }.pack('U*')
104
+ end
105
+ alias random_token secure_token
106
+
93
107
  def inspect
94
108
  len = length
95
109
  "#<#{klass.name}: {\#{first(5) * ', '}\#{'...' if len > 5}} (size: \#{len})>"
@@ -151,7 +165,7 @@ class CharacterSet
151
165
  end
152
166
 
153
167
  def divide(&func)
154
- require 'set'
168
+ CharacterSet.require_optional_dependency('set', __method__)
155
169
  Set.new(to_a).divide(&func)
156
170
  end
157
171
  RUBY
@@ -1,3 +1,3 @@
1
1
  class CharacterSet
2
- VERSION = '1.3.0'
2
+ VERSION = '1.6.0'
3
3
  end
@@ -1,37 +1,108 @@
1
1
  class CharacterSet
2
2
  module Writer
3
- module_function
4
-
5
- def write(codepoint_ranges, opts = {}, &block)
6
- content = codepoint_ranges.map do |range|
7
- if range.size > 2 && opts[:abbreviate] != false
8
- range.minmax.map { |cp| Character.new(cp).escape(opts, &block) }.join('-')
9
- else
10
- range.map { |cp| Character.new(cp).escape(opts, &block) }.join
3
+ class << self
4
+ def write(codepoint_ranges, opts = {}, &block)
5
+ content = codepoint_ranges.map do |range|
6
+ if range.size > 2 && opts[:abbreviate] != false
7
+ bounds = [range.min, range.max]
8
+ bounds.map { |cp| write_codepoint(cp, opts, &block) }.join('-')
9
+ else
10
+ range.map { |cp| write_codepoint(cp, opts, &block) }.join
11
+ end
12
+ end.join
13
+ opts[:in_brackets] ? "[#{content}]" : content
14
+ end
15
+
16
+ def write_codepoint(codepoint, opts = {}, &block)
17
+ Character.new(codepoint).escape(opts, &block)
18
+ end
19
+
20
+ def write_surrogate_ranges(bmp_ranges, astral_ranges)
21
+ astral_branches = surrogate_range_expressions(astral_ranges)
22
+ bmp_set_with_alternatives(bmp_ranges, astral_branches)
23
+ end
24
+
25
+ def write_surrogate_alternation(bmp_ranges, astral_ranges)
26
+ astral_branches = surrogate_pairs(astral_ranges)
27
+ bmp_set_with_alternatives(bmp_ranges, astral_branches)
28
+ end
29
+
30
+ private
31
+
32
+ def surrogate_range_expressions(astral_ranges)
33
+ compressed_surrogate_range_pairs(astral_ranges).map do |hi_ranges, lo_ranges|
34
+ [hi_ranges, lo_ranges].map do |ranges|
35
+ use_brackets = ranges.size > 1 || ranges.first.size > 1
36
+ write(ranges, format: :js, in_brackets: use_brackets)
37
+ end.join
11
38
  end
12
- end.join
13
- opts[:in_brackets] ? "[#{content}]" : content
14
- end
39
+ end
40
+
41
+ def compressed_surrogate_range_pairs(astral_ranges)
42
+ halves = astral_ranges.flat_map { |range| surrogate_half_ranges(range) }
15
43
 
16
- def write_surrogate_alternation(bmp_ranges, astral_ranges)
17
- bmp_set = write(bmp_ranges, format: :js, in_brackets: true)
18
- if astral_ranges.empty?
19
- bmp_set
20
- else
21
- surrogate_pairs = surrogate_pairs(astral_ranges)
22
- "(?:#{((bmp_ranges.any? ? [bmp_set] : []) + surrogate_pairs) * '|'})"
44
+ # compress high surrogate codepoint ranges with common low range half
45
+ with_common_lo = halves.group_by(&:last).map do |lo_range, pairs|
46
+ hi_ranges = pairs.map(&:first)
47
+ compressed_hi_ranges = hi_ranges.each_with_object([]) do |range, arr|
48
+ prev = arr.last
49
+ if prev.nil? || prev.max + 1 < range.min # first or gap
50
+ arr << range
51
+ else # continuous codepoints, expand previous range
52
+ arr[-1] = (prev.min)..(range.max)
53
+ end
54
+ end
55
+ [compressed_hi_ranges, lo_range]
56
+ end
57
+
58
+ # compress low surrogate codepoint ranges with common high ranges
59
+ with_common_lo.each_with_object({}) do |(hi_ranges, lo_range), hash|
60
+ (hash[hi_ranges] ||= []) << lo_range
61
+ end
23
62
  end
24
- end
25
63
 
26
- def surrogate_pairs(astral_ranges)
27
- astral_ranges.flat_map { |range| range.map { |cp| surrogate_pair(cp) } }
28
- end
64
+ def surrogate_half_ranges(astral_range)
65
+ hi_min, lo_min = surrogate_pair_codepoints(astral_range.min)
66
+ hi_max, lo_max = surrogate_pair_codepoints(astral_range.max)
67
+ hi_count = 1 + hi_max - hi_min
68
+ return [[hi_min..hi_min, lo_min..lo_max]] if hi_count == 1
69
+
70
+ ranges = []
71
+
72
+ # first high surrogate might be partially covered (if lo_min > 0xDC00)
73
+ ranges << [hi_min..hi_min, lo_min..0xDFFF]
74
+
75
+ # any high surrogates in between are fully covered
76
+ ranges << [(hi_min + 1)..(hi_max - 1), 0xDC00..0xDFFF] if hi_count > 2
29
77
 
30
- def surrogate_pair(astral_codepoint)
31
- base = astral_codepoint - 0x10000
32
- high = ((base / 1024).floor + 0xD800).to_s(16)
33
- low = (base % 1024 + 0xDC00).to_s(16)
34
- "\\u#{high}\\u#{low}"
78
+ # last high surrogate might be partially covered (if lo_max < 0xDFFF)
79
+ ranges << [hi_max..hi_max, 0xDC00..lo_max]
80
+
81
+ ranges
82
+ end
83
+
84
+ def surrogate_pair_codepoints(astral_codepoint)
85
+ base = astral_codepoint - 0x10000
86
+ high = base / 1024 + 0xD800
87
+ low = base % 1024 + 0xDC00
88
+ [high, low]
89
+ end
90
+
91
+ def bmp_set_with_alternatives(bmp_ranges, alternatives)
92
+ bmp_set = write(bmp_ranges, format: :js, in_brackets: true)
93
+ return bmp_set if alternatives.empty? && bmp_ranges.any?
94
+
95
+ "(?:#{((bmp_ranges.any? ? [bmp_set] : []) + alternatives).join('|')})"
96
+ end
97
+
98
+ def surrogate_pairs(astral_ranges)
99
+ astral_ranges.flat_map { |range| range.map { |cp| surrogate_pair(cp) } }
100
+ end
101
+
102
+ def surrogate_pair(astral_codepoint)
103
+ surrogate_pair_codepoints(astral_codepoint)
104
+ .map { |half| write_codepoint(half, format: :js) }.join
105
+ end
35
106
  end
36
107
  end
37
108
  end