character_set 1.2.0-java → 1.5.0-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. checksums.yaml +4 -4
  2. data/.gitattributes +3 -0
  3. data/.github/workflows/gouteur.yml +20 -0
  4. data/.github/workflows/lint.yml +29 -0
  5. data/.github/workflows/tests.yml +22 -0
  6. data/.gitignore +1 -0
  7. data/.gouteur.yml +2 -0
  8. data/.rubocop.yml +17 -0
  9. data/BENCHMARK.md +53 -17
  10. data/CHANGELOG.md +54 -0
  11. data/README.md +51 -12
  12. data/Rakefile +20 -18
  13. data/benchmarks/count_in.rb +13 -0
  14. data/benchmarks/delete_in.rb +1 -1
  15. data/benchmarks/scan.rb +13 -0
  16. data/benchmarks/shared.rb +5 -0
  17. data/benchmarks/z_add.rb +12 -0
  18. data/benchmarks/z_delete.rb +12 -0
  19. data/benchmarks/z_merge.rb +15 -0
  20. data/benchmarks/z_minmax.rb +12 -0
  21. data/bin/console +2 -0
  22. data/character_set.gemspec +17 -4
  23. data/ext/character_set/character_set.c +969 -415
  24. data/ext/character_set/unicode_casefold_table.h +44 -1
  25. data/ext/character_set/unicode_casefold_table.h.tmpl +11 -0
  26. data/lib/character_set/character.rb +1 -1
  27. data/lib/character_set/core_ext/regexp_ext.rb +1 -1
  28. data/lib/character_set/core_ext/string_ext.rb +3 -1
  29. data/lib/character_set/expression_converter.rb +41 -43
  30. data/lib/character_set/parser.rb +1 -1
  31. data/lib/character_set/predefined_sets/any.cps +1 -0
  32. data/lib/character_set/predefined_sets/ascii.cps +1 -0
  33. data/lib/character_set/predefined_sets/ascii_alnum.cps +3 -0
  34. data/lib/character_set/predefined_sets/ascii_letter.cps +2 -0
  35. data/lib/character_set/predefined_sets/assigned.cps +677 -0
  36. data/lib/character_set/predefined_sets/bmp.cps +2 -0
  37. data/lib/character_set/predefined_sets/crypt.cps +2 -0
  38. data/lib/character_set/predefined_sets/emoji.cps +152 -0
  39. data/lib/character_set/predefined_sets/newline.cps +3 -0
  40. data/lib/character_set/predefined_sets/surrogate.cps +1 -0
  41. data/lib/character_set/predefined_sets/unicode.cps +2 -0
  42. data/lib/character_set/predefined_sets/url_fragment.cps +8 -0
  43. data/lib/character_set/predefined_sets/url_host.cps +10 -0
  44. data/lib/character_set/predefined_sets/url_path.cps +7 -0
  45. data/lib/character_set/predefined_sets/url_query.cps +8 -0
  46. data/lib/character_set/predefined_sets/whitespace.cps +10 -0
  47. data/lib/character_set/predefined_sets.rb +25 -260
  48. data/lib/character_set/ruby_fallback/character_set_methods.rb +60 -9
  49. data/lib/character_set/ruby_fallback/set_methods.rb +25 -17
  50. data/lib/character_set/ruby_fallback.rb +5 -3
  51. data/lib/character_set/set_method_adapters.rb +4 -3
  52. data/lib/character_set/shared_methods.rb +69 -50
  53. data/lib/character_set/version.rb +1 -1
  54. data/lib/character_set/writer.rb +98 -27
  55. metadata +114 -17
  56. data/.travis.yml +0 -8
  57. data/lib/character_set/ruby_fallback/plane_methods.rb +0 -27
@@ -0,0 +1,152 @@
1
+ 23,23
2
+ 2A,2A
3
+ 30,39
4
+ A9,A9
5
+ AE,AE
6
+ 203C,203C
7
+ 2049,2049
8
+ 2122,2122
9
+ 2139,2139
10
+ 2194,2199
11
+ 21A9,21AA
12
+ 231A,231B
13
+ 2328,2328
14
+ 23CF,23CF
15
+ 23E9,23F3
16
+ 23F8,23FA
17
+ 24C2,24C2
18
+ 25AA,25AB
19
+ 25B6,25B6
20
+ 25C0,25C0
21
+ 25FB,25FE
22
+ 2600,2604
23
+ 260E,260E
24
+ 2611,2611
25
+ 2614,2615
26
+ 2618,2618
27
+ 261D,261D
28
+ 2620,2620
29
+ 2622,2623
30
+ 2626,2626
31
+ 262A,262A
32
+ 262E,262F
33
+ 2638,263A
34
+ 2640,2640
35
+ 2642,2642
36
+ 2648,2653
37
+ 265F,2660
38
+ 2663,2663
39
+ 2665,2666
40
+ 2668,2668
41
+ 267B,267B
42
+ 267E,267F
43
+ 2692,2697
44
+ 2699,2699
45
+ 269B,269C
46
+ 26A0,26A1
47
+ 26A7,26A7
48
+ 26AA,26AB
49
+ 26B0,26B1
50
+ 26BD,26BE
51
+ 26C4,26C5
52
+ 26C8,26C8
53
+ 26CE,26CF
54
+ 26D1,26D1
55
+ 26D3,26D4
56
+ 26E9,26EA
57
+ 26F0,26F5
58
+ 26F7,26FA
59
+ 26FD,26FD
60
+ 2702,2702
61
+ 2705,2705
62
+ 2708,270D
63
+ 270F,270F
64
+ 2712,2712
65
+ 2714,2714
66
+ 2716,2716
67
+ 271D,271D
68
+ 2721,2721
69
+ 2728,2728
70
+ 2733,2734
71
+ 2744,2744
72
+ 2747,2747
73
+ 274C,274C
74
+ 274E,274E
75
+ 2753,2755
76
+ 2757,2757
77
+ 2763,2764
78
+ 2795,2797
79
+ 27A1,27A1
80
+ 27B0,27B0
81
+ 27BF,27BF
82
+ 2934,2935
83
+ 2B05,2B07
84
+ 2B1B,2B1C
85
+ 2B50,2B50
86
+ 2B55,2B55
87
+ 3030,3030
88
+ 303D,303D
89
+ 3297,3297
90
+ 3299,3299
91
+ 1F004,1F004
92
+ 1F0CF,1F0CF
93
+ 1F170,1F171
94
+ 1F17E,1F17F
95
+ 1F18E,1F18E
96
+ 1F191,1F19A
97
+ 1F1E6,1F1FF
98
+ 1F201,1F202
99
+ 1F21A,1F21A
100
+ 1F22F,1F22F
101
+ 1F232,1F23A
102
+ 1F250,1F251
103
+ 1F300,1F321
104
+ 1F324,1F393
105
+ 1F396,1F397
106
+ 1F399,1F39B
107
+ 1F39E,1F3F0
108
+ 1F3F3,1F3F5
109
+ 1F3F7,1F4FD
110
+ 1F4FF,1F53D
111
+ 1F549,1F54E
112
+ 1F550,1F567
113
+ 1F56F,1F570
114
+ 1F573,1F57A
115
+ 1F587,1F587
116
+ 1F58A,1F58D
117
+ 1F590,1F590
118
+ 1F595,1F596
119
+ 1F5A4,1F5A5
120
+ 1F5A8,1F5A8
121
+ 1F5B1,1F5B2
122
+ 1F5BC,1F5BC
123
+ 1F5C2,1F5C4
124
+ 1F5D1,1F5D3
125
+ 1F5DC,1F5DE
126
+ 1F5E1,1F5E1
127
+ 1F5E3,1F5E3
128
+ 1F5E8,1F5E8
129
+ 1F5EF,1F5EF
130
+ 1F5F3,1F5F3
131
+ 1F5FA,1F64F
132
+ 1F680,1F6C5
133
+ 1F6CB,1F6D2
134
+ 1F6D5,1F6D7
135
+ 1F6E0,1F6E5
136
+ 1F6E9,1F6E9
137
+ 1F6EB,1F6EC
138
+ 1F6F0,1F6F0
139
+ 1F6F3,1F6FC
140
+ 1F7E0,1F7EB
141
+ 1F90C,1F93A
142
+ 1F93C,1F945
143
+ 1F947,1F978
144
+ 1F97A,1F9CB
145
+ 1F9CD,1F9FF
146
+ 1FA70,1FA74
147
+ 1FA78,1FA7A
148
+ 1FA80,1FA86
149
+ 1FA90,1FAA8
150
+ 1FAB0,1FAB6
151
+ 1FAC0,1FAC2
152
+ 1FAD0,1FAD6
@@ -0,0 +1,3 @@
1
+ A,D
2
+ 85,85
3
+ 2028,2029
@@ -0,0 +1 @@
1
+ D800,DFFF
@@ -0,0 +1,2 @@
1
+ 0,D7FF
2
+ E000,10FFFF
@@ -0,0 +1,8 @@
1
+ 21,21
2
+ 24,24
3
+ 26,3B
4
+ 3D,3D
5
+ 3F,5A
6
+ 5F,5F
7
+ 61,7A
8
+ 7E,7E
@@ -0,0 +1,10 @@
1
+ 21,21
2
+ 24,24
3
+ 26,2E
4
+ 30,3B
5
+ 3D,3D
6
+ 41,5B
7
+ 5D,5D
8
+ 5F,5F
9
+ 61,7A
10
+ 7E,7E
@@ -0,0 +1,7 @@
1
+ 21,21
2
+ 24,3A
3
+ 3D,3D
4
+ 40,5A
5
+ 5F,5F
6
+ 61,7A
7
+ 7E,7E
@@ -0,0 +1,8 @@
1
+ 21,21
2
+ 24,24
3
+ 26,3B
4
+ 3D,3D
5
+ 3F,5A
6
+ 5F,5F
7
+ 61,7A
8
+ 7E,7E
@@ -0,0 +1,10 @@
1
+ 9,D
2
+ 20,20
3
+ 85,85
4
+ A0,A0
5
+ 1680,1680
6
+ 2000,200A
7
+ 2028,2029
8
+ 202F,202F
9
+ 205F,205F
10
+ 3000,3000
@@ -1,266 +1,31 @@
1
1
  class CharacterSet
2
2
  module PredefinedSets
3
- def ascii
4
- @ascii ||= from_ranges(0..0x7F).freeze
5
- end
6
-
7
- def ascii_alnum
8
- @ascii_alnum ||= from_ranges(0x30..0x39, 0x41..0x5A, 0x61..0x7A).freeze
9
- end
10
-
11
- def ascii_letters
12
- @ascii_letters ||= from_ranges(0x41..0x5A, 0x61..0x7A).freeze
13
- end
14
-
15
- # basic multilingual plane
16
- def bmp
17
- @bmp ||= from_ranges(0..0xD7FF, 0xE000..0xFFFF).freeze
18
- end
19
-
20
- # ./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz
21
- def crypt
22
- @crypt ||= from_ranges(0x2E..0x5A, 0x61..0x7A).freeze
23
- end
24
-
25
- def newline
26
- @newline ||= from_ranges(0xA..0xD, 0x85..0x85, 0x2028..0x2029).freeze
27
- end
28
-
29
- def unicode
30
- @unicode ||= from_ranges(0..0xD7FF, 0xE000..0x10FFFF).freeze
31
- end
32
-
33
- def url_fragment
34
- @url_fragment ||= from_ranges(
35
- 0x21..0x21,
36
- 0x24..0x24,
37
- 0x26..0x3B,
38
- 0x3D..0x3D,
39
- 0x3F..0x5A,
40
- 0x5F..0x5F,
41
- 0x61..0x7A,
42
- 0x7E..0x7E
43
- ).freeze
44
- end
45
-
46
- def url_host
47
- @url_host ||= from_ranges(
48
- 0x21..0x21,
49
- 0x24..0x24,
50
- 0x26..0x2E,
51
- 0x30..0x3B,
52
- 0x3D..0x3D,
53
- 0x41..0x5B,
54
- 0x5D..0x5D,
55
- 0x5F..0x5F,
56
- 0x61..0x7A,
57
- 0x7E..0x7E
58
- ).freeze
59
- end
60
-
61
- def url_path
62
- @url_path ||= from_ranges(
63
- 0x21..0x21,
64
- 0x24..0x3A,
65
- 0x3D..0x3D,
66
- 0x40..0x5A,
67
- 0x5F..0x5F,
68
- 0x61..0x7A,
69
- 0x7E..0x7E
70
- ).freeze
71
- end
72
-
73
- def url_query
74
- @url_query ||= from_ranges(
75
- 0x21..0x21,
76
- 0x24..0x24,
77
- 0x26..0x3B,
78
- 0x3D..0x3D,
79
- 0x3F..0x5A,
80
- 0x5F..0x5F,
81
- 0x61..0x7A,
82
- 0x7E..0x7E
83
- ).freeze
84
- end
85
-
86
- def whitespace
87
- @whitespace ||= from_ranges(
88
- 0x9..0x9,
89
- 0xA..0xD,
90
- 0x20..0x20,
91
- 0x85..0x85,
92
- 0xA0..0xA0,
93
- 0x1680..0x1680,
94
- 0x180E..0x180E,
95
- 0x2000..0x200A,
96
- 0x2028..0x2029,
97
- 0x202F..0x202F,
98
- 0x205F..0x205F,
99
- 0x3000..0x3000
100
- ).freeze
101
- end
102
-
103
- def emoji
104
- @emoji ||= from_ranges(
105
- 0x23..0x23,
106
- 0x2A..0x2A,
107
- 0x30..0x39,
108
- 0xA9..0xA9,
109
- 0xAE..0xAE,
110
- 0x203C..0x203C,
111
- 0x2049..0x2049,
112
- 0x2122..0x2122,
113
- 0x2139..0x2139,
114
- 0x2194..0x2199,
115
- 0x21A9..0x21AA,
116
- 0x231A..0x231B,
117
- 0x2328..0x2328,
118
- 0x23CF..0x23CF,
119
- 0x23E9..0x23F3,
120
- 0x23F8..0x23FA,
121
- 0x24C2..0x24C2,
122
- 0x25AA..0x25AB,
123
- 0x25B6..0x25B6,
124
- 0x25C0..0x25C0,
125
- 0x25FB..0x25FE,
126
- 0x2600..0x2604,
127
- 0x260E..0x260E,
128
- 0x2611..0x2611,
129
- 0x2614..0x2615,
130
- 0x2618..0x2618,
131
- 0x261D..0x261D,
132
- 0x2620..0x2620,
133
- 0x2622..0x2623,
134
- 0x2626..0x2626,
135
- 0x262A..0x262A,
136
- 0x262E..0x262F,
137
- 0x2638..0x263A,
138
- 0x2640..0x2640,
139
- 0x2642..0x2642,
140
- 0x2648..0x2653,
141
- 0x2660..0x2660,
142
- 0x2663..0x2663,
143
- 0x2665..0x2666,
144
- 0x2668..0x2668,
145
- 0x267B..0x267B,
146
- 0x267F..0x267F,
147
- 0x2692..0x2697,
148
- 0x2699..0x2699,
149
- 0x269B..0x269C,
150
- 0x26A0..0x26A1,
151
- 0x26AA..0x26AB,
152
- 0x26B0..0x26B1,
153
- 0x26BD..0x26BE,
154
- 0x26C4..0x26C5,
155
- 0x26C8..0x26C8,
156
- 0x26CE..0x26CF,
157
- 0x26D1..0x26D1,
158
- 0x26D3..0x26D4,
159
- 0x26E9..0x26EA,
160
- 0x26F0..0x26F5,
161
- 0x26F7..0x26FA,
162
- 0x26FD..0x26FD,
163
- 0x2702..0x2702,
164
- 0x2705..0x2705,
165
- 0x2708..0x270D,
166
- 0x270F..0x270F,
167
- 0x2712..0x2712,
168
- 0x2714..0x2714,
169
- 0x2716..0x2716,
170
- 0x271D..0x271D,
171
- 0x2721..0x2721,
172
- 0x2728..0x2728,
173
- 0x2733..0x2734,
174
- 0x2744..0x2744,
175
- 0x2747..0x2747,
176
- 0x274C..0x274C,
177
- 0x274E..0x274E,
178
- 0x2753..0x2755,
179
- 0x2757..0x2757,
180
- 0x2763..0x2764,
181
- 0x2795..0x2797,
182
- 0x27A1..0x27A1,
183
- 0x27B0..0x27B0,
184
- 0x27BF..0x27BF,
185
- 0x2934..0x2935,
186
- 0x2B05..0x2B07,
187
- 0x2B1B..0x2B1C,
188
- 0x2B50..0x2B50,
189
- 0x2B55..0x2B55,
190
- 0x3030..0x3030,
191
- 0x303D..0x303D,
192
- 0x3297..0x3297,
193
- 0x3299..0x3299,
194
- 0x1F004..0x1F004,
195
- 0x1F0CF..0x1F0CF,
196
- 0x1F170..0x1F171,
197
- 0x1F17E..0x1F17F,
198
- 0x1F18E..0x1F18E,
199
- 0x1F191..0x1F19A,
200
- 0x1F1E6..0x1F1FF,
201
- 0x1F201..0x1F202,
202
- 0x1F21A..0x1F21A,
203
- 0x1F22F..0x1F22F,
204
- 0x1F232..0x1F23A,
205
- 0x1F250..0x1F251,
206
- 0x1F300..0x1F321,
207
- 0x1F324..0x1F393,
208
- 0x1F396..0x1F397,
209
- 0x1F399..0x1F39B,
210
- 0x1F39E..0x1F3F0,
211
- 0x1F3F3..0x1F3F5,
212
- 0x1F3F7..0x1F4FD,
213
- 0x1F4FF..0x1F53D,
214
- 0x1F549..0x1F54E,
215
- 0x1F550..0x1F567,
216
- 0x1F56F..0x1F570,
217
- 0x1F573..0x1F57A,
218
- 0x1F587..0x1F587,
219
- 0x1F58A..0x1F58D,
220
- 0x1F590..0x1F590,
221
- 0x1F595..0x1F596,
222
- 0x1F5A4..0x1F5A5,
223
- 0x1F5A8..0x1F5A8,
224
- 0x1F5B1..0x1F5B2,
225
- 0x1F5BC..0x1F5BC,
226
- 0x1F5C2..0x1F5C4,
227
- 0x1F5D1..0x1F5D3,
228
- 0x1F5DC..0x1F5DE,
229
- 0x1F5E1..0x1F5E1,
230
- 0x1F5E3..0x1F5E3,
231
- 0x1F5E8..0x1F5E8,
232
- 0x1F5EF..0x1F5EF,
233
- 0x1F5F3..0x1F5F3,
234
- 0x1F5FA..0x1F64F,
235
- 0x1F680..0x1F6C5,
236
- 0x1F6CB..0x1F6D2,
237
- 0x1F6E0..0x1F6E5,
238
- 0x1F6E9..0x1F6E9,
239
- 0x1F6EB..0x1F6EC,
240
- 0x1F6F0..0x1F6F0,
241
- 0x1F6F3..0x1F6F8,
242
- 0x1F910..0x1F93A,
243
- 0x1F93C..0x1F93E,
244
- 0x1F940..0x1F945,
245
- 0x1F947..0x1F94C,
246
- 0x1F950..0x1F96B,
247
- 0x1F980..0x1F997,
248
- 0x1F9C0..0x1F9C0,
249
- 0x1F9D0..0x1F9E6
250
- ).freeze
251
- end
252
-
253
- def respond_to_missing?(method_name, include_private = false)
254
- (base = method_name[/^non_(.*)/, 1]) && respond_to?(base) || super
255
- end
256
-
257
- def method_missing(method_name, *args, &block)
258
- if (base = method_name[/^non_(.*)/, 1])
259
- ivar_name = "@#{method_name}"
260
- return instance_variable_get(ivar_name) ||
261
- instance_variable_set(ivar_name, send(base).inversion.freeze)
3
+ Dir[File.join(__dir__, 'predefined_sets', '*.cps')].each do |path|
4
+ set_name = File.basename(path, '.cps')
5
+
6
+ class_eval <<-RUBY, __FILE__, __LINE__ + 1
7
+ def #{set_name}
8
+ @#{set_name} ||= build_from_cps_file('#{path}').freeze
9
+ end
10
+
11
+ def non_#{set_name}
12
+ @non_#{set_name} ||= build_from_cps_file('#{path}').inversion.freeze
13
+ end
14
+ RUBY
15
+ end
16
+
17
+ alias all any
18
+ alias ascii_letters ascii_letter
19
+ alias basic_multilingual_plane bmp
20
+ alias blank whitespace
21
+ alias invalid surrogate
22
+ alias valid unicode
23
+
24
+ def build_from_cps_file(path)
25
+ File.readlines(path).inject(new) do |set, line|
26
+ range_start, range_end = line.split(',')
27
+ set.merge((range_start.to_i(16))..(range_end.to_i(16)))
262
28
  end
263
- super
264
29
  end
265
30
  end
266
31
  end
@@ -6,9 +6,13 @@ class CharacterSet
6
6
  new(Array(ranges).flat_map(&:to_a))
7
7
  end
8
8
 
9
- def of(string)
10
- raise ArgumentError, 'pass a String' unless string.is_a?(String)
11
- new(string.codepoints)
9
+ def of(*strings)
10
+ new_set = new
11
+ strings.each do |str|
12
+ raise ArgumentError, 'pass a String' unless str.respond_to?(:codepoints)
13
+ str.codepoints.each { |cp| new_set << cp }
14
+ end
15
+ new_set
12
16
  end
13
17
  end
14
18
 
@@ -31,7 +35,7 @@ class CharacterSet
31
35
  end
32
36
 
33
37
  def ranges
34
- CharacterSet.require_optional_dependency('range_compressor')
38
+ CharacterSet.require_optional_dependency('range_compressor', __method__)
35
39
  RangeCompressor.compress(self)
36
40
  end
37
41
 
@@ -39,9 +43,8 @@ class CharacterSet
39
43
  count.nil? ? to_a(true).sample : to_a(true).sample(count)
40
44
  end
41
45
 
42
- def used_by?(string)
43
- str!(string).each_codepoint { |cp| return true if include?(cp) }
44
- false
46
+ def count_in(string)
47
+ str!(string).each_codepoint.count { |cp| include?(cp) }
45
48
  end
46
49
 
47
50
  def cover?(string)
@@ -67,16 +70,64 @@ class CharacterSet
67
70
  result.size == string.size ? nil : string.replace(result)
68
71
  end
69
72
 
73
+ def scan(string)
74
+ encoding = str!(string).encoding
75
+ string.each_codepoint.inject([]) do |arr, cp|
76
+ include?(cp) ? arr.push(cp.chr(encoding)) : arr
77
+ end
78
+ end
79
+
80
+ def used_by?(string)
81
+ str!(string).each_codepoint { |cp| return true if include?(cp) }
82
+ false
83
+ end
84
+
85
+ def section(from:, upto: 0x10FFFF)
86
+ dup.keep_if { |cp| cp >= from && cp <= upto }
87
+ end
88
+
89
+ def count_in_section(from:, upto: 0x10FFFF)
90
+ count { |cp| cp >= from && cp <= upto }
91
+ end
92
+
93
+ def section?(from:, upto: 0x10FFFF)
94
+ any? { |cp| cp >= from && cp <= upto }
95
+ end
96
+
97
+ def section_ratio(from:, upto: 0x10FFFF)
98
+ section(from: from, upto: upto).count / count.to_f
99
+ end
100
+
101
+ def planes
102
+ plane_size = 0x10000.to_f
103
+ inject({}) { |hash, cp| hash.merge((cp / plane_size).floor => 1) }.keys
104
+ end
105
+
106
+ def plane(num)
107
+ validate_plane_number(num)
108
+ section(from: (num * 0x10000), upto: ((num + 1) * 0x10000) - 1)
109
+ end
110
+
111
+ def member_in_plane?(num)
112
+ validate_plane_number(num)
113
+ ((num * 0x10000)...((num + 1) * 0x10000)).any? { |cp| include?(cp) }
114
+ end
115
+
70
116
  private
71
117
 
118
+ def validate_plane_number(num)
119
+ num >= 0 && num <= 16 or raise ArgumentError, 'plane must be between 0 and 16'
120
+ end
121
+
72
122
  def str!(obj)
73
123
  raise ArgumentError, 'pass a String' unless obj.respond_to?(:codepoints)
74
124
  obj
75
125
  end
76
126
 
77
127
  def make_new_str(original, &block)
78
- new_string = str!(original).each_codepoint.each_with_object('', &block)
79
- original.tainted? ? new_string.taint : new_string
128
+ str!(original)
129
+ .each_codepoint
130
+ .each_with_object(''.encode(original.encoding), &block)
80
131
  end
81
132
  end
82
133
  end
@@ -1,7 +1,9 @@
1
1
  class CharacterSet
2
2
  module RubyFallback
3
3
  module SetMethods
4
- Enumerable.instance_methods.concat(%w[empty? length size]).each do |mthd|
4
+ (Enumerable.instance_methods -
5
+ %i[include? member? to_a] +
6
+ %i[empty? length size]).each do |mthd|
5
7
  class_eval <<-RUBY, __FILE__, __LINE__ + 1
6
8
  def #{mthd}(*args, &block)
7
9
  @__set.#{mthd}(*args, &block)
@@ -9,7 +11,7 @@ class CharacterSet
9
11
  RUBY
10
12
  end
11
13
 
12
- %w[< <= > >= disjoint? intersect? proper_subset? proper_superset?
14
+ %i[< <= > >= disjoint? intersect? proper_subset? proper_superset?
13
15
  subset? superset?].each do |mthd|
14
16
  class_eval <<-RUBY, __FILE__, __LINE__ + 1
15
17
  def #{mthd}(enum, &block)
@@ -21,8 +23,8 @@ class CharacterSet
21
23
  RUBY
22
24
  end
23
25
 
24
- %w[<< === add add? clear collect! delete delete? delete_if
25
- each filter! hash include? map! member? keep_if reject!
26
+ %i[<< add add? clear collect! delete delete? delete_if
27
+ each filter! map! keep_if reject!
26
28
  select! subtract].each do |mthd|
27
29
  class_eval <<-RUBY, __FILE__, __LINE__ + 1
28
30
  def #{mthd}(*args, &block)
@@ -32,22 +34,22 @@ class CharacterSet
32
34
  RUBY
33
35
  end
34
36
 
35
- %w[& + - ^ | difference intersection union].each do |mthd|
37
+ # revert if https://github.com/knu/sorted_set/issues/2 is resolved
38
+ %i[=== include? member?].each do |mthd|
36
39
  class_eval <<-RUBY, __FILE__, __LINE__ + 1
37
- def #{mthd}(enum, &block)
38
- if enum.respond_to?(:map)
39
- enum = enum.map { |el| el.is_a?(String) ? el.ord : el }
40
- end
41
- self.class.new(@__set.#{mthd}(enum, &block).to_a)
40
+ def #{mthd}(*args, &block)
41
+ !!@__set.#{mthd}(*args, &block)
42
42
  end
43
43
  RUBY
44
44
  end
45
45
 
46
- %w[taint untaint].each do |mthd|
46
+ %i[& + - ^ | difference intersection union].each do |mthd|
47
47
  class_eval <<-RUBY, __FILE__, __LINE__ + 1
48
- def #{mthd}
49
- @__set.#{mthd}
50
- super
48
+ def #{mthd}(enum, &block)
49
+ if enum.respond_to?(:map)
50
+ enum = enum.map { |el| el.is_a?(String) ? el.ord : el }
51
+ end
52
+ self.class.new(@__set.#{mthd}(enum, &block).to_a)
51
53
  end
52
54
  RUBY
53
55
  end
@@ -72,8 +74,8 @@ class CharacterSet
72
74
  true
73
75
  elsif other.instance_of?(self.class)
74
76
  @__set == other.instance_variable_get(:@__set)
75
- elsif other.is_a?(self.class) && size == other.size
76
- other.all? { |cp| @__set.include?(cp) }
77
+ elsif other.is_a?(CharacterSet) || other.is_a?(CharacterSet::Pure)
78
+ size == other.size && other.all? { |cp| @__set.include?(cp) }
77
79
  else
78
80
  false
79
81
  end
@@ -81,7 +83,13 @@ class CharacterSet
81
83
 
82
84
  def eql?(other)
83
85
  return false unless other.is_a?(self.class)
84
- @__set.eql?(other.instance_variable_get(:@__set))
86
+ # revert if https://github.com/knu/sorted_set/issues/3 is resolved
87
+ hash == other.hash
88
+ end
89
+
90
+ # revert if https://github.com/knu/sorted_set/issues/3 is resolved
91
+ def hash
92
+ @__set.to_a.hash
85
93
  end
86
94
 
87
95
  def initialize_dup(orig)