character_set 1.2.0-java → 1.5.0-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitattributes +3 -0
- data/.github/workflows/gouteur.yml +20 -0
- data/.github/workflows/lint.yml +29 -0
- data/.github/workflows/tests.yml +22 -0
- data/.gitignore +1 -0
- data/.gouteur.yml +2 -0
- data/.rubocop.yml +17 -0
- data/BENCHMARK.md +53 -17
- data/CHANGELOG.md +54 -0
- data/README.md +51 -12
- data/Rakefile +20 -18
- data/benchmarks/count_in.rb +13 -0
- data/benchmarks/delete_in.rb +1 -1
- data/benchmarks/scan.rb +13 -0
- data/benchmarks/shared.rb +5 -0
- data/benchmarks/z_add.rb +12 -0
- data/benchmarks/z_delete.rb +12 -0
- data/benchmarks/z_merge.rb +15 -0
- data/benchmarks/z_minmax.rb +12 -0
- data/bin/console +2 -0
- data/character_set.gemspec +17 -4
- data/ext/character_set/character_set.c +969 -415
- data/ext/character_set/unicode_casefold_table.h +44 -1
- data/ext/character_set/unicode_casefold_table.h.tmpl +11 -0
- data/lib/character_set/character.rb +1 -1
- data/lib/character_set/core_ext/regexp_ext.rb +1 -1
- data/lib/character_set/core_ext/string_ext.rb +3 -1
- data/lib/character_set/expression_converter.rb +41 -43
- data/lib/character_set/parser.rb +1 -1
- data/lib/character_set/predefined_sets/any.cps +1 -0
- data/lib/character_set/predefined_sets/ascii.cps +1 -0
- data/lib/character_set/predefined_sets/ascii_alnum.cps +3 -0
- data/lib/character_set/predefined_sets/ascii_letter.cps +2 -0
- data/lib/character_set/predefined_sets/assigned.cps +677 -0
- data/lib/character_set/predefined_sets/bmp.cps +2 -0
- data/lib/character_set/predefined_sets/crypt.cps +2 -0
- data/lib/character_set/predefined_sets/emoji.cps +152 -0
- data/lib/character_set/predefined_sets/newline.cps +3 -0
- data/lib/character_set/predefined_sets/surrogate.cps +1 -0
- data/lib/character_set/predefined_sets/unicode.cps +2 -0
- data/lib/character_set/predefined_sets/url_fragment.cps +8 -0
- data/lib/character_set/predefined_sets/url_host.cps +10 -0
- data/lib/character_set/predefined_sets/url_path.cps +7 -0
- data/lib/character_set/predefined_sets/url_query.cps +8 -0
- data/lib/character_set/predefined_sets/whitespace.cps +10 -0
- data/lib/character_set/predefined_sets.rb +25 -260
- data/lib/character_set/ruby_fallback/character_set_methods.rb +60 -9
- data/lib/character_set/ruby_fallback/set_methods.rb +25 -17
- data/lib/character_set/ruby_fallback.rb +5 -3
- data/lib/character_set/set_method_adapters.rb +4 -3
- data/lib/character_set/shared_methods.rb +69 -50
- data/lib/character_set/version.rb +1 -1
- data/lib/character_set/writer.rb +98 -27
- metadata +114 -17
- data/.travis.yml +0 -8
- data/lib/character_set/ruby_fallback/plane_methods.rb +0 -27
@@ -0,0 +1,152 @@
|
|
1
|
+
23,23
|
2
|
+
2A,2A
|
3
|
+
30,39
|
4
|
+
A9,A9
|
5
|
+
AE,AE
|
6
|
+
203C,203C
|
7
|
+
2049,2049
|
8
|
+
2122,2122
|
9
|
+
2139,2139
|
10
|
+
2194,2199
|
11
|
+
21A9,21AA
|
12
|
+
231A,231B
|
13
|
+
2328,2328
|
14
|
+
23CF,23CF
|
15
|
+
23E9,23F3
|
16
|
+
23F8,23FA
|
17
|
+
24C2,24C2
|
18
|
+
25AA,25AB
|
19
|
+
25B6,25B6
|
20
|
+
25C0,25C0
|
21
|
+
25FB,25FE
|
22
|
+
2600,2604
|
23
|
+
260E,260E
|
24
|
+
2611,2611
|
25
|
+
2614,2615
|
26
|
+
2618,2618
|
27
|
+
261D,261D
|
28
|
+
2620,2620
|
29
|
+
2622,2623
|
30
|
+
2626,2626
|
31
|
+
262A,262A
|
32
|
+
262E,262F
|
33
|
+
2638,263A
|
34
|
+
2640,2640
|
35
|
+
2642,2642
|
36
|
+
2648,2653
|
37
|
+
265F,2660
|
38
|
+
2663,2663
|
39
|
+
2665,2666
|
40
|
+
2668,2668
|
41
|
+
267B,267B
|
42
|
+
267E,267F
|
43
|
+
2692,2697
|
44
|
+
2699,2699
|
45
|
+
269B,269C
|
46
|
+
26A0,26A1
|
47
|
+
26A7,26A7
|
48
|
+
26AA,26AB
|
49
|
+
26B0,26B1
|
50
|
+
26BD,26BE
|
51
|
+
26C4,26C5
|
52
|
+
26C8,26C8
|
53
|
+
26CE,26CF
|
54
|
+
26D1,26D1
|
55
|
+
26D3,26D4
|
56
|
+
26E9,26EA
|
57
|
+
26F0,26F5
|
58
|
+
26F7,26FA
|
59
|
+
26FD,26FD
|
60
|
+
2702,2702
|
61
|
+
2705,2705
|
62
|
+
2708,270D
|
63
|
+
270F,270F
|
64
|
+
2712,2712
|
65
|
+
2714,2714
|
66
|
+
2716,2716
|
67
|
+
271D,271D
|
68
|
+
2721,2721
|
69
|
+
2728,2728
|
70
|
+
2733,2734
|
71
|
+
2744,2744
|
72
|
+
2747,2747
|
73
|
+
274C,274C
|
74
|
+
274E,274E
|
75
|
+
2753,2755
|
76
|
+
2757,2757
|
77
|
+
2763,2764
|
78
|
+
2795,2797
|
79
|
+
27A1,27A1
|
80
|
+
27B0,27B0
|
81
|
+
27BF,27BF
|
82
|
+
2934,2935
|
83
|
+
2B05,2B07
|
84
|
+
2B1B,2B1C
|
85
|
+
2B50,2B50
|
86
|
+
2B55,2B55
|
87
|
+
3030,3030
|
88
|
+
303D,303D
|
89
|
+
3297,3297
|
90
|
+
3299,3299
|
91
|
+
1F004,1F004
|
92
|
+
1F0CF,1F0CF
|
93
|
+
1F170,1F171
|
94
|
+
1F17E,1F17F
|
95
|
+
1F18E,1F18E
|
96
|
+
1F191,1F19A
|
97
|
+
1F1E6,1F1FF
|
98
|
+
1F201,1F202
|
99
|
+
1F21A,1F21A
|
100
|
+
1F22F,1F22F
|
101
|
+
1F232,1F23A
|
102
|
+
1F250,1F251
|
103
|
+
1F300,1F321
|
104
|
+
1F324,1F393
|
105
|
+
1F396,1F397
|
106
|
+
1F399,1F39B
|
107
|
+
1F39E,1F3F0
|
108
|
+
1F3F3,1F3F5
|
109
|
+
1F3F7,1F4FD
|
110
|
+
1F4FF,1F53D
|
111
|
+
1F549,1F54E
|
112
|
+
1F550,1F567
|
113
|
+
1F56F,1F570
|
114
|
+
1F573,1F57A
|
115
|
+
1F587,1F587
|
116
|
+
1F58A,1F58D
|
117
|
+
1F590,1F590
|
118
|
+
1F595,1F596
|
119
|
+
1F5A4,1F5A5
|
120
|
+
1F5A8,1F5A8
|
121
|
+
1F5B1,1F5B2
|
122
|
+
1F5BC,1F5BC
|
123
|
+
1F5C2,1F5C4
|
124
|
+
1F5D1,1F5D3
|
125
|
+
1F5DC,1F5DE
|
126
|
+
1F5E1,1F5E1
|
127
|
+
1F5E3,1F5E3
|
128
|
+
1F5E8,1F5E8
|
129
|
+
1F5EF,1F5EF
|
130
|
+
1F5F3,1F5F3
|
131
|
+
1F5FA,1F64F
|
132
|
+
1F680,1F6C5
|
133
|
+
1F6CB,1F6D2
|
134
|
+
1F6D5,1F6D7
|
135
|
+
1F6E0,1F6E5
|
136
|
+
1F6E9,1F6E9
|
137
|
+
1F6EB,1F6EC
|
138
|
+
1F6F0,1F6F0
|
139
|
+
1F6F3,1F6FC
|
140
|
+
1F7E0,1F7EB
|
141
|
+
1F90C,1F93A
|
142
|
+
1F93C,1F945
|
143
|
+
1F947,1F978
|
144
|
+
1F97A,1F9CB
|
145
|
+
1F9CD,1F9FF
|
146
|
+
1FA70,1FA74
|
147
|
+
1FA78,1FA7A
|
148
|
+
1FA80,1FA86
|
149
|
+
1FA90,1FAA8
|
150
|
+
1FAB0,1FAB6
|
151
|
+
1FAC0,1FAC2
|
152
|
+
1FAD0,1FAD6
|
@@ -0,0 +1 @@
|
|
1
|
+
D800,DFFF
|
@@ -1,266 +1,31 @@
|
|
1
1
|
class CharacterSet
|
2
2
|
module PredefinedSets
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
def unicode
|
30
|
-
@unicode ||= from_ranges(0..0xD7FF, 0xE000..0x10FFFF).freeze
|
31
|
-
end
|
32
|
-
|
33
|
-
def url_fragment
|
34
|
-
@url_fragment ||= from_ranges(
|
35
|
-
0x21..0x21,
|
36
|
-
0x24..0x24,
|
37
|
-
0x26..0x3B,
|
38
|
-
0x3D..0x3D,
|
39
|
-
0x3F..0x5A,
|
40
|
-
0x5F..0x5F,
|
41
|
-
0x61..0x7A,
|
42
|
-
0x7E..0x7E
|
43
|
-
).freeze
|
44
|
-
end
|
45
|
-
|
46
|
-
def url_host
|
47
|
-
@url_host ||= from_ranges(
|
48
|
-
0x21..0x21,
|
49
|
-
0x24..0x24,
|
50
|
-
0x26..0x2E,
|
51
|
-
0x30..0x3B,
|
52
|
-
0x3D..0x3D,
|
53
|
-
0x41..0x5B,
|
54
|
-
0x5D..0x5D,
|
55
|
-
0x5F..0x5F,
|
56
|
-
0x61..0x7A,
|
57
|
-
0x7E..0x7E
|
58
|
-
).freeze
|
59
|
-
end
|
60
|
-
|
61
|
-
def url_path
|
62
|
-
@url_path ||= from_ranges(
|
63
|
-
0x21..0x21,
|
64
|
-
0x24..0x3A,
|
65
|
-
0x3D..0x3D,
|
66
|
-
0x40..0x5A,
|
67
|
-
0x5F..0x5F,
|
68
|
-
0x61..0x7A,
|
69
|
-
0x7E..0x7E
|
70
|
-
).freeze
|
71
|
-
end
|
72
|
-
|
73
|
-
def url_query
|
74
|
-
@url_query ||= from_ranges(
|
75
|
-
0x21..0x21,
|
76
|
-
0x24..0x24,
|
77
|
-
0x26..0x3B,
|
78
|
-
0x3D..0x3D,
|
79
|
-
0x3F..0x5A,
|
80
|
-
0x5F..0x5F,
|
81
|
-
0x61..0x7A,
|
82
|
-
0x7E..0x7E
|
83
|
-
).freeze
|
84
|
-
end
|
85
|
-
|
86
|
-
def whitespace
|
87
|
-
@whitespace ||= from_ranges(
|
88
|
-
0x9..0x9,
|
89
|
-
0xA..0xD,
|
90
|
-
0x20..0x20,
|
91
|
-
0x85..0x85,
|
92
|
-
0xA0..0xA0,
|
93
|
-
0x1680..0x1680,
|
94
|
-
0x180E..0x180E,
|
95
|
-
0x2000..0x200A,
|
96
|
-
0x2028..0x2029,
|
97
|
-
0x202F..0x202F,
|
98
|
-
0x205F..0x205F,
|
99
|
-
0x3000..0x3000
|
100
|
-
).freeze
|
101
|
-
end
|
102
|
-
|
103
|
-
def emoji
|
104
|
-
@emoji ||= from_ranges(
|
105
|
-
0x23..0x23,
|
106
|
-
0x2A..0x2A,
|
107
|
-
0x30..0x39,
|
108
|
-
0xA9..0xA9,
|
109
|
-
0xAE..0xAE,
|
110
|
-
0x203C..0x203C,
|
111
|
-
0x2049..0x2049,
|
112
|
-
0x2122..0x2122,
|
113
|
-
0x2139..0x2139,
|
114
|
-
0x2194..0x2199,
|
115
|
-
0x21A9..0x21AA,
|
116
|
-
0x231A..0x231B,
|
117
|
-
0x2328..0x2328,
|
118
|
-
0x23CF..0x23CF,
|
119
|
-
0x23E9..0x23F3,
|
120
|
-
0x23F8..0x23FA,
|
121
|
-
0x24C2..0x24C2,
|
122
|
-
0x25AA..0x25AB,
|
123
|
-
0x25B6..0x25B6,
|
124
|
-
0x25C0..0x25C0,
|
125
|
-
0x25FB..0x25FE,
|
126
|
-
0x2600..0x2604,
|
127
|
-
0x260E..0x260E,
|
128
|
-
0x2611..0x2611,
|
129
|
-
0x2614..0x2615,
|
130
|
-
0x2618..0x2618,
|
131
|
-
0x261D..0x261D,
|
132
|
-
0x2620..0x2620,
|
133
|
-
0x2622..0x2623,
|
134
|
-
0x2626..0x2626,
|
135
|
-
0x262A..0x262A,
|
136
|
-
0x262E..0x262F,
|
137
|
-
0x2638..0x263A,
|
138
|
-
0x2640..0x2640,
|
139
|
-
0x2642..0x2642,
|
140
|
-
0x2648..0x2653,
|
141
|
-
0x2660..0x2660,
|
142
|
-
0x2663..0x2663,
|
143
|
-
0x2665..0x2666,
|
144
|
-
0x2668..0x2668,
|
145
|
-
0x267B..0x267B,
|
146
|
-
0x267F..0x267F,
|
147
|
-
0x2692..0x2697,
|
148
|
-
0x2699..0x2699,
|
149
|
-
0x269B..0x269C,
|
150
|
-
0x26A0..0x26A1,
|
151
|
-
0x26AA..0x26AB,
|
152
|
-
0x26B0..0x26B1,
|
153
|
-
0x26BD..0x26BE,
|
154
|
-
0x26C4..0x26C5,
|
155
|
-
0x26C8..0x26C8,
|
156
|
-
0x26CE..0x26CF,
|
157
|
-
0x26D1..0x26D1,
|
158
|
-
0x26D3..0x26D4,
|
159
|
-
0x26E9..0x26EA,
|
160
|
-
0x26F0..0x26F5,
|
161
|
-
0x26F7..0x26FA,
|
162
|
-
0x26FD..0x26FD,
|
163
|
-
0x2702..0x2702,
|
164
|
-
0x2705..0x2705,
|
165
|
-
0x2708..0x270D,
|
166
|
-
0x270F..0x270F,
|
167
|
-
0x2712..0x2712,
|
168
|
-
0x2714..0x2714,
|
169
|
-
0x2716..0x2716,
|
170
|
-
0x271D..0x271D,
|
171
|
-
0x2721..0x2721,
|
172
|
-
0x2728..0x2728,
|
173
|
-
0x2733..0x2734,
|
174
|
-
0x2744..0x2744,
|
175
|
-
0x2747..0x2747,
|
176
|
-
0x274C..0x274C,
|
177
|
-
0x274E..0x274E,
|
178
|
-
0x2753..0x2755,
|
179
|
-
0x2757..0x2757,
|
180
|
-
0x2763..0x2764,
|
181
|
-
0x2795..0x2797,
|
182
|
-
0x27A1..0x27A1,
|
183
|
-
0x27B0..0x27B0,
|
184
|
-
0x27BF..0x27BF,
|
185
|
-
0x2934..0x2935,
|
186
|
-
0x2B05..0x2B07,
|
187
|
-
0x2B1B..0x2B1C,
|
188
|
-
0x2B50..0x2B50,
|
189
|
-
0x2B55..0x2B55,
|
190
|
-
0x3030..0x3030,
|
191
|
-
0x303D..0x303D,
|
192
|
-
0x3297..0x3297,
|
193
|
-
0x3299..0x3299,
|
194
|
-
0x1F004..0x1F004,
|
195
|
-
0x1F0CF..0x1F0CF,
|
196
|
-
0x1F170..0x1F171,
|
197
|
-
0x1F17E..0x1F17F,
|
198
|
-
0x1F18E..0x1F18E,
|
199
|
-
0x1F191..0x1F19A,
|
200
|
-
0x1F1E6..0x1F1FF,
|
201
|
-
0x1F201..0x1F202,
|
202
|
-
0x1F21A..0x1F21A,
|
203
|
-
0x1F22F..0x1F22F,
|
204
|
-
0x1F232..0x1F23A,
|
205
|
-
0x1F250..0x1F251,
|
206
|
-
0x1F300..0x1F321,
|
207
|
-
0x1F324..0x1F393,
|
208
|
-
0x1F396..0x1F397,
|
209
|
-
0x1F399..0x1F39B,
|
210
|
-
0x1F39E..0x1F3F0,
|
211
|
-
0x1F3F3..0x1F3F5,
|
212
|
-
0x1F3F7..0x1F4FD,
|
213
|
-
0x1F4FF..0x1F53D,
|
214
|
-
0x1F549..0x1F54E,
|
215
|
-
0x1F550..0x1F567,
|
216
|
-
0x1F56F..0x1F570,
|
217
|
-
0x1F573..0x1F57A,
|
218
|
-
0x1F587..0x1F587,
|
219
|
-
0x1F58A..0x1F58D,
|
220
|
-
0x1F590..0x1F590,
|
221
|
-
0x1F595..0x1F596,
|
222
|
-
0x1F5A4..0x1F5A5,
|
223
|
-
0x1F5A8..0x1F5A8,
|
224
|
-
0x1F5B1..0x1F5B2,
|
225
|
-
0x1F5BC..0x1F5BC,
|
226
|
-
0x1F5C2..0x1F5C4,
|
227
|
-
0x1F5D1..0x1F5D3,
|
228
|
-
0x1F5DC..0x1F5DE,
|
229
|
-
0x1F5E1..0x1F5E1,
|
230
|
-
0x1F5E3..0x1F5E3,
|
231
|
-
0x1F5E8..0x1F5E8,
|
232
|
-
0x1F5EF..0x1F5EF,
|
233
|
-
0x1F5F3..0x1F5F3,
|
234
|
-
0x1F5FA..0x1F64F,
|
235
|
-
0x1F680..0x1F6C5,
|
236
|
-
0x1F6CB..0x1F6D2,
|
237
|
-
0x1F6E0..0x1F6E5,
|
238
|
-
0x1F6E9..0x1F6E9,
|
239
|
-
0x1F6EB..0x1F6EC,
|
240
|
-
0x1F6F0..0x1F6F0,
|
241
|
-
0x1F6F3..0x1F6F8,
|
242
|
-
0x1F910..0x1F93A,
|
243
|
-
0x1F93C..0x1F93E,
|
244
|
-
0x1F940..0x1F945,
|
245
|
-
0x1F947..0x1F94C,
|
246
|
-
0x1F950..0x1F96B,
|
247
|
-
0x1F980..0x1F997,
|
248
|
-
0x1F9C0..0x1F9C0,
|
249
|
-
0x1F9D0..0x1F9E6
|
250
|
-
).freeze
|
251
|
-
end
|
252
|
-
|
253
|
-
def respond_to_missing?(method_name, include_private = false)
|
254
|
-
(base = method_name[/^non_(.*)/, 1]) && respond_to?(base) || super
|
255
|
-
end
|
256
|
-
|
257
|
-
def method_missing(method_name, *args, &block)
|
258
|
-
if (base = method_name[/^non_(.*)/, 1])
|
259
|
-
ivar_name = "@#{method_name}"
|
260
|
-
return instance_variable_get(ivar_name) ||
|
261
|
-
instance_variable_set(ivar_name, send(base).inversion.freeze)
|
3
|
+
Dir[File.join(__dir__, 'predefined_sets', '*.cps')].each do |path|
|
4
|
+
set_name = File.basename(path, '.cps')
|
5
|
+
|
6
|
+
class_eval <<-RUBY, __FILE__, __LINE__ + 1
|
7
|
+
def #{set_name}
|
8
|
+
@#{set_name} ||= build_from_cps_file('#{path}').freeze
|
9
|
+
end
|
10
|
+
|
11
|
+
def non_#{set_name}
|
12
|
+
@non_#{set_name} ||= build_from_cps_file('#{path}').inversion.freeze
|
13
|
+
end
|
14
|
+
RUBY
|
15
|
+
end
|
16
|
+
|
17
|
+
alias all any
|
18
|
+
alias ascii_letters ascii_letter
|
19
|
+
alias basic_multilingual_plane bmp
|
20
|
+
alias blank whitespace
|
21
|
+
alias invalid surrogate
|
22
|
+
alias valid unicode
|
23
|
+
|
24
|
+
def build_from_cps_file(path)
|
25
|
+
File.readlines(path).inject(new) do |set, line|
|
26
|
+
range_start, range_end = line.split(',')
|
27
|
+
set.merge((range_start.to_i(16))..(range_end.to_i(16)))
|
262
28
|
end
|
263
|
-
super
|
264
29
|
end
|
265
30
|
end
|
266
31
|
end
|
@@ -6,9 +6,13 @@ class CharacterSet
|
|
6
6
|
new(Array(ranges).flat_map(&:to_a))
|
7
7
|
end
|
8
8
|
|
9
|
-
def of(
|
10
|
-
|
11
|
-
|
9
|
+
def of(*strings)
|
10
|
+
new_set = new
|
11
|
+
strings.each do |str|
|
12
|
+
raise ArgumentError, 'pass a String' unless str.respond_to?(:codepoints)
|
13
|
+
str.codepoints.each { |cp| new_set << cp }
|
14
|
+
end
|
15
|
+
new_set
|
12
16
|
end
|
13
17
|
end
|
14
18
|
|
@@ -31,7 +35,7 @@ class CharacterSet
|
|
31
35
|
end
|
32
36
|
|
33
37
|
def ranges
|
34
|
-
CharacterSet.require_optional_dependency('range_compressor')
|
38
|
+
CharacterSet.require_optional_dependency('range_compressor', __method__)
|
35
39
|
RangeCompressor.compress(self)
|
36
40
|
end
|
37
41
|
|
@@ -39,9 +43,8 @@ class CharacterSet
|
|
39
43
|
count.nil? ? to_a(true).sample : to_a(true).sample(count)
|
40
44
|
end
|
41
45
|
|
42
|
-
def
|
43
|
-
str!(string).each_codepoint { |cp|
|
44
|
-
false
|
46
|
+
def count_in(string)
|
47
|
+
str!(string).each_codepoint.count { |cp| include?(cp) }
|
45
48
|
end
|
46
49
|
|
47
50
|
def cover?(string)
|
@@ -67,16 +70,64 @@ class CharacterSet
|
|
67
70
|
result.size == string.size ? nil : string.replace(result)
|
68
71
|
end
|
69
72
|
|
73
|
+
def scan(string)
|
74
|
+
encoding = str!(string).encoding
|
75
|
+
string.each_codepoint.inject([]) do |arr, cp|
|
76
|
+
include?(cp) ? arr.push(cp.chr(encoding)) : arr
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
def used_by?(string)
|
81
|
+
str!(string).each_codepoint { |cp| return true if include?(cp) }
|
82
|
+
false
|
83
|
+
end
|
84
|
+
|
85
|
+
def section(from:, upto: 0x10FFFF)
|
86
|
+
dup.keep_if { |cp| cp >= from && cp <= upto }
|
87
|
+
end
|
88
|
+
|
89
|
+
def count_in_section(from:, upto: 0x10FFFF)
|
90
|
+
count { |cp| cp >= from && cp <= upto }
|
91
|
+
end
|
92
|
+
|
93
|
+
def section?(from:, upto: 0x10FFFF)
|
94
|
+
any? { |cp| cp >= from && cp <= upto }
|
95
|
+
end
|
96
|
+
|
97
|
+
def section_ratio(from:, upto: 0x10FFFF)
|
98
|
+
section(from: from, upto: upto).count / count.to_f
|
99
|
+
end
|
100
|
+
|
101
|
+
def planes
|
102
|
+
plane_size = 0x10000.to_f
|
103
|
+
inject({}) { |hash, cp| hash.merge((cp / plane_size).floor => 1) }.keys
|
104
|
+
end
|
105
|
+
|
106
|
+
def plane(num)
|
107
|
+
validate_plane_number(num)
|
108
|
+
section(from: (num * 0x10000), upto: ((num + 1) * 0x10000) - 1)
|
109
|
+
end
|
110
|
+
|
111
|
+
def member_in_plane?(num)
|
112
|
+
validate_plane_number(num)
|
113
|
+
((num * 0x10000)...((num + 1) * 0x10000)).any? { |cp| include?(cp) }
|
114
|
+
end
|
115
|
+
|
70
116
|
private
|
71
117
|
|
118
|
+
def validate_plane_number(num)
|
119
|
+
num >= 0 && num <= 16 or raise ArgumentError, 'plane must be between 0 and 16'
|
120
|
+
end
|
121
|
+
|
72
122
|
def str!(obj)
|
73
123
|
raise ArgumentError, 'pass a String' unless obj.respond_to?(:codepoints)
|
74
124
|
obj
|
75
125
|
end
|
76
126
|
|
77
127
|
def make_new_str(original, &block)
|
78
|
-
|
79
|
-
|
128
|
+
str!(original)
|
129
|
+
.each_codepoint
|
130
|
+
.each_with_object(''.encode(original.encoding), &block)
|
80
131
|
end
|
81
132
|
end
|
82
133
|
end
|
@@ -1,7 +1,9 @@
|
|
1
1
|
class CharacterSet
|
2
2
|
module RubyFallback
|
3
3
|
module SetMethods
|
4
|
-
Enumerable.instance_methods
|
4
|
+
(Enumerable.instance_methods -
|
5
|
+
%i[include? member? to_a] +
|
6
|
+
%i[empty? length size]).each do |mthd|
|
5
7
|
class_eval <<-RUBY, __FILE__, __LINE__ + 1
|
6
8
|
def #{mthd}(*args, &block)
|
7
9
|
@__set.#{mthd}(*args, &block)
|
@@ -9,7 +11,7 @@ class CharacterSet
|
|
9
11
|
RUBY
|
10
12
|
end
|
11
13
|
|
12
|
-
%
|
14
|
+
%i[< <= > >= disjoint? intersect? proper_subset? proper_superset?
|
13
15
|
subset? superset?].each do |mthd|
|
14
16
|
class_eval <<-RUBY, __FILE__, __LINE__ + 1
|
15
17
|
def #{mthd}(enum, &block)
|
@@ -21,8 +23,8 @@ class CharacterSet
|
|
21
23
|
RUBY
|
22
24
|
end
|
23
25
|
|
24
|
-
%
|
25
|
-
each filter!
|
26
|
+
%i[<< add add? clear collect! delete delete? delete_if
|
27
|
+
each filter! map! keep_if reject!
|
26
28
|
select! subtract].each do |mthd|
|
27
29
|
class_eval <<-RUBY, __FILE__, __LINE__ + 1
|
28
30
|
def #{mthd}(*args, &block)
|
@@ -32,22 +34,22 @@ class CharacterSet
|
|
32
34
|
RUBY
|
33
35
|
end
|
34
36
|
|
35
|
-
|
37
|
+
# revert if https://github.com/knu/sorted_set/issues/2 is resolved
|
38
|
+
%i[=== include? member?].each do |mthd|
|
36
39
|
class_eval <<-RUBY, __FILE__, __LINE__ + 1
|
37
|
-
def #{mthd}(
|
38
|
-
|
39
|
-
enum = enum.map { |el| el.is_a?(String) ? el.ord : el }
|
40
|
-
end
|
41
|
-
self.class.new(@__set.#{mthd}(enum, &block).to_a)
|
40
|
+
def #{mthd}(*args, &block)
|
41
|
+
!!@__set.#{mthd}(*args, &block)
|
42
42
|
end
|
43
43
|
RUBY
|
44
44
|
end
|
45
45
|
|
46
|
-
%
|
46
|
+
%i[& + - ^ | difference intersection union].each do |mthd|
|
47
47
|
class_eval <<-RUBY, __FILE__, __LINE__ + 1
|
48
|
-
def #{mthd}
|
49
|
-
|
50
|
-
|
48
|
+
def #{mthd}(enum, &block)
|
49
|
+
if enum.respond_to?(:map)
|
50
|
+
enum = enum.map { |el| el.is_a?(String) ? el.ord : el }
|
51
|
+
end
|
52
|
+
self.class.new(@__set.#{mthd}(enum, &block).to_a)
|
51
53
|
end
|
52
54
|
RUBY
|
53
55
|
end
|
@@ -72,8 +74,8 @@ class CharacterSet
|
|
72
74
|
true
|
73
75
|
elsif other.instance_of?(self.class)
|
74
76
|
@__set == other.instance_variable_get(:@__set)
|
75
|
-
elsif other.is_a?(
|
76
|
-
other.all? { |cp| @__set.include?(cp) }
|
77
|
+
elsif other.is_a?(CharacterSet) || other.is_a?(CharacterSet::Pure)
|
78
|
+
size == other.size && other.all? { |cp| @__set.include?(cp) }
|
77
79
|
else
|
78
80
|
false
|
79
81
|
end
|
@@ -81,7 +83,13 @@ class CharacterSet
|
|
81
83
|
|
82
84
|
def eql?(other)
|
83
85
|
return false unless other.is_a?(self.class)
|
84
|
-
|
86
|
+
# revert if https://github.com/knu/sorted_set/issues/3 is resolved
|
87
|
+
hash == other.hash
|
88
|
+
end
|
89
|
+
|
90
|
+
# revert if https://github.com/knu/sorted_set/issues/3 is resolved
|
91
|
+
def hash
|
92
|
+
@__set.to_a.hash
|
85
93
|
end
|
86
94
|
|
87
95
|
def initialize_dup(orig)
|