character_set 1.2.0 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitattributes +3 -0
- data/.travis.yml +1 -0
- data/BENCHMARK.md +51 -15
- data/CHANGELOG.md +20 -0
- data/README.md +24 -8
- data/Rakefile +20 -18
- data/benchmarks/count_in.rb +13 -0
- data/benchmarks/delete_in.rb +1 -1
- data/benchmarks/scan.rb +13 -0
- data/benchmarks/shared.rb +1 -0
- data/benchmarks/z_add.rb +12 -0
- data/benchmarks/z_delete.rb +12 -0
- data/benchmarks/z_merge.rb +15 -0
- data/benchmarks/z_minmax.rb +12 -0
- data/bin/console +2 -0
- data/character_set.gemspec +2 -0
- data/ext/character_set/character_set.c +963 -413
- data/ext/character_set/unicode_casefold_table.h.tmpl +11 -0
- data/lib/character_set/core_ext/string_ext.rb +2 -0
- data/lib/character_set/expression_converter.rb +21 -24
- data/lib/character_set/predefined_sets.rb +25 -260
- data/lib/character_set/predefined_sets/any.cps +1 -0
- data/lib/character_set/predefined_sets/ascii.cps +1 -0
- data/lib/character_set/predefined_sets/ascii_alnum.cps +3 -0
- data/lib/character_set/predefined_sets/ascii_letter.cps +2 -0
- data/lib/character_set/predefined_sets/assigned.cps +666 -0
- data/lib/character_set/predefined_sets/bmp.cps +2 -0
- data/lib/character_set/predefined_sets/crypt.cps +2 -0
- data/lib/character_set/predefined_sets/emoji.cps +151 -0
- data/lib/character_set/predefined_sets/newline.cps +3 -0
- data/lib/character_set/predefined_sets/surrogate.cps +1 -0
- data/lib/character_set/predefined_sets/unicode.cps +2 -0
- data/lib/character_set/predefined_sets/url_fragment.cps +8 -0
- data/lib/character_set/predefined_sets/url_host.cps +10 -0
- data/lib/character_set/predefined_sets/url_path.cps +7 -0
- data/lib/character_set/predefined_sets/url_query.cps +8 -0
- data/lib/character_set/predefined_sets/whitespace.cps +10 -0
- data/lib/character_set/ruby_fallback.rb +0 -2
- data/lib/character_set/ruby_fallback/character_set_methods.rb +52 -4
- data/lib/character_set/ruby_fallback/set_methods.rb +2 -2
- data/lib/character_set/shared_methods.rb +51 -40
- data/lib/character_set/version.rb +1 -1
- metadata +54 -3
- data/lib/character_set/ruby_fallback/plane_methods.rb +0 -27
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ae7ec84b0727a804bf4d82564e6609fdd0bf070fd0e20c0a5688b579e320bc30
|
4
|
+
data.tar.gz: b73dec9fbd4abf83fae5881de89e4e1876e48bcefc3ef935401d5adbeb9c6c8e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2b84916c89dcd6a234cc5acedfc604f664a9e285c92b3bae6bade748ad3d9c275fb3307fb5721142e52dbedc9b16da65285a8ebd87cd686b55391f222ef1b4f8
|
7
|
+
data.tar.gz: 25147010da0adfd869891d50d51e265c2b4f28e1b0cb70727d9784b11c3944b9a06a9844a2068f529e487028c214f44e2ab60271a9a5730cdd40bb04dd989aaf
|
data/.gitattributes
ADDED
data/.travis.yml
CHANGED
data/BENCHMARK.md
CHANGED
@@ -1,46 +1,58 @@
|
|
1
|
-
Results of `rake:benchmark` on ruby 2.6.
|
1
|
+
Results of `rake:benchmark` on ruby 2.6.2p47 (2019-03-13 revision 67232) [x86_64-darwin18]
|
2
2
|
|
3
|
+
```
|
4
|
+
Counting non-letters
|
5
|
+
|
6
|
+
CharacterSet#count_in: 12253693.8 i/s
|
7
|
+
String#count: 1737741.7 i/s - 7.05x slower
|
8
|
+
```
|
3
9
|
```
|
4
10
|
Detecting non-whitespace
|
5
11
|
|
6
|
-
CharacterSet#cover?:
|
7
|
-
Regexp#match?:
|
12
|
+
CharacterSet#cover?: 14058351.9 i/s
|
13
|
+
Regexp#match?: 7907608.1 i/s - 1.78x slower
|
8
14
|
```
|
9
15
|
```
|
10
16
|
Detecting non-letters
|
11
17
|
|
12
|
-
CharacterSet#cover?:
|
13
|
-
Regexp#match?:
|
18
|
+
CharacterSet#cover?: 13341301.6 i/s
|
19
|
+
Regexp#match?: 5187453.3 i/s - 2.57x slower
|
14
20
|
```
|
15
21
|
```
|
16
22
|
Removing whitespace
|
17
23
|
|
18
|
-
CharacterSet#delete_in:
|
19
|
-
String#gsub:
|
24
|
+
CharacterSet#delete_in: 2523184.0 i/s
|
25
|
+
String#gsub: 225804.7 i/s - 11.17x slower
|
20
26
|
```
|
21
27
|
```
|
22
28
|
Removing whitespace, emoji and umlauts
|
23
29
|
|
24
|
-
CharacterSet#delete_in:
|
25
|
-
String#gsub:
|
30
|
+
CharacterSet#delete_in: 1712208.6 i/s
|
31
|
+
String#gsub: 278508.8 i/s - 6.15x slower
|
26
32
|
```
|
27
33
|
```
|
28
34
|
Removing non-whitespace
|
29
35
|
|
30
|
-
CharacterSet#keep_in:
|
31
|
-
String#gsub:
|
36
|
+
CharacterSet#keep_in: 2760158.1 i/s
|
37
|
+
String#gsub: 232797.7 i/s - 11.86x slower
|
32
38
|
```
|
33
39
|
```
|
34
40
|
Extracting emoji
|
35
41
|
|
36
|
-
CharacterSet#keep_in:
|
37
|
-
String#gsub:
|
42
|
+
CharacterSet#keep_in: 1775758.8 i/s
|
43
|
+
String#gsub: 217649.9 i/s - 8.16x slower
|
44
|
+
```
|
45
|
+
```
|
46
|
+
Extracting emoji to an Array
|
47
|
+
|
48
|
+
CharacterSet#scan: 2579030.8 i/s
|
49
|
+
String#scan: 545107.0 i/s - 4.73x slower
|
38
50
|
```
|
39
51
|
```
|
40
52
|
Detecting whitespace
|
41
53
|
|
42
|
-
CharacterSet#used_by?:
|
43
|
-
Regexp#match?:
|
54
|
+
CharacterSet#used_by?: 13847689.0 i/s
|
55
|
+
Regexp#match?: 7533275.2 i/s - 1.84x slower
|
44
56
|
```
|
45
57
|
```
|
46
58
|
Detecting emoji in a large string
|
@@ -48,3 +60,27 @@ Detecting emoji in a large string
|
|
48
60
|
CharacterSet#used_by?: 246527.7 i/s
|
49
61
|
Regexp#match?: 92956.5 i/s - 2.65x slower
|
50
62
|
```
|
63
|
+
```
|
64
|
+
Adding entries
|
65
|
+
|
66
|
+
CharacterSet#add: 3102081.7 i/s
|
67
|
+
SortedSet#add: 1897464.8 i/s - 1.63x slower
|
68
|
+
```
|
69
|
+
```
|
70
|
+
Removing entries
|
71
|
+
|
72
|
+
CharacterSet#delete: 3240924.1 i/s
|
73
|
+
SortedSet#delete: 2887493.9 i/s - 1.12x slower
|
74
|
+
```
|
75
|
+
```
|
76
|
+
Merging entries
|
77
|
+
|
78
|
+
CharacterSet#merge: 536.8 i/s
|
79
|
+
SortedSet#merge: 12.5 i/s - 42.78x slower
|
80
|
+
```
|
81
|
+
```
|
82
|
+
Getting the min and max
|
83
|
+
|
84
|
+
CharacterSet#minmax: 4111960.8 i/s
|
85
|
+
SortedSet#minmax: 756.4 i/s - 5436.39x slower
|
86
|
+
```
|
data/CHANGELOG.md
CHANGED
@@ -4,6 +4,26 @@ All notable changes to this project will be documented in this file.
|
|
4
4
|
The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
|
5
5
|
and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
|
6
6
|
|
7
|
+
## UNRELEASED
|
8
|
+
|
9
|
+
## [1.3.0] - 2019-04-26
|
10
|
+
|
11
|
+
### Added
|
12
|
+
- improved `String` manipulation speed
|
13
|
+
- improved initialization and `#merge` speed when passing a large `Range`
|
14
|
+
- reduced memory consumption by > 90% for most use cases via dynamic resizing
|
15
|
+
- before, every set instance required 136 KB for codepoints
|
16
|
+
- now, 16 bytes for a CharacterSet in ASCII space, 8 KB for one in BMP space etc.
|
17
|
+
- `#count_in` and `#scan_in` methods for `String` interaction
|
18
|
+
- new predefined sets `::any`/`::all`, `::assigned`, `::surrogate`
|
19
|
+
- conversion methods `#assigned_part`, `#valid_part`
|
20
|
+
- sectioning methods `#ascii_part`, `#plane(n)`
|
21
|
+
- section test methods `#ascii_part?`, `#ascii_ratio`, `#ascii_only?`, `#astral_only?`
|
22
|
+
|
23
|
+
### Fixed
|
24
|
+
- `#count` now supports passing an argument or block as usual
|
25
|
+
- `CharacterSet::Pure#keep_in`, `#delete_in` now preserve the original encoding
|
26
|
+
|
7
27
|
## [1.2.0] - 2019-04-02
|
8
28
|
|
9
29
|
### Added
|
data/README.md
CHANGED
@@ -2,8 +2,11 @@
|
|
2
2
|
|
3
3
|
[](http://badge.fury.io/rb/character_set)
|
4
4
|
[](https://travis-ci.org/jaynetics/character_set)
|
5
|
+
[](https://codecov.io/gh/jaynetics/character_set)
|
5
6
|
|
6
|
-
|
7
|
+
This is a C-extended Ruby gem to work with sets of Unicode codepoints. It can read and write these sets in various formats and implements the stdlib `Set` interface for them.
|
8
|
+
|
9
|
+
It also offers an alternate paradigm of `String` processing which grants much better performance than `Regexp` and `String` methods from the stdlib where applicable (see [benchmarks](./BENCHMARK.md)).
|
7
10
|
|
8
11
|
Many parts can be used independently, e.g.:
|
9
12
|
- `CharacterSet::Character`
|
@@ -49,7 +52,7 @@ require 'character_set/core_ext/regexp_ext'
|
|
49
52
|
|
50
53
|
### Predefined utility sets
|
51
54
|
|
52
|
-
`ascii`, `ascii_alnum`, `
|
55
|
+
`ascii`, `ascii_alnum`, `ascii_letter`, `assigned`, `bmp`, `crypt`, `emoji`, `newline`, `surrogate`, `unicode`, `url_fragment`, `url_host`, `url_path`, `url_query`, `whitespace`
|
53
56
|
|
54
57
|
```ruby
|
55
58
|
CharacterSet.ascii # => #<CharacterSet (size: 128)>
|
@@ -60,7 +63,7 @@ CharacterSet.non_ascii
|
|
60
63
|
|
61
64
|
### Interact with Strings
|
62
65
|
|
63
|
-
CharacterSet can replace some `
|
66
|
+
`CharacterSet` can replace some types of `String` handling with better performance than the stdlib.
|
64
67
|
|
65
68
|
`#used_by?` and `#cover?` can replace some `Regexp#match?` calls:
|
66
69
|
|
@@ -71,6 +74,7 @@ CharacterSet.ascii.cover?('Tr') # => true
|
|
71
74
|
```
|
72
75
|
|
73
76
|
`#delete_in(!)` and `#keep_in(!)` can replace `String#gsub(!)` and the like:
|
77
|
+
|
74
78
|
```ruby
|
75
79
|
string = 'Tüür'
|
76
80
|
|
@@ -84,6 +88,13 @@ CharacterSet.ascii.keep_in!(string) # => ''
|
|
84
88
|
string # => ''
|
85
89
|
```
|
86
90
|
|
91
|
+
`#count_in` and `#scan` can replace `String#count` and `String#scan`:
|
92
|
+
|
93
|
+
```ruby
|
94
|
+
CharacterSet.non_ascii.count_in('Tüür') # => 2
|
95
|
+
CharacterSet.non_ascii.scan_in('Tüür') # => ['ü', 'ü']
|
96
|
+
```
|
97
|
+
|
87
98
|
There is also a core extension for String interaction.
|
88
99
|
```ruby
|
89
100
|
require 'character_set/core_ext/string_ext'
|
@@ -100,7 +111,7 @@ require 'character_set/core_ext/string_ext'
|
|
100
111
|
|
101
112
|
### Manipulate
|
102
113
|
|
103
|
-
Use any
|
114
|
+
Use [any Ruby Set method](https://ruby-doc.org/stdlib-2.5.1/libdoc/set/rdoc/Set.html), e.g. `#+`, `#-`, `#&`, `#^`, `#intersect?`, `#<`, `#>` etc. to interact with other sets. Use `#add`, `#delete`, `#include?` etc. to change or check for members.
|
104
115
|
|
105
116
|
Where appropriate, methods take both chars and codepoints, e.g.:
|
106
117
|
|
@@ -122,13 +133,13 @@ non_a.include?('ü') # => true
|
|
122
133
|
|
123
134
|
# surrogate pair halves are not included by default
|
124
135
|
CharacterSet['a'].inversion(include_surrogates: true)
|
125
|
-
# => #<CharacterSet (size:
|
136
|
+
# => #<CharacterSet (size: 1114112)>
|
126
137
|
```
|
127
138
|
|
128
139
|
`#case_insensitive` can be used to create a `CharacterSet` where upper/lower case codepoints are supplemented:
|
129
140
|
|
130
141
|
```ruby
|
131
|
-
CharacterSet['1', '
|
142
|
+
CharacterSet['1', 'A'].case_insensitive # => CharacterSet['1', 'A', 'a']
|
132
143
|
```
|
133
144
|
|
134
145
|
### Write
|
@@ -157,17 +168,22 @@ set.to_s(escape_all: true) { |c| "<#{c.hex}>" } # => "<61>-<63><258><1F929>"
|
|
157
168
|
set.to_s(abbreviate: false) # => "abc\u0258\u{1F929}"
|
158
169
|
|
159
170
|
# for full js regex compatibility in case of astral members:
|
160
|
-
set.to_s_with_surrogate_alternation # => '(?:[\u0258]|\ud83e\udd29)'
|
171
|
+
set.to_s_with_surrogate_alternation # => '(?:[a-c\u0258]|\ud83e\udd29)'
|
161
172
|
```
|
162
173
|
|
163
174
|
### Unicode plane methods
|
164
175
|
|
165
|
-
There are some methods to check for planes and to handle [BMP](https://en.wikipedia.org/wiki/Plane_%28Unicode%29#Basic_Multilingual_Plane) and astral parts:
|
176
|
+
There are some methods to check for planes and to handle ASCII, [BMP](https://en.wikipedia.org/wiki/Plane_%28Unicode%29#Basic_Multilingual_Plane) and astral parts:
|
166
177
|
```Ruby
|
178
|
+
CharacterSet['a', 'ü', '🤩'].ascii_part # => CharacterSet['a']
|
179
|
+
CharacterSet['a', 'ü', '🤩'].ascii_part? # => true
|
180
|
+
CharacterSet['a', 'ü', '🤩'].ascii_only? # => false
|
181
|
+
CharacterSet['a', 'ü', '🤩'].ascii_ratio # => 0.3333333
|
167
182
|
CharacterSet['a', 'ü', '🤩'].bmp_part # => CharacterSet['a', 'ü']
|
168
183
|
CharacterSet['a', 'ü', '🤩'].astral_part # => CharacterSet['🤩']
|
169
184
|
CharacterSet['a', 'ü', '🤩'].bmp_ratio # => 0.6666666
|
170
185
|
CharacterSet['a', 'ü', '🤩'].planes # => [0, 1]
|
186
|
+
CharacterSet['a', 'ü', '🤩'].plane(1) # => CharacterSet['🤩']
|
171
187
|
CharacterSet['a', 'ü', '🤩'].member_in_plane?(7) # => false
|
172
188
|
CharacterSet::Character.new('a').plane # => 0
|
173
189
|
```
|
data/Rakefile
CHANGED
@@ -7,6 +7,13 @@ RSpec::Core::RakeTask.new(:spec)
|
|
7
7
|
|
8
8
|
task default: :spec
|
9
9
|
|
10
|
+
namespace :spec do
|
11
|
+
task :quick do
|
12
|
+
ENV['SKIP_MEMSAFETY_SPECS'] = '1'
|
13
|
+
Rake::Task[:spec].invoke
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
10
17
|
Rake::ExtensionTask.new('character_set') do |ext|
|
11
18
|
ext.lib_dir = 'lib/character_set'
|
12
19
|
end
|
@@ -106,27 +113,22 @@ task :sync_casefold_data do
|
|
106
113
|
hash[from] = to if type == 'C'
|
107
114
|
end.sort
|
108
115
|
|
109
|
-
File.
|
110
|
-
|
111
|
-
|
112
|
-
// -*-C-*-
|
113
|
-
|
114
|
-
typedef struct casefold_mapping {
|
115
|
-
unsigned long from;
|
116
|
-
unsigned long to;
|
117
|
-
} casefold_mapping;
|
118
|
-
|
119
|
-
#define CASEFOLD_COUNT #{mapping.size}
|
116
|
+
content = File.read(dst_path + '.tmpl')
|
117
|
+
.sub(/(CASEFOLD_COUNT )0/, "\\1#{mapping.count}")
|
118
|
+
.sub('{}', ['{', mapping.map { |a, b| "{0x#{a},0x#{b}}," }, '}'].join("\n"))
|
120
119
|
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
mapping.each { |from, to| f.puts "{0x#{from},0x#{to}}," }
|
120
|
+
File.write(dst_path, content)
|
121
|
+
File.unlink(src_path)
|
122
|
+
end
|
125
123
|
|
126
|
-
|
124
|
+
desc 'Update codepoint data for predefined sets, based on Onigmo'
|
125
|
+
task :sync_predefined_sets do
|
126
|
+
%w[assigned emoji whitespace].each do |prop|
|
127
|
+
require 'regexp_property_values'
|
128
|
+
ranges = RegexpPropertyValues[prop].matched_ranges
|
129
|
+
str = ranges.map { |r| r.minmax.map { |n| n.to_s(16) }.join(',').upcase + "\n" }.join
|
130
|
+
File.write("./lib/character_set/predefined_sets/#{prop}.cps", str, mode: 'w')
|
127
131
|
end
|
128
|
-
|
129
|
-
File.unlink(src_path)
|
130
132
|
end
|
131
133
|
|
132
134
|
desc 'Run all IPS benchmarks'
|
@@ -0,0 +1,13 @@
|
|
1
|
+
require_relative './shared'
|
2
|
+
|
3
|
+
str = 'Lorem ipsum et dolorem'
|
4
|
+
tr = '^A-Za-z'
|
5
|
+
cs = CharacterSet.non_ascii_letter
|
6
|
+
|
7
|
+
benchmark(
|
8
|
+
caption: 'Counting non-letters',
|
9
|
+
cases: {
|
10
|
+
'String#count' => -> { str.count(tr) },
|
11
|
+
'CharacterSet#count_in' => -> { cs.count_in(str) },
|
12
|
+
}
|
13
|
+
)
|
data/benchmarks/delete_in.rb
CHANGED
@@ -14,7 +14,7 @@ benchmark(
|
|
14
14
|
|
15
15
|
str = 'Lörem ipsüm ⛷ et dölörem'
|
16
16
|
rx = /[\s\p{emoji}äüö]/
|
17
|
-
cs = CharacterSet.whitespace + CharacterSet.emoji +
|
17
|
+
cs = CharacterSet.whitespace + CharacterSet.emoji + CharacterSet['ä', 'ö', 'ü']
|
18
18
|
|
19
19
|
benchmark(
|
20
20
|
caption: 'Removing whitespace, emoji and umlauts',
|
data/benchmarks/scan.rb
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
require_relative './shared'
|
2
|
+
|
3
|
+
str = 'Lorem ipsum ⛷ et dolorem'
|
4
|
+
rx = /\p{emoji}/
|
5
|
+
cs = CharacterSet.emoji
|
6
|
+
|
7
|
+
benchmark(
|
8
|
+
caption: 'Extracting emoji to an Array',
|
9
|
+
cases: {
|
10
|
+
'String#scan' => -> { str.scan(rx) },
|
11
|
+
'CharacterSet#scan' => -> { cs.scan(str) },
|
12
|
+
}
|
13
|
+
)
|
data/benchmarks/shared.rb
CHANGED
data/benchmarks/z_add.rb
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
require_relative './shared'
|
2
|
+
|
3
|
+
cs = CharacterSet.new(0..0x10FFFF)
|
4
|
+
ss = SortedSet.new(0..0x10FFFF)
|
5
|
+
|
6
|
+
benchmark(
|
7
|
+
caption: 'Removing entries',
|
8
|
+
cases: {
|
9
|
+
'CharacterSet#delete' => -> { cs.delete(rand(0x10FFFF)) },
|
10
|
+
'SortedSet#delete' => -> { ss.delete(rand(0x10FFFF)) },
|
11
|
+
}
|
12
|
+
)
|
@@ -0,0 +1,15 @@
|
|
1
|
+
require_relative './shared'
|
2
|
+
|
3
|
+
cs1 = CharacterSet.new(0...0x88000)
|
4
|
+
cs2 = CharacterSet.new(0x88000..0x10FFFF)
|
5
|
+
|
6
|
+
ss1 = SortedSet.new(0...0x88000)
|
7
|
+
ss2 = SortedSet.new(0x88000..0x10FFFF)
|
8
|
+
|
9
|
+
benchmark(
|
10
|
+
caption: 'Merging entries',
|
11
|
+
cases: {
|
12
|
+
'CharacterSet#merge' => -> { cs1.merge(cs2) },
|
13
|
+
'SortedSet#merge' => -> { ss1.merge(ss2) },
|
14
|
+
}
|
15
|
+
)
|
@@ -0,0 +1,12 @@
|
|
1
|
+
require_relative './shared'
|
2
|
+
|
3
|
+
cs = CharacterSet.new(0..0xFFFF)
|
4
|
+
ss = SortedSet.new(0..0xFFFF)
|
5
|
+
|
6
|
+
benchmark(
|
7
|
+
caption: 'Getting the min and max',
|
8
|
+
cases: {
|
9
|
+
'CharacterSet#minmax' => -> { cs.minmax },
|
10
|
+
'SortedSet#minmax' => -> { ss.minmax },
|
11
|
+
}
|
12
|
+
)
|
data/bin/console
CHANGED
data/character_set.gemspec
CHANGED
@@ -23,6 +23,8 @@ Gem::Specification.new do |s|
|
|
23
23
|
s.required_ruby_version = '>= 2.1.0'
|
24
24
|
|
25
25
|
s.add_development_dependency 'benchmark-ips', '~> 2.7'
|
26
|
+
s.add_development_dependency 'codecov', '~> 0.1'
|
27
|
+
s.add_development_dependency 'get_process_mem', '~> 0.2.3'
|
26
28
|
s.add_development_dependency 'rake', '~> 12.0'
|
27
29
|
s.add_development_dependency 'rake-compiler', '~> 1.0'
|
28
30
|
s.add_development_dependency 'range_compressor', '~> 1.0'
|
@@ -2,81 +2,180 @@
|
|
2
2
|
#include "ruby/encoding.h"
|
3
3
|
#include "unicode_casefold_table.h"
|
4
4
|
|
5
|
-
#define
|
6
|
-
#define
|
7
|
-
#define
|
5
|
+
#define UNICODE_PLANE_SIZE 0x10000
|
6
|
+
#define UNICODE_PLANE_COUNT 17
|
7
|
+
#define UNICODE_CP_COUNT (UNICODE_PLANE_SIZE * UNICODE_PLANE_COUNT)
|
8
8
|
|
9
|
-
|
10
|
-
|
9
|
+
// start at ascii size
|
10
|
+
#define CS_DEFAULT_INITIAL_LEN 128
|
11
11
|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
12
|
+
typedef char cs_ar;
|
13
|
+
typedef unsigned long cs_cp;
|
14
|
+
|
15
|
+
struct cs_data
|
16
|
+
{
|
17
|
+
cs_ar *cps;
|
18
|
+
cs_cp len;
|
19
|
+
};
|
20
|
+
|
21
|
+
#define CS_MSIZE(len) (sizeof(cs_ar) * (len / 8))
|
22
|
+
|
23
|
+
static inline void
|
24
|
+
add_memspace_for_another_plane(struct cs_data *data)
|
25
|
+
{
|
26
|
+
data->cps = ruby_xrealloc(data->cps, CS_MSIZE(data->len + UNICODE_PLANE_SIZE));
|
27
|
+
memset(data->cps + CS_MSIZE(data->len), 0, CS_MSIZE(UNICODE_PLANE_SIZE));
|
28
|
+
data->len += UNICODE_PLANE_SIZE;
|
29
|
+
}
|
30
|
+
|
31
|
+
static inline void
|
32
|
+
ensure_memsize_fits(struct cs_data *data, cs_cp target_cp)
|
33
|
+
{
|
34
|
+
while (target_cp >= data->len)
|
35
|
+
{
|
36
|
+
add_memspace_for_another_plane(data);
|
37
|
+
}
|
38
|
+
}
|
39
|
+
|
40
|
+
static inline void
|
41
|
+
set_cp(struct cs_data *data, cs_cp cp)
|
42
|
+
{
|
43
|
+
ensure_memsize_fits(data, cp);
|
44
|
+
data->cps[cp >> 3] |= (1 << (cp & 0x07));
|
45
|
+
}
|
46
|
+
|
47
|
+
static inline int
|
48
|
+
tst_cp(cs_ar *cps, cs_cp len, cs_cp cp)
|
49
|
+
{
|
50
|
+
return ((cp < len) && cps[cp >> 3] & (1 << (cp & 0x07)));
|
51
|
+
}
|
52
|
+
|
53
|
+
static inline void
|
54
|
+
clr_cp(cs_ar *cps, cs_cp len, cs_cp cp)
|
55
|
+
{
|
56
|
+
if (cp < len)
|
57
|
+
{
|
58
|
+
cps[cp >> 3] &= ~(1 << (cp & 0x07));
|
59
|
+
}
|
60
|
+
}
|
16
61
|
|
17
62
|
static void
|
18
|
-
|
19
|
-
|
63
|
+
cs_free(void *ptr)
|
64
|
+
{
|
65
|
+
struct cs_data *data = ptr;
|
66
|
+
ruby_xfree(data->cps);
|
67
|
+
ruby_xfree(data);
|
20
68
|
}
|
21
69
|
|
22
70
|
static size_t
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
.
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
71
|
+
cs_memsize(const void *ptr)
|
72
|
+
{
|
73
|
+
const struct cs_data *data = ptr;
|
74
|
+
return sizeof(*data) + CS_MSIZE(data->len);
|
75
|
+
}
|
76
|
+
|
77
|
+
static const rb_data_type_t cs_type = {
|
78
|
+
.wrap_struct_name = "character_set",
|
79
|
+
.function = {
|
80
|
+
.dmark = NULL,
|
81
|
+
.dfree = cs_free,
|
82
|
+
.dsize = cs_memsize,
|
83
|
+
},
|
84
|
+
.data = NULL,
|
85
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY,
|
37
86
|
};
|
38
87
|
|
39
|
-
|
40
|
-
|
88
|
+
static inline VALUE
|
89
|
+
cs_alloc_len(VALUE klass, struct cs_data **data_ptr, cs_cp len)
|
90
|
+
{
|
91
|
+
VALUE cs;
|
92
|
+
struct cs_data *data;
|
93
|
+
cs = TypedData_Make_Struct(klass, struct cs_data, &cs_type, data);
|
94
|
+
data->cps = ruby_xmalloc(CS_MSIZE(len));
|
95
|
+
memset(data->cps, 0, CS_MSIZE(len));
|
96
|
+
data->len = len;
|
97
|
+
|
98
|
+
if (data_ptr)
|
99
|
+
{
|
100
|
+
*data_ptr = data;
|
101
|
+
}
|
41
102
|
|
42
|
-
|
43
|
-
|
103
|
+
return cs;
|
104
|
+
}
|
44
105
|
|
45
|
-
static VALUE
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
return NEW_CHARACTER_SET(self, cp_arr);
|
106
|
+
static inline VALUE
|
107
|
+
cs_alloc(VALUE klass, struct cs_data **data_ptr)
|
108
|
+
{
|
109
|
+
return cs_alloc_len(klass, data_ptr, CS_DEFAULT_INITIAL_LEN);
|
50
110
|
}
|
51
111
|
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
112
|
+
static inline struct cs_data *
|
113
|
+
cs_fetch_data(VALUE cs)
|
114
|
+
{
|
115
|
+
struct cs_data *data;
|
116
|
+
TypedData_Get_Struct(cs, struct cs_data, &cs_type, data);
|
117
|
+
return data;
|
118
|
+
}
|
119
|
+
|
120
|
+
static inline cs_ar *
|
121
|
+
cs_fetch_cps(VALUE cs, cs_cp *len_ptr)
|
122
|
+
{
|
123
|
+
struct cs_data *data;
|
124
|
+
data = cs_fetch_data(cs);
|
125
|
+
*len_ptr = data->len;
|
126
|
+
return data->cps;
|
127
|
+
}
|
128
|
+
|
129
|
+
static VALUE
|
130
|
+
cs_method_allocate(VALUE self)
|
131
|
+
{
|
132
|
+
return cs_alloc(self, 0);
|
133
|
+
}
|
134
|
+
|
135
|
+
#define FOR_EACH_ACTIVE_CODEPOINT(action) \
|
136
|
+
do \
|
137
|
+
{ \
|
138
|
+
cs_cp cp, len; \
|
139
|
+
cs_ar *cps; \
|
140
|
+
cps = cs_fetch_cps(self, &len); \
|
141
|
+
for (cp = 0; cp < len; cp++) \
|
142
|
+
{ \
|
143
|
+
if (tst_cp(cps, len, cp)) \
|
144
|
+
{ \
|
145
|
+
action; \
|
146
|
+
} \
|
147
|
+
} \
|
148
|
+
} while (0)
|
59
149
|
|
60
150
|
// ***************************
|
61
151
|
// `Set` compatibility methods
|
62
152
|
// ***************************
|
63
153
|
|
64
|
-
static inline
|
65
|
-
|
66
|
-
|
154
|
+
static inline cs_cp
|
155
|
+
cs_active_cp_count(VALUE self)
|
156
|
+
{
|
157
|
+
cs_cp count;
|
67
158
|
count = 0;
|
68
159
|
FOR_EACH_ACTIVE_CODEPOINT(count++);
|
69
|
-
return
|
160
|
+
return count;
|
70
161
|
}
|
71
162
|
|
72
163
|
static VALUE
|
73
|
-
|
74
|
-
|
164
|
+
cs_method_length(VALUE self)
|
165
|
+
{
|
166
|
+
return LONG2FIX(cs_active_cp_count(self));
|
167
|
+
}
|
168
|
+
|
169
|
+
static inline VALUE
|
170
|
+
cs_enumerator_length(VALUE self, VALUE args, VALUE eobj)
|
171
|
+
{
|
172
|
+
return LONG2FIX(cs_active_cp_count(self));
|
75
173
|
}
|
76
174
|
|
77
175
|
static VALUE
|
78
|
-
|
79
|
-
|
176
|
+
cs_method_each(VALUE self)
|
177
|
+
{
|
178
|
+
RETURN_SIZED_ENUMERATOR(self, 0, 0, cs_enumerator_length);
|
80
179
|
FOR_EACH_ACTIVE_CODEPOINT(rb_yield(LONG2FIX(cp)));
|
81
180
|
return self;
|
82
181
|
}
|
@@ -84,16 +183,19 @@ method_each(VALUE self) {
|
|
84
183
|
// returns an Array of codepoint Integers by default.
|
85
184
|
// returns an Array of Strings of length 1 if passed `true`.
|
86
185
|
static VALUE
|
87
|
-
|
186
|
+
cs_method_to_a(int argc, VALUE *argv, VALUE self)
|
187
|
+
{
|
88
188
|
VALUE arr;
|
89
189
|
rb_encoding *enc;
|
90
190
|
rb_check_arity(argc, 0, 1);
|
91
191
|
|
92
192
|
arr = rb_ary_new();
|
93
|
-
if (!argc || NIL_P(argv[0]) || argv[0] == Qfalse)
|
193
|
+
if (!argc || NIL_P(argv[0]) || argv[0] == Qfalse)
|
194
|
+
{
|
94
195
|
FOR_EACH_ACTIVE_CODEPOINT(rb_ary_push(arr, LONG2FIX(cp)));
|
95
196
|
}
|
96
|
-
else
|
197
|
+
else
|
198
|
+
{
|
97
199
|
enc = rb_utf8_encoding();
|
98
200
|
FOR_EACH_ACTIVE_CODEPOINT(rb_ary_push(arr, rb_enc_uint_chr((int)cp, enc)));
|
99
201
|
}
|
@@ -102,302 +204,472 @@ method_to_a(int argc, VALUE *argv, VALUE self) {
|
|
102
204
|
}
|
103
205
|
|
104
206
|
static VALUE
|
105
|
-
|
207
|
+
cs_method_empty_p(VALUE self)
|
208
|
+
{
|
106
209
|
FOR_EACH_ACTIVE_CODEPOINT(return Qfalse);
|
107
210
|
return Qtrue;
|
108
211
|
}
|
109
212
|
|
110
213
|
static VALUE
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
214
|
+
cs_method_hash(VALUE self)
|
215
|
+
{
|
216
|
+
cs_cp cp, len, hash, four_byte_value;
|
217
|
+
cs_ar *cps;
|
218
|
+
cps = cs_fetch_cps(self, &len);
|
115
219
|
|
116
220
|
hash = 17;
|
117
|
-
for (cp = 0; cp <
|
118
|
-
|
119
|
-
|
221
|
+
for (cp = 0; cp < len; cp++)
|
222
|
+
{
|
223
|
+
if (cp % 32 == 0)
|
224
|
+
{
|
225
|
+
if (cp != 0)
|
226
|
+
{
|
227
|
+
hash = hash * 23 + four_byte_value;
|
228
|
+
}
|
120
229
|
four_byte_value = 0;
|
121
230
|
}
|
122
|
-
if (
|
231
|
+
if (tst_cp(cps, len, cp))
|
232
|
+
{
|
233
|
+
four_byte_value++;
|
234
|
+
}
|
123
235
|
}
|
124
236
|
|
125
237
|
return LONG2FIX(hash);
|
126
238
|
}
|
127
239
|
|
128
240
|
static inline VALUE
|
129
|
-
|
241
|
+
cs_delete_if_block_result(VALUE self, int truthy)
|
242
|
+
{
|
130
243
|
VALUE result;
|
131
244
|
rb_need_block();
|
132
245
|
rb_check_frozen(self);
|
133
246
|
FOR_EACH_ACTIVE_CODEPOINT(
|
134
|
-
|
135
|
-
|
136
|
-
);
|
247
|
+
result = rb_yield(LONG2FIX(cp));
|
248
|
+
if ((NIL_P(result) || result == Qfalse) != truthy) clr_cp(cps, len, cp););
|
137
249
|
return self;
|
138
250
|
}
|
139
251
|
|
140
252
|
static VALUE
|
141
|
-
|
142
|
-
|
143
|
-
|
253
|
+
cs_method_delete_if(VALUE self)
|
254
|
+
{
|
255
|
+
RETURN_SIZED_ENUMERATOR(self, 0, 0, cs_enumerator_length);
|
256
|
+
return cs_delete_if_block_result(self, 1);
|
144
257
|
}
|
145
258
|
|
146
259
|
static VALUE
|
147
|
-
|
148
|
-
|
149
|
-
|
260
|
+
cs_method_keep_if(VALUE self)
|
261
|
+
{
|
262
|
+
RETURN_SIZED_ENUMERATOR(self, 0, 0, cs_enumerator_length);
|
263
|
+
return cs_delete_if_block_result(self, 0);
|
150
264
|
}
|
151
265
|
|
152
266
|
static VALUE
|
153
|
-
|
154
|
-
|
155
|
-
|
267
|
+
cs_method_clear(VALUE self)
|
268
|
+
{
|
269
|
+
struct cs_data *data;
|
156
270
|
rb_check_frozen(self);
|
157
|
-
|
158
|
-
|
159
|
-
CLRBIT(cps, cp);
|
160
|
-
}
|
271
|
+
data = cs_fetch_data(self);
|
272
|
+
memset(data->cps, 0, CS_MSIZE(data->len));
|
161
273
|
return self;
|
162
274
|
}
|
163
275
|
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
276
|
+
static VALUE
|
277
|
+
cs_method_min(VALUE self)
|
278
|
+
{
|
279
|
+
FOR_EACH_ACTIVE_CODEPOINT(return LONG2FIX(cp));
|
280
|
+
return Qnil;
|
281
|
+
}
|
282
|
+
|
283
|
+
static VALUE
|
284
|
+
cs_method_max(VALUE self)
|
285
|
+
{
|
286
|
+
cs_cp len;
|
287
|
+
long reverse_idx;
|
288
|
+
cs_ar *cps;
|
289
|
+
cps = cs_fetch_cps(self, &len);
|
290
|
+
for (reverse_idx = len; reverse_idx >= 0; reverse_idx--)
|
291
|
+
{
|
292
|
+
if (tst_cp(cps, len, reverse_idx))
|
293
|
+
{
|
294
|
+
return LONG2FIX(reverse_idx);
|
295
|
+
}
|
296
|
+
}
|
297
|
+
return Qnil;
|
298
|
+
}
|
299
|
+
|
300
|
+
static VALUE
|
301
|
+
cs_method_minmax(VALUE self)
|
302
|
+
{
|
303
|
+
VALUE arr;
|
304
|
+
arr = rb_ary_new2(2);
|
305
|
+
rb_ary_push(arr, cs_method_min(self));
|
306
|
+
rb_ary_push(arr, cs_method_max(self));
|
307
|
+
return arr;
|
308
|
+
}
|
309
|
+
|
310
|
+
#define RETURN_COMBINED_CS(cs_a, cs_b, comp_op) \
|
311
|
+
do \
|
312
|
+
{ \
|
313
|
+
VALUE new_cs; \
|
314
|
+
cs_cp cp, alen, blen; \
|
315
|
+
cs_ar *acps, *bcps; \
|
316
|
+
struct cs_data *new_data; \
|
317
|
+
new_cs = cs_alloc(RBASIC(self)->klass, &new_data); \
|
318
|
+
acps = cs_fetch_cps(cs_a, &alen); \
|
319
|
+
bcps = cs_fetch_cps(cs_b, &blen); \
|
320
|
+
for (cp = 0; cp < UNICODE_CP_COUNT; cp++) \
|
321
|
+
{ \
|
322
|
+
if (tst_cp(acps, alen, cp) comp_op tst_cp(bcps, blen, cp)) \
|
323
|
+
{ \
|
324
|
+
set_cp(new_data, cp); \
|
325
|
+
} \
|
326
|
+
} \
|
327
|
+
return new_cs; \
|
328
|
+
} while (0)
|
174
329
|
|
175
330
|
static VALUE
|
176
|
-
|
177
|
-
|
331
|
+
cs_method_intersection(VALUE self, VALUE other)
|
332
|
+
{
|
333
|
+
RETURN_COMBINED_CS(self, other, &&);
|
178
334
|
}
|
179
335
|
|
180
336
|
static VALUE
|
181
|
-
|
182
|
-
|
337
|
+
cs_method_exclusion(VALUE self, VALUE other)
|
338
|
+
{
|
339
|
+
RETURN_COMBINED_CS(self, other, ^);
|
183
340
|
}
|
184
341
|
|
185
342
|
static VALUE
|
186
|
-
|
187
|
-
|
343
|
+
cs_method_union(VALUE self, VALUE other)
|
344
|
+
{
|
345
|
+
RETURN_COMBINED_CS(self, other, ||);
|
188
346
|
}
|
189
347
|
|
190
348
|
static VALUE
|
191
|
-
|
192
|
-
|
349
|
+
cs_method_difference(VALUE self, VALUE other)
|
350
|
+
{
|
351
|
+
RETURN_COMBINED_CS(self, other, >);
|
193
352
|
}
|
194
353
|
|
195
354
|
static VALUE
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
355
|
+
cs_method_include_p(VALUE self, VALUE num)
|
356
|
+
{
|
357
|
+
cs_ar *cps;
|
358
|
+
cs_cp len;
|
359
|
+
cps = cs_fetch_cps(self, &len);
|
360
|
+
return (tst_cp(cps, len, FIX2ULONG(num)) ? Qtrue : Qfalse);
|
200
361
|
}
|
201
362
|
|
202
|
-
static inline
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
363
|
+
static inline VALUE
|
364
|
+
cs_toggle_codepoint(VALUE cs, VALUE cp_num, int on, int return_nil_if_noop)
|
365
|
+
{
|
366
|
+
cs_cp cp, len;
|
367
|
+
cs_ar *cps;
|
368
|
+
struct cs_data *data;
|
369
|
+
rb_check_frozen(cs);
|
370
|
+
data = cs_fetch_data(cs);
|
371
|
+
cps = data->cps;
|
372
|
+
len = data->len;
|
208
373
|
cp = FIX2ULONG(cp_num);
|
209
|
-
if (
|
210
|
-
|
374
|
+
if (return_nil_if_noop && (!tst_cp(cps, len, cp) == !on))
|
375
|
+
{
|
376
|
+
return Qnil;
|
211
377
|
}
|
212
|
-
else
|
213
|
-
|
214
|
-
|
215
|
-
|
378
|
+
else
|
379
|
+
{
|
380
|
+
if (on)
|
381
|
+
{
|
382
|
+
set_cp(data, cp);
|
383
|
+
}
|
384
|
+
else
|
385
|
+
{
|
386
|
+
clr_cp(cps, len, cp);
|
387
|
+
}
|
388
|
+
return cs;
|
216
389
|
}
|
217
390
|
}
|
218
391
|
|
219
392
|
static VALUE
|
220
|
-
|
221
|
-
|
393
|
+
cs_method_add(VALUE self, VALUE cp_num)
|
394
|
+
{
|
395
|
+
return cs_toggle_codepoint(self, cp_num, 1, 0);
|
222
396
|
}
|
223
397
|
|
224
398
|
static VALUE
|
225
|
-
|
226
|
-
|
399
|
+
cs_method_add_p(VALUE self, VALUE cp_num)
|
400
|
+
{
|
401
|
+
return cs_toggle_codepoint(self, cp_num, 1, 1);
|
227
402
|
}
|
228
403
|
|
229
404
|
static VALUE
|
230
|
-
|
231
|
-
|
405
|
+
cs_method_delete(VALUE self, VALUE cp_num)
|
406
|
+
{
|
407
|
+
return cs_toggle_codepoint(self, cp_num, 0, 0);
|
232
408
|
}
|
233
409
|
|
234
410
|
static VALUE
|
235
|
-
|
236
|
-
|
411
|
+
cs_method_delete_p(VALUE self, VALUE cp_num)
|
412
|
+
{
|
413
|
+
return cs_toggle_codepoint(self, cp_num, 0, 1);
|
237
414
|
}
|
238
415
|
|
239
|
-
#define COMPARE_SETS(action)\
|
240
|
-
cp_index cp;\
|
241
|
-
cp_byte *cps, *other_cps;\
|
242
|
-
FETCH_CODEPOINTS(self, cps);\
|
243
|
-
FETCH_CODEPOINTS(other, other_cps);\
|
244
|
-
for (cp = 0; cp < UNICODE_CP_COUNT; cp++) { action; }\
|
245
|
-
|
246
416
|
static VALUE
|
247
|
-
|
248
|
-
|
417
|
+
cs_method_intersect_p(VALUE self, VALUE other)
|
418
|
+
{
|
419
|
+
cs_cp cp, alen, blen;
|
420
|
+
cs_ar *acps, *bcps;
|
421
|
+
acps = cs_fetch_cps(self, &alen);
|
422
|
+
bcps = cs_fetch_cps(other, &blen);
|
423
|
+
for (cp = 0; cp < UNICODE_CP_COUNT; cp++)
|
424
|
+
{
|
425
|
+
if (tst_cp(acps, alen, cp) && tst_cp(bcps, blen, cp))
|
426
|
+
{
|
427
|
+
return Qtrue;
|
428
|
+
}
|
429
|
+
}
|
249
430
|
return Qfalse;
|
250
431
|
}
|
251
432
|
|
252
433
|
static VALUE
|
253
|
-
|
254
|
-
|
434
|
+
cs_method_disjoint_p(VALUE self, VALUE other)
|
435
|
+
{
|
436
|
+
return cs_method_intersect_p(self, other) ? Qfalse : Qtrue;
|
255
437
|
}
|
256
438
|
|
257
439
|
static inline int
|
258
|
-
|
259
|
-
|
440
|
+
cs_check_type(VALUE obj)
|
441
|
+
{
|
442
|
+
return rb_typeddata_is_kind_of(obj, &cs_type);
|
260
443
|
}
|
261
444
|
|
262
445
|
static VALUE
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
446
|
+
cs_cps_eql(VALUE cs_a, VALUE cs_b)
|
447
|
+
{
|
448
|
+
cs_cp cp, alen, blen;
|
449
|
+
cs_ar *acps, *bcps;
|
450
|
+
acps = cs_fetch_cps(cs_a, &alen);
|
451
|
+
bcps = cs_fetch_cps(cs_b, &blen);
|
452
|
+
for (cp = 0; cp < UNICODE_CP_COUNT; cp++)
|
453
|
+
{
|
454
|
+
if (tst_cp(acps, alen, cp) != tst_cp(bcps, blen, cp))
|
455
|
+
{
|
456
|
+
return Qfalse;
|
457
|
+
}
|
458
|
+
}
|
269
459
|
return Qtrue;
|
270
460
|
}
|
271
461
|
|
462
|
+
static VALUE
|
463
|
+
cs_method_eql_p(VALUE self, VALUE other)
|
464
|
+
{
|
465
|
+
if (!cs_check_type(other))
|
466
|
+
{
|
467
|
+
return Qfalse;
|
468
|
+
}
|
469
|
+
if (self == other) // same object_id
|
470
|
+
{
|
471
|
+
return Qtrue;
|
472
|
+
}
|
473
|
+
return cs_cps_eql(self, other);
|
474
|
+
}
|
475
|
+
|
272
476
|
static inline VALUE
|
273
|
-
|
274
|
-
|
275
|
-
|
477
|
+
cs_merge_cs(VALUE recipient, VALUE source)
|
478
|
+
{
|
479
|
+
cs_cp cp, source_len;
|
480
|
+
struct cs_data *data;
|
481
|
+
cs_ar *source_cps;
|
482
|
+
data = cs_fetch_data(recipient);
|
483
|
+
source_cps = cs_fetch_cps(source, &source_len);
|
484
|
+
for (cp = 0; cp < UNICODE_CP_COUNT; cp++)
|
485
|
+
{
|
486
|
+
if (tst_cp(source_cps, source_len, cp))
|
487
|
+
{
|
488
|
+
set_cp(data, cp);
|
489
|
+
}
|
490
|
+
}
|
491
|
+
return recipient;
|
276
492
|
}
|
277
493
|
|
278
|
-
static inline
|
279
|
-
|
280
|
-
|
494
|
+
static inline cs_cp
|
495
|
+
cs_checked_cp(VALUE object_id)
|
496
|
+
{
|
497
|
+
if (FIXNUM_P(object_id) && object_id > 0 && object_id < 0x220001)
|
498
|
+
{
|
499
|
+
return FIX2ULONG(object_id);
|
500
|
+
}
|
281
501
|
rb_raise(rb_eArgError, "CharacterSet members must be between 0 and 0x10FFFF");
|
282
502
|
}
|
283
503
|
|
284
504
|
static inline VALUE
|
285
|
-
|
505
|
+
cs_merge_rb_range(VALUE self, VALUE rb_range)
|
506
|
+
{
|
286
507
|
VALUE from_id, upto_id;
|
508
|
+
cs_cp from_cp, upto_cp, cont_len, rem;
|
287
509
|
int excl;
|
288
|
-
|
289
|
-
|
290
|
-
FETCH_CODEPOINTS(self, cps);
|
510
|
+
struct cs_data *data;
|
511
|
+
data = cs_fetch_data(self);
|
291
512
|
|
292
|
-
if (!RTEST(rb_range_values(rb_range, &from_id, &upto_id, &excl)))
|
513
|
+
if (!RTEST(rb_range_values(rb_range, &from_id, &upto_id, &excl)))
|
514
|
+
{
|
293
515
|
rb_raise(rb_eArgError, "pass a Range");
|
294
516
|
}
|
295
|
-
if (excl)
|
517
|
+
if (excl)
|
518
|
+
{
|
519
|
+
upto_id -= 2;
|
520
|
+
}
|
521
|
+
|
522
|
+
from_cp = cs_checked_cp(from_id);
|
523
|
+
upto_cp = cs_checked_cp(upto_id);
|
296
524
|
|
297
|
-
|
298
|
-
|
525
|
+
if (upto_cp > from_cp && (upto_cp - from_cp > 6))
|
526
|
+
{
|
527
|
+
// set bits in preceding partially toggled bytes individually
|
528
|
+
for (/* */; (from_cp <= upto_cp) && (from_cp % 8); from_cp++)
|
529
|
+
{
|
530
|
+
set_cp(data, from_cp);
|
531
|
+
}
|
532
|
+
// memset contiguous bits directly
|
533
|
+
cont_len = upto_cp - from_cp + 1;
|
534
|
+
rem = cont_len % 8;
|
535
|
+
ensure_memsize_fits(data, upto_cp);
|
536
|
+
memset(data->cps + CS_MSIZE(from_cp), 0xFF, CS_MSIZE(cont_len - rem) / 8);
|
537
|
+
from_cp = upto_cp - rem + 1;
|
538
|
+
}
|
299
539
|
|
300
|
-
|
301
|
-
|
302
|
-
|
540
|
+
// set bits in partially toggled bytes individually
|
541
|
+
for (/* */; from_cp <= upto_cp; from_cp++)
|
542
|
+
{
|
543
|
+
set_cp(data, from_cp);
|
303
544
|
}
|
545
|
+
|
304
546
|
return self;
|
305
547
|
}
|
306
548
|
|
307
549
|
static inline VALUE
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
FETCH_CODEPOINTS(self, cps);
|
550
|
+
cs_merge_rb_array(VALUE self, VALUE rb_array)
|
551
|
+
{
|
552
|
+
VALUE el, array_length, i;
|
553
|
+
struct cs_data *data;
|
313
554
|
Check_Type(rb_array, T_ARRAY);
|
555
|
+
data = cs_fetch_data(self);
|
314
556
|
array_length = RARRAY_LEN(rb_array);
|
315
|
-
for (i = 0; i < array_length; i++)
|
557
|
+
for (i = 0; i < array_length; i++)
|
558
|
+
{
|
316
559
|
el = RARRAY_AREF(rb_array, i);
|
317
|
-
|
318
|
-
SETBIT(cps, FIX2ULONG(el));
|
560
|
+
set_cp(data, cs_checked_cp(el));
|
319
561
|
}
|
320
562
|
return self;
|
321
563
|
}
|
322
564
|
|
323
565
|
static VALUE
|
324
|
-
|
566
|
+
cs_method_merge(VALUE self, VALUE other)
|
567
|
+
{
|
325
568
|
rb_check_frozen(self);
|
326
|
-
if (
|
327
|
-
|
569
|
+
if (cs_check_type(other))
|
570
|
+
{
|
571
|
+
return cs_merge_cs(self, other);
|
328
572
|
}
|
329
|
-
else if (TYPE(other) == T_ARRAY)
|
330
|
-
|
573
|
+
else if (TYPE(other) == T_ARRAY)
|
574
|
+
{
|
575
|
+
return cs_merge_rb_array(self, other);
|
331
576
|
}
|
332
|
-
return
|
577
|
+
return cs_merge_rb_range(self, other);
|
333
578
|
}
|
334
579
|
|
335
580
|
static VALUE
|
336
|
-
|
337
|
-
|
338
|
-
|
581
|
+
cs_method_initialize_copy(VALUE self, VALUE orig)
|
582
|
+
{
|
583
|
+
cs_merge_cs(self, orig);
|
584
|
+
return self;
|
339
585
|
}
|
340
586
|
|
341
587
|
static VALUE
|
342
|
-
|
588
|
+
cs_method_subtract(VALUE self, VALUE other)
|
589
|
+
{
|
590
|
+
cs_cp cp, len, other_len;
|
591
|
+
cs_ar *cps, *other_cps;
|
343
592
|
rb_check_frozen(self);
|
344
|
-
|
593
|
+
cps = cs_fetch_cps(self, &len);
|
594
|
+
other_cps = cs_fetch_cps(other, &other_len);
|
595
|
+
for (cp = 0; cp < UNICODE_CP_COUNT; cp++)
|
596
|
+
{
|
597
|
+
if (tst_cp(other_cps, other_len, cp))
|
598
|
+
{
|
599
|
+
clr_cp(cps, len, cp);
|
600
|
+
}
|
601
|
+
}
|
345
602
|
return self;
|
346
603
|
}
|
347
604
|
|
348
605
|
static inline int
|
349
|
-
|
350
|
-
|
351
|
-
|
606
|
+
cs_a_subset_of_b(VALUE cs_a, VALUE cs_b, int *is_proper_ptr)
|
607
|
+
{
|
608
|
+
cs_ar *a, *b;
|
609
|
+
cs_cp cp, alen, blen, count_a, count_b;
|
352
610
|
|
353
|
-
if (!
|
611
|
+
if (!cs_check_type(cs_a) || !cs_check_type(cs_b))
|
612
|
+
{
|
354
613
|
rb_raise(rb_eArgError, "pass a CharacterSet");
|
355
614
|
}
|
356
615
|
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
if (
|
366
|
-
|
367
|
-
|
368
|
-
|
616
|
+
a = cs_fetch_cps(cs_a, &alen);
|
617
|
+
b = cs_fetch_cps(cs_b, &blen);
|
618
|
+
|
619
|
+
count_a = 0;
|
620
|
+
count_b = 0;
|
621
|
+
|
622
|
+
for (cp = 0; cp < UNICODE_CP_COUNT; cp++)
|
623
|
+
{
|
624
|
+
if (tst_cp(a, alen, cp))
|
625
|
+
{
|
626
|
+
if (!tst_cp(b, blen, cp))
|
627
|
+
{
|
628
|
+
return 0;
|
629
|
+
}
|
630
|
+
count_a++;
|
631
|
+
count_b++;
|
632
|
+
}
|
633
|
+
else if (tst_cp(b, blen, cp))
|
634
|
+
{
|
635
|
+
count_b++;
|
369
636
|
}
|
370
|
-
else if (TSTBIT(cps_b, cp)) size_b++;
|
371
637
|
}
|
372
638
|
|
373
|
-
if (
|
639
|
+
if (is_proper_ptr)
|
640
|
+
{
|
641
|
+
*is_proper_ptr = count_b > count_a;
|
642
|
+
}
|
643
|
+
|
374
644
|
return 1;
|
375
645
|
}
|
376
646
|
|
377
647
|
static VALUE
|
378
|
-
|
379
|
-
|
380
|
-
return
|
648
|
+
cs_method_subset_p(VALUE self, VALUE other)
|
649
|
+
{
|
650
|
+
return cs_a_subset_of_b(self, other, NULL) ? Qtrue : Qfalse;
|
381
651
|
}
|
382
652
|
|
383
653
|
static VALUE
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
654
|
+
cs_method_proper_subset_p(VALUE self, VALUE other)
|
655
|
+
{
|
656
|
+
int is_subset, is_proper;
|
657
|
+
is_subset = cs_a_subset_of_b(self, other, &is_proper);
|
658
|
+
return (is_subset && is_proper) ? Qtrue : Qfalse;
|
388
659
|
}
|
389
660
|
|
390
661
|
static VALUE
|
391
|
-
|
392
|
-
|
393
|
-
return
|
662
|
+
cs_method_superset_p(VALUE self, VALUE other)
|
663
|
+
{
|
664
|
+
return cs_a_subset_of_b(other, self, NULL) ? Qtrue : Qfalse;
|
394
665
|
}
|
395
666
|
|
396
667
|
static VALUE
|
397
|
-
|
398
|
-
|
399
|
-
|
400
|
-
|
668
|
+
cs_method_proper_superset_p(VALUE self, VALUE other)
|
669
|
+
{
|
670
|
+
int is_superset, is_proper;
|
671
|
+
is_superset = cs_a_subset_of_b(other, self, &is_proper);
|
672
|
+
return (is_superset && is_proper) ? Qtrue : Qfalse;
|
401
673
|
}
|
402
674
|
|
403
675
|
// *******************************
|
@@ -405,42 +677,43 @@ method_proper_superset_p(VALUE self, VALUE other) {
|
|
405
677
|
// *******************************
|
406
678
|
|
407
679
|
static VALUE
|
408
|
-
|
409
|
-
|
410
|
-
|
680
|
+
cs_class_method_from_ranges(VALUE self, VALUE ranges)
|
681
|
+
{
|
682
|
+
VALUE new_cs, range_count, i;
|
683
|
+
new_cs = rb_class_new_instance(0, 0, self);
|
411
684
|
range_count = RARRAY_LEN(ranges);
|
412
|
-
for (i = 0; i < range_count; i++)
|
413
|
-
|
685
|
+
for (i = 0; i < range_count; i++)
|
686
|
+
{
|
687
|
+
cs_merge_rb_range(new_cs, RARRAY_AREF(ranges, i));
|
414
688
|
}
|
415
|
-
return
|
689
|
+
return new_cs;
|
416
690
|
}
|
417
691
|
|
418
692
|
static VALUE
|
419
|
-
|
420
|
-
|
693
|
+
cs_method_ranges(VALUE self)
|
694
|
+
{
|
695
|
+
VALUE ranges, cp_num, previous_cp_num, current_start, current_end;
|
421
696
|
|
422
697
|
ranges = rb_ary_new();
|
423
|
-
|
698
|
+
previous_cp_num = 0;
|
424
699
|
current_start = 0;
|
425
700
|
current_end = 0;
|
426
701
|
|
427
702
|
FOR_EACH_ACTIVE_CODEPOINT(
|
428
|
-
|
703
|
+
cp_num = LONG2FIX(cp);
|
429
704
|
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
current_end = codepoint;
|
439
|
-
previous_codepoint = codepoint;
|
440
|
-
);
|
705
|
+
if (!previous_cp_num) {
|
706
|
+
current_start = cp_num;
|
707
|
+
} else if (previous_cp_num + 2 != cp_num) {
|
708
|
+
// gap found, finalize previous range
|
709
|
+
rb_ary_push(ranges, rb_range_new(current_start, current_end, 0));
|
710
|
+
current_start = cp_num;
|
711
|
+
} current_end = cp_num;
|
712
|
+
previous_cp_num = cp_num;);
|
441
713
|
|
442
714
|
// add final range
|
443
|
-
if (current_start)
|
715
|
+
if (current_start)
|
716
|
+
{
|
444
717
|
rb_ary_push(ranges, rb_range_new(current_start, current_end, 0));
|
445
718
|
}
|
446
719
|
|
@@ -448,117 +721,233 @@ method_ranges(VALUE self) {
|
|
448
721
|
}
|
449
722
|
|
450
723
|
static VALUE
|
451
|
-
|
452
|
-
|
724
|
+
cs_method_sample(int argc, VALUE *argv, VALUE self)
|
725
|
+
{
|
726
|
+
VALUE array, to_a_args[1] = {Qtrue};
|
453
727
|
rb_check_arity(argc, 0, 1);
|
454
|
-
|
455
|
-
array = method_to_a(1, to_a_args, self);
|
728
|
+
array = cs_method_to_a(1, to_a_args, self);
|
456
729
|
return rb_funcall(array, rb_intern("sample"), argc, argc ? argv[0] : 0);
|
457
730
|
}
|
458
731
|
|
459
732
|
static inline VALUE
|
460
|
-
|
461
|
-
|
462
|
-
|
463
|
-
|
464
|
-
|
465
|
-
|
466
|
-
|
733
|
+
cs_from_section(VALUE set, cs_cp from, cs_cp upto)
|
734
|
+
{
|
735
|
+
VALUE new_cs;
|
736
|
+
cs_ar *cps;
|
737
|
+
cs_cp cp, len;
|
738
|
+
struct cs_data *new_data;
|
739
|
+
new_cs = cs_alloc(RBASIC(set)->klass, &new_data);
|
740
|
+
cps = cs_fetch_cps(set, &len);
|
741
|
+
for (cp = from; cp <= upto; cp++)
|
742
|
+
{
|
743
|
+
if (tst_cp(cps, len, cp))
|
744
|
+
{
|
745
|
+
set_cp(new_data, cp);
|
746
|
+
}
|
467
747
|
}
|
468
|
-
return
|
748
|
+
return new_cs;
|
469
749
|
}
|
470
750
|
|
471
751
|
static VALUE
|
472
|
-
|
473
|
-
|
752
|
+
cs_method_ext_section(VALUE self, VALUE from, VALUE upto)
|
753
|
+
{
|
754
|
+
return cs_from_section(self, FIX2ULONG(from), FIX2ULONG(upto));
|
755
|
+
}
|
756
|
+
|
757
|
+
static inline cs_cp
|
758
|
+
cs_active_cp_count_in_section(VALUE set, cs_cp from, cs_cp upto)
|
759
|
+
{
|
760
|
+
cs_ar *cps;
|
761
|
+
cs_cp cp, count, len;
|
762
|
+
cps = cs_fetch_cps(set, &len);
|
763
|
+
for (count = 0, cp = from; cp <= upto; cp++)
|
764
|
+
{
|
765
|
+
if (tst_cp(cps, len, cp))
|
766
|
+
{
|
767
|
+
count++;
|
768
|
+
}
|
769
|
+
}
|
770
|
+
return count;
|
474
771
|
}
|
475
772
|
|
476
773
|
static VALUE
|
477
|
-
|
478
|
-
|
774
|
+
cs_method_ext_count_in_section(VALUE self, VALUE from, VALUE upto)
|
775
|
+
{
|
776
|
+
cs_cp count;
|
777
|
+
count = cs_active_cp_count_in_section(self, FIX2ULONG(from), FIX2ULONG(upto));
|
778
|
+
return LONG2FIX(count);
|
479
779
|
}
|
480
780
|
|
481
781
|
static inline VALUE
|
482
|
-
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
|
487
|
-
|
488
|
-
|
489
|
-
|
782
|
+
cs_has_cp_in_section(cs_ar *cps, cs_cp len, cs_cp from, cs_cp upto)
|
783
|
+
{
|
784
|
+
cs_cp cp;
|
785
|
+
for (cp = from; cp <= upto; cp++)
|
786
|
+
{
|
787
|
+
if (tst_cp(cps, len, cp))
|
788
|
+
{
|
789
|
+
return Qtrue;
|
790
|
+
}
|
490
791
|
}
|
491
792
|
return Qfalse;
|
492
793
|
}
|
493
794
|
|
494
795
|
static VALUE
|
495
|
-
|
796
|
+
cs_method_ext_section_p(VALUE self, VALUE from, VALUE upto)
|
797
|
+
{
|
798
|
+
cs_ar *cps;
|
799
|
+
cs_cp len;
|
800
|
+
cps = cs_fetch_cps(self, &len);
|
801
|
+
return cs_has_cp_in_section(cps, len, FIX2ULONG(from), FIX2ULONG(upto));
|
802
|
+
}
|
803
|
+
|
804
|
+
static inline VALUE
|
805
|
+
cs_ratio_of_section(VALUE set, cs_cp from, cs_cp upto)
|
806
|
+
{
|
807
|
+
double section_count, total_count;
|
808
|
+
section_count = (double)cs_active_cp_count_in_section(set, from, upto);
|
809
|
+
total_count = (double)cs_active_cp_count(set);
|
810
|
+
return DBL2NUM(section_count / total_count);
|
811
|
+
}
|
812
|
+
|
813
|
+
static VALUE
|
814
|
+
cs_method_ext_section_ratio(VALUE self, VALUE from, VALUE upto)
|
815
|
+
{
|
816
|
+
return cs_ratio_of_section(self, FIX2ULONG(from), FIX2ULONG(upto));
|
817
|
+
}
|
818
|
+
|
819
|
+
#define MAX_CP 0x10FFFF
|
820
|
+
#define MAX_ASCII_CP 0x7F
|
821
|
+
#define MAX_BMP_CP 0xFFFF
|
822
|
+
#define MIN_ASTRAL_CP 0x10000
|
823
|
+
|
824
|
+
static inline VALUE
|
825
|
+
cs_has_cp_in_plane(cs_ar *cps, cs_cp len, unsigned int plane)
|
826
|
+
{
|
827
|
+
cs_cp plane_beg, plane_end;
|
828
|
+
plane_beg = plane * UNICODE_PLANE_SIZE;
|
829
|
+
plane_end = (plane + 1) * MAX_BMP_CP;
|
830
|
+
return cs_has_cp_in_section(cps, len, plane_beg, plane_end);
|
831
|
+
}
|
832
|
+
|
833
|
+
static VALUE
|
834
|
+
cs_method_planes(VALUE self)
|
835
|
+
{
|
836
|
+
cs_ar *cps;
|
837
|
+
cs_cp len;
|
496
838
|
unsigned int i;
|
497
839
|
VALUE planes;
|
840
|
+
cps = cs_fetch_cps(self, &len);
|
498
841
|
planes = rb_ary_new();
|
499
|
-
for (i = 0; i < UNICODE_PLANE_COUNT; i++)
|
500
|
-
|
842
|
+
for (i = 0; i < UNICODE_PLANE_COUNT; i++)
|
843
|
+
{
|
844
|
+
if (cs_has_cp_in_plane(cps, len, i))
|
845
|
+
{
|
846
|
+
rb_ary_push(planes, INT2FIX(i));
|
847
|
+
}
|
501
848
|
}
|
502
849
|
return planes;
|
503
850
|
}
|
504
851
|
|
505
|
-
static
|
506
|
-
|
852
|
+
static inline int
|
853
|
+
cs_valid_plane_num(VALUE num)
|
854
|
+
{
|
507
855
|
int plane;
|
508
|
-
Check_Type(
|
509
|
-
plane = FIX2INT(
|
510
|
-
if (plane < 0 || plane >= UNICODE_PLANE_COUNT)
|
511
|
-
|
856
|
+
Check_Type(num, T_FIXNUM);
|
857
|
+
plane = FIX2INT(num);
|
858
|
+
if (plane < 0 || plane >= UNICODE_PLANE_COUNT)
|
859
|
+
{
|
860
|
+
rb_raise(rb_eArgError, "plane must be between 0 and %d", UNICODE_PLANE_COUNT - 1);
|
512
861
|
}
|
513
|
-
return
|
862
|
+
return plane;
|
863
|
+
}
|
864
|
+
|
865
|
+
static VALUE
|
866
|
+
cs_method_plane(VALUE self, VALUE plane_num)
|
867
|
+
{
|
868
|
+
cs_cp plane, plane_beg, plane_end;
|
869
|
+
plane = cs_valid_plane_num(plane_num);
|
870
|
+
plane_beg = plane * UNICODE_PLANE_SIZE;
|
871
|
+
plane_end = (plane + 1) * MAX_BMP_CP;
|
872
|
+
return cs_from_section(self, plane_beg, plane_end);
|
873
|
+
}
|
874
|
+
|
875
|
+
static VALUE
|
876
|
+
cs_method_member_in_plane_p(VALUE self, VALUE plane_num)
|
877
|
+
{
|
878
|
+
cs_ar *cps;
|
879
|
+
cs_cp len;
|
880
|
+
unsigned int plane;
|
881
|
+
plane = cs_valid_plane_num(plane_num);
|
882
|
+
cps = cs_fetch_cps(self, &len);
|
883
|
+
return cs_has_cp_in_plane(cps, len, plane);
|
514
884
|
}
|
515
885
|
|
516
886
|
#define NON_SURROGATE(cp) (cp > 0xDFFF || cp < 0xD800)
|
517
887
|
|
518
888
|
static VALUE
|
519
|
-
|
520
|
-
|
521
|
-
|
522
|
-
|
523
|
-
|
889
|
+
cs_method_ext_inversion(int argc, VALUE *argv, VALUE self)
|
890
|
+
{
|
891
|
+
int inc_surr;
|
892
|
+
cs_cp upto, cp, len;
|
893
|
+
cs_ar *cps;
|
894
|
+
VALUE new_cs;
|
895
|
+
struct cs_data *new_data;
|
896
|
+
|
524
897
|
rb_check_arity(argc, 0, 2);
|
525
|
-
|
526
|
-
|
527
|
-
|
528
|
-
|
529
|
-
|
530
|
-
|
898
|
+
|
899
|
+
cps = cs_fetch_cps(self, &len);
|
900
|
+
inc_surr = argc && argv[0] == Qtrue;
|
901
|
+
new_cs = cs_alloc(RBASIC(self)->klass, &new_data);
|
902
|
+
upto = argc > 1 && FIXNUM_P(argv[1]) ? FIX2ULONG(argv[1]) : UNICODE_CP_COUNT;
|
903
|
+
|
904
|
+
for (cp = 0; cp < UNICODE_CP_COUNT; cp++)
|
905
|
+
{
|
906
|
+
if (cp <= upto && !tst_cp(cps, len, cp) && (inc_surr || NON_SURROGATE(cp)))
|
907
|
+
{
|
908
|
+
set_cp(new_data, cp);
|
909
|
+
}
|
531
910
|
}
|
532
|
-
|
533
|
-
|
534
|
-
);
|
911
|
+
|
912
|
+
return new_cs;
|
535
913
|
}
|
536
914
|
|
537
|
-
typedef int(*str_cp_handler)(unsigned int,
|
915
|
+
typedef int (*str_cp_handler)(unsigned int, cs_ar *, cs_cp len, struct cs_data *data, VALUE *memo);
|
538
916
|
|
539
917
|
static inline int
|
540
|
-
add_str_cp_to_arr(unsigned int str_cp,
|
541
|
-
|
918
|
+
add_str_cp_to_arr(unsigned int str_cp, cs_ar *cp_arr, cs_cp len, struct cs_data *data, VALUE *memo)
|
919
|
+
{
|
920
|
+
set_cp(data, str_cp);
|
542
921
|
return 1;
|
543
922
|
}
|
544
923
|
|
545
924
|
static VALUE
|
546
|
-
|
547
|
-
|
548
|
-
|
549
|
-
|
550
|
-
|
925
|
+
cs_method_case_insensitive(VALUE self)
|
926
|
+
{
|
927
|
+
cs_cp i, len;
|
928
|
+
cs_ar *cps;
|
929
|
+
VALUE new_cs;
|
930
|
+
struct cs_data *new_data;
|
551
931
|
|
552
|
-
|
932
|
+
cps = cs_fetch_cps(self, &len);
|
933
|
+
new_cs = cs_alloc(RBASIC(self)->klass, &new_data);
|
934
|
+
cs_merge_cs(new_cs, self);
|
553
935
|
|
554
|
-
for (i = 0; i < CASEFOLD_COUNT; i++)
|
936
|
+
for (i = 0; i < CASEFOLD_COUNT; i++)
|
937
|
+
{
|
555
938
|
casefold_mapping m = unicode_casefold_table[i];
|
556
939
|
|
557
|
-
if
|
558
|
-
|
940
|
+
if (tst_cp(cps, len, m.from))
|
941
|
+
{
|
942
|
+
set_cp(new_data, m.to);
|
943
|
+
}
|
944
|
+
else if (tst_cp(cps, len, m.to))
|
945
|
+
{
|
946
|
+
set_cp(new_data, m.from);
|
947
|
+
}
|
559
948
|
}
|
560
949
|
|
561
|
-
return
|
950
|
+
return new_cs;
|
562
951
|
|
563
952
|
// OnigCaseFoldType flags;
|
564
953
|
// rb_encoding *enc;
|
@@ -573,20 +962,27 @@ method_case_insensitive(VALUE self) {
|
|
573
962
|
}
|
574
963
|
|
575
964
|
static inline VALUE
|
576
|
-
each_sb_cp(VALUE str, str_cp_handler func,
|
577
|
-
|
965
|
+
each_sb_cp(VALUE str, str_cp_handler func, cs_ar *cp_arr, cs_cp len, struct cs_data *data, VALUE *memo)
|
966
|
+
{
|
967
|
+
long i, str_len;
|
578
968
|
unsigned int str_cp;
|
969
|
+
str_len = RSTRING_LEN(str);
|
579
970
|
|
580
|
-
for (i = 0; i <
|
971
|
+
for (i = 0; i < str_len; i++)
|
972
|
+
{
|
581
973
|
str_cp = (RSTRING_PTR(str)[i] & 0xff);
|
582
|
-
if (!(*func)(str_cp, cp_arr))
|
974
|
+
if (!(*func)(str_cp, cp_arr, len, data, memo))
|
975
|
+
{
|
976
|
+
return Qfalse;
|
977
|
+
}
|
583
978
|
}
|
584
979
|
|
585
980
|
return Qtrue;
|
586
981
|
}
|
587
982
|
|
588
983
|
static inline VALUE
|
589
|
-
each_mb_cp(VALUE str, str_cp_handler func,
|
984
|
+
each_mb_cp(VALUE str, str_cp_handler func, cs_ar *cp_arr, cs_cp len, struct cs_data *data, VALUE *memo)
|
985
|
+
{
|
590
986
|
int n;
|
591
987
|
unsigned int str_cp;
|
592
988
|
const char *ptr, *end;
|
@@ -597,9 +993,13 @@ each_mb_cp(VALUE str, str_cp_handler func, cp_byte *cp_arr) {
|
|
597
993
|
end = RSTRING_END(str);
|
598
994
|
enc = rb_enc_get(str);
|
599
995
|
|
600
|
-
while (ptr < end)
|
996
|
+
while (ptr < end)
|
997
|
+
{
|
601
998
|
str_cp = rb_enc_codepoint_len(ptr, end, &n, enc);
|
602
|
-
if (!(*func)(str_cp, cp_arr))
|
999
|
+
if (!(*func)(str_cp, cp_arr, len, data, memo))
|
1000
|
+
{
|
1001
|
+
return Qfalse;
|
1002
|
+
}
|
603
1003
|
ptr += n;
|
604
1004
|
}
|
605
1005
|
|
@@ -611,105 +1011,238 @@ static inline int
|
|
611
1011
|
single_byte_optimizable(VALUE str)
|
612
1012
|
{
|
613
1013
|
rb_encoding *enc;
|
614
|
-
if (ENC_CODERANGE(str) == ENC_CODERANGE_7BIT)
|
1014
|
+
if (ENC_CODERANGE(str) == ENC_CODERANGE_7BIT)
|
1015
|
+
{
|
1016
|
+
return 1;
|
1017
|
+
}
|
615
1018
|
|
616
1019
|
enc = rb_enc_get(str);
|
617
|
-
if (rb_enc_mbmaxlen(enc) == 1)
|
1020
|
+
if (rb_enc_mbmaxlen(enc) == 1)
|
1021
|
+
{
|
1022
|
+
return 1;
|
1023
|
+
}
|
618
1024
|
|
619
1025
|
return 0;
|
620
1026
|
}
|
621
1027
|
|
622
1028
|
static inline VALUE
|
623
|
-
each_cp(VALUE str, str_cp_handler func,
|
624
|
-
|
625
|
-
|
1029
|
+
each_cp(VALUE str, str_cp_handler func, cs_ar *cp_arr, cs_cp len, struct cs_data *data, VALUE *memo)
|
1030
|
+
{
|
1031
|
+
if (single_byte_optimizable(str))
|
1032
|
+
{
|
1033
|
+
return each_sb_cp(str, func, cp_arr, len, data, memo);
|
626
1034
|
}
|
627
|
-
return each_mb_cp(str, func, cp_arr);
|
1035
|
+
return each_mb_cp(str, func, cp_arr, len, data, memo);
|
628
1036
|
}
|
629
1037
|
|
630
1038
|
static inline void
|
631
|
-
raise_arg_err_unless_string(VALUE val)
|
632
|
-
|
1039
|
+
raise_arg_err_unless_string(VALUE val)
|
1040
|
+
{
|
1041
|
+
if (!RB_TYPE_P(val, T_STRING))
|
1042
|
+
{
|
1043
|
+
rb_raise(rb_eArgError, "pass a String");
|
1044
|
+
}
|
633
1045
|
}
|
634
1046
|
|
635
1047
|
static VALUE
|
636
|
-
|
637
|
-
|
1048
|
+
cs_class_method_of(VALUE self, VALUE str)
|
1049
|
+
{
|
1050
|
+
VALUE new_cs;
|
1051
|
+
struct cs_data *new_data;
|
1052
|
+
new_cs = cs_alloc(self, &new_data);
|
638
1053
|
raise_arg_err_unless_string(str);
|
639
|
-
|
640
|
-
|
641
|
-
return NEW_CHARACTER_SET(self, cp_arr);
|
1054
|
+
each_cp(str, add_str_cp_to_arr, 0, 0, new_data, 0);
|
1055
|
+
return new_cs;
|
642
1056
|
}
|
643
1057
|
|
644
1058
|
static inline int
|
645
|
-
|
646
|
-
|
1059
|
+
count_str_cp(unsigned int str_cp, cs_ar *cp_arr, cs_cp len, struct cs_data *data, VALUE *memo)
|
1060
|
+
{
|
1061
|
+
if (tst_cp(cp_arr, len, str_cp))
|
1062
|
+
{
|
1063
|
+
*memo += 1;
|
1064
|
+
}
|
1065
|
+
return 1;
|
647
1066
|
}
|
648
1067
|
|
649
1068
|
static VALUE
|
650
|
-
|
651
|
-
|
652
|
-
VALUE
|
1069
|
+
cs_method_count_in(VALUE self, VALUE str)
|
1070
|
+
{
|
1071
|
+
VALUE count;
|
1072
|
+
struct cs_data *data;
|
653
1073
|
raise_arg_err_unless_string(str);
|
654
|
-
|
655
|
-
|
656
|
-
|
1074
|
+
data = cs_fetch_data(self);
|
1075
|
+
count = 0;
|
1076
|
+
each_cp(str, count_str_cp, data->cps, data->len, data, &count);
|
1077
|
+
return INT2NUM(count);
|
1078
|
+
}
|
1079
|
+
|
1080
|
+
static inline int
|
1081
|
+
str_cp_in_arr(unsigned int str_cp, cs_ar *cp_arr, cs_cp len, struct cs_data *data, VALUE *memo)
|
1082
|
+
{
|
1083
|
+
return tst_cp(cp_arr, len, str_cp);
|
1084
|
+
}
|
1085
|
+
|
1086
|
+
static VALUE
|
1087
|
+
cs_method_cover_p(VALUE self, VALUE str)
|
1088
|
+
{
|
1089
|
+
struct cs_data *data;
|
1090
|
+
raise_arg_err_unless_string(str);
|
1091
|
+
data = cs_fetch_data(self);
|
1092
|
+
return each_cp(str, str_cp_in_arr, data->cps, data->len, data, 0);
|
1093
|
+
}
|
1094
|
+
|
1095
|
+
static inline int
|
1096
|
+
add_str_cp_to_str_arr(unsigned int str_cp, cs_ar *cp_arr, cs_cp len, struct cs_data *data, VALUE *memo)
|
1097
|
+
{
|
1098
|
+
if (tst_cp(cp_arr, len, str_cp))
|
1099
|
+
{
|
1100
|
+
rb_ary_push(memo[0], rb_enc_uint_chr((int)str_cp, (rb_encoding *)memo[1]));
|
1101
|
+
}
|
1102
|
+
return 1;
|
1103
|
+
}
|
1104
|
+
|
1105
|
+
static VALUE
|
1106
|
+
cs_method_scan(VALUE self, VALUE str)
|
1107
|
+
{
|
1108
|
+
VALUE memo[2];
|
1109
|
+
struct cs_data *data;
|
1110
|
+
raise_arg_err_unless_string(str);
|
1111
|
+
data = cs_fetch_data(self);
|
1112
|
+
memo[0] = rb_ary_new();
|
1113
|
+
memo[1] = (VALUE)rb_enc_get(str);
|
1114
|
+
each_cp(str, add_str_cp_to_str_arr, data->cps, data->len, data, memo);
|
1115
|
+
return memo[0];
|
657
1116
|
}
|
658
1117
|
|
659
1118
|
static inline int
|
660
|
-
|
661
|
-
|
1119
|
+
str_cp_not_in_arr(unsigned int str_cp, cs_ar *cp_arr, cs_cp len, struct cs_data *data, VALUE *memo)
|
1120
|
+
{
|
1121
|
+
return !tst_cp(cp_arr, len, str_cp);
|
662
1122
|
}
|
663
1123
|
|
664
1124
|
static VALUE
|
665
|
-
|
666
|
-
|
1125
|
+
cs_method_used_by_p(VALUE self, VALUE str)
|
1126
|
+
{
|
1127
|
+
VALUE only_uses_other_cps;
|
1128
|
+
struct cs_data *data;
|
667
1129
|
raise_arg_err_unless_string(str);
|
668
|
-
|
669
|
-
|
1130
|
+
data = cs_fetch_data(self);
|
1131
|
+
only_uses_other_cps = each_cp(str, str_cp_not_in_arr, data->cps, data->len, data, 0);
|
1132
|
+
return only_uses_other_cps == Qfalse ? Qtrue : Qfalse;
|
1133
|
+
}
|
1134
|
+
|
1135
|
+
static void
|
1136
|
+
cs_str_buf_cat(VALUE str, const char *ptr, long len)
|
1137
|
+
{
|
1138
|
+
long total, olen;
|
1139
|
+
char *sptr;
|
1140
|
+
|
1141
|
+
RSTRING_GETMEM(str, sptr, olen);
|
1142
|
+
sptr = RSTRING(str)->as.heap.ptr;
|
1143
|
+
olen = RSTRING(str)->as.heap.len;
|
1144
|
+
total = olen + len;
|
1145
|
+
memcpy(sptr + olen, ptr, len);
|
1146
|
+
RSTRING(str)->as.heap.len = total;
|
1147
|
+
}
|
1148
|
+
|
1149
|
+
#ifndef TERM_FILL
|
1150
|
+
#define TERM_FILL(ptr, termlen) \
|
1151
|
+
do \
|
1152
|
+
{ \
|
1153
|
+
char *const term_fill_ptr = (ptr); \
|
1154
|
+
const int term_fill_len = (termlen); \
|
1155
|
+
*term_fill_ptr = '\0'; \
|
1156
|
+
if (__builtin_expect(!!(term_fill_len > 1), 0)) \
|
1157
|
+
memset(term_fill_ptr, 0, term_fill_len); \
|
1158
|
+
} while (0)
|
1159
|
+
#endif
|
1160
|
+
|
1161
|
+
static void
|
1162
|
+
cs_str_buf_terminate(VALUE str, rb_encoding *enc)
|
1163
|
+
{
|
1164
|
+
char *ptr;
|
1165
|
+
long len;
|
1166
|
+
|
1167
|
+
ptr = RSTRING(str)->as.heap.ptr;
|
1168
|
+
len = RSTRING(str)->as.heap.len;
|
1169
|
+
TERM_FILL(ptr + len, rb_enc_mbminlen(enc));
|
670
1170
|
}
|
671
1171
|
|
672
1172
|
static inline VALUE
|
673
|
-
|
674
|
-
|
1173
|
+
cs_apply_to_str(VALUE set, VALUE str, int delete, int bang)
|
1174
|
+
{
|
1175
|
+
cs_ar *cps;
|
1176
|
+
cs_cp len;
|
675
1177
|
rb_encoding *str_enc;
|
676
|
-
VALUE orig_len,
|
677
|
-
int
|
1178
|
+
VALUE orig_len, new_str_buf;
|
1179
|
+
int cp_len;
|
678
1180
|
unsigned int str_cp;
|
679
1181
|
const char *ptr, *end;
|
680
1182
|
|
681
1183
|
raise_arg_err_unless_string(str);
|
682
1184
|
|
683
|
-
|
1185
|
+
cps = cs_fetch_cps(set, &len);
|
684
1186
|
|
685
1187
|
orig_len = RSTRING_LEN(str);
|
686
|
-
|
687
|
-
|
1188
|
+
if (orig_len < 1) // empty string, will never change
|
1189
|
+
{
|
1190
|
+
if (bang)
|
1191
|
+
{
|
1192
|
+
return Qnil;
|
1193
|
+
}
|
1194
|
+
return rb_str_dup(str);
|
1195
|
+
}
|
1196
|
+
|
1197
|
+
new_str_buf = rb_str_buf_new(orig_len);
|
688
1198
|
str_enc = rb_enc_get(str);
|
689
1199
|
rb_enc_associate(new_str_buf, str_enc);
|
690
|
-
|
691
|
-
|
1200
|
+
rb_str_modify(new_str_buf);
|
1201
|
+
ENC_CODERANGE_SET(new_str_buf, rb_enc_asciicompat(str_enc) ? ENC_CODERANGE_7BIT : ENC_CODERANGE_VALID);
|
692
1202
|
|
693
1203
|
ptr = RSTRING_PTR(str);
|
694
1204
|
end = RSTRING_END(str);
|
695
1205
|
|
696
|
-
|
697
|
-
|
698
|
-
|
699
|
-
|
700
|
-
|
1206
|
+
if (single_byte_optimizable(str))
|
1207
|
+
{
|
1208
|
+
while (ptr < end)
|
1209
|
+
{
|
1210
|
+
str_cp = *ptr & 0xff;
|
1211
|
+
if ((!tst_cp(cps, len, str_cp)) == delete)
|
1212
|
+
{
|
1213
|
+
cs_str_buf_cat(new_str_buf, ptr, 1);
|
1214
|
+
}
|
1215
|
+
ptr++;
|
1216
|
+
}
|
1217
|
+
}
|
1218
|
+
else // likely to be multibyte string
|
1219
|
+
{
|
1220
|
+
while (ptr < end)
|
1221
|
+
{
|
1222
|
+
str_cp = rb_enc_codepoint_len(ptr, end, &cp_len, str_enc);
|
1223
|
+
if ((!tst_cp(cps, len, str_cp)) == delete)
|
1224
|
+
{
|
1225
|
+
cs_str_buf_cat(new_str_buf, ptr, cp_len);
|
1226
|
+
}
|
1227
|
+
ptr += cp_len;
|
701
1228
|
}
|
702
|
-
ptr += n;
|
703
1229
|
}
|
704
1230
|
|
705
|
-
|
706
|
-
|
1231
|
+
cs_str_buf_terminate(new_str_buf, str_enc);
|
1232
|
+
|
1233
|
+
if (bang)
|
1234
|
+
{
|
1235
|
+
if (RSTRING_LEN(new_str_buf) == (long)orig_len) // string unchanged
|
1236
|
+
{
|
1237
|
+
return Qnil;
|
1238
|
+
}
|
707
1239
|
rb_str_shared_replace(str, new_str_buf);
|
708
1240
|
}
|
709
|
-
else
|
1241
|
+
else
|
1242
|
+
{
|
710
1243
|
RB_OBJ_WRITE(new_str_buf, &(RBASIC(new_str_buf))->klass, rb_obj_class(str));
|
711
1244
|
// slightly cumbersome approach needed for compatibility with Ruby < 2.3:
|
712
|
-
RBASIC(new_str_buf)->flags |= (RBASIC(str)->flags&(FL_TAINT));
|
1245
|
+
RBASIC(new_str_buf)->flags |= (RBASIC(str)->flags & (FL_TAINT));
|
713
1246
|
str = new_str_buf;
|
714
1247
|
}
|
715
1248
|
|
@@ -717,98 +1250,115 @@ apply_to_str(VALUE set, VALUE str, int delete, int bang) {
|
|
717
1250
|
}
|
718
1251
|
|
719
1252
|
static VALUE
|
720
|
-
|
721
|
-
|
1253
|
+
cs_method_delete_in(VALUE self, VALUE str)
|
1254
|
+
{
|
1255
|
+
return cs_apply_to_str(self, str, 1, 0);
|
1256
|
+
}
|
1257
|
+
|
1258
|
+
static VALUE
|
1259
|
+
cs_method_delete_in_bang(VALUE self, VALUE str)
|
1260
|
+
{
|
1261
|
+
return cs_apply_to_str(self, str, 1, 1);
|
722
1262
|
}
|
723
1263
|
|
724
1264
|
static VALUE
|
725
|
-
|
726
|
-
|
1265
|
+
cs_method_keep_in(VALUE self, VALUE str)
|
1266
|
+
{
|
1267
|
+
return cs_apply_to_str(self, str, 0, 0);
|
727
1268
|
}
|
728
1269
|
|
729
1270
|
static VALUE
|
730
|
-
|
731
|
-
|
1271
|
+
cs_method_keep_in_bang(VALUE self, VALUE str)
|
1272
|
+
{
|
1273
|
+
return cs_apply_to_str(self, str, 0, 1);
|
732
1274
|
}
|
733
1275
|
|
734
1276
|
static VALUE
|
735
|
-
|
736
|
-
|
1277
|
+
cs_method_allocated_length(VALUE self)
|
1278
|
+
{
|
1279
|
+
return LONG2FIX(cs_fetch_data(self)->len);
|
737
1280
|
}
|
738
1281
|
|
739
1282
|
// ****
|
740
1283
|
// init
|
741
1284
|
// ****
|
742
1285
|
|
743
|
-
void
|
744
|
-
Init_character_set()
|
1286
|
+
void Init_character_set()
|
745
1287
|
{
|
746
1288
|
VALUE cs = rb_define_class("CharacterSet", rb_cObject);
|
747
1289
|
|
748
|
-
rb_define_alloc_func(cs,
|
1290
|
+
rb_define_alloc_func(cs, cs_method_allocate);
|
749
1291
|
|
750
1292
|
// `Set` compatibility methods
|
751
1293
|
|
752
|
-
rb_define_method(cs, "each",
|
753
|
-
rb_define_method(cs, "to_a",
|
754
|
-
rb_define_method(cs, "length",
|
755
|
-
rb_define_method(cs, "size",
|
756
|
-
rb_define_method(cs, "
|
757
|
-
rb_define_method(cs, "
|
758
|
-
rb_define_method(cs, "
|
759
|
-
rb_define_method(cs, "
|
760
|
-
rb_define_method(cs, "
|
761
|
-
rb_define_method(cs, "
|
762
|
-
rb_define_method(cs, "
|
763
|
-
rb_define_method(cs, "
|
764
|
-
rb_define_method(cs, "
|
765
|
-
rb_define_method(cs, "
|
766
|
-
rb_define_method(cs, "
|
767
|
-
rb_define_method(cs, "
|
768
|
-
rb_define_method(cs, "
|
769
|
-
rb_define_method(cs, "
|
770
|
-
rb_define_method(cs, "
|
771
|
-
rb_define_method(cs, "
|
772
|
-
rb_define_method(cs, "
|
773
|
-
rb_define_method(cs, "
|
774
|
-
rb_define_method(cs, "
|
775
|
-
rb_define_method(cs, "add
|
776
|
-
rb_define_method(cs, "
|
777
|
-
rb_define_method(cs, "
|
778
|
-
rb_define_method(cs, "
|
779
|
-
rb_define_method(cs, "
|
780
|
-
rb_define_method(cs, "
|
781
|
-
rb_define_method(cs, "
|
782
|
-
rb_define_method(cs, "
|
783
|
-
rb_define_method(cs, "
|
784
|
-
rb_define_method(cs, "
|
785
|
-
rb_define_method(cs, "
|
786
|
-
rb_define_method(cs, "
|
787
|
-
rb_define_method(cs, "
|
788
|
-
rb_define_method(cs, "
|
789
|
-
rb_define_method(cs, "
|
790
|
-
rb_define_method(cs, "
|
791
|
-
rb_define_method(cs, "
|
792
|
-
rb_define_method(cs, "
|
793
|
-
rb_define_method(cs, "
|
1294
|
+
rb_define_method(cs, "each", cs_method_each, 0);
|
1295
|
+
rb_define_method(cs, "to_a", cs_method_to_a, -1);
|
1296
|
+
rb_define_method(cs, "length", cs_method_length, 0);
|
1297
|
+
rb_define_method(cs, "size", cs_method_length, 0);
|
1298
|
+
rb_define_method(cs, "empty?", cs_method_empty_p, 0);
|
1299
|
+
rb_define_method(cs, "hash", cs_method_hash, 0);
|
1300
|
+
rb_define_method(cs, "keep_if", cs_method_keep_if, 0);
|
1301
|
+
rb_define_method(cs, "delete_if", cs_method_delete_if, 0);
|
1302
|
+
rb_define_method(cs, "clear", cs_method_clear, 0);
|
1303
|
+
rb_define_method(cs, "min", cs_method_min, 0);
|
1304
|
+
rb_define_method(cs, "max", cs_method_max, 0);
|
1305
|
+
rb_define_method(cs, "minmax", cs_method_minmax, 0);
|
1306
|
+
rb_define_method(cs, "intersection", cs_method_intersection, 1);
|
1307
|
+
rb_define_method(cs, "&", cs_method_intersection, 1);
|
1308
|
+
rb_define_method(cs, "union", cs_method_union, 1);
|
1309
|
+
rb_define_method(cs, "+", cs_method_union, 1);
|
1310
|
+
rb_define_method(cs, "|", cs_method_union, 1);
|
1311
|
+
rb_define_method(cs, "difference", cs_method_difference, 1);
|
1312
|
+
rb_define_method(cs, "-", cs_method_difference, 1);
|
1313
|
+
rb_define_method(cs, "^", cs_method_exclusion, 1);
|
1314
|
+
rb_define_method(cs, "include?", cs_method_include_p, 1);
|
1315
|
+
rb_define_method(cs, "member?", cs_method_include_p, 1);
|
1316
|
+
rb_define_method(cs, "===", cs_method_include_p, 1);
|
1317
|
+
rb_define_method(cs, "add", cs_method_add, 1);
|
1318
|
+
rb_define_method(cs, "<<", cs_method_add, 1);
|
1319
|
+
rb_define_method(cs, "add?", cs_method_add_p, 1);
|
1320
|
+
rb_define_method(cs, "delete", cs_method_delete, 1);
|
1321
|
+
rb_define_method(cs, "delete?", cs_method_delete_p, 1);
|
1322
|
+
rb_define_method(cs, "intersect?", cs_method_intersect_p, 1);
|
1323
|
+
rb_define_method(cs, "disjoint?", cs_method_disjoint_p, 1);
|
1324
|
+
rb_define_method(cs, "eql?", cs_method_eql_p, 1);
|
1325
|
+
rb_define_method(cs, "==", cs_method_eql_p, 1);
|
1326
|
+
rb_define_method(cs, "merge", cs_method_merge, 1);
|
1327
|
+
rb_define_method(cs, "initialize_clone", cs_method_initialize_copy, 1);
|
1328
|
+
rb_define_method(cs, "initialize_dup", cs_method_initialize_copy, 1);
|
1329
|
+
rb_define_method(cs, "subtract", cs_method_subtract, 1);
|
1330
|
+
rb_define_method(cs, "subset?", cs_method_subset_p, 1);
|
1331
|
+
rb_define_method(cs, "<=", cs_method_subset_p, 1);
|
1332
|
+
rb_define_method(cs, "proper_subset?", cs_method_proper_subset_p, 1);
|
1333
|
+
rb_define_method(cs, "<", cs_method_proper_subset_p, 1);
|
1334
|
+
rb_define_method(cs, "superset?", cs_method_superset_p, 1);
|
1335
|
+
rb_define_method(cs, ">=", cs_method_superset_p, 1);
|
1336
|
+
rb_define_method(cs, "proper_superset?", cs_method_proper_superset_p, 1);
|
1337
|
+
rb_define_method(cs, ">", cs_method_proper_superset_p, 1);
|
794
1338
|
|
795
1339
|
// `CharacterSet`-specific methods
|
796
1340
|
|
797
|
-
rb_define_singleton_method(cs, "from_ranges",
|
798
|
-
rb_define_singleton_method(cs, "of",
|
799
|
-
|
800
|
-
rb_define_method(cs, "ranges",
|
801
|
-
rb_define_method(cs, "sample",
|
802
|
-
rb_define_method(cs, "
|
803
|
-
rb_define_method(cs, "
|
804
|
-
rb_define_method(cs, "
|
805
|
-
rb_define_method(cs, "
|
806
|
-
rb_define_method(cs, "
|
807
|
-
rb_define_method(cs, "
|
808
|
-
rb_define_method(cs, "
|
809
|
-
rb_define_method(cs, "
|
810
|
-
rb_define_method(cs, "
|
811
|
-
rb_define_method(cs, "
|
812
|
-
rb_define_method(cs, "
|
813
|
-
rb_define_method(cs, "
|
1341
|
+
rb_define_singleton_method(cs, "from_ranges", cs_class_method_from_ranges, -2);
|
1342
|
+
rb_define_singleton_method(cs, "of", cs_class_method_of, 1);
|
1343
|
+
|
1344
|
+
rb_define_method(cs, "ranges", cs_method_ranges, 0);
|
1345
|
+
rb_define_method(cs, "sample", cs_method_sample, -1);
|
1346
|
+
rb_define_method(cs, "ext_section", cs_method_ext_section, 2);
|
1347
|
+
rb_define_method(cs, "ext_count_in_section", cs_method_ext_count_in_section, 2);
|
1348
|
+
rb_define_method(cs, "ext_section?", cs_method_ext_section_p, 2);
|
1349
|
+
rb_define_method(cs, "ext_section_ratio", cs_method_ext_section_ratio, 2);
|
1350
|
+
rb_define_method(cs, "planes", cs_method_planes, 0);
|
1351
|
+
rb_define_method(cs, "plane", cs_method_plane, 1);
|
1352
|
+
rb_define_method(cs, "member_in_plane?", cs_method_member_in_plane_p, 1);
|
1353
|
+
rb_define_method(cs, "ext_inversion", cs_method_ext_inversion, -1);
|
1354
|
+
rb_define_method(cs, "case_insensitive", cs_method_case_insensitive, 0);
|
1355
|
+
rb_define_method(cs, "count_in", cs_method_count_in, 1);
|
1356
|
+
rb_define_method(cs, "cover?", cs_method_cover_p, 1);
|
1357
|
+
rb_define_method(cs, "delete_in", cs_method_delete_in, 1);
|
1358
|
+
rb_define_method(cs, "delete_in!", cs_method_delete_in_bang, 1);
|
1359
|
+
rb_define_method(cs, "keep_in", cs_method_keep_in, 1);
|
1360
|
+
rb_define_method(cs, "keep_in!", cs_method_keep_in_bang, 1);
|
1361
|
+
rb_define_method(cs, "scan", cs_method_scan, 1);
|
1362
|
+
rb_define_method(cs, "used_by?", cs_method_used_by_p, 1);
|
1363
|
+
rb_define_method(cs, "allocated_length", cs_method_allocated_length, 0);
|
814
1364
|
}
|