character_set 1.2.0-java → 1.3.0-java
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitattributes +3 -0
- data/.travis.yml +1 -0
- data/BENCHMARK.md +51 -15
- data/CHANGELOG.md +20 -0
- data/README.md +24 -8
- data/Rakefile +20 -18
- data/benchmarks/count_in.rb +13 -0
- data/benchmarks/delete_in.rb +1 -1
- data/benchmarks/scan.rb +13 -0
- data/benchmarks/shared.rb +1 -0
- data/benchmarks/z_add.rb +12 -0
- data/benchmarks/z_delete.rb +12 -0
- data/benchmarks/z_merge.rb +15 -0
- data/benchmarks/z_minmax.rb +12 -0
- data/bin/console +2 -0
- data/character_set.gemspec +2 -0
- data/ext/character_set/character_set.c +963 -413
- data/ext/character_set/unicode_casefold_table.h.tmpl +11 -0
- data/lib/character_set/core_ext/string_ext.rb +2 -0
- data/lib/character_set/expression_converter.rb +21 -24
- data/lib/character_set/predefined_sets.rb +25 -260
- data/lib/character_set/predefined_sets/any.cps +1 -0
- data/lib/character_set/predefined_sets/ascii.cps +1 -0
- data/lib/character_set/predefined_sets/ascii_alnum.cps +3 -0
- data/lib/character_set/predefined_sets/ascii_letter.cps +2 -0
- data/lib/character_set/predefined_sets/assigned.cps +666 -0
- data/lib/character_set/predefined_sets/bmp.cps +2 -0
- data/lib/character_set/predefined_sets/crypt.cps +2 -0
- data/lib/character_set/predefined_sets/emoji.cps +151 -0
- data/lib/character_set/predefined_sets/newline.cps +3 -0
- data/lib/character_set/predefined_sets/surrogate.cps +1 -0
- data/lib/character_set/predefined_sets/unicode.cps +2 -0
- data/lib/character_set/predefined_sets/url_fragment.cps +8 -0
- data/lib/character_set/predefined_sets/url_host.cps +10 -0
- data/lib/character_set/predefined_sets/url_path.cps +7 -0
- data/lib/character_set/predefined_sets/url_query.cps +8 -0
- data/lib/character_set/predefined_sets/whitespace.cps +10 -0
- data/lib/character_set/ruby_fallback.rb +0 -2
- data/lib/character_set/ruby_fallback/character_set_methods.rb +52 -4
- data/lib/character_set/ruby_fallback/set_methods.rb +2 -2
- data/lib/character_set/shared_methods.rb +51 -40
- data/lib/character_set/version.rb +1 -1
- metadata +54 -3
- data/lib/character_set/ruby_fallback/plane_methods.rb +0 -27
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: cd336b705f4a2c9dc5af1fc0d841bbfb6324a5c8eb955bb72747c9c4c8a8431d
|
4
|
+
data.tar.gz: 965a1f84fe364d1e0d44039f2947f5c91dae2bcd485201fa262d1fbd41ba7dea
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: dbef79700f9cc6d00387d373fdd0d307c1b2ebbcdd78d3efd25cbdc54e067b0576c53ec2b7b46eacde8def6a0feb2acd58396af62b462666dde213664c400d73
|
7
|
+
data.tar.gz: f66e839c472188f52511a4ff3a4fda3becbe0af177c8cc4c4d7aeeaf65bb55b256f10a03a4aec453c6800ccd0bd697b4adb514f6621177f08e750d448150333b
|
data/.gitattributes
ADDED
data/.travis.yml
CHANGED
data/BENCHMARK.md
CHANGED
@@ -1,46 +1,58 @@
|
|
1
|
-
Results of `rake:benchmark` on ruby 2.6.
|
1
|
+
Results of `rake:benchmark` on ruby 2.6.2p47 (2019-03-13 revision 67232) [x86_64-darwin18]
|
2
2
|
|
3
|
+
```
|
4
|
+
Counting non-letters
|
5
|
+
|
6
|
+
CharacterSet#count_in: 12253693.8 i/s
|
7
|
+
String#count: 1737741.7 i/s - 7.05x slower
|
8
|
+
```
|
3
9
|
```
|
4
10
|
Detecting non-whitespace
|
5
11
|
|
6
|
-
CharacterSet#cover?:
|
7
|
-
Regexp#match?:
|
12
|
+
CharacterSet#cover?: 14058351.9 i/s
|
13
|
+
Regexp#match?: 7907608.1 i/s - 1.78x slower
|
8
14
|
```
|
9
15
|
```
|
10
16
|
Detecting non-letters
|
11
17
|
|
12
|
-
CharacterSet#cover?:
|
13
|
-
Regexp#match?:
|
18
|
+
CharacterSet#cover?: 13341301.6 i/s
|
19
|
+
Regexp#match?: 5187453.3 i/s - 2.57x slower
|
14
20
|
```
|
15
21
|
```
|
16
22
|
Removing whitespace
|
17
23
|
|
18
|
-
CharacterSet#delete_in:
|
19
|
-
String#gsub:
|
24
|
+
CharacterSet#delete_in: 2523184.0 i/s
|
25
|
+
String#gsub: 225804.7 i/s - 11.17x slower
|
20
26
|
```
|
21
27
|
```
|
22
28
|
Removing whitespace, emoji and umlauts
|
23
29
|
|
24
|
-
CharacterSet#delete_in:
|
25
|
-
String#gsub:
|
30
|
+
CharacterSet#delete_in: 1712208.6 i/s
|
31
|
+
String#gsub: 278508.8 i/s - 6.15x slower
|
26
32
|
```
|
27
33
|
```
|
28
34
|
Removing non-whitespace
|
29
35
|
|
30
|
-
CharacterSet#keep_in:
|
31
|
-
String#gsub:
|
36
|
+
CharacterSet#keep_in: 2760158.1 i/s
|
37
|
+
String#gsub: 232797.7 i/s - 11.86x slower
|
32
38
|
```
|
33
39
|
```
|
34
40
|
Extracting emoji
|
35
41
|
|
36
|
-
CharacterSet#keep_in:
|
37
|
-
String#gsub:
|
42
|
+
CharacterSet#keep_in: 1775758.8 i/s
|
43
|
+
String#gsub: 217649.9 i/s - 8.16x slower
|
44
|
+
```
|
45
|
+
```
|
46
|
+
Extracting emoji to an Array
|
47
|
+
|
48
|
+
CharacterSet#scan: 2579030.8 i/s
|
49
|
+
String#scan: 545107.0 i/s - 4.73x slower
|
38
50
|
```
|
39
51
|
```
|
40
52
|
Detecting whitespace
|
41
53
|
|
42
|
-
CharacterSet#used_by?:
|
43
|
-
Regexp#match?:
|
54
|
+
CharacterSet#used_by?: 13847689.0 i/s
|
55
|
+
Regexp#match?: 7533275.2 i/s - 1.84x slower
|
44
56
|
```
|
45
57
|
```
|
46
58
|
Detecting emoji in a large string
|
@@ -48,3 +60,27 @@ Detecting emoji in a large string
|
|
48
60
|
CharacterSet#used_by?: 246527.7 i/s
|
49
61
|
Regexp#match?: 92956.5 i/s - 2.65x slower
|
50
62
|
```
|
63
|
+
```
|
64
|
+
Adding entries
|
65
|
+
|
66
|
+
CharacterSet#add: 3102081.7 i/s
|
67
|
+
SortedSet#add: 1897464.8 i/s - 1.63x slower
|
68
|
+
```
|
69
|
+
```
|
70
|
+
Removing entries
|
71
|
+
|
72
|
+
CharacterSet#delete: 3240924.1 i/s
|
73
|
+
SortedSet#delete: 2887493.9 i/s - 1.12x slower
|
74
|
+
```
|
75
|
+
```
|
76
|
+
Merging entries
|
77
|
+
|
78
|
+
CharacterSet#merge: 536.8 i/s
|
79
|
+
SortedSet#merge: 12.5 i/s - 42.78x slower
|
80
|
+
```
|
81
|
+
```
|
82
|
+
Getting the min and max
|
83
|
+
|
84
|
+
CharacterSet#minmax: 4111960.8 i/s
|
85
|
+
SortedSet#minmax: 756.4 i/s - 5436.39x slower
|
86
|
+
```
|
data/CHANGELOG.md
CHANGED
@@ -4,6 +4,26 @@ All notable changes to this project will be documented in this file.
|
|
4
4
|
The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
|
5
5
|
and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
|
6
6
|
|
7
|
+
## UNRELEASED
|
8
|
+
|
9
|
+
## [1.3.0] - 2019-04-26
|
10
|
+
|
11
|
+
### Added
|
12
|
+
- improved `String` manipulation speed
|
13
|
+
- improved initialization and `#merge` speed when passing a large `Range`
|
14
|
+
- reduced memory consumption by > 90% for most use cases via dynamic resizing
|
15
|
+
- before, every set instance required 136 KB for codepoints
|
16
|
+
- now, 16 bytes for a CharacterSet in ASCII space, 8 KB for one in BMP space etc.
|
17
|
+
- `#count_in` and `#scan_in` methods for `String` interaction
|
18
|
+
- new predefined sets `::any`/`::all`, `::assigned`, `::surrogate`
|
19
|
+
- conversion methods `#assigned_part`, `#valid_part`
|
20
|
+
- sectioning methods `#ascii_part`, `#plane(n)`
|
21
|
+
- section test methods `#ascii_part?`, `#ascii_ratio`, `#ascii_only?`, `#astral_only?`
|
22
|
+
|
23
|
+
### Fixed
|
24
|
+
- `#count` now supports passing an argument or block as usual
|
25
|
+
- `CharacterSet::Pure#keep_in`, `#delete_in` now preserve the original encoding
|
26
|
+
|
7
27
|
## [1.2.0] - 2019-04-02
|
8
28
|
|
9
29
|
### Added
|
data/README.md
CHANGED
@@ -2,8 +2,11 @@
|
|
2
2
|
|
3
3
|
[![Gem Version](https://badge.fury.io/rb/character_set.svg)](http://badge.fury.io/rb/character_set)
|
4
4
|
[![Build Status](https://travis-ci.org/jaynetics/character_set.svg?branch=master)](https://travis-ci.org/jaynetics/character_set)
|
5
|
+
[![codecov](https://codecov.io/gh/jaynetics/character_set/branch/master/graph/badge.svg)](https://codecov.io/gh/jaynetics/character_set)
|
5
6
|
|
6
|
-
|
7
|
+
This is a C-extended Ruby gem to work with sets of Unicode codepoints. It can read and write these sets in various formats and implements the stdlib `Set` interface for them.
|
8
|
+
|
9
|
+
It also offers an alternate paradigm of `String` processing which grants much better performance than `Regexp` and `String` methods from the stdlib where applicable (see [benchmarks](./BENCHMARK.md)).
|
7
10
|
|
8
11
|
Many parts can be used independently, e.g.:
|
9
12
|
- `CharacterSet::Character`
|
@@ -49,7 +52,7 @@ require 'character_set/core_ext/regexp_ext'
|
|
49
52
|
|
50
53
|
### Predefined utility sets
|
51
54
|
|
52
|
-
`ascii`, `ascii_alnum`, `
|
55
|
+
`ascii`, `ascii_alnum`, `ascii_letter`, `assigned`, `bmp`, `crypt`, `emoji`, `newline`, `surrogate`, `unicode`, `url_fragment`, `url_host`, `url_path`, `url_query`, `whitespace`
|
53
56
|
|
54
57
|
```ruby
|
55
58
|
CharacterSet.ascii # => #<CharacterSet (size: 128)>
|
@@ -60,7 +63,7 @@ CharacterSet.non_ascii
|
|
60
63
|
|
61
64
|
### Interact with Strings
|
62
65
|
|
63
|
-
CharacterSet can replace some `
|
66
|
+
`CharacterSet` can replace some types of `String` handling with better performance than the stdlib.
|
64
67
|
|
65
68
|
`#used_by?` and `#cover?` can replace some `Regexp#match?` calls:
|
66
69
|
|
@@ -71,6 +74,7 @@ CharacterSet.ascii.cover?('Tr') # => true
|
|
71
74
|
```
|
72
75
|
|
73
76
|
`#delete_in(!)` and `#keep_in(!)` can replace `String#gsub(!)` and the like:
|
77
|
+
|
74
78
|
```ruby
|
75
79
|
string = 'Tüür'
|
76
80
|
|
@@ -84,6 +88,13 @@ CharacterSet.ascii.keep_in!(string) # => ''
|
|
84
88
|
string # => ''
|
85
89
|
```
|
86
90
|
|
91
|
+
`#count_in` and `#scan` can replace `String#count` and `String#scan`:
|
92
|
+
|
93
|
+
```ruby
|
94
|
+
CharacterSet.non_ascii.count_in('Tüür') # => 2
|
95
|
+
CharacterSet.non_ascii.scan_in('Tüür') # => ['ü', 'ü']
|
96
|
+
```
|
97
|
+
|
87
98
|
There is also a core extension for String interaction.
|
88
99
|
```ruby
|
89
100
|
require 'character_set/core_ext/string_ext'
|
@@ -100,7 +111,7 @@ require 'character_set/core_ext/string_ext'
|
|
100
111
|
|
101
112
|
### Manipulate
|
102
113
|
|
103
|
-
Use any
|
114
|
+
Use [any Ruby Set method](https://ruby-doc.org/stdlib-2.5.1/libdoc/set/rdoc/Set.html), e.g. `#+`, `#-`, `#&`, `#^`, `#intersect?`, `#<`, `#>` etc. to interact with other sets. Use `#add`, `#delete`, `#include?` etc. to change or check for members.
|
104
115
|
|
105
116
|
Where appropriate, methods take both chars and codepoints, e.g.:
|
106
117
|
|
@@ -122,13 +133,13 @@ non_a.include?('ü') # => true
|
|
122
133
|
|
123
134
|
# surrogate pair halves are not included by default
|
124
135
|
CharacterSet['a'].inversion(include_surrogates: true)
|
125
|
-
# => #<CharacterSet (size:
|
136
|
+
# => #<CharacterSet (size: 1114112)>
|
126
137
|
```
|
127
138
|
|
128
139
|
`#case_insensitive` can be used to create a `CharacterSet` where upper/lower case codepoints are supplemented:
|
129
140
|
|
130
141
|
```ruby
|
131
|
-
CharacterSet['1', '
|
142
|
+
CharacterSet['1', 'A'].case_insensitive # => CharacterSet['1', 'A', 'a']
|
132
143
|
```
|
133
144
|
|
134
145
|
### Write
|
@@ -157,17 +168,22 @@ set.to_s(escape_all: true) { |c| "<#{c.hex}>" } # => "<61>-<63><258><1F929>"
|
|
157
168
|
set.to_s(abbreviate: false) # => "abc\u0258\u{1F929}"
|
158
169
|
|
159
170
|
# for full js regex compatibility in case of astral members:
|
160
|
-
set.to_s_with_surrogate_alternation # => '(?:[\u0258]|\ud83e\udd29)'
|
171
|
+
set.to_s_with_surrogate_alternation # => '(?:[a-c\u0258]|\ud83e\udd29)'
|
161
172
|
```
|
162
173
|
|
163
174
|
### Unicode plane methods
|
164
175
|
|
165
|
-
There are some methods to check for planes and to handle [BMP](https://en.wikipedia.org/wiki/Plane_%28Unicode%29#Basic_Multilingual_Plane) and astral parts:
|
176
|
+
There are some methods to check for planes and to handle ASCII, [BMP](https://en.wikipedia.org/wiki/Plane_%28Unicode%29#Basic_Multilingual_Plane) and astral parts:
|
166
177
|
```Ruby
|
178
|
+
CharacterSet['a', 'ü', '🤩'].ascii_part # => CharacterSet['a']
|
179
|
+
CharacterSet['a', 'ü', '🤩'].ascii_part? # => true
|
180
|
+
CharacterSet['a', 'ü', '🤩'].ascii_only? # => false
|
181
|
+
CharacterSet['a', 'ü', '🤩'].ascii_ratio # => 0.3333333
|
167
182
|
CharacterSet['a', 'ü', '🤩'].bmp_part # => CharacterSet['a', 'ü']
|
168
183
|
CharacterSet['a', 'ü', '🤩'].astral_part # => CharacterSet['🤩']
|
169
184
|
CharacterSet['a', 'ü', '🤩'].bmp_ratio # => 0.6666666
|
170
185
|
CharacterSet['a', 'ü', '🤩'].planes # => [0, 1]
|
186
|
+
CharacterSet['a', 'ü', '🤩'].plane(1) # => CharacterSet['🤩']
|
171
187
|
CharacterSet['a', 'ü', '🤩'].member_in_plane?(7) # => false
|
172
188
|
CharacterSet::Character.new('a').plane # => 0
|
173
189
|
```
|
data/Rakefile
CHANGED
@@ -7,6 +7,13 @@ RSpec::Core::RakeTask.new(:spec)
|
|
7
7
|
|
8
8
|
task default: :spec
|
9
9
|
|
10
|
+
namespace :spec do
|
11
|
+
task :quick do
|
12
|
+
ENV['SKIP_MEMSAFETY_SPECS'] = '1'
|
13
|
+
Rake::Task[:spec].invoke
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
10
17
|
Rake::ExtensionTask.new('character_set') do |ext|
|
11
18
|
ext.lib_dir = 'lib/character_set'
|
12
19
|
end
|
@@ -106,27 +113,22 @@ task :sync_casefold_data do
|
|
106
113
|
hash[from] = to if type == 'C'
|
107
114
|
end.sort
|
108
115
|
|
109
|
-
File.
|
110
|
-
|
111
|
-
|
112
|
-
// -*-C-*-
|
113
|
-
|
114
|
-
typedef struct casefold_mapping {
|
115
|
-
unsigned long from;
|
116
|
-
unsigned long to;
|
117
|
-
} casefold_mapping;
|
118
|
-
|
119
|
-
#define CASEFOLD_COUNT #{mapping.size}
|
116
|
+
content = File.read(dst_path + '.tmpl')
|
117
|
+
.sub(/(CASEFOLD_COUNT )0/, "\\1#{mapping.count}")
|
118
|
+
.sub('{}', ['{', mapping.map { |a, b| "{0x#{a},0x#{b}}," }, '}'].join("\n"))
|
120
119
|
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
mapping.each { |from, to| f.puts "{0x#{from},0x#{to}}," }
|
120
|
+
File.write(dst_path, content)
|
121
|
+
File.unlink(src_path)
|
122
|
+
end
|
125
123
|
|
126
|
-
|
124
|
+
desc 'Update codepoint data for predefined sets, based on Onigmo'
|
125
|
+
task :sync_predefined_sets do
|
126
|
+
%w[assigned emoji whitespace].each do |prop|
|
127
|
+
require 'regexp_property_values'
|
128
|
+
ranges = RegexpPropertyValues[prop].matched_ranges
|
129
|
+
str = ranges.map { |r| r.minmax.map { |n| n.to_s(16) }.join(',').upcase + "\n" }.join
|
130
|
+
File.write("./lib/character_set/predefined_sets/#{prop}.cps", str, mode: 'w')
|
127
131
|
end
|
128
|
-
|
129
|
-
File.unlink(src_path)
|
130
132
|
end
|
131
133
|
|
132
134
|
desc 'Run all IPS benchmarks'
|
@@ -0,0 +1,13 @@
|
|
1
|
+
require_relative './shared'
|
2
|
+
|
3
|
+
str = 'Lorem ipsum et dolorem'
|
4
|
+
tr = '^A-Za-z'
|
5
|
+
cs = CharacterSet.non_ascii_letter
|
6
|
+
|
7
|
+
benchmark(
|
8
|
+
caption: 'Counting non-letters',
|
9
|
+
cases: {
|
10
|
+
'String#count' => -> { str.count(tr) },
|
11
|
+
'CharacterSet#count_in' => -> { cs.count_in(str) },
|
12
|
+
}
|
13
|
+
)
|
data/benchmarks/delete_in.rb
CHANGED
@@ -14,7 +14,7 @@ benchmark(
|
|
14
14
|
|
15
15
|
str = 'Lörem ipsüm ⛷ et dölörem'
|
16
16
|
rx = /[\s\p{emoji}äüö]/
|
17
|
-
cs = CharacterSet.whitespace + CharacterSet.emoji +
|
17
|
+
cs = CharacterSet.whitespace + CharacterSet.emoji + CharacterSet['ä', 'ö', 'ü']
|
18
18
|
|
19
19
|
benchmark(
|
20
20
|
caption: 'Removing whitespace, emoji and umlauts',
|
data/benchmarks/scan.rb
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
require_relative './shared'
|
2
|
+
|
3
|
+
str = 'Lorem ipsum ⛷ et dolorem'
|
4
|
+
rx = /\p{emoji}/
|
5
|
+
cs = CharacterSet.emoji
|
6
|
+
|
7
|
+
benchmark(
|
8
|
+
caption: 'Extracting emoji to an Array',
|
9
|
+
cases: {
|
10
|
+
'String#scan' => -> { str.scan(rx) },
|
11
|
+
'CharacterSet#scan' => -> { cs.scan(str) },
|
12
|
+
}
|
13
|
+
)
|
data/benchmarks/shared.rb
CHANGED
data/benchmarks/z_add.rb
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
require_relative './shared'
|
2
|
+
|
3
|
+
cs = CharacterSet.new(0..0x10FFFF)
|
4
|
+
ss = SortedSet.new(0..0x10FFFF)
|
5
|
+
|
6
|
+
benchmark(
|
7
|
+
caption: 'Removing entries',
|
8
|
+
cases: {
|
9
|
+
'CharacterSet#delete' => -> { cs.delete(rand(0x10FFFF)) },
|
10
|
+
'SortedSet#delete' => -> { ss.delete(rand(0x10FFFF)) },
|
11
|
+
}
|
12
|
+
)
|
@@ -0,0 +1,15 @@
|
|
1
|
+
require_relative './shared'
|
2
|
+
|
3
|
+
cs1 = CharacterSet.new(0...0x88000)
|
4
|
+
cs2 = CharacterSet.new(0x88000..0x10FFFF)
|
5
|
+
|
6
|
+
ss1 = SortedSet.new(0...0x88000)
|
7
|
+
ss2 = SortedSet.new(0x88000..0x10FFFF)
|
8
|
+
|
9
|
+
benchmark(
|
10
|
+
caption: 'Merging entries',
|
11
|
+
cases: {
|
12
|
+
'CharacterSet#merge' => -> { cs1.merge(cs2) },
|
13
|
+
'SortedSet#merge' => -> { ss1.merge(ss2) },
|
14
|
+
}
|
15
|
+
)
|
@@ -0,0 +1,12 @@
|
|
1
|
+
require_relative './shared'
|
2
|
+
|
3
|
+
cs = CharacterSet.new(0..0xFFFF)
|
4
|
+
ss = SortedSet.new(0..0xFFFF)
|
5
|
+
|
6
|
+
benchmark(
|
7
|
+
caption: 'Getting the min and max',
|
8
|
+
cases: {
|
9
|
+
'CharacterSet#minmax' => -> { cs.minmax },
|
10
|
+
'SortedSet#minmax' => -> { ss.minmax },
|
11
|
+
}
|
12
|
+
)
|
data/bin/console
CHANGED
data/character_set.gemspec
CHANGED
@@ -23,6 +23,8 @@ Gem::Specification.new do |s|
|
|
23
23
|
s.required_ruby_version = '>= 2.1.0'
|
24
24
|
|
25
25
|
s.add_development_dependency 'benchmark-ips', '~> 2.7'
|
26
|
+
s.add_development_dependency 'codecov', '~> 0.1'
|
27
|
+
s.add_development_dependency 'get_process_mem', '~> 0.2.3'
|
26
28
|
s.add_development_dependency 'rake', '~> 12.0'
|
27
29
|
s.add_development_dependency 'rake-compiler', '~> 1.0'
|
28
30
|
s.add_development_dependency 'range_compressor', '~> 1.0'
|
@@ -2,81 +2,180 @@
|
|
2
2
|
#include "ruby/encoding.h"
|
3
3
|
#include "unicode_casefold_table.h"
|
4
4
|
|
5
|
-
#define
|
6
|
-
#define
|
7
|
-
#define
|
5
|
+
#define UNICODE_PLANE_SIZE 0x10000
|
6
|
+
#define UNICODE_PLANE_COUNT 17
|
7
|
+
#define UNICODE_CP_COUNT (UNICODE_PLANE_SIZE * UNICODE_PLANE_COUNT)
|
8
8
|
|
9
|
-
|
10
|
-
|
9
|
+
// start at ascii size
|
10
|
+
#define CS_DEFAULT_INITIAL_LEN 128
|
11
11
|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
12
|
+
typedef char cs_ar;
|
13
|
+
typedef unsigned long cs_cp;
|
14
|
+
|
15
|
+
struct cs_data
|
16
|
+
{
|
17
|
+
cs_ar *cps;
|
18
|
+
cs_cp len;
|
19
|
+
};
|
20
|
+
|
21
|
+
#define CS_MSIZE(len) (sizeof(cs_ar) * (len / 8))
|
22
|
+
|
23
|
+
static inline void
|
24
|
+
add_memspace_for_another_plane(struct cs_data *data)
|
25
|
+
{
|
26
|
+
data->cps = ruby_xrealloc(data->cps, CS_MSIZE(data->len + UNICODE_PLANE_SIZE));
|
27
|
+
memset(data->cps + CS_MSIZE(data->len), 0, CS_MSIZE(UNICODE_PLANE_SIZE));
|
28
|
+
data->len += UNICODE_PLANE_SIZE;
|
29
|
+
}
|
30
|
+
|
31
|
+
static inline void
|
32
|
+
ensure_memsize_fits(struct cs_data *data, cs_cp target_cp)
|
33
|
+
{
|
34
|
+
while (target_cp >= data->len)
|
35
|
+
{
|
36
|
+
add_memspace_for_another_plane(data);
|
37
|
+
}
|
38
|
+
}
|
39
|
+
|
40
|
+
static inline void
|
41
|
+
set_cp(struct cs_data *data, cs_cp cp)
|
42
|
+
{
|
43
|
+
ensure_memsize_fits(data, cp);
|
44
|
+
data->cps[cp >> 3] |= (1 << (cp & 0x07));
|
45
|
+
}
|
46
|
+
|
47
|
+
static inline int
|
48
|
+
tst_cp(cs_ar *cps, cs_cp len, cs_cp cp)
|
49
|
+
{
|
50
|
+
return ((cp < len) && cps[cp >> 3] & (1 << (cp & 0x07)));
|
51
|
+
}
|
52
|
+
|
53
|
+
static inline void
|
54
|
+
clr_cp(cs_ar *cps, cs_cp len, cs_cp cp)
|
55
|
+
{
|
56
|
+
if (cp < len)
|
57
|
+
{
|
58
|
+
cps[cp >> 3] &= ~(1 << (cp & 0x07));
|
59
|
+
}
|
60
|
+
}
|
16
61
|
|
17
62
|
static void
|
18
|
-
|
19
|
-
|
63
|
+
cs_free(void *ptr)
|
64
|
+
{
|
65
|
+
struct cs_data *data = ptr;
|
66
|
+
ruby_xfree(data->cps);
|
67
|
+
ruby_xfree(data);
|
20
68
|
}
|
21
69
|
|
22
70
|
static size_t
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
.
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
71
|
+
cs_memsize(const void *ptr)
|
72
|
+
{
|
73
|
+
const struct cs_data *data = ptr;
|
74
|
+
return sizeof(*data) + CS_MSIZE(data->len);
|
75
|
+
}
|
76
|
+
|
77
|
+
static const rb_data_type_t cs_type = {
|
78
|
+
.wrap_struct_name = "character_set",
|
79
|
+
.function = {
|
80
|
+
.dmark = NULL,
|
81
|
+
.dfree = cs_free,
|
82
|
+
.dsize = cs_memsize,
|
83
|
+
},
|
84
|
+
.data = NULL,
|
85
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY,
|
37
86
|
};
|
38
87
|
|
39
|
-
|
40
|
-
|
88
|
+
static inline VALUE
|
89
|
+
cs_alloc_len(VALUE klass, struct cs_data **data_ptr, cs_cp len)
|
90
|
+
{
|
91
|
+
VALUE cs;
|
92
|
+
struct cs_data *data;
|
93
|
+
cs = TypedData_Make_Struct(klass, struct cs_data, &cs_type, data);
|
94
|
+
data->cps = ruby_xmalloc(CS_MSIZE(len));
|
95
|
+
memset(data->cps, 0, CS_MSIZE(len));
|
96
|
+
data->len = len;
|
97
|
+
|
98
|
+
if (data_ptr)
|
99
|
+
{
|
100
|
+
*data_ptr = data;
|
101
|
+
}
|
41
102
|
|
42
|
-
|
43
|
-
|
103
|
+
return cs;
|
104
|
+
}
|
44
105
|
|
45
|
-
static VALUE
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
return NEW_CHARACTER_SET(self, cp_arr);
|
106
|
+
static inline VALUE
|
107
|
+
cs_alloc(VALUE klass, struct cs_data **data_ptr)
|
108
|
+
{
|
109
|
+
return cs_alloc_len(klass, data_ptr, CS_DEFAULT_INITIAL_LEN);
|
50
110
|
}
|
51
111
|
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
112
|
+
static inline struct cs_data *
|
113
|
+
cs_fetch_data(VALUE cs)
|
114
|
+
{
|
115
|
+
struct cs_data *data;
|
116
|
+
TypedData_Get_Struct(cs, struct cs_data, &cs_type, data);
|
117
|
+
return data;
|
118
|
+
}
|
119
|
+
|
120
|
+
static inline cs_ar *
|
121
|
+
cs_fetch_cps(VALUE cs, cs_cp *len_ptr)
|
122
|
+
{
|
123
|
+
struct cs_data *data;
|
124
|
+
data = cs_fetch_data(cs);
|
125
|
+
*len_ptr = data->len;
|
126
|
+
return data->cps;
|
127
|
+
}
|
128
|
+
|
129
|
+
static VALUE
|
130
|
+
cs_method_allocate(VALUE self)
|
131
|
+
{
|
132
|
+
return cs_alloc(self, 0);
|
133
|
+
}
|
134
|
+
|
135
|
+
#define FOR_EACH_ACTIVE_CODEPOINT(action) \
|
136
|
+
do \
|
137
|
+
{ \
|
138
|
+
cs_cp cp, len; \
|
139
|
+
cs_ar *cps; \
|
140
|
+
cps = cs_fetch_cps(self, &len); \
|
141
|
+
for (cp = 0; cp < len; cp++) \
|
142
|
+
{ \
|
143
|
+
if (tst_cp(cps, len, cp)) \
|
144
|
+
{ \
|
145
|
+
action; \
|
146
|
+
} \
|
147
|
+
} \
|
148
|
+
} while (0)
|
59
149
|
|
60
150
|
// ***************************
|
61
151
|
// `Set` compatibility methods
|
62
152
|
// ***************************
|
63
153
|
|
64
|
-
static inline
|
65
|
-
|
66
|
-
|
154
|
+
static inline cs_cp
|
155
|
+
cs_active_cp_count(VALUE self)
|
156
|
+
{
|
157
|
+
cs_cp count;
|
67
158
|
count = 0;
|
68
159
|
FOR_EACH_ACTIVE_CODEPOINT(count++);
|
69
|
-
return
|
160
|
+
return count;
|
70
161
|
}
|
71
162
|
|
72
163
|
static VALUE
|
73
|
-
|
74
|
-
|
164
|
+
cs_method_length(VALUE self)
|
165
|
+
{
|
166
|
+
return LONG2FIX(cs_active_cp_count(self));
|
167
|
+
}
|
168
|
+
|
169
|
+
static inline VALUE
|
170
|
+
cs_enumerator_length(VALUE self, VALUE args, VALUE eobj)
|
171
|
+
{
|
172
|
+
return LONG2FIX(cs_active_cp_count(self));
|
75
173
|
}
|
76
174
|
|
77
175
|
static VALUE
|
78
|
-
|
79
|
-
|
176
|
+
cs_method_each(VALUE self)
|
177
|
+
{
|
178
|
+
RETURN_SIZED_ENUMERATOR(self, 0, 0, cs_enumerator_length);
|
80
179
|
FOR_EACH_ACTIVE_CODEPOINT(rb_yield(LONG2FIX(cp)));
|
81
180
|
return self;
|
82
181
|
}
|
@@ -84,16 +183,19 @@ method_each(VALUE self) {
|
|
84
183
|
// returns an Array of codepoint Integers by default.
|
85
184
|
// returns an Array of Strings of length 1 if passed `true`.
|
86
185
|
static VALUE
|
87
|
-
|
186
|
+
cs_method_to_a(int argc, VALUE *argv, VALUE self)
|
187
|
+
{
|
88
188
|
VALUE arr;
|
89
189
|
rb_encoding *enc;
|
90
190
|
rb_check_arity(argc, 0, 1);
|
91
191
|
|
92
192
|
arr = rb_ary_new();
|
93
|
-
if (!argc || NIL_P(argv[0]) || argv[0] == Qfalse)
|
193
|
+
if (!argc || NIL_P(argv[0]) || argv[0] == Qfalse)
|
194
|
+
{
|
94
195
|
FOR_EACH_ACTIVE_CODEPOINT(rb_ary_push(arr, LONG2FIX(cp)));
|
95
196
|
}
|
96
|
-
else
|
197
|
+
else
|
198
|
+
{
|
97
199
|
enc = rb_utf8_encoding();
|
98
200
|
FOR_EACH_ACTIVE_CODEPOINT(rb_ary_push(arr, rb_enc_uint_chr((int)cp, enc)));
|
99
201
|
}
|
@@ -102,302 +204,472 @@ method_to_a(int argc, VALUE *argv, VALUE self) {
|
|
102
204
|
}
|
103
205
|
|
104
206
|
static VALUE
|
105
|
-
|
207
|
+
cs_method_empty_p(VALUE self)
|
208
|
+
{
|
106
209
|
FOR_EACH_ACTIVE_CODEPOINT(return Qfalse);
|
107
210
|
return Qtrue;
|
108
211
|
}
|
109
212
|
|
110
213
|
static VALUE
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
214
|
+
cs_method_hash(VALUE self)
|
215
|
+
{
|
216
|
+
cs_cp cp, len, hash, four_byte_value;
|
217
|
+
cs_ar *cps;
|
218
|
+
cps = cs_fetch_cps(self, &len);
|
115
219
|
|
116
220
|
hash = 17;
|
117
|
-
for (cp = 0; cp <
|
118
|
-
|
119
|
-
|
221
|
+
for (cp = 0; cp < len; cp++)
|
222
|
+
{
|
223
|
+
if (cp % 32 == 0)
|
224
|
+
{
|
225
|
+
if (cp != 0)
|
226
|
+
{
|
227
|
+
hash = hash * 23 + four_byte_value;
|
228
|
+
}
|
120
229
|
four_byte_value = 0;
|
121
230
|
}
|
122
|
-
if (
|
231
|
+
if (tst_cp(cps, len, cp))
|
232
|
+
{
|
233
|
+
four_byte_value++;
|
234
|
+
}
|
123
235
|
}
|
124
236
|
|
125
237
|
return LONG2FIX(hash);
|
126
238
|
}
|
127
239
|
|
128
240
|
static inline VALUE
|
129
|
-
|
241
|
+
cs_delete_if_block_result(VALUE self, int truthy)
|
242
|
+
{
|
130
243
|
VALUE result;
|
131
244
|
rb_need_block();
|
132
245
|
rb_check_frozen(self);
|
133
246
|
FOR_EACH_ACTIVE_CODEPOINT(
|
134
|
-
|
135
|
-
|
136
|
-
);
|
247
|
+
result = rb_yield(LONG2FIX(cp));
|
248
|
+
if ((NIL_P(result) || result == Qfalse) != truthy) clr_cp(cps, len, cp););
|
137
249
|
return self;
|
138
250
|
}
|
139
251
|
|
140
252
|
static VALUE
|
141
|
-
|
142
|
-
|
143
|
-
|
253
|
+
cs_method_delete_if(VALUE self)
|
254
|
+
{
|
255
|
+
RETURN_SIZED_ENUMERATOR(self, 0, 0, cs_enumerator_length);
|
256
|
+
return cs_delete_if_block_result(self, 1);
|
144
257
|
}
|
145
258
|
|
146
259
|
static VALUE
|
147
|
-
|
148
|
-
|
149
|
-
|
260
|
+
cs_method_keep_if(VALUE self)
|
261
|
+
{
|
262
|
+
RETURN_SIZED_ENUMERATOR(self, 0, 0, cs_enumerator_length);
|
263
|
+
return cs_delete_if_block_result(self, 0);
|
150
264
|
}
|
151
265
|
|
152
266
|
static VALUE
|
153
|
-
|
154
|
-
|
155
|
-
|
267
|
+
cs_method_clear(VALUE self)
|
268
|
+
{
|
269
|
+
struct cs_data *data;
|
156
270
|
rb_check_frozen(self);
|
157
|
-
|
158
|
-
|
159
|
-
CLRBIT(cps, cp);
|
160
|
-
}
|
271
|
+
data = cs_fetch_data(self);
|
272
|
+
memset(data->cps, 0, CS_MSIZE(data->len));
|
161
273
|
return self;
|
162
274
|
}
|
163
275
|
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
276
|
+
static VALUE
|
277
|
+
cs_method_min(VALUE self)
|
278
|
+
{
|
279
|
+
FOR_EACH_ACTIVE_CODEPOINT(return LONG2FIX(cp));
|
280
|
+
return Qnil;
|
281
|
+
}
|
282
|
+
|
283
|
+
static VALUE
|
284
|
+
cs_method_max(VALUE self)
|
285
|
+
{
|
286
|
+
cs_cp len;
|
287
|
+
long reverse_idx;
|
288
|
+
cs_ar *cps;
|
289
|
+
cps = cs_fetch_cps(self, &len);
|
290
|
+
for (reverse_idx = len; reverse_idx >= 0; reverse_idx--)
|
291
|
+
{
|
292
|
+
if (tst_cp(cps, len, reverse_idx))
|
293
|
+
{
|
294
|
+
return LONG2FIX(reverse_idx);
|
295
|
+
}
|
296
|
+
}
|
297
|
+
return Qnil;
|
298
|
+
}
|
299
|
+
|
300
|
+
static VALUE
|
301
|
+
cs_method_minmax(VALUE self)
|
302
|
+
{
|
303
|
+
VALUE arr;
|
304
|
+
arr = rb_ary_new2(2);
|
305
|
+
rb_ary_push(arr, cs_method_min(self));
|
306
|
+
rb_ary_push(arr, cs_method_max(self));
|
307
|
+
return arr;
|
308
|
+
}
|
309
|
+
|
310
|
+
#define RETURN_COMBINED_CS(cs_a, cs_b, comp_op) \
|
311
|
+
do \
|
312
|
+
{ \
|
313
|
+
VALUE new_cs; \
|
314
|
+
cs_cp cp, alen, blen; \
|
315
|
+
cs_ar *acps, *bcps; \
|
316
|
+
struct cs_data *new_data; \
|
317
|
+
new_cs = cs_alloc(RBASIC(self)->klass, &new_data); \
|
318
|
+
acps = cs_fetch_cps(cs_a, &alen); \
|
319
|
+
bcps = cs_fetch_cps(cs_b, &blen); \
|
320
|
+
for (cp = 0; cp < UNICODE_CP_COUNT; cp++) \
|
321
|
+
{ \
|
322
|
+
if (tst_cp(acps, alen, cp) comp_op tst_cp(bcps, blen, cp)) \
|
323
|
+
{ \
|
324
|
+
set_cp(new_data, cp); \
|
325
|
+
} \
|
326
|
+
} \
|
327
|
+
return new_cs; \
|
328
|
+
} while (0)
|
174
329
|
|
175
330
|
static VALUE
|
176
|
-
|
177
|
-
|
331
|
+
cs_method_intersection(VALUE self, VALUE other)
|
332
|
+
{
|
333
|
+
RETURN_COMBINED_CS(self, other, &&);
|
178
334
|
}
|
179
335
|
|
180
336
|
static VALUE
|
181
|
-
|
182
|
-
|
337
|
+
cs_method_exclusion(VALUE self, VALUE other)
|
338
|
+
{
|
339
|
+
RETURN_COMBINED_CS(self, other, ^);
|
183
340
|
}
|
184
341
|
|
185
342
|
static VALUE
|
186
|
-
|
187
|
-
|
343
|
+
cs_method_union(VALUE self, VALUE other)
|
344
|
+
{
|
345
|
+
RETURN_COMBINED_CS(self, other, ||);
|
188
346
|
}
|
189
347
|
|
190
348
|
static VALUE
|
191
|
-
|
192
|
-
|
349
|
+
cs_method_difference(VALUE self, VALUE other)
|
350
|
+
{
|
351
|
+
RETURN_COMBINED_CS(self, other, >);
|
193
352
|
}
|
194
353
|
|
195
354
|
static VALUE
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
355
|
+
cs_method_include_p(VALUE self, VALUE num)
|
356
|
+
{
|
357
|
+
cs_ar *cps;
|
358
|
+
cs_cp len;
|
359
|
+
cps = cs_fetch_cps(self, &len);
|
360
|
+
return (tst_cp(cps, len, FIX2ULONG(num)) ? Qtrue : Qfalse);
|
200
361
|
}
|
201
362
|
|
202
|
-
static inline
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
363
|
+
static inline VALUE
|
364
|
+
cs_toggle_codepoint(VALUE cs, VALUE cp_num, int on, int return_nil_if_noop)
|
365
|
+
{
|
366
|
+
cs_cp cp, len;
|
367
|
+
cs_ar *cps;
|
368
|
+
struct cs_data *data;
|
369
|
+
rb_check_frozen(cs);
|
370
|
+
data = cs_fetch_data(cs);
|
371
|
+
cps = data->cps;
|
372
|
+
len = data->len;
|
208
373
|
cp = FIX2ULONG(cp_num);
|
209
|
-
if (
|
210
|
-
|
374
|
+
if (return_nil_if_noop && (!tst_cp(cps, len, cp) == !on))
|
375
|
+
{
|
376
|
+
return Qnil;
|
211
377
|
}
|
212
|
-
else
|
213
|
-
|
214
|
-
|
215
|
-
|
378
|
+
else
|
379
|
+
{
|
380
|
+
if (on)
|
381
|
+
{
|
382
|
+
set_cp(data, cp);
|
383
|
+
}
|
384
|
+
else
|
385
|
+
{
|
386
|
+
clr_cp(cps, len, cp);
|
387
|
+
}
|
388
|
+
return cs;
|
216
389
|
}
|
217
390
|
}
|
218
391
|
|
219
392
|
static VALUE
|
220
|
-
|
221
|
-
|
393
|
+
cs_method_add(VALUE self, VALUE cp_num)
|
394
|
+
{
|
395
|
+
return cs_toggle_codepoint(self, cp_num, 1, 0);
|
222
396
|
}
|
223
397
|
|
224
398
|
static VALUE
|
225
|
-
|
226
|
-
|
399
|
+
cs_method_add_p(VALUE self, VALUE cp_num)
|
400
|
+
{
|
401
|
+
return cs_toggle_codepoint(self, cp_num, 1, 1);
|
227
402
|
}
|
228
403
|
|
229
404
|
static VALUE
|
230
|
-
|
231
|
-
|
405
|
+
cs_method_delete(VALUE self, VALUE cp_num)
|
406
|
+
{
|
407
|
+
return cs_toggle_codepoint(self, cp_num, 0, 0);
|
232
408
|
}
|
233
409
|
|
234
410
|
static VALUE
|
235
|
-
|
236
|
-
|
411
|
+
cs_method_delete_p(VALUE self, VALUE cp_num)
|
412
|
+
{
|
413
|
+
return cs_toggle_codepoint(self, cp_num, 0, 1);
|
237
414
|
}
|
238
415
|
|
239
|
-
#define COMPARE_SETS(action)\
|
240
|
-
cp_index cp;\
|
241
|
-
cp_byte *cps, *other_cps;\
|
242
|
-
FETCH_CODEPOINTS(self, cps);\
|
243
|
-
FETCH_CODEPOINTS(other, other_cps);\
|
244
|
-
for (cp = 0; cp < UNICODE_CP_COUNT; cp++) { action; }\
|
245
|
-
|
246
416
|
static VALUE
|
247
|
-
|
248
|
-
|
417
|
+
cs_method_intersect_p(VALUE self, VALUE other)
|
418
|
+
{
|
419
|
+
cs_cp cp, alen, blen;
|
420
|
+
cs_ar *acps, *bcps;
|
421
|
+
acps = cs_fetch_cps(self, &alen);
|
422
|
+
bcps = cs_fetch_cps(other, &blen);
|
423
|
+
for (cp = 0; cp < UNICODE_CP_COUNT; cp++)
|
424
|
+
{
|
425
|
+
if (tst_cp(acps, alen, cp) && tst_cp(bcps, blen, cp))
|
426
|
+
{
|
427
|
+
return Qtrue;
|
428
|
+
}
|
429
|
+
}
|
249
430
|
return Qfalse;
|
250
431
|
}
|
251
432
|
|
252
433
|
static VALUE
|
253
|
-
|
254
|
-
|
434
|
+
cs_method_disjoint_p(VALUE self, VALUE other)
|
435
|
+
{
|
436
|
+
return cs_method_intersect_p(self, other) ? Qfalse : Qtrue;
|
255
437
|
}
|
256
438
|
|
257
439
|
static inline int
|
258
|
-
|
259
|
-
|
440
|
+
cs_check_type(VALUE obj)
|
441
|
+
{
|
442
|
+
return rb_typeddata_is_kind_of(obj, &cs_type);
|
260
443
|
}
|
261
444
|
|
262
445
|
static VALUE
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
446
|
+
cs_cps_eql(VALUE cs_a, VALUE cs_b)
|
447
|
+
{
|
448
|
+
cs_cp cp, alen, blen;
|
449
|
+
cs_ar *acps, *bcps;
|
450
|
+
acps = cs_fetch_cps(cs_a, &alen);
|
451
|
+
bcps = cs_fetch_cps(cs_b, &blen);
|
452
|
+
for (cp = 0; cp < UNICODE_CP_COUNT; cp++)
|
453
|
+
{
|
454
|
+
if (tst_cp(acps, alen, cp) != tst_cp(bcps, blen, cp))
|
455
|
+
{
|
456
|
+
return Qfalse;
|
457
|
+
}
|
458
|
+
}
|
269
459
|
return Qtrue;
|
270
460
|
}
|
271
461
|
|
462
|
+
static VALUE
|
463
|
+
cs_method_eql_p(VALUE self, VALUE other)
|
464
|
+
{
|
465
|
+
if (!cs_check_type(other))
|
466
|
+
{
|
467
|
+
return Qfalse;
|
468
|
+
}
|
469
|
+
if (self == other) // same object_id
|
470
|
+
{
|
471
|
+
return Qtrue;
|
472
|
+
}
|
473
|
+
return cs_cps_eql(self, other);
|
474
|
+
}
|
475
|
+
|
272
476
|
static inline VALUE
|
273
|
-
|
274
|
-
|
275
|
-
|
477
|
+
cs_merge_cs(VALUE recipient, VALUE source)
|
478
|
+
{
|
479
|
+
cs_cp cp, source_len;
|
480
|
+
struct cs_data *data;
|
481
|
+
cs_ar *source_cps;
|
482
|
+
data = cs_fetch_data(recipient);
|
483
|
+
source_cps = cs_fetch_cps(source, &source_len);
|
484
|
+
for (cp = 0; cp < UNICODE_CP_COUNT; cp++)
|
485
|
+
{
|
486
|
+
if (tst_cp(source_cps, source_len, cp))
|
487
|
+
{
|
488
|
+
set_cp(data, cp);
|
489
|
+
}
|
490
|
+
}
|
491
|
+
return recipient;
|
276
492
|
}
|
277
493
|
|
278
|
-
static inline
|
279
|
-
|
280
|
-
|
494
|
+
static inline cs_cp
|
495
|
+
cs_checked_cp(VALUE object_id)
|
496
|
+
{
|
497
|
+
if (FIXNUM_P(object_id) && object_id > 0 && object_id < 0x220001)
|
498
|
+
{
|
499
|
+
return FIX2ULONG(object_id);
|
500
|
+
}
|
281
501
|
rb_raise(rb_eArgError, "CharacterSet members must be between 0 and 0x10FFFF");
|
282
502
|
}
|
283
503
|
|
284
504
|
static inline VALUE
|
285
|
-
|
505
|
+
cs_merge_rb_range(VALUE self, VALUE rb_range)
|
506
|
+
{
|
286
507
|
VALUE from_id, upto_id;
|
508
|
+
cs_cp from_cp, upto_cp, cont_len, rem;
|
287
509
|
int excl;
|
288
|
-
|
289
|
-
|
290
|
-
FETCH_CODEPOINTS(self, cps);
|
510
|
+
struct cs_data *data;
|
511
|
+
data = cs_fetch_data(self);
|
291
512
|
|
292
|
-
if (!RTEST(rb_range_values(rb_range, &from_id, &upto_id, &excl)))
|
513
|
+
if (!RTEST(rb_range_values(rb_range, &from_id, &upto_id, &excl)))
|
514
|
+
{
|
293
515
|
rb_raise(rb_eArgError, "pass a Range");
|
294
516
|
}
|
295
|
-
if (excl)
|
517
|
+
if (excl)
|
518
|
+
{
|
519
|
+
upto_id -= 2;
|
520
|
+
}
|
521
|
+
|
522
|
+
from_cp = cs_checked_cp(from_id);
|
523
|
+
upto_cp = cs_checked_cp(upto_id);
|
296
524
|
|
297
|
-
|
298
|
-
|
525
|
+
if (upto_cp > from_cp && (upto_cp - from_cp > 6))
|
526
|
+
{
|
527
|
+
// set bits in preceding partially toggled bytes individually
|
528
|
+
for (/* */; (from_cp <= upto_cp) && (from_cp % 8); from_cp++)
|
529
|
+
{
|
530
|
+
set_cp(data, from_cp);
|
531
|
+
}
|
532
|
+
// memset contiguous bits directly
|
533
|
+
cont_len = upto_cp - from_cp + 1;
|
534
|
+
rem = cont_len % 8;
|
535
|
+
ensure_memsize_fits(data, upto_cp);
|
536
|
+
memset(data->cps + CS_MSIZE(from_cp), 0xFF, CS_MSIZE(cont_len - rem) / 8);
|
537
|
+
from_cp = upto_cp - rem + 1;
|
538
|
+
}
|
299
539
|
|
300
|
-
|
301
|
-
|
302
|
-
|
540
|
+
// set bits in partially toggled bytes individually
|
541
|
+
for (/* */; from_cp <= upto_cp; from_cp++)
|
542
|
+
{
|
543
|
+
set_cp(data, from_cp);
|
303
544
|
}
|
545
|
+
|
304
546
|
return self;
|
305
547
|
}
|
306
548
|
|
307
549
|
static inline VALUE
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
FETCH_CODEPOINTS(self, cps);
|
550
|
+
cs_merge_rb_array(VALUE self, VALUE rb_array)
|
551
|
+
{
|
552
|
+
VALUE el, array_length, i;
|
553
|
+
struct cs_data *data;
|
313
554
|
Check_Type(rb_array, T_ARRAY);
|
555
|
+
data = cs_fetch_data(self);
|
314
556
|
array_length = RARRAY_LEN(rb_array);
|
315
|
-
for (i = 0; i < array_length; i++)
|
557
|
+
for (i = 0; i < array_length; i++)
|
558
|
+
{
|
316
559
|
el = RARRAY_AREF(rb_array, i);
|
317
|
-
|
318
|
-
SETBIT(cps, FIX2ULONG(el));
|
560
|
+
set_cp(data, cs_checked_cp(el));
|
319
561
|
}
|
320
562
|
return self;
|
321
563
|
}
|
322
564
|
|
323
565
|
static VALUE
|
324
|
-
|
566
|
+
cs_method_merge(VALUE self, VALUE other)
|
567
|
+
{
|
325
568
|
rb_check_frozen(self);
|
326
|
-
if (
|
327
|
-
|
569
|
+
if (cs_check_type(other))
|
570
|
+
{
|
571
|
+
return cs_merge_cs(self, other);
|
328
572
|
}
|
329
|
-
else if (TYPE(other) == T_ARRAY)
|
330
|
-
|
573
|
+
else if (TYPE(other) == T_ARRAY)
|
574
|
+
{
|
575
|
+
return cs_merge_rb_array(self, other);
|
331
576
|
}
|
332
|
-
return
|
577
|
+
return cs_merge_rb_range(self, other);
|
333
578
|
}
|
334
579
|
|
335
580
|
static VALUE
|
336
|
-
|
337
|
-
|
338
|
-
|
581
|
+
cs_method_initialize_copy(VALUE self, VALUE orig)
|
582
|
+
{
|
583
|
+
cs_merge_cs(self, orig);
|
584
|
+
return self;
|
339
585
|
}
|
340
586
|
|
341
587
|
static VALUE
|
342
|
-
|
588
|
+
cs_method_subtract(VALUE self, VALUE other)
|
589
|
+
{
|
590
|
+
cs_cp cp, len, other_len;
|
591
|
+
cs_ar *cps, *other_cps;
|
343
592
|
rb_check_frozen(self);
|
344
|
-
|
593
|
+
cps = cs_fetch_cps(self, &len);
|
594
|
+
other_cps = cs_fetch_cps(other, &other_len);
|
595
|
+
for (cp = 0; cp < UNICODE_CP_COUNT; cp++)
|
596
|
+
{
|
597
|
+
if (tst_cp(other_cps, other_len, cp))
|
598
|
+
{
|
599
|
+
clr_cp(cps, len, cp);
|
600
|
+
}
|
601
|
+
}
|
345
602
|
return self;
|
346
603
|
}
|
347
604
|
|
348
605
|
static inline int
|
349
|
-
|
350
|
-
|
351
|
-
|
606
|
+
cs_a_subset_of_b(VALUE cs_a, VALUE cs_b, int *is_proper_ptr)
|
607
|
+
{
|
608
|
+
cs_ar *a, *b;
|
609
|
+
cs_cp cp, alen, blen, count_a, count_b;
|
352
610
|
|
353
|
-
if (!
|
611
|
+
if (!cs_check_type(cs_a) || !cs_check_type(cs_b))
|
612
|
+
{
|
354
613
|
rb_raise(rb_eArgError, "pass a CharacterSet");
|
355
614
|
}
|
356
615
|
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
if (
|
366
|
-
|
367
|
-
|
368
|
-
|
616
|
+
a = cs_fetch_cps(cs_a, &alen);
|
617
|
+
b = cs_fetch_cps(cs_b, &blen);
|
618
|
+
|
619
|
+
count_a = 0;
|
620
|
+
count_b = 0;
|
621
|
+
|
622
|
+
for (cp = 0; cp < UNICODE_CP_COUNT; cp++)
|
623
|
+
{
|
624
|
+
if (tst_cp(a, alen, cp))
|
625
|
+
{
|
626
|
+
if (!tst_cp(b, blen, cp))
|
627
|
+
{
|
628
|
+
return 0;
|
629
|
+
}
|
630
|
+
count_a++;
|
631
|
+
count_b++;
|
632
|
+
}
|
633
|
+
else if (tst_cp(b, blen, cp))
|
634
|
+
{
|
635
|
+
count_b++;
|
369
636
|
}
|
370
|
-
else if (TSTBIT(cps_b, cp)) size_b++;
|
371
637
|
}
|
372
638
|
|
373
|
-
if (
|
639
|
+
if (is_proper_ptr)
|
640
|
+
{
|
641
|
+
*is_proper_ptr = count_b > count_a;
|
642
|
+
}
|
643
|
+
|
374
644
|
return 1;
|
375
645
|
}
|
376
646
|
|
377
647
|
static VALUE
|
378
|
-
|
379
|
-
|
380
|
-
return
|
648
|
+
cs_method_subset_p(VALUE self, VALUE other)
|
649
|
+
{
|
650
|
+
return cs_a_subset_of_b(self, other, NULL) ? Qtrue : Qfalse;
|
381
651
|
}
|
382
652
|
|
383
653
|
static VALUE
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
654
|
+
cs_method_proper_subset_p(VALUE self, VALUE other)
|
655
|
+
{
|
656
|
+
int is_subset, is_proper;
|
657
|
+
is_subset = cs_a_subset_of_b(self, other, &is_proper);
|
658
|
+
return (is_subset && is_proper) ? Qtrue : Qfalse;
|
388
659
|
}
|
389
660
|
|
390
661
|
static VALUE
|
391
|
-
|
392
|
-
|
393
|
-
return
|
662
|
+
cs_method_superset_p(VALUE self, VALUE other)
|
663
|
+
{
|
664
|
+
return cs_a_subset_of_b(other, self, NULL) ? Qtrue : Qfalse;
|
394
665
|
}
|
395
666
|
|
396
667
|
static VALUE
|
397
|
-
|
398
|
-
|
399
|
-
|
400
|
-
|
668
|
+
cs_method_proper_superset_p(VALUE self, VALUE other)
|
669
|
+
{
|
670
|
+
int is_superset, is_proper;
|
671
|
+
is_superset = cs_a_subset_of_b(other, self, &is_proper);
|
672
|
+
return (is_superset && is_proper) ? Qtrue : Qfalse;
|
401
673
|
}
|
402
674
|
|
403
675
|
// *******************************
|
@@ -405,42 +677,43 @@ method_proper_superset_p(VALUE self, VALUE other) {
|
|
405
677
|
// *******************************
|
406
678
|
|
407
679
|
static VALUE
|
408
|
-
|
409
|
-
|
410
|
-
|
680
|
+
cs_class_method_from_ranges(VALUE self, VALUE ranges)
|
681
|
+
{
|
682
|
+
VALUE new_cs, range_count, i;
|
683
|
+
new_cs = rb_class_new_instance(0, 0, self);
|
411
684
|
range_count = RARRAY_LEN(ranges);
|
412
|
-
for (i = 0; i < range_count; i++)
|
413
|
-
|
685
|
+
for (i = 0; i < range_count; i++)
|
686
|
+
{
|
687
|
+
cs_merge_rb_range(new_cs, RARRAY_AREF(ranges, i));
|
414
688
|
}
|
415
|
-
return
|
689
|
+
return new_cs;
|
416
690
|
}
|
417
691
|
|
418
692
|
static VALUE
|
419
|
-
|
420
|
-
|
693
|
+
cs_method_ranges(VALUE self)
|
694
|
+
{
|
695
|
+
VALUE ranges, cp_num, previous_cp_num, current_start, current_end;
|
421
696
|
|
422
697
|
ranges = rb_ary_new();
|
423
|
-
|
698
|
+
previous_cp_num = 0;
|
424
699
|
current_start = 0;
|
425
700
|
current_end = 0;
|
426
701
|
|
427
702
|
FOR_EACH_ACTIVE_CODEPOINT(
|
428
|
-
|
703
|
+
cp_num = LONG2FIX(cp);
|
429
704
|
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
current_end = codepoint;
|
439
|
-
previous_codepoint = codepoint;
|
440
|
-
);
|
705
|
+
if (!previous_cp_num) {
|
706
|
+
current_start = cp_num;
|
707
|
+
} else if (previous_cp_num + 2 != cp_num) {
|
708
|
+
// gap found, finalize previous range
|
709
|
+
rb_ary_push(ranges, rb_range_new(current_start, current_end, 0));
|
710
|
+
current_start = cp_num;
|
711
|
+
} current_end = cp_num;
|
712
|
+
previous_cp_num = cp_num;);
|
441
713
|
|
442
714
|
// add final range
|
443
|
-
if (current_start)
|
715
|
+
if (current_start)
|
716
|
+
{
|
444
717
|
rb_ary_push(ranges, rb_range_new(current_start, current_end, 0));
|
445
718
|
}
|
446
719
|
|
@@ -448,117 +721,233 @@ method_ranges(VALUE self) {
|
|
448
721
|
}
|
449
722
|
|
450
723
|
static VALUE
|
451
|
-
|
452
|
-
|
724
|
+
cs_method_sample(int argc, VALUE *argv, VALUE self)
|
725
|
+
{
|
726
|
+
VALUE array, to_a_args[1] = {Qtrue};
|
453
727
|
rb_check_arity(argc, 0, 1);
|
454
|
-
|
455
|
-
array = method_to_a(1, to_a_args, self);
|
728
|
+
array = cs_method_to_a(1, to_a_args, self);
|
456
729
|
return rb_funcall(array, rb_intern("sample"), argc, argc ? argv[0] : 0);
|
457
730
|
}
|
458
731
|
|
459
732
|
static inline VALUE
|
460
|
-
|
461
|
-
|
462
|
-
|
463
|
-
|
464
|
-
|
465
|
-
|
466
|
-
|
733
|
+
cs_from_section(VALUE set, cs_cp from, cs_cp upto)
|
734
|
+
{
|
735
|
+
VALUE new_cs;
|
736
|
+
cs_ar *cps;
|
737
|
+
cs_cp cp, len;
|
738
|
+
struct cs_data *new_data;
|
739
|
+
new_cs = cs_alloc(RBASIC(set)->klass, &new_data);
|
740
|
+
cps = cs_fetch_cps(set, &len);
|
741
|
+
for (cp = from; cp <= upto; cp++)
|
742
|
+
{
|
743
|
+
if (tst_cp(cps, len, cp))
|
744
|
+
{
|
745
|
+
set_cp(new_data, cp);
|
746
|
+
}
|
467
747
|
}
|
468
|
-
return
|
748
|
+
return new_cs;
|
469
749
|
}
|
470
750
|
|
471
751
|
static VALUE
|
472
|
-
|
473
|
-
|
752
|
+
cs_method_ext_section(VALUE self, VALUE from, VALUE upto)
|
753
|
+
{
|
754
|
+
return cs_from_section(self, FIX2ULONG(from), FIX2ULONG(upto));
|
755
|
+
}
|
756
|
+
|
757
|
+
static inline cs_cp
|
758
|
+
cs_active_cp_count_in_section(VALUE set, cs_cp from, cs_cp upto)
|
759
|
+
{
|
760
|
+
cs_ar *cps;
|
761
|
+
cs_cp cp, count, len;
|
762
|
+
cps = cs_fetch_cps(set, &len);
|
763
|
+
for (count = 0, cp = from; cp <= upto; cp++)
|
764
|
+
{
|
765
|
+
if (tst_cp(cps, len, cp))
|
766
|
+
{
|
767
|
+
count++;
|
768
|
+
}
|
769
|
+
}
|
770
|
+
return count;
|
474
771
|
}
|
475
772
|
|
476
773
|
static VALUE
|
477
|
-
|
478
|
-
|
774
|
+
cs_method_ext_count_in_section(VALUE self, VALUE from, VALUE upto)
|
775
|
+
{
|
776
|
+
cs_cp count;
|
777
|
+
count = cs_active_cp_count_in_section(self, FIX2ULONG(from), FIX2ULONG(upto));
|
778
|
+
return LONG2FIX(count);
|
479
779
|
}
|
480
780
|
|
481
781
|
static inline VALUE
|
482
|
-
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
|
487
|
-
|
488
|
-
|
489
|
-
|
782
|
+
cs_has_cp_in_section(cs_ar *cps, cs_cp len, cs_cp from, cs_cp upto)
|
783
|
+
{
|
784
|
+
cs_cp cp;
|
785
|
+
for (cp = from; cp <= upto; cp++)
|
786
|
+
{
|
787
|
+
if (tst_cp(cps, len, cp))
|
788
|
+
{
|
789
|
+
return Qtrue;
|
790
|
+
}
|
490
791
|
}
|
491
792
|
return Qfalse;
|
492
793
|
}
|
493
794
|
|
494
795
|
static VALUE
|
495
|
-
|
796
|
+
cs_method_ext_section_p(VALUE self, VALUE from, VALUE upto)
|
797
|
+
{
|
798
|
+
cs_ar *cps;
|
799
|
+
cs_cp len;
|
800
|
+
cps = cs_fetch_cps(self, &len);
|
801
|
+
return cs_has_cp_in_section(cps, len, FIX2ULONG(from), FIX2ULONG(upto));
|
802
|
+
}
|
803
|
+
|
804
|
+
static inline VALUE
|
805
|
+
cs_ratio_of_section(VALUE set, cs_cp from, cs_cp upto)
|
806
|
+
{
|
807
|
+
double section_count, total_count;
|
808
|
+
section_count = (double)cs_active_cp_count_in_section(set, from, upto);
|
809
|
+
total_count = (double)cs_active_cp_count(set);
|
810
|
+
return DBL2NUM(section_count / total_count);
|
811
|
+
}
|
812
|
+
|
813
|
+
static VALUE
|
814
|
+
cs_method_ext_section_ratio(VALUE self, VALUE from, VALUE upto)
|
815
|
+
{
|
816
|
+
return cs_ratio_of_section(self, FIX2ULONG(from), FIX2ULONG(upto));
|
817
|
+
}
|
818
|
+
|
819
|
+
#define MAX_CP 0x10FFFF
|
820
|
+
#define MAX_ASCII_CP 0x7F
|
821
|
+
#define MAX_BMP_CP 0xFFFF
|
822
|
+
#define MIN_ASTRAL_CP 0x10000
|
823
|
+
|
824
|
+
static inline VALUE
|
825
|
+
cs_has_cp_in_plane(cs_ar *cps, cs_cp len, unsigned int plane)
|
826
|
+
{
|
827
|
+
cs_cp plane_beg, plane_end;
|
828
|
+
plane_beg = plane * UNICODE_PLANE_SIZE;
|
829
|
+
plane_end = (plane + 1) * MAX_BMP_CP;
|
830
|
+
return cs_has_cp_in_section(cps, len, plane_beg, plane_end);
|
831
|
+
}
|
832
|
+
|
833
|
+
static VALUE
|
834
|
+
cs_method_planes(VALUE self)
|
835
|
+
{
|
836
|
+
cs_ar *cps;
|
837
|
+
cs_cp len;
|
496
838
|
unsigned int i;
|
497
839
|
VALUE planes;
|
840
|
+
cps = cs_fetch_cps(self, &len);
|
498
841
|
planes = rb_ary_new();
|
499
|
-
for (i = 0; i < UNICODE_PLANE_COUNT; i++)
|
500
|
-
|
842
|
+
for (i = 0; i < UNICODE_PLANE_COUNT; i++)
|
843
|
+
{
|
844
|
+
if (cs_has_cp_in_plane(cps, len, i))
|
845
|
+
{
|
846
|
+
rb_ary_push(planes, INT2FIX(i));
|
847
|
+
}
|
501
848
|
}
|
502
849
|
return planes;
|
503
850
|
}
|
504
851
|
|
505
|
-
static
|
506
|
-
|
852
|
+
static inline int
|
853
|
+
cs_valid_plane_num(VALUE num)
|
854
|
+
{
|
507
855
|
int plane;
|
508
|
-
Check_Type(
|
509
|
-
plane = FIX2INT(
|
510
|
-
if (plane < 0 || plane >= UNICODE_PLANE_COUNT)
|
511
|
-
|
856
|
+
Check_Type(num, T_FIXNUM);
|
857
|
+
plane = FIX2INT(num);
|
858
|
+
if (plane < 0 || plane >= UNICODE_PLANE_COUNT)
|
859
|
+
{
|
860
|
+
rb_raise(rb_eArgError, "plane must be between 0 and %d", UNICODE_PLANE_COUNT - 1);
|
512
861
|
}
|
513
|
-
return
|
862
|
+
return plane;
|
863
|
+
}
|
864
|
+
|
865
|
+
static VALUE
|
866
|
+
cs_method_plane(VALUE self, VALUE plane_num)
|
867
|
+
{
|
868
|
+
cs_cp plane, plane_beg, plane_end;
|
869
|
+
plane = cs_valid_plane_num(plane_num);
|
870
|
+
plane_beg = plane * UNICODE_PLANE_SIZE;
|
871
|
+
plane_end = (plane + 1) * MAX_BMP_CP;
|
872
|
+
return cs_from_section(self, plane_beg, plane_end);
|
873
|
+
}
|
874
|
+
|
875
|
+
static VALUE
|
876
|
+
cs_method_member_in_plane_p(VALUE self, VALUE plane_num)
|
877
|
+
{
|
878
|
+
cs_ar *cps;
|
879
|
+
cs_cp len;
|
880
|
+
unsigned int plane;
|
881
|
+
plane = cs_valid_plane_num(plane_num);
|
882
|
+
cps = cs_fetch_cps(self, &len);
|
883
|
+
return cs_has_cp_in_plane(cps, len, plane);
|
514
884
|
}
|
515
885
|
|
516
886
|
#define NON_SURROGATE(cp) (cp > 0xDFFF || cp < 0xD800)
|
517
887
|
|
518
888
|
static VALUE
|
519
|
-
|
520
|
-
|
521
|
-
|
522
|
-
|
523
|
-
|
889
|
+
cs_method_ext_inversion(int argc, VALUE *argv, VALUE self)
|
890
|
+
{
|
891
|
+
int inc_surr;
|
892
|
+
cs_cp upto, cp, len;
|
893
|
+
cs_ar *cps;
|
894
|
+
VALUE new_cs;
|
895
|
+
struct cs_data *new_data;
|
896
|
+
|
524
897
|
rb_check_arity(argc, 0, 2);
|
525
|
-
|
526
|
-
|
527
|
-
|
528
|
-
|
529
|
-
|
530
|
-
|
898
|
+
|
899
|
+
cps = cs_fetch_cps(self, &len);
|
900
|
+
inc_surr = argc && argv[0] == Qtrue;
|
901
|
+
new_cs = cs_alloc(RBASIC(self)->klass, &new_data);
|
902
|
+
upto = argc > 1 && FIXNUM_P(argv[1]) ? FIX2ULONG(argv[1]) : UNICODE_CP_COUNT;
|
903
|
+
|
904
|
+
for (cp = 0; cp < UNICODE_CP_COUNT; cp++)
|
905
|
+
{
|
906
|
+
if (cp <= upto && !tst_cp(cps, len, cp) && (inc_surr || NON_SURROGATE(cp)))
|
907
|
+
{
|
908
|
+
set_cp(new_data, cp);
|
909
|
+
}
|
531
910
|
}
|
532
|
-
|
533
|
-
|
534
|
-
);
|
911
|
+
|
912
|
+
return new_cs;
|
535
913
|
}
|
536
914
|
|
537
|
-
typedef int(*str_cp_handler)(unsigned int,
|
915
|
+
typedef int (*str_cp_handler)(unsigned int, cs_ar *, cs_cp len, struct cs_data *data, VALUE *memo);
|
538
916
|
|
539
917
|
static inline int
|
540
|
-
add_str_cp_to_arr(unsigned int str_cp,
|
541
|
-
|
918
|
+
add_str_cp_to_arr(unsigned int str_cp, cs_ar *cp_arr, cs_cp len, struct cs_data *data, VALUE *memo)
|
919
|
+
{
|
920
|
+
set_cp(data, str_cp);
|
542
921
|
return 1;
|
543
922
|
}
|
544
923
|
|
545
924
|
static VALUE
|
546
|
-
|
547
|
-
|
548
|
-
|
549
|
-
|
550
|
-
|
925
|
+
cs_method_case_insensitive(VALUE self)
|
926
|
+
{
|
927
|
+
cs_cp i, len;
|
928
|
+
cs_ar *cps;
|
929
|
+
VALUE new_cs;
|
930
|
+
struct cs_data *new_data;
|
551
931
|
|
552
|
-
|
932
|
+
cps = cs_fetch_cps(self, &len);
|
933
|
+
new_cs = cs_alloc(RBASIC(self)->klass, &new_data);
|
934
|
+
cs_merge_cs(new_cs, self);
|
553
935
|
|
554
|
-
for (i = 0; i < CASEFOLD_COUNT; i++)
|
936
|
+
for (i = 0; i < CASEFOLD_COUNT; i++)
|
937
|
+
{
|
555
938
|
casefold_mapping m = unicode_casefold_table[i];
|
556
939
|
|
557
|
-
if
|
558
|
-
|
940
|
+
if (tst_cp(cps, len, m.from))
|
941
|
+
{
|
942
|
+
set_cp(new_data, m.to);
|
943
|
+
}
|
944
|
+
else if (tst_cp(cps, len, m.to))
|
945
|
+
{
|
946
|
+
set_cp(new_data, m.from);
|
947
|
+
}
|
559
948
|
}
|
560
949
|
|
561
|
-
return
|
950
|
+
return new_cs;
|
562
951
|
|
563
952
|
// OnigCaseFoldType flags;
|
564
953
|
// rb_encoding *enc;
|
@@ -573,20 +962,27 @@ method_case_insensitive(VALUE self) {
|
|
573
962
|
}
|
574
963
|
|
575
964
|
static inline VALUE
|
576
|
-
each_sb_cp(VALUE str, str_cp_handler func,
|
577
|
-
|
965
|
+
each_sb_cp(VALUE str, str_cp_handler func, cs_ar *cp_arr, cs_cp len, struct cs_data *data, VALUE *memo)
|
966
|
+
{
|
967
|
+
long i, str_len;
|
578
968
|
unsigned int str_cp;
|
969
|
+
str_len = RSTRING_LEN(str);
|
579
970
|
|
580
|
-
for (i = 0; i <
|
971
|
+
for (i = 0; i < str_len; i++)
|
972
|
+
{
|
581
973
|
str_cp = (RSTRING_PTR(str)[i] & 0xff);
|
582
|
-
if (!(*func)(str_cp, cp_arr))
|
974
|
+
if (!(*func)(str_cp, cp_arr, len, data, memo))
|
975
|
+
{
|
976
|
+
return Qfalse;
|
977
|
+
}
|
583
978
|
}
|
584
979
|
|
585
980
|
return Qtrue;
|
586
981
|
}
|
587
982
|
|
588
983
|
static inline VALUE
|
589
|
-
each_mb_cp(VALUE str, str_cp_handler func,
|
984
|
+
each_mb_cp(VALUE str, str_cp_handler func, cs_ar *cp_arr, cs_cp len, struct cs_data *data, VALUE *memo)
|
985
|
+
{
|
590
986
|
int n;
|
591
987
|
unsigned int str_cp;
|
592
988
|
const char *ptr, *end;
|
@@ -597,9 +993,13 @@ each_mb_cp(VALUE str, str_cp_handler func, cp_byte *cp_arr) {
|
|
597
993
|
end = RSTRING_END(str);
|
598
994
|
enc = rb_enc_get(str);
|
599
995
|
|
600
|
-
while (ptr < end)
|
996
|
+
while (ptr < end)
|
997
|
+
{
|
601
998
|
str_cp = rb_enc_codepoint_len(ptr, end, &n, enc);
|
602
|
-
if (!(*func)(str_cp, cp_arr))
|
999
|
+
if (!(*func)(str_cp, cp_arr, len, data, memo))
|
1000
|
+
{
|
1001
|
+
return Qfalse;
|
1002
|
+
}
|
603
1003
|
ptr += n;
|
604
1004
|
}
|
605
1005
|
|
@@ -611,105 +1011,238 @@ static inline int
|
|
611
1011
|
single_byte_optimizable(VALUE str)
|
612
1012
|
{
|
613
1013
|
rb_encoding *enc;
|
614
|
-
if (ENC_CODERANGE(str) == ENC_CODERANGE_7BIT)
|
1014
|
+
if (ENC_CODERANGE(str) == ENC_CODERANGE_7BIT)
|
1015
|
+
{
|
1016
|
+
return 1;
|
1017
|
+
}
|
615
1018
|
|
616
1019
|
enc = rb_enc_get(str);
|
617
|
-
if (rb_enc_mbmaxlen(enc) == 1)
|
1020
|
+
if (rb_enc_mbmaxlen(enc) == 1)
|
1021
|
+
{
|
1022
|
+
return 1;
|
1023
|
+
}
|
618
1024
|
|
619
1025
|
return 0;
|
620
1026
|
}
|
621
1027
|
|
622
1028
|
static inline VALUE
|
623
|
-
each_cp(VALUE str, str_cp_handler func,
|
624
|
-
|
625
|
-
|
1029
|
+
each_cp(VALUE str, str_cp_handler func, cs_ar *cp_arr, cs_cp len, struct cs_data *data, VALUE *memo)
|
1030
|
+
{
|
1031
|
+
if (single_byte_optimizable(str))
|
1032
|
+
{
|
1033
|
+
return each_sb_cp(str, func, cp_arr, len, data, memo);
|
626
1034
|
}
|
627
|
-
return each_mb_cp(str, func, cp_arr);
|
1035
|
+
return each_mb_cp(str, func, cp_arr, len, data, memo);
|
628
1036
|
}
|
629
1037
|
|
630
1038
|
static inline void
|
631
|
-
raise_arg_err_unless_string(VALUE val)
|
632
|
-
|
1039
|
+
raise_arg_err_unless_string(VALUE val)
|
1040
|
+
{
|
1041
|
+
if (!RB_TYPE_P(val, T_STRING))
|
1042
|
+
{
|
1043
|
+
rb_raise(rb_eArgError, "pass a String");
|
1044
|
+
}
|
633
1045
|
}
|
634
1046
|
|
635
1047
|
static VALUE
|
636
|
-
|
637
|
-
|
1048
|
+
cs_class_method_of(VALUE self, VALUE str)
|
1049
|
+
{
|
1050
|
+
VALUE new_cs;
|
1051
|
+
struct cs_data *new_data;
|
1052
|
+
new_cs = cs_alloc(self, &new_data);
|
638
1053
|
raise_arg_err_unless_string(str);
|
639
|
-
|
640
|
-
|
641
|
-
return NEW_CHARACTER_SET(self, cp_arr);
|
1054
|
+
each_cp(str, add_str_cp_to_arr, 0, 0, new_data, 0);
|
1055
|
+
return new_cs;
|
642
1056
|
}
|
643
1057
|
|
644
1058
|
static inline int
|
645
|
-
|
646
|
-
|
1059
|
+
count_str_cp(unsigned int str_cp, cs_ar *cp_arr, cs_cp len, struct cs_data *data, VALUE *memo)
|
1060
|
+
{
|
1061
|
+
if (tst_cp(cp_arr, len, str_cp))
|
1062
|
+
{
|
1063
|
+
*memo += 1;
|
1064
|
+
}
|
1065
|
+
return 1;
|
647
1066
|
}
|
648
1067
|
|
649
1068
|
static VALUE
|
650
|
-
|
651
|
-
|
652
|
-
VALUE
|
1069
|
+
cs_method_count_in(VALUE self, VALUE str)
|
1070
|
+
{
|
1071
|
+
VALUE count;
|
1072
|
+
struct cs_data *data;
|
653
1073
|
raise_arg_err_unless_string(str);
|
654
|
-
|
655
|
-
|
656
|
-
|
1074
|
+
data = cs_fetch_data(self);
|
1075
|
+
count = 0;
|
1076
|
+
each_cp(str, count_str_cp, data->cps, data->len, data, &count);
|
1077
|
+
return INT2NUM(count);
|
1078
|
+
}
|
1079
|
+
|
1080
|
+
static inline int
|
1081
|
+
str_cp_in_arr(unsigned int str_cp, cs_ar *cp_arr, cs_cp len, struct cs_data *data, VALUE *memo)
|
1082
|
+
{
|
1083
|
+
return tst_cp(cp_arr, len, str_cp);
|
1084
|
+
}
|
1085
|
+
|
1086
|
+
static VALUE
|
1087
|
+
cs_method_cover_p(VALUE self, VALUE str)
|
1088
|
+
{
|
1089
|
+
struct cs_data *data;
|
1090
|
+
raise_arg_err_unless_string(str);
|
1091
|
+
data = cs_fetch_data(self);
|
1092
|
+
return each_cp(str, str_cp_in_arr, data->cps, data->len, data, 0);
|
1093
|
+
}
|
1094
|
+
|
1095
|
+
static inline int
|
1096
|
+
add_str_cp_to_str_arr(unsigned int str_cp, cs_ar *cp_arr, cs_cp len, struct cs_data *data, VALUE *memo)
|
1097
|
+
{
|
1098
|
+
if (tst_cp(cp_arr, len, str_cp))
|
1099
|
+
{
|
1100
|
+
rb_ary_push(memo[0], rb_enc_uint_chr((int)str_cp, (rb_encoding *)memo[1]));
|
1101
|
+
}
|
1102
|
+
return 1;
|
1103
|
+
}
|
1104
|
+
|
1105
|
+
static VALUE
|
1106
|
+
cs_method_scan(VALUE self, VALUE str)
|
1107
|
+
{
|
1108
|
+
VALUE memo[2];
|
1109
|
+
struct cs_data *data;
|
1110
|
+
raise_arg_err_unless_string(str);
|
1111
|
+
data = cs_fetch_data(self);
|
1112
|
+
memo[0] = rb_ary_new();
|
1113
|
+
memo[1] = (VALUE)rb_enc_get(str);
|
1114
|
+
each_cp(str, add_str_cp_to_str_arr, data->cps, data->len, data, memo);
|
1115
|
+
return memo[0];
|
657
1116
|
}
|
658
1117
|
|
659
1118
|
static inline int
|
660
|
-
|
661
|
-
|
1119
|
+
str_cp_not_in_arr(unsigned int str_cp, cs_ar *cp_arr, cs_cp len, struct cs_data *data, VALUE *memo)
|
1120
|
+
{
|
1121
|
+
return !tst_cp(cp_arr, len, str_cp);
|
662
1122
|
}
|
663
1123
|
|
664
1124
|
static VALUE
|
665
|
-
|
666
|
-
|
1125
|
+
cs_method_used_by_p(VALUE self, VALUE str)
|
1126
|
+
{
|
1127
|
+
VALUE only_uses_other_cps;
|
1128
|
+
struct cs_data *data;
|
667
1129
|
raise_arg_err_unless_string(str);
|
668
|
-
|
669
|
-
|
1130
|
+
data = cs_fetch_data(self);
|
1131
|
+
only_uses_other_cps = each_cp(str, str_cp_not_in_arr, data->cps, data->len, data, 0);
|
1132
|
+
return only_uses_other_cps == Qfalse ? Qtrue : Qfalse;
|
1133
|
+
}
|
1134
|
+
|
1135
|
+
static void
|
1136
|
+
cs_str_buf_cat(VALUE str, const char *ptr, long len)
|
1137
|
+
{
|
1138
|
+
long total, olen;
|
1139
|
+
char *sptr;
|
1140
|
+
|
1141
|
+
RSTRING_GETMEM(str, sptr, olen);
|
1142
|
+
sptr = RSTRING(str)->as.heap.ptr;
|
1143
|
+
olen = RSTRING(str)->as.heap.len;
|
1144
|
+
total = olen + len;
|
1145
|
+
memcpy(sptr + olen, ptr, len);
|
1146
|
+
RSTRING(str)->as.heap.len = total;
|
1147
|
+
}
|
1148
|
+
|
1149
|
+
#ifndef TERM_FILL
|
1150
|
+
#define TERM_FILL(ptr, termlen) \
|
1151
|
+
do \
|
1152
|
+
{ \
|
1153
|
+
char *const term_fill_ptr = (ptr); \
|
1154
|
+
const int term_fill_len = (termlen); \
|
1155
|
+
*term_fill_ptr = '\0'; \
|
1156
|
+
if (__builtin_expect(!!(term_fill_len > 1), 0)) \
|
1157
|
+
memset(term_fill_ptr, 0, term_fill_len); \
|
1158
|
+
} while (0)
|
1159
|
+
#endif
|
1160
|
+
|
1161
|
+
static void
|
1162
|
+
cs_str_buf_terminate(VALUE str, rb_encoding *enc)
|
1163
|
+
{
|
1164
|
+
char *ptr;
|
1165
|
+
long len;
|
1166
|
+
|
1167
|
+
ptr = RSTRING(str)->as.heap.ptr;
|
1168
|
+
len = RSTRING(str)->as.heap.len;
|
1169
|
+
TERM_FILL(ptr + len, rb_enc_mbminlen(enc));
|
670
1170
|
}
|
671
1171
|
|
672
1172
|
static inline VALUE
|
673
|
-
|
674
|
-
|
1173
|
+
cs_apply_to_str(VALUE set, VALUE str, int delete, int bang)
|
1174
|
+
{
|
1175
|
+
cs_ar *cps;
|
1176
|
+
cs_cp len;
|
675
1177
|
rb_encoding *str_enc;
|
676
|
-
VALUE orig_len,
|
677
|
-
int
|
1178
|
+
VALUE orig_len, new_str_buf;
|
1179
|
+
int cp_len;
|
678
1180
|
unsigned int str_cp;
|
679
1181
|
const char *ptr, *end;
|
680
1182
|
|
681
1183
|
raise_arg_err_unless_string(str);
|
682
1184
|
|
683
|
-
|
1185
|
+
cps = cs_fetch_cps(set, &len);
|
684
1186
|
|
685
1187
|
orig_len = RSTRING_LEN(str);
|
686
|
-
|
687
|
-
|
1188
|
+
if (orig_len < 1) // empty string, will never change
|
1189
|
+
{
|
1190
|
+
if (bang)
|
1191
|
+
{
|
1192
|
+
return Qnil;
|
1193
|
+
}
|
1194
|
+
return rb_str_dup(str);
|
1195
|
+
}
|
1196
|
+
|
1197
|
+
new_str_buf = rb_str_buf_new(orig_len);
|
688
1198
|
str_enc = rb_enc_get(str);
|
689
1199
|
rb_enc_associate(new_str_buf, str_enc);
|
690
|
-
|
691
|
-
|
1200
|
+
rb_str_modify(new_str_buf);
|
1201
|
+
ENC_CODERANGE_SET(new_str_buf, rb_enc_asciicompat(str_enc) ? ENC_CODERANGE_7BIT : ENC_CODERANGE_VALID);
|
692
1202
|
|
693
1203
|
ptr = RSTRING_PTR(str);
|
694
1204
|
end = RSTRING_END(str);
|
695
1205
|
|
696
|
-
|
697
|
-
|
698
|
-
|
699
|
-
|
700
|
-
|
1206
|
+
if (single_byte_optimizable(str))
|
1207
|
+
{
|
1208
|
+
while (ptr < end)
|
1209
|
+
{
|
1210
|
+
str_cp = *ptr & 0xff;
|
1211
|
+
if ((!tst_cp(cps, len, str_cp)) == delete)
|
1212
|
+
{
|
1213
|
+
cs_str_buf_cat(new_str_buf, ptr, 1);
|
1214
|
+
}
|
1215
|
+
ptr++;
|
1216
|
+
}
|
1217
|
+
}
|
1218
|
+
else // likely to be multibyte string
|
1219
|
+
{
|
1220
|
+
while (ptr < end)
|
1221
|
+
{
|
1222
|
+
str_cp = rb_enc_codepoint_len(ptr, end, &cp_len, str_enc);
|
1223
|
+
if ((!tst_cp(cps, len, str_cp)) == delete)
|
1224
|
+
{
|
1225
|
+
cs_str_buf_cat(new_str_buf, ptr, cp_len);
|
1226
|
+
}
|
1227
|
+
ptr += cp_len;
|
701
1228
|
}
|
702
|
-
ptr += n;
|
703
1229
|
}
|
704
1230
|
|
705
|
-
|
706
|
-
|
1231
|
+
cs_str_buf_terminate(new_str_buf, str_enc);
|
1232
|
+
|
1233
|
+
if (bang)
|
1234
|
+
{
|
1235
|
+
if (RSTRING_LEN(new_str_buf) == (long)orig_len) // string unchanged
|
1236
|
+
{
|
1237
|
+
return Qnil;
|
1238
|
+
}
|
707
1239
|
rb_str_shared_replace(str, new_str_buf);
|
708
1240
|
}
|
709
|
-
else
|
1241
|
+
else
|
1242
|
+
{
|
710
1243
|
RB_OBJ_WRITE(new_str_buf, &(RBASIC(new_str_buf))->klass, rb_obj_class(str));
|
711
1244
|
// slightly cumbersome approach needed for compatibility with Ruby < 2.3:
|
712
|
-
RBASIC(new_str_buf)->flags |= (RBASIC(str)->flags&(FL_TAINT));
|
1245
|
+
RBASIC(new_str_buf)->flags |= (RBASIC(str)->flags & (FL_TAINT));
|
713
1246
|
str = new_str_buf;
|
714
1247
|
}
|
715
1248
|
|
@@ -717,98 +1250,115 @@ apply_to_str(VALUE set, VALUE str, int delete, int bang) {
|
|
717
1250
|
}
|
718
1251
|
|
719
1252
|
static VALUE
|
720
|
-
|
721
|
-
|
1253
|
+
cs_method_delete_in(VALUE self, VALUE str)
|
1254
|
+
{
|
1255
|
+
return cs_apply_to_str(self, str, 1, 0);
|
1256
|
+
}
|
1257
|
+
|
1258
|
+
static VALUE
|
1259
|
+
cs_method_delete_in_bang(VALUE self, VALUE str)
|
1260
|
+
{
|
1261
|
+
return cs_apply_to_str(self, str, 1, 1);
|
722
1262
|
}
|
723
1263
|
|
724
1264
|
static VALUE
|
725
|
-
|
726
|
-
|
1265
|
+
cs_method_keep_in(VALUE self, VALUE str)
|
1266
|
+
{
|
1267
|
+
return cs_apply_to_str(self, str, 0, 0);
|
727
1268
|
}
|
728
1269
|
|
729
1270
|
static VALUE
|
730
|
-
|
731
|
-
|
1271
|
+
cs_method_keep_in_bang(VALUE self, VALUE str)
|
1272
|
+
{
|
1273
|
+
return cs_apply_to_str(self, str, 0, 1);
|
732
1274
|
}
|
733
1275
|
|
734
1276
|
static VALUE
|
735
|
-
|
736
|
-
|
1277
|
+
cs_method_allocated_length(VALUE self)
|
1278
|
+
{
|
1279
|
+
return LONG2FIX(cs_fetch_data(self)->len);
|
737
1280
|
}
|
738
1281
|
|
739
1282
|
// ****
|
740
1283
|
// init
|
741
1284
|
// ****
|
742
1285
|
|
743
|
-
void
|
744
|
-
Init_character_set()
|
1286
|
+
void Init_character_set()
|
745
1287
|
{
|
746
1288
|
VALUE cs = rb_define_class("CharacterSet", rb_cObject);
|
747
1289
|
|
748
|
-
rb_define_alloc_func(cs,
|
1290
|
+
rb_define_alloc_func(cs, cs_method_allocate);
|
749
1291
|
|
750
1292
|
// `Set` compatibility methods
|
751
1293
|
|
752
|
-
rb_define_method(cs, "each",
|
753
|
-
rb_define_method(cs, "to_a",
|
754
|
-
rb_define_method(cs, "length",
|
755
|
-
rb_define_method(cs, "size",
|
756
|
-
rb_define_method(cs, "
|
757
|
-
rb_define_method(cs, "
|
758
|
-
rb_define_method(cs, "
|
759
|
-
rb_define_method(cs, "
|
760
|
-
rb_define_method(cs, "
|
761
|
-
rb_define_method(cs, "
|
762
|
-
rb_define_method(cs, "
|
763
|
-
rb_define_method(cs, "
|
764
|
-
rb_define_method(cs, "
|
765
|
-
rb_define_method(cs, "
|
766
|
-
rb_define_method(cs, "
|
767
|
-
rb_define_method(cs, "
|
768
|
-
rb_define_method(cs, "
|
769
|
-
rb_define_method(cs, "
|
770
|
-
rb_define_method(cs, "
|
771
|
-
rb_define_method(cs, "
|
772
|
-
rb_define_method(cs, "
|
773
|
-
rb_define_method(cs, "
|
774
|
-
rb_define_method(cs, "
|
775
|
-
rb_define_method(cs, "add
|
776
|
-
rb_define_method(cs, "
|
777
|
-
rb_define_method(cs, "
|
778
|
-
rb_define_method(cs, "
|
779
|
-
rb_define_method(cs, "
|
780
|
-
rb_define_method(cs, "
|
781
|
-
rb_define_method(cs, "
|
782
|
-
rb_define_method(cs, "
|
783
|
-
rb_define_method(cs, "
|
784
|
-
rb_define_method(cs, "
|
785
|
-
rb_define_method(cs, "
|
786
|
-
rb_define_method(cs, "
|
787
|
-
rb_define_method(cs, "
|
788
|
-
rb_define_method(cs, "
|
789
|
-
rb_define_method(cs, "
|
790
|
-
rb_define_method(cs, "
|
791
|
-
rb_define_method(cs, "
|
792
|
-
rb_define_method(cs, "
|
793
|
-
rb_define_method(cs, "
|
1294
|
+
rb_define_method(cs, "each", cs_method_each, 0);
|
1295
|
+
rb_define_method(cs, "to_a", cs_method_to_a, -1);
|
1296
|
+
rb_define_method(cs, "length", cs_method_length, 0);
|
1297
|
+
rb_define_method(cs, "size", cs_method_length, 0);
|
1298
|
+
rb_define_method(cs, "empty?", cs_method_empty_p, 0);
|
1299
|
+
rb_define_method(cs, "hash", cs_method_hash, 0);
|
1300
|
+
rb_define_method(cs, "keep_if", cs_method_keep_if, 0);
|
1301
|
+
rb_define_method(cs, "delete_if", cs_method_delete_if, 0);
|
1302
|
+
rb_define_method(cs, "clear", cs_method_clear, 0);
|
1303
|
+
rb_define_method(cs, "min", cs_method_min, 0);
|
1304
|
+
rb_define_method(cs, "max", cs_method_max, 0);
|
1305
|
+
rb_define_method(cs, "minmax", cs_method_minmax, 0);
|
1306
|
+
rb_define_method(cs, "intersection", cs_method_intersection, 1);
|
1307
|
+
rb_define_method(cs, "&", cs_method_intersection, 1);
|
1308
|
+
rb_define_method(cs, "union", cs_method_union, 1);
|
1309
|
+
rb_define_method(cs, "+", cs_method_union, 1);
|
1310
|
+
rb_define_method(cs, "|", cs_method_union, 1);
|
1311
|
+
rb_define_method(cs, "difference", cs_method_difference, 1);
|
1312
|
+
rb_define_method(cs, "-", cs_method_difference, 1);
|
1313
|
+
rb_define_method(cs, "^", cs_method_exclusion, 1);
|
1314
|
+
rb_define_method(cs, "include?", cs_method_include_p, 1);
|
1315
|
+
rb_define_method(cs, "member?", cs_method_include_p, 1);
|
1316
|
+
rb_define_method(cs, "===", cs_method_include_p, 1);
|
1317
|
+
rb_define_method(cs, "add", cs_method_add, 1);
|
1318
|
+
rb_define_method(cs, "<<", cs_method_add, 1);
|
1319
|
+
rb_define_method(cs, "add?", cs_method_add_p, 1);
|
1320
|
+
rb_define_method(cs, "delete", cs_method_delete, 1);
|
1321
|
+
rb_define_method(cs, "delete?", cs_method_delete_p, 1);
|
1322
|
+
rb_define_method(cs, "intersect?", cs_method_intersect_p, 1);
|
1323
|
+
rb_define_method(cs, "disjoint?", cs_method_disjoint_p, 1);
|
1324
|
+
rb_define_method(cs, "eql?", cs_method_eql_p, 1);
|
1325
|
+
rb_define_method(cs, "==", cs_method_eql_p, 1);
|
1326
|
+
rb_define_method(cs, "merge", cs_method_merge, 1);
|
1327
|
+
rb_define_method(cs, "initialize_clone", cs_method_initialize_copy, 1);
|
1328
|
+
rb_define_method(cs, "initialize_dup", cs_method_initialize_copy, 1);
|
1329
|
+
rb_define_method(cs, "subtract", cs_method_subtract, 1);
|
1330
|
+
rb_define_method(cs, "subset?", cs_method_subset_p, 1);
|
1331
|
+
rb_define_method(cs, "<=", cs_method_subset_p, 1);
|
1332
|
+
rb_define_method(cs, "proper_subset?", cs_method_proper_subset_p, 1);
|
1333
|
+
rb_define_method(cs, "<", cs_method_proper_subset_p, 1);
|
1334
|
+
rb_define_method(cs, "superset?", cs_method_superset_p, 1);
|
1335
|
+
rb_define_method(cs, ">=", cs_method_superset_p, 1);
|
1336
|
+
rb_define_method(cs, "proper_superset?", cs_method_proper_superset_p, 1);
|
1337
|
+
rb_define_method(cs, ">", cs_method_proper_superset_p, 1);
|
794
1338
|
|
795
1339
|
// `CharacterSet`-specific methods
|
796
1340
|
|
797
|
-
rb_define_singleton_method(cs, "from_ranges",
|
798
|
-
rb_define_singleton_method(cs, "of",
|
799
|
-
|
800
|
-
rb_define_method(cs, "ranges",
|
801
|
-
rb_define_method(cs, "sample",
|
802
|
-
rb_define_method(cs, "
|
803
|
-
rb_define_method(cs, "
|
804
|
-
rb_define_method(cs, "
|
805
|
-
rb_define_method(cs, "
|
806
|
-
rb_define_method(cs, "
|
807
|
-
rb_define_method(cs, "
|
808
|
-
rb_define_method(cs, "
|
809
|
-
rb_define_method(cs, "
|
810
|
-
rb_define_method(cs, "
|
811
|
-
rb_define_method(cs, "
|
812
|
-
rb_define_method(cs, "
|
813
|
-
rb_define_method(cs, "
|
1341
|
+
rb_define_singleton_method(cs, "from_ranges", cs_class_method_from_ranges, -2);
|
1342
|
+
rb_define_singleton_method(cs, "of", cs_class_method_of, 1);
|
1343
|
+
|
1344
|
+
rb_define_method(cs, "ranges", cs_method_ranges, 0);
|
1345
|
+
rb_define_method(cs, "sample", cs_method_sample, -1);
|
1346
|
+
rb_define_method(cs, "ext_section", cs_method_ext_section, 2);
|
1347
|
+
rb_define_method(cs, "ext_count_in_section", cs_method_ext_count_in_section, 2);
|
1348
|
+
rb_define_method(cs, "ext_section?", cs_method_ext_section_p, 2);
|
1349
|
+
rb_define_method(cs, "ext_section_ratio", cs_method_ext_section_ratio, 2);
|
1350
|
+
rb_define_method(cs, "planes", cs_method_planes, 0);
|
1351
|
+
rb_define_method(cs, "plane", cs_method_plane, 1);
|
1352
|
+
rb_define_method(cs, "member_in_plane?", cs_method_member_in_plane_p, 1);
|
1353
|
+
rb_define_method(cs, "ext_inversion", cs_method_ext_inversion, -1);
|
1354
|
+
rb_define_method(cs, "case_insensitive", cs_method_case_insensitive, 0);
|
1355
|
+
rb_define_method(cs, "count_in", cs_method_count_in, 1);
|
1356
|
+
rb_define_method(cs, "cover?", cs_method_cover_p, 1);
|
1357
|
+
rb_define_method(cs, "delete_in", cs_method_delete_in, 1);
|
1358
|
+
rb_define_method(cs, "delete_in!", cs_method_delete_in_bang, 1);
|
1359
|
+
rb_define_method(cs, "keep_in", cs_method_keep_in, 1);
|
1360
|
+
rb_define_method(cs, "keep_in!", cs_method_keep_in_bang, 1);
|
1361
|
+
rb_define_method(cs, "scan", cs_method_scan, 1);
|
1362
|
+
rb_define_method(cs, "used_by?", cs_method_used_by_p, 1);
|
1363
|
+
rb_define_method(cs, "allocated_length", cs_method_allocated_length, 0);
|
814
1364
|
}
|