utf8_proc 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitmodules +3 -0
- data/.rubocop.yml +2 -0
- data/.travis.yml +1 -4
- data/README.md +10 -2
- data/Rakefile +6 -3
- data/ext/utf8_proc/extconf.rb +12 -3
- data/ext/utf8_proc/utf8_proc.c +74 -7
- data/lib/utf8_proc/core_ext/string.rb +11 -0
- data/lib/utf8_proc/core_ext/string_jruby.rb +33 -0
- data/lib/utf8_proc/version.rb +1 -1
- data/lib/utf8_proc.rb +1 -1
- data/utf8_proc.gemspec +5 -1
- data/vendor/libutf8proc/utf8proc.c +755 -0
- data/vendor/libutf8proc/utf8proc.h +699 -0
- data/vendor/libutf8proc/utf8proc_data.c +14386 -0
- metadata +8 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b0f876c27547cb55eb9c0dd4ae3e4dc9686b76bd
|
4
|
+
data.tar.gz: ca58c8a85dbce81d92cbd724f38f29c81383349e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 56e03b1892309039f455c07cbc857076460a66bd910775df4eebfa95c597829731839ae2049d3006210531a85c3d2aa2720a0265997eb237782aba1e5b074163
|
7
|
+
data.tar.gz: f0a52ac5e29de9dc47c276ea8a4bda42a06579540331ff22dd8b89411f4a290d5c138894617b91f7756629fb7adc31ddf1d86be2912612232e0000c4c3f064c9
|
data/.gitmodules
ADDED
data/.rubocop.yml
CHANGED
data/.travis.yml
CHANGED
@@ -15,8 +15,5 @@ matrix:
|
|
15
15
|
- rvm: ruby-head
|
16
16
|
- rvm: jruby-head
|
17
17
|
before_install:
|
18
|
-
-
|
19
|
-
- unzip utf8proc.zip
|
20
|
-
- pushd utf8proc* && sudo make install prefix=/usr && popd
|
21
|
-
- gem install bundler -v 1.14.4
|
18
|
+
- gem install bundler -v 1.14.5
|
22
19
|
script: bundle exec rake
|
data/README.md
CHANGED
@@ -4,7 +4,7 @@
|
|
4
4
|
[](https://gemnasium.com/github.com/nomoon/utf8_proc)
|
5
5
|
[](https://badge.fury.io/rb/utf8_proc)
|
6
6
|
|
7
|
-
A simple wrapper around [utf8proc](https://github.com/JuliaLang/utf8proc) for normalizing Unicode strings.
|
7
|
+
A simple wrapper around [utf8proc](https://github.com/JuliaLang/utf8proc) for normalizing Unicode strings. Will use the `utf8proc` shared library and headers installed on your system if they are available *(Packages are available. OSX: `brew install utf8proc`, Linux: `libutf8proc-dev` or `utf8proc-devel`)*. Failing that, it will fall-back to compiling the library into the extension.
|
8
8
|
|
9
9
|
Currently supports UTF-8/ASCII string input and NFC, NFD, NFKC, NFKD, and NKFC-Casefold forms. Handles Unicode 9.0 and includes the current official full suite of 9.0 normalization tests.
|
10
10
|
|
@@ -17,7 +17,7 @@ Quick benchmarks against the [UNF](https://github.com/knu/ruby-unf) gem show it
|
|
17
17
|
Add this line to your application's Gemfile:
|
18
18
|
|
19
19
|
```ruby
|
20
|
-
gem
|
20
|
+
gem "utf8_proc"
|
21
21
|
```
|
22
22
|
|
23
23
|
And then execute:
|
@@ -53,6 +53,14 @@ UTF8Proc.normalize(utf8_string, form = :nfc)
|
|
53
53
|
|
54
54
|
# Version string of loaded libutf8proc
|
55
55
|
UTF8Proc::LIBRARY_VERSION
|
56
|
+
|
57
|
+
# Add normalization methods directly to String class
|
58
|
+
require "utf8_proc/core_ext/string"
|
59
|
+
|
60
|
+
# This enables:
|
61
|
+
"String".NFC
|
62
|
+
"String".normalize(:nfc)
|
63
|
+
|
56
64
|
```
|
57
65
|
|
58
66
|
(Like `unf`) on JRuby the gem will fall-back to using `java.text.normalizer`. The interface remains the same.
|
data/Rakefile
CHANGED
@@ -1,15 +1,18 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
require "bundler/gem_tasks"
|
3
|
+
require "rubocop/rake_task"
|
3
4
|
require "rake/testtask"
|
4
5
|
|
6
|
+
RuboCop::RakeTask.new
|
7
|
+
|
5
8
|
Rake::TestTask.new(:test) do |t|
|
6
9
|
t.libs << "test"
|
7
10
|
t.libs << "lib"
|
8
11
|
t.test_files = FileList["test/**/*_test.rb"]
|
9
12
|
end
|
10
13
|
|
11
|
-
if
|
12
|
-
task default:
|
14
|
+
if RUBY_ENGINE == "jruby"
|
15
|
+
task default: %i[rubocop test]
|
13
16
|
else
|
14
17
|
require "rake/extensiontask"
|
15
18
|
|
@@ -19,5 +22,5 @@ else
|
|
19
22
|
ext.lib_dir = "lib/utf8_proc"
|
20
23
|
end
|
21
24
|
|
22
|
-
task default: %i[clobber compile test]
|
25
|
+
task default: %i[rubocop clobber compile test]
|
23
26
|
end
|
data/ext/utf8_proc/extconf.rb
CHANGED
@@ -2,10 +2,19 @@
|
|
2
2
|
# rubocop:disable GlobalVars
|
3
3
|
require "mkmf"
|
4
4
|
|
5
|
-
$CFLAGS << " -std=c99 -Wno-declaration-after-statement"
|
6
|
-
|
7
5
|
pkg_config("utf8proc")
|
6
|
+
unless have_library("utf8proc")
|
7
|
+
puts "Compiling local libutf8proc..."
|
8
|
+
|
9
|
+
libutf8proc_dir = File.expand_path(
|
10
|
+
File.join(File.dirname(__FILE__), "../../vendor/libutf8proc")
|
11
|
+
)
|
8
12
|
|
9
|
-
|
13
|
+
$VPATH << libutf8proc_dir
|
14
|
+
$srcs = ["utf8_proc.c", "utf8proc.c"]
|
15
|
+
$CFLAGS << " -I#{libutf8proc_dir}"
|
16
|
+
end
|
17
|
+
|
18
|
+
$CFLAGS << " -std=c99 -Wno-declaration-after-statement"
|
10
19
|
|
11
20
|
create_makefile("utf8_proc/utf8_proc")
|
data/ext/utf8_proc/utf8_proc.c
CHANGED
@@ -32,29 +32,60 @@ static inline VALUE normInternal(VALUE string, utf8proc_option_t options) {
|
|
32
32
|
return new_str;
|
33
33
|
}
|
34
34
|
|
35
|
+
// NFC
|
35
36
|
|
36
|
-
VALUE toNFC(VALUE self, VALUE string) {
|
37
|
+
static VALUE toNFC(VALUE self, VALUE string) {
|
37
38
|
return normInternal(string, UTF8PROC_STABLE | UTF8PROC_COMPOSE);
|
38
39
|
}
|
39
40
|
|
40
|
-
VALUE
|
41
|
+
static VALUE StoNFC(VALUE string) {
|
42
|
+
return normInternal(string, UTF8PROC_STABLE | UTF8PROC_COMPOSE);
|
43
|
+
}
|
44
|
+
|
45
|
+
// NFD
|
46
|
+
|
47
|
+
static VALUE toNFD(VALUE self, VALUE string) {
|
48
|
+
return normInternal(string, UTF8PROC_STABLE | UTF8PROC_DECOMPOSE);
|
49
|
+
}
|
50
|
+
|
51
|
+
static VALUE StoNFD(VALUE string) {
|
41
52
|
return normInternal(string, UTF8PROC_STABLE | UTF8PROC_DECOMPOSE);
|
42
53
|
}
|
43
54
|
|
44
|
-
|
55
|
+
// NFKC
|
56
|
+
|
57
|
+
static VALUE toNFKC(VALUE self, VALUE string) {
|
45
58
|
return normInternal(string,UTF8PROC_STABLE | UTF8PROC_COMPOSE | UTF8PROC_COMPAT);
|
46
59
|
}
|
47
60
|
|
48
|
-
VALUE
|
61
|
+
static VALUE StoNFKC(VALUE string) {
|
62
|
+
return normInternal(string,UTF8PROC_STABLE | UTF8PROC_COMPOSE | UTF8PROC_COMPAT);
|
63
|
+
}
|
64
|
+
|
65
|
+
// NFKD
|
66
|
+
|
67
|
+
static VALUE toNFKD(VALUE self, VALUE string) {
|
68
|
+
return normInternal(string, UTF8PROC_STABLE | UTF8PROC_DECOMPOSE | UTF8PROC_COMPAT);
|
69
|
+
}
|
70
|
+
|
71
|
+
static VALUE StoNFKD(VALUE string) {
|
49
72
|
return normInternal(string, UTF8PROC_STABLE | UTF8PROC_DECOMPOSE | UTF8PROC_COMPAT);
|
50
73
|
}
|
51
74
|
|
52
|
-
|
75
|
+
// NFKC_CF
|
76
|
+
|
77
|
+
static VALUE toNFKC_CF(VALUE self, VALUE string) {
|
78
|
+
return normInternal(string, UTF8PROC_STABLE | UTF8PROC_COMPOSE | UTF8PROC_COMPAT | UTF8PROC_CASEFOLD);
|
79
|
+
}
|
80
|
+
|
81
|
+
static VALUE StoNFKC_CF(VALUE string) {
|
53
82
|
return normInternal(string, UTF8PROC_STABLE | UTF8PROC_COMPOSE | UTF8PROC_COMPAT | UTF8PROC_CASEFOLD);
|
54
83
|
}
|
55
84
|
|
85
|
+
// Parameterized normalization
|
56
86
|
|
57
|
-
|
87
|
+
|
88
|
+
static VALUE toNorm(int argc, VALUE* argv, VALUE self){
|
58
89
|
VALUE string;
|
59
90
|
VALUE form;
|
60
91
|
rb_scan_args(argc, argv, "11", &string, &form);
|
@@ -82,6 +113,33 @@ VALUE norm(int argc, VALUE* argv, VALUE self){
|
|
82
113
|
}
|
83
114
|
}
|
84
115
|
|
116
|
+
static VALUE StoNorm(int argc, VALUE* argv, VALUE string){
|
117
|
+
VALUE form;
|
118
|
+
rb_scan_args(argc, argv, "01", &form);
|
119
|
+
|
120
|
+
if (NIL_P(form)) {
|
121
|
+
return StoNFC(string);
|
122
|
+
}
|
123
|
+
|
124
|
+
ID s_form;
|
125
|
+
s_form = SYM2ID(form);
|
126
|
+
if (s_form == NFC) {
|
127
|
+
return StoNFC(string);
|
128
|
+
}else if(s_form == NFD) {
|
129
|
+
return StoNFD(string);
|
130
|
+
}else if(s_form == NFKC) {
|
131
|
+
return StoNFKC(string);
|
132
|
+
}else if(s_form == NFKD) {
|
133
|
+
return StoNFKD(string);
|
134
|
+
}else if(s_form == NFKC_CF) {
|
135
|
+
return StoNFKC_CF(string);
|
136
|
+
}else{
|
137
|
+
rb_raise(rb_eArgError, "%s",
|
138
|
+
"Argument must be one of [:nfc (default), :nfd, :nfkc, " \
|
139
|
+
":nfkd, :nfkc_cf]");
|
140
|
+
}
|
141
|
+
}
|
142
|
+
|
85
143
|
void Init_utf8_proc(void) {
|
86
144
|
VALUE rb_mBase;
|
87
145
|
rb_mBase = rb_define_module("UTF8Proc");
|
@@ -105,5 +163,14 @@ void Init_utf8_proc(void) {
|
|
105
163
|
rb_define_singleton_method(rb_mBase, "NFKC", toNFKC, 1);
|
106
164
|
rb_define_singleton_method(rb_mBase, "NFKD", toNFKD, 1);
|
107
165
|
rb_define_singleton_method(rb_mBase, "NFKC_CF", toNFKC_CF, 1);
|
108
|
-
rb_define_singleton_method(rb_mBase, "normalize",
|
166
|
+
rb_define_singleton_method(rb_mBase, "normalize", toNorm, -1);
|
167
|
+
|
168
|
+
VALUE rb_mStringExt;
|
169
|
+
rb_mStringExt = rb_define_module_under(rb_mBase, "StringExtension");
|
170
|
+
rb_define_method(rb_mStringExt, "NFC", StoNFC, 0);
|
171
|
+
rb_define_method(rb_mStringExt, "NFD", StoNFD, 0);
|
172
|
+
rb_define_method(rb_mStringExt, "NFKC", StoNFKC, 0);
|
173
|
+
rb_define_method(rb_mStringExt, "NFKD", StoNFKD, 0);
|
174
|
+
rb_define_method(rb_mStringExt, "NFKC_CF", StoNFKC_CF, 0);
|
175
|
+
rb_define_method(rb_mStringExt, "normalize", StoNorm, -1);
|
109
176
|
}
|
@@ -0,0 +1,33 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
# rubocop:disable MethodName
|
3
|
+
|
4
|
+
# This file should only be required within JRuby
|
5
|
+
|
6
|
+
require "java"
|
7
|
+
require "utf8_proc"
|
8
|
+
|
9
|
+
class String
|
10
|
+
def NFC
|
11
|
+
::UTF8Proc.NFC(self)
|
12
|
+
end
|
13
|
+
|
14
|
+
def NFD
|
15
|
+
::UTF8Proc.NFD(self)
|
16
|
+
end
|
17
|
+
|
18
|
+
def NFKC
|
19
|
+
::UTF8Proc.NFKC(self)
|
20
|
+
end
|
21
|
+
|
22
|
+
def NFKD
|
23
|
+
::UTF8Proc.NFKD(self)
|
24
|
+
end
|
25
|
+
|
26
|
+
def NFKC_CF
|
27
|
+
::UTF8Proc.NFKC_CF(self)
|
28
|
+
end
|
29
|
+
|
30
|
+
def normalize(form = :nfc)
|
31
|
+
::UTF8Proc.normalize(self, form)
|
32
|
+
end
|
33
|
+
end
|
data/lib/utf8_proc/version.rb
CHANGED
data/lib/utf8_proc.rb
CHANGED
data/utf8_proc.gemspec
CHANGED
@@ -19,6 +19,10 @@ Gem::Specification.new do |spec|
|
|
19
19
|
spec.files = `git ls-files -z`.split("\x0").reject do |f|
|
20
20
|
f.match(%r{^(test|spec|features)/})
|
21
21
|
end
|
22
|
+
spec.files += ["vendor/libutf8proc/utf8proc.c",
|
23
|
+
"vendor/libutf8proc/utf8proc.h",
|
24
|
+
"vendor/libutf8proc/utf8proc_data.c"]
|
25
|
+
|
22
26
|
spec.require_paths = ["lib"]
|
23
27
|
|
24
28
|
spec.add_development_dependency "bundler", "~> 1.14"
|
@@ -27,7 +31,7 @@ Gem::Specification.new do |spec|
|
|
27
31
|
spec.add_development_dependency "minitest", "~> 5.10"
|
28
32
|
spec.add_development_dependency "rubocop", "~> 0.47"
|
29
33
|
|
30
|
-
unless
|
34
|
+
unless RUBY_ENGINE == "jruby"
|
31
35
|
spec.extensions = ["ext/utf8_proc/extconf.rb"]
|
32
36
|
spec.add_development_dependency "rake-compiler", "~> 1.0"
|
33
37
|
end
|