utf8_proc 0.3.0 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitmodules +3 -0
- data/.rubocop.yml +2 -0
- data/.travis.yml +1 -4
- data/README.md +10 -2
- data/Rakefile +6 -3
- data/ext/utf8_proc/extconf.rb +12 -3
- data/ext/utf8_proc/utf8_proc.c +74 -7
- data/lib/utf8_proc/core_ext/string.rb +11 -0
- data/lib/utf8_proc/core_ext/string_jruby.rb +33 -0
- data/lib/utf8_proc/version.rb +1 -1
- data/lib/utf8_proc.rb +1 -1
- data/utf8_proc.gemspec +5 -1
- data/vendor/libutf8proc/utf8proc.c +755 -0
- data/vendor/libutf8proc/utf8proc.h +699 -0
- data/vendor/libutf8proc/utf8proc_data.c +14386 -0
- metadata +8 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b0f876c27547cb55eb9c0dd4ae3e4dc9686b76bd
|
4
|
+
data.tar.gz: ca58c8a85dbce81d92cbd724f38f29c81383349e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 56e03b1892309039f455c07cbc857076460a66bd910775df4eebfa95c597829731839ae2049d3006210531a85c3d2aa2720a0265997eb237782aba1e5b074163
|
7
|
+
data.tar.gz: f0a52ac5e29de9dc47c276ea8a4bda42a06579540331ff22dd8b89411f4a290d5c138894617b91f7756629fb7adc31ddf1d86be2912612232e0000c4c3f064c9
|
data/.gitmodules
ADDED
data/.rubocop.yml
CHANGED
data/.travis.yml
CHANGED
@@ -15,8 +15,5 @@ matrix:
|
|
15
15
|
- rvm: ruby-head
|
16
16
|
- rvm: jruby-head
|
17
17
|
before_install:
|
18
|
-
-
|
19
|
-
- unzip utf8proc.zip
|
20
|
-
- pushd utf8proc* && sudo make install prefix=/usr && popd
|
21
|
-
- gem install bundler -v 1.14.4
|
18
|
+
- gem install bundler -v 1.14.5
|
22
19
|
script: bundle exec rake
|
data/README.md
CHANGED
@@ -4,7 +4,7 @@
|
|
4
4
|
[![Dependency Status](https://gemnasium.com/badges/github.com/nomoon/utf8_proc.svg)](https://gemnasium.com/github.com/nomoon/utf8_proc)
|
5
5
|
[![Gem Version](https://badge.fury.io/rb/utf8_proc.svg)](https://badge.fury.io/rb/utf8_proc)
|
6
6
|
|
7
|
-
A simple wrapper around [utf8proc](https://github.com/JuliaLang/utf8proc) for normalizing Unicode strings.
|
7
|
+
A simple wrapper around [utf8proc](https://github.com/JuliaLang/utf8proc) for normalizing Unicode strings. Will use the `utf8proc` shared library and headers installed on your system if they are available *(Packages are available. OSX: `brew install utf8proc`, Linux: `libutf8proc-dev` or `utf8proc-devel`)*. Failing that, it will fall-back to compiling the library into the extension.
|
8
8
|
|
9
9
|
Currently supports UTF-8/ASCII string input and NFC, NFD, NFKC, NFKD, and NKFC-Casefold forms. Handles Unicode 9.0 and includes the current official full suite of 9.0 normalization tests.
|
10
10
|
|
@@ -17,7 +17,7 @@ Quick benchmarks against the [UNF](https://github.com/knu/ruby-unf) gem show it
|
|
17
17
|
Add this line to your application's Gemfile:
|
18
18
|
|
19
19
|
```ruby
|
20
|
-
gem
|
20
|
+
gem "utf8_proc"
|
21
21
|
```
|
22
22
|
|
23
23
|
And then execute:
|
@@ -53,6 +53,14 @@ UTF8Proc.normalize(utf8_string, form = :nfc)
|
|
53
53
|
|
54
54
|
# Version string of loaded libutf8proc
|
55
55
|
UTF8Proc::LIBRARY_VERSION
|
56
|
+
|
57
|
+
# Add normalization methods directly to String class
|
58
|
+
require "utf8_proc/core_ext/string"
|
59
|
+
|
60
|
+
# This enables:
|
61
|
+
"String".NFC
|
62
|
+
"String".normalize(:nfc)
|
63
|
+
|
56
64
|
```
|
57
65
|
|
58
66
|
(Like `unf`) on JRuby the gem will fall-back to using `java.text.normalizer`. The interface remains the same.
|
data/Rakefile
CHANGED
@@ -1,15 +1,18 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
require "bundler/gem_tasks"
|
3
|
+
require "rubocop/rake_task"
|
3
4
|
require "rake/testtask"
|
4
5
|
|
6
|
+
RuboCop::RakeTask.new
|
7
|
+
|
5
8
|
Rake::TestTask.new(:test) do |t|
|
6
9
|
t.libs << "test"
|
7
10
|
t.libs << "lib"
|
8
11
|
t.test_files = FileList["test/**/*_test.rb"]
|
9
12
|
end
|
10
13
|
|
11
|
-
if
|
12
|
-
task default:
|
14
|
+
if RUBY_ENGINE == "jruby"
|
15
|
+
task default: %i[rubocop test]
|
13
16
|
else
|
14
17
|
require "rake/extensiontask"
|
15
18
|
|
@@ -19,5 +22,5 @@ else
|
|
19
22
|
ext.lib_dir = "lib/utf8_proc"
|
20
23
|
end
|
21
24
|
|
22
|
-
task default: %i[clobber compile test]
|
25
|
+
task default: %i[rubocop clobber compile test]
|
23
26
|
end
|
data/ext/utf8_proc/extconf.rb
CHANGED
@@ -2,10 +2,19 @@
|
|
2
2
|
# rubocop:disable GlobalVars
|
3
3
|
require "mkmf"
|
4
4
|
|
5
|
-
$CFLAGS << " -std=c99 -Wno-declaration-after-statement"
|
6
|
-
|
7
5
|
pkg_config("utf8proc")
|
6
|
+
unless have_library("utf8proc")
|
7
|
+
puts "Compiling local libutf8proc..."
|
8
|
+
|
9
|
+
libutf8proc_dir = File.expand_path(
|
10
|
+
File.join(File.dirname(__FILE__), "../../vendor/libutf8proc")
|
11
|
+
)
|
8
12
|
|
9
|
-
|
13
|
+
$VPATH << libutf8proc_dir
|
14
|
+
$srcs = ["utf8_proc.c", "utf8proc.c"]
|
15
|
+
$CFLAGS << " -I#{libutf8proc_dir}"
|
16
|
+
end
|
17
|
+
|
18
|
+
$CFLAGS << " -std=c99 -Wno-declaration-after-statement"
|
10
19
|
|
11
20
|
create_makefile("utf8_proc/utf8_proc")
|
data/ext/utf8_proc/utf8_proc.c
CHANGED
@@ -32,29 +32,60 @@ static inline VALUE normInternal(VALUE string, utf8proc_option_t options) {
|
|
32
32
|
return new_str;
|
33
33
|
}
|
34
34
|
|
35
|
+
// NFC
|
35
36
|
|
36
|
-
VALUE toNFC(VALUE self, VALUE string) {
|
37
|
+
static VALUE toNFC(VALUE self, VALUE string) {
|
37
38
|
return normInternal(string, UTF8PROC_STABLE | UTF8PROC_COMPOSE);
|
38
39
|
}
|
39
40
|
|
40
|
-
VALUE
|
41
|
+
static VALUE StoNFC(VALUE string) {
|
42
|
+
return normInternal(string, UTF8PROC_STABLE | UTF8PROC_COMPOSE);
|
43
|
+
}
|
44
|
+
|
45
|
+
// NFD
|
46
|
+
|
47
|
+
static VALUE toNFD(VALUE self, VALUE string) {
|
48
|
+
return normInternal(string, UTF8PROC_STABLE | UTF8PROC_DECOMPOSE);
|
49
|
+
}
|
50
|
+
|
51
|
+
static VALUE StoNFD(VALUE string) {
|
41
52
|
return normInternal(string, UTF8PROC_STABLE | UTF8PROC_DECOMPOSE);
|
42
53
|
}
|
43
54
|
|
44
|
-
|
55
|
+
// NFKC
|
56
|
+
|
57
|
+
static VALUE toNFKC(VALUE self, VALUE string) {
|
45
58
|
return normInternal(string,UTF8PROC_STABLE | UTF8PROC_COMPOSE | UTF8PROC_COMPAT);
|
46
59
|
}
|
47
60
|
|
48
|
-
VALUE
|
61
|
+
static VALUE StoNFKC(VALUE string) {
|
62
|
+
return normInternal(string,UTF8PROC_STABLE | UTF8PROC_COMPOSE | UTF8PROC_COMPAT);
|
63
|
+
}
|
64
|
+
|
65
|
+
// NFKD
|
66
|
+
|
67
|
+
static VALUE toNFKD(VALUE self, VALUE string) {
|
68
|
+
return normInternal(string, UTF8PROC_STABLE | UTF8PROC_DECOMPOSE | UTF8PROC_COMPAT);
|
69
|
+
}
|
70
|
+
|
71
|
+
static VALUE StoNFKD(VALUE string) {
|
49
72
|
return normInternal(string, UTF8PROC_STABLE | UTF8PROC_DECOMPOSE | UTF8PROC_COMPAT);
|
50
73
|
}
|
51
74
|
|
52
|
-
|
75
|
+
// NFKC_CF
|
76
|
+
|
77
|
+
static VALUE toNFKC_CF(VALUE self, VALUE string) {
|
78
|
+
return normInternal(string, UTF8PROC_STABLE | UTF8PROC_COMPOSE | UTF8PROC_COMPAT | UTF8PROC_CASEFOLD);
|
79
|
+
}
|
80
|
+
|
81
|
+
static VALUE StoNFKC_CF(VALUE string) {
|
53
82
|
return normInternal(string, UTF8PROC_STABLE | UTF8PROC_COMPOSE | UTF8PROC_COMPAT | UTF8PROC_CASEFOLD);
|
54
83
|
}
|
55
84
|
|
85
|
+
// Parameterized normalization
|
56
86
|
|
57
|
-
|
87
|
+
|
88
|
+
static VALUE toNorm(int argc, VALUE* argv, VALUE self){
|
58
89
|
VALUE string;
|
59
90
|
VALUE form;
|
60
91
|
rb_scan_args(argc, argv, "11", &string, &form);
|
@@ -82,6 +113,33 @@ VALUE norm(int argc, VALUE* argv, VALUE self){
|
|
82
113
|
}
|
83
114
|
}
|
84
115
|
|
116
|
+
static VALUE StoNorm(int argc, VALUE* argv, VALUE string){
|
117
|
+
VALUE form;
|
118
|
+
rb_scan_args(argc, argv, "01", &form);
|
119
|
+
|
120
|
+
if (NIL_P(form)) {
|
121
|
+
return StoNFC(string);
|
122
|
+
}
|
123
|
+
|
124
|
+
ID s_form;
|
125
|
+
s_form = SYM2ID(form);
|
126
|
+
if (s_form == NFC) {
|
127
|
+
return StoNFC(string);
|
128
|
+
}else if(s_form == NFD) {
|
129
|
+
return StoNFD(string);
|
130
|
+
}else if(s_form == NFKC) {
|
131
|
+
return StoNFKC(string);
|
132
|
+
}else if(s_form == NFKD) {
|
133
|
+
return StoNFKD(string);
|
134
|
+
}else if(s_form == NFKC_CF) {
|
135
|
+
return StoNFKC_CF(string);
|
136
|
+
}else{
|
137
|
+
rb_raise(rb_eArgError, "%s",
|
138
|
+
"Argument must be one of [:nfc (default), :nfd, :nfkc, " \
|
139
|
+
":nfkd, :nfkc_cf]");
|
140
|
+
}
|
141
|
+
}
|
142
|
+
|
85
143
|
void Init_utf8_proc(void) {
|
86
144
|
VALUE rb_mBase;
|
87
145
|
rb_mBase = rb_define_module("UTF8Proc");
|
@@ -105,5 +163,14 @@ void Init_utf8_proc(void) {
|
|
105
163
|
rb_define_singleton_method(rb_mBase, "NFKC", toNFKC, 1);
|
106
164
|
rb_define_singleton_method(rb_mBase, "NFKD", toNFKD, 1);
|
107
165
|
rb_define_singleton_method(rb_mBase, "NFKC_CF", toNFKC_CF, 1);
|
108
|
-
rb_define_singleton_method(rb_mBase, "normalize",
|
166
|
+
rb_define_singleton_method(rb_mBase, "normalize", toNorm, -1);
|
167
|
+
|
168
|
+
VALUE rb_mStringExt;
|
169
|
+
rb_mStringExt = rb_define_module_under(rb_mBase, "StringExtension");
|
170
|
+
rb_define_method(rb_mStringExt, "NFC", StoNFC, 0);
|
171
|
+
rb_define_method(rb_mStringExt, "NFD", StoNFD, 0);
|
172
|
+
rb_define_method(rb_mStringExt, "NFKC", StoNFKC, 0);
|
173
|
+
rb_define_method(rb_mStringExt, "NFKD", StoNFKD, 0);
|
174
|
+
rb_define_method(rb_mStringExt, "NFKC_CF", StoNFKC_CF, 0);
|
175
|
+
rb_define_method(rb_mStringExt, "normalize", StoNorm, -1);
|
109
176
|
}
|
@@ -0,0 +1,33 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
# rubocop:disable MethodName
|
3
|
+
|
4
|
+
# This file should only be required within JRuby
|
5
|
+
|
6
|
+
require "java"
|
7
|
+
require "utf8_proc"
|
8
|
+
|
9
|
+
class String
|
10
|
+
def NFC
|
11
|
+
::UTF8Proc.NFC(self)
|
12
|
+
end
|
13
|
+
|
14
|
+
def NFD
|
15
|
+
::UTF8Proc.NFD(self)
|
16
|
+
end
|
17
|
+
|
18
|
+
def NFKC
|
19
|
+
::UTF8Proc.NFKC(self)
|
20
|
+
end
|
21
|
+
|
22
|
+
def NFKD
|
23
|
+
::UTF8Proc.NFKD(self)
|
24
|
+
end
|
25
|
+
|
26
|
+
def NFKC_CF
|
27
|
+
::UTF8Proc.NFKC_CF(self)
|
28
|
+
end
|
29
|
+
|
30
|
+
def normalize(form = :nfc)
|
31
|
+
::UTF8Proc.normalize(self, form)
|
32
|
+
end
|
33
|
+
end
|
data/lib/utf8_proc/version.rb
CHANGED
data/lib/utf8_proc.rb
CHANGED
data/utf8_proc.gemspec
CHANGED
@@ -19,6 +19,10 @@ Gem::Specification.new do |spec|
|
|
19
19
|
spec.files = `git ls-files -z`.split("\x0").reject do |f|
|
20
20
|
f.match(%r{^(test|spec|features)/})
|
21
21
|
end
|
22
|
+
spec.files += ["vendor/libutf8proc/utf8proc.c",
|
23
|
+
"vendor/libutf8proc/utf8proc.h",
|
24
|
+
"vendor/libutf8proc/utf8proc_data.c"]
|
25
|
+
|
22
26
|
spec.require_paths = ["lib"]
|
23
27
|
|
24
28
|
spec.add_development_dependency "bundler", "~> 1.14"
|
@@ -27,7 +31,7 @@ Gem::Specification.new do |spec|
|
|
27
31
|
spec.add_development_dependency "minitest", "~> 5.10"
|
28
32
|
spec.add_development_dependency "rubocop", "~> 0.47"
|
29
33
|
|
30
|
-
unless
|
34
|
+
unless RUBY_ENGINE == "jruby"
|
31
35
|
spec.extensions = ["ext/utf8_proc/extconf.rb"]
|
32
36
|
spec.add_development_dependency "rake-compiler", "~> 1.0"
|
33
37
|
end
|