utf8_proc 0.3.0 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 51fd48320809134499eb6cc1ec7d84f91034df30
4
- data.tar.gz: 8b25e229470de1b9e666d8bf55cebd4b647797ed
3
+ metadata.gz: b0f876c27547cb55eb9c0dd4ae3e4dc9686b76bd
4
+ data.tar.gz: ca58c8a85dbce81d92cbd724f38f29c81383349e
5
5
  SHA512:
6
- metadata.gz: 03dee7d55aa9c396d079f1267033a3978ace36717c94b05a32d8225cda0e2c2d09f22e5f6fa058510b923796e948e031fabc48c6331b95d06a10ae0634400897
7
- data.tar.gz: 2b3af9256deaa1cfea16eea4e6470e36edb37b091c8dd9e009d7e420e6ef50125e03f81c2921ee4ece325834f1c6dd698920ede15bbf7bb9de591713ea53ec65
6
+ metadata.gz: 56e03b1892309039f455c07cbc857076460a66bd910775df4eebfa95c597829731839ae2049d3006210531a85c3d2aa2720a0265997eb237782aba1e5b074163
7
+ data.tar.gz: f0a52ac5e29de9dc47c276ea8a4bda42a06579540331ff22dd8b89411f4a290d5c138894617b91f7756629fb7adc31ddf1d86be2912612232e0000c4c3f064c9
data/.gitmodules ADDED
@@ -0,0 +1,3 @@
1
+ [submodule "vendor/libutf8proc"]
2
+ path = vendor/libutf8proc
3
+ url = https://github.com/JuliaLang/utf8proc.git
data/.rubocop.yml CHANGED
@@ -1,5 +1,7 @@
1
1
  AllCops:
2
2
  TargetRubyVersion: 2.0
3
+ Exclude:
4
+ - "vendor/**/*"
3
5
 
4
6
  # Metrics
5
7
  Metrics/AbcSize:
data/.travis.yml CHANGED
@@ -15,8 +15,5 @@ matrix:
15
15
  - rvm: ruby-head
16
16
  - rvm: jruby-head
17
17
  before_install:
18
- - wget -O utf8proc.zip https://github.com/JuliaLang/utf8proc/archive/v2.1.0.zip
19
- - unzip utf8proc.zip
20
- - pushd utf8proc* && sudo make install prefix=/usr && popd
21
- - gem install bundler -v 1.14.4
18
+ - gem install bundler -v 1.14.5
22
19
  script: bundle exec rake
data/README.md CHANGED
@@ -4,7 +4,7 @@
4
4
  [![Dependency Status](https://gemnasium.com/badges/github.com/nomoon/utf8_proc.svg)](https://gemnasium.com/github.com/nomoon/utf8_proc)
5
5
  [![Gem Version](https://badge.fury.io/rb/utf8_proc.svg)](https://badge.fury.io/rb/utf8_proc)
6
6
 
7
- A simple wrapper around [utf8proc](https://github.com/JuliaLang/utf8proc) for normalizing Unicode strings. Requires the `utf8proc` library and headers to be installed on your system. *(Packages are available. OSX: `brew install utf8proc`, Linux: `libutf8proc-dev` or `utf8proc-devel`)*
7
+ A simple wrapper around [utf8proc](https://github.com/JuliaLang/utf8proc) for normalizing Unicode strings. Will use the `utf8proc` shared library and headers installed on your system if they are available *(Packages are available. OSX: `brew install utf8proc`, Linux: `libutf8proc-dev` or `utf8proc-devel`)*. Failing that, it will fall-back to compiling the library into the extension.
8
8
 
9
9
  Currently supports UTF-8/ASCII string input and NFC, NFD, NFKC, NFKD, and NKFC-Casefold forms. Handles Unicode 9.0 and includes the current official full suite of 9.0 normalization tests.
10
10
 
@@ -17,7 +17,7 @@ Quick benchmarks against the [UNF](https://github.com/knu/ruby-unf) gem show it
17
17
  Add this line to your application's Gemfile:
18
18
 
19
19
  ```ruby
20
- gem 'utf8_proc'
20
+ gem "utf8_proc"
21
21
  ```
22
22
 
23
23
  And then execute:
@@ -53,6 +53,14 @@ UTF8Proc.normalize(utf8_string, form = :nfc)
53
53
 
54
54
  # Version string of loaded libutf8proc
55
55
  UTF8Proc::LIBRARY_VERSION
56
+
57
+ # Add normalization methods directly to String class
58
+ require "utf8_proc/core_ext/string"
59
+
60
+ # This enables:
61
+ "String".NFC
62
+ "String".normalize(:nfc)
63
+
56
64
  ```
57
65
 
58
66
  (Like `unf`) on JRuby the gem will fall-back to using `java.text.normalizer`. The interface remains the same.
data/Rakefile CHANGED
@@ -1,15 +1,18 @@
1
1
  # frozen_string_literal: true
2
2
  require "bundler/gem_tasks"
3
+ require "rubocop/rake_task"
3
4
  require "rake/testtask"
4
5
 
6
+ RuboCop::RakeTask.new
7
+
5
8
  Rake::TestTask.new(:test) do |t|
6
9
  t.libs << "test"
7
10
  t.libs << "lib"
8
11
  t.test_files = FileList["test/**/*_test.rb"]
9
12
  end
10
13
 
11
- if defined?(JRUBY_VERSION)
12
- task default: :test
14
+ if RUBY_ENGINE == "jruby"
15
+ task default: %i[rubocop test]
13
16
  else
14
17
  require "rake/extensiontask"
15
18
 
@@ -19,5 +22,5 @@ else
19
22
  ext.lib_dir = "lib/utf8_proc"
20
23
  end
21
24
 
22
- task default: %i[clobber compile test]
25
+ task default: %i[rubocop clobber compile test]
23
26
  end
@@ -2,10 +2,19 @@
2
2
  # rubocop:disable GlobalVars
3
3
  require "mkmf"
4
4
 
5
- $CFLAGS << " -std=c99 -Wno-declaration-after-statement"
6
-
7
5
  pkg_config("utf8proc")
6
+ unless have_library("utf8proc")
7
+ puts "Compiling local libutf8proc..."
8
+
9
+ libutf8proc_dir = File.expand_path(
10
+ File.join(File.dirname(__FILE__), "../../vendor/libutf8proc")
11
+ )
8
12
 
9
- have_library("utf8proc") || abort("This extension requires the utf8proc library.")
13
+ $VPATH << libutf8proc_dir
14
+ $srcs = ["utf8_proc.c", "utf8proc.c"]
15
+ $CFLAGS << " -I#{libutf8proc_dir}"
16
+ end
17
+
18
+ $CFLAGS << " -std=c99 -Wno-declaration-after-statement"
10
19
 
11
20
  create_makefile("utf8_proc/utf8_proc")
@@ -32,29 +32,60 @@ static inline VALUE normInternal(VALUE string, utf8proc_option_t options) {
32
32
  return new_str;
33
33
  }
34
34
 
35
+ // NFC
35
36
 
36
- VALUE toNFC(VALUE self, VALUE string) {
37
+ static VALUE toNFC(VALUE self, VALUE string) {
37
38
  return normInternal(string, UTF8PROC_STABLE | UTF8PROC_COMPOSE);
38
39
  }
39
40
 
40
- VALUE toNFD(VALUE self, VALUE string) {
41
+ static VALUE StoNFC(VALUE string) {
42
+ return normInternal(string, UTF8PROC_STABLE | UTF8PROC_COMPOSE);
43
+ }
44
+
45
+ // NFD
46
+
47
+ static VALUE toNFD(VALUE self, VALUE string) {
48
+ return normInternal(string, UTF8PROC_STABLE | UTF8PROC_DECOMPOSE);
49
+ }
50
+
51
+ static VALUE StoNFD(VALUE string) {
41
52
  return normInternal(string, UTF8PROC_STABLE | UTF8PROC_DECOMPOSE);
42
53
  }
43
54
 
44
- VALUE toNFKC(VALUE self, VALUE string) {
55
+ // NFKC
56
+
57
+ static VALUE toNFKC(VALUE self, VALUE string) {
45
58
  return normInternal(string,UTF8PROC_STABLE | UTF8PROC_COMPOSE | UTF8PROC_COMPAT);
46
59
  }
47
60
 
48
- VALUE toNFKD(VALUE self, VALUE string) {
61
+ static VALUE StoNFKC(VALUE string) {
62
+ return normInternal(string,UTF8PROC_STABLE | UTF8PROC_COMPOSE | UTF8PROC_COMPAT);
63
+ }
64
+
65
+ // NFKD
66
+
67
+ static VALUE toNFKD(VALUE self, VALUE string) {
68
+ return normInternal(string, UTF8PROC_STABLE | UTF8PROC_DECOMPOSE | UTF8PROC_COMPAT);
69
+ }
70
+
71
+ static VALUE StoNFKD(VALUE string) {
49
72
  return normInternal(string, UTF8PROC_STABLE | UTF8PROC_DECOMPOSE | UTF8PROC_COMPAT);
50
73
  }
51
74
 
52
- VALUE toNFKC_CF(VALUE self, VALUE string) {
75
+ // NFKC_CF
76
+
77
+ static VALUE toNFKC_CF(VALUE self, VALUE string) {
78
+ return normInternal(string, UTF8PROC_STABLE | UTF8PROC_COMPOSE | UTF8PROC_COMPAT | UTF8PROC_CASEFOLD);
79
+ }
80
+
81
+ static VALUE StoNFKC_CF(VALUE string) {
53
82
  return normInternal(string, UTF8PROC_STABLE | UTF8PROC_COMPOSE | UTF8PROC_COMPAT | UTF8PROC_CASEFOLD);
54
83
  }
55
84
 
85
+ // Parameterized normalization
56
86
 
57
- VALUE norm(int argc, VALUE* argv, VALUE self){
87
+
88
+ static VALUE toNorm(int argc, VALUE* argv, VALUE self){
58
89
  VALUE string;
59
90
  VALUE form;
60
91
  rb_scan_args(argc, argv, "11", &string, &form);
@@ -82,6 +113,33 @@ VALUE norm(int argc, VALUE* argv, VALUE self){
82
113
  }
83
114
  }
84
115
 
116
+ static VALUE StoNorm(int argc, VALUE* argv, VALUE string){
117
+ VALUE form;
118
+ rb_scan_args(argc, argv, "01", &form);
119
+
120
+ if (NIL_P(form)) {
121
+ return StoNFC(string);
122
+ }
123
+
124
+ ID s_form;
125
+ s_form = SYM2ID(form);
126
+ if (s_form == NFC) {
127
+ return StoNFC(string);
128
+ }else if(s_form == NFD) {
129
+ return StoNFD(string);
130
+ }else if(s_form == NFKC) {
131
+ return StoNFKC(string);
132
+ }else if(s_form == NFKD) {
133
+ return StoNFKD(string);
134
+ }else if(s_form == NFKC_CF) {
135
+ return StoNFKC_CF(string);
136
+ }else{
137
+ rb_raise(rb_eArgError, "%s",
138
+ "Argument must be one of [:nfc (default), :nfd, :nfkc, " \
139
+ ":nfkd, :nfkc_cf]");
140
+ }
141
+ }
142
+
85
143
  void Init_utf8_proc(void) {
86
144
  VALUE rb_mBase;
87
145
  rb_mBase = rb_define_module("UTF8Proc");
@@ -105,5 +163,14 @@ void Init_utf8_proc(void) {
105
163
  rb_define_singleton_method(rb_mBase, "NFKC", toNFKC, 1);
106
164
  rb_define_singleton_method(rb_mBase, "NFKD", toNFKD, 1);
107
165
  rb_define_singleton_method(rb_mBase, "NFKC_CF", toNFKC_CF, 1);
108
- rb_define_singleton_method(rb_mBase, "normalize", norm, -1);
166
+ rb_define_singleton_method(rb_mBase, "normalize", toNorm, -1);
167
+
168
+ VALUE rb_mStringExt;
169
+ rb_mStringExt = rb_define_module_under(rb_mBase, "StringExtension");
170
+ rb_define_method(rb_mStringExt, "NFC", StoNFC, 0);
171
+ rb_define_method(rb_mStringExt, "NFD", StoNFD, 0);
172
+ rb_define_method(rb_mStringExt, "NFKC", StoNFKC, 0);
173
+ rb_define_method(rb_mStringExt, "NFKD", StoNFKD, 0);
174
+ rb_define_method(rb_mStringExt, "NFKC_CF", StoNFKC_CF, 0);
175
+ rb_define_method(rb_mStringExt, "normalize", StoNorm, -1);
109
176
  }
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "utf8_proc"
4
+
5
+ class String
6
+ if RUBY_ENGINE == "jruby"
7
+ require "utf8_proc/core_ext/string_jruby"
8
+ else
9
+ include ::UTF8Proc::StringExtension
10
+ end
11
+ end
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+ # rubocop:disable MethodName
3
+
4
+ # This file should only be required within JRuby
5
+
6
+ require "java"
7
+ require "utf8_proc"
8
+
9
+ class String
10
+ def NFC
11
+ ::UTF8Proc.NFC(self)
12
+ end
13
+
14
+ def NFD
15
+ ::UTF8Proc.NFD(self)
16
+ end
17
+
18
+ def NFKC
19
+ ::UTF8Proc.NFKC(self)
20
+ end
21
+
22
+ def NFKD
23
+ ::UTF8Proc.NFKD(self)
24
+ end
25
+
26
+ def NFKC_CF
27
+ ::UTF8Proc.NFKC_CF(self)
28
+ end
29
+
30
+ def normalize(form = :nfc)
31
+ ::UTF8Proc.normalize(self, form)
32
+ end
33
+ end
@@ -1,4 +1,4 @@
1
1
  # frozen_string_literal: true
2
2
  module UTF8Proc
3
- VERSION = "0.3.0".freeze
3
+ VERSION = "0.4.0".freeze
4
4
  end
data/lib/utf8_proc.rb CHANGED
@@ -2,7 +2,7 @@
2
2
  require "utf8_proc/version"
3
3
 
4
4
  module UTF8Proc
5
- if defined?(JRUBY_VERSION)
5
+ if RUBY_ENGINE == "jruby"
6
6
  require "utf8_proc/jruby"
7
7
  include JRuby
8
8
  else
data/utf8_proc.gemspec CHANGED
@@ -19,6 +19,10 @@ Gem::Specification.new do |spec|
19
19
  spec.files = `git ls-files -z`.split("\x0").reject do |f|
20
20
  f.match(%r{^(test|spec|features)/})
21
21
  end
22
+ spec.files += ["vendor/libutf8proc/utf8proc.c",
23
+ "vendor/libutf8proc/utf8proc.h",
24
+ "vendor/libutf8proc/utf8proc_data.c"]
25
+
22
26
  spec.require_paths = ["lib"]
23
27
 
24
28
  spec.add_development_dependency "bundler", "~> 1.14"
@@ -27,7 +31,7 @@ Gem::Specification.new do |spec|
27
31
  spec.add_development_dependency "minitest", "~> 5.10"
28
32
  spec.add_development_dependency "rubocop", "~> 0.47"
29
33
 
30
- unless defined?(JRUBY_VERSION)
34
+ unless RUBY_ENGINE == "jruby"
31
35
  spec.extensions = ["ext/utf8_proc/extconf.rb"]
32
36
  spec.add_development_dependency "rake-compiler", "~> 1.0"
33
37
  end