utf8_proc 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 51fd48320809134499eb6cc1ec7d84f91034df30
4
- data.tar.gz: 8b25e229470de1b9e666d8bf55cebd4b647797ed
3
+ metadata.gz: b0f876c27547cb55eb9c0dd4ae3e4dc9686b76bd
4
+ data.tar.gz: ca58c8a85dbce81d92cbd724f38f29c81383349e
5
5
  SHA512:
6
- metadata.gz: 03dee7d55aa9c396d079f1267033a3978ace36717c94b05a32d8225cda0e2c2d09f22e5f6fa058510b923796e948e031fabc48c6331b95d06a10ae0634400897
7
- data.tar.gz: 2b3af9256deaa1cfea16eea4e6470e36edb37b091c8dd9e009d7e420e6ef50125e03f81c2921ee4ece325834f1c6dd698920ede15bbf7bb9de591713ea53ec65
6
+ metadata.gz: 56e03b1892309039f455c07cbc857076460a66bd910775df4eebfa95c597829731839ae2049d3006210531a85c3d2aa2720a0265997eb237782aba1e5b074163
7
+ data.tar.gz: f0a52ac5e29de9dc47c276ea8a4bda42a06579540331ff22dd8b89411f4a290d5c138894617b91f7756629fb7adc31ddf1d86be2912612232e0000c4c3f064c9
data/.gitmodules ADDED
@@ -0,0 +1,3 @@
1
+ [submodule "vendor/libutf8proc"]
2
+ path = vendor/libutf8proc
3
+ url = https://github.com/JuliaLang/utf8proc.git
data/.rubocop.yml CHANGED
@@ -1,5 +1,7 @@
1
1
  AllCops:
2
2
  TargetRubyVersion: 2.0
3
+ Exclude:
4
+ - "vendor/**/*"
3
5
 
4
6
  # Metrics
5
7
  Metrics/AbcSize:
data/.travis.yml CHANGED
@@ -15,8 +15,5 @@ matrix:
15
15
  - rvm: ruby-head
16
16
  - rvm: jruby-head
17
17
  before_install:
18
- - wget -O utf8proc.zip https://github.com/JuliaLang/utf8proc/archive/v2.1.0.zip
19
- - unzip utf8proc.zip
20
- - pushd utf8proc* && sudo make install prefix=/usr && popd
21
- - gem install bundler -v 1.14.4
18
+ - gem install bundler -v 1.14.5
22
19
  script: bundle exec rake
data/README.md CHANGED
@@ -4,7 +4,7 @@
4
4
  [![Dependency Status](https://gemnasium.com/badges/github.com/nomoon/utf8_proc.svg)](https://gemnasium.com/github.com/nomoon/utf8_proc)
5
5
  [![Gem Version](https://badge.fury.io/rb/utf8_proc.svg)](https://badge.fury.io/rb/utf8_proc)
6
6
 
7
- A simple wrapper around [utf8proc](https://github.com/JuliaLang/utf8proc) for normalizing Unicode strings. Requires the `utf8proc` library and headers to be installed on your system. *(Packages are available. OSX: `brew install utf8proc`, Linux: `libutf8proc-dev` or `utf8proc-devel`)*
7
+ A simple wrapper around [utf8proc](https://github.com/JuliaLang/utf8proc) for normalizing Unicode strings. Will use the `utf8proc` shared library and headers installed on your system if they are available *(Packages are available. OSX: `brew install utf8proc`, Linux: `libutf8proc-dev` or `utf8proc-devel`)*. Failing that, it will fall-back to compiling the library into the extension.
8
8
 
9
9
  Currently supports UTF-8/ASCII string input and NFC, NFD, NFKC, NFKD, and NKFC-Casefold forms. Handles Unicode 9.0 and includes the current official full suite of 9.0 normalization tests.
10
10
 
@@ -17,7 +17,7 @@ Quick benchmarks against the [UNF](https://github.com/knu/ruby-unf) gem show it
17
17
  Add this line to your application's Gemfile:
18
18
 
19
19
  ```ruby
20
- gem 'utf8_proc'
20
+ gem "utf8_proc"
21
21
  ```
22
22
 
23
23
  And then execute:
@@ -53,6 +53,14 @@ UTF8Proc.normalize(utf8_string, form = :nfc)
53
53
 
54
54
  # Version string of loaded libutf8proc
55
55
  UTF8Proc::LIBRARY_VERSION
56
+
57
+ # Add normalization methods directly to String class
58
+ require "utf8_proc/core_ext/string"
59
+
60
+ # This enables:
61
+ "String".NFC
62
+ "String".normalize(:nfc)
63
+
56
64
  ```
57
65
 
58
66
  (Like `unf`) on JRuby the gem will fall-back to using `java.text.normalizer`. The interface remains the same.
data/Rakefile CHANGED
@@ -1,15 +1,18 @@
1
1
  # frozen_string_literal: true
2
2
  require "bundler/gem_tasks"
3
+ require "rubocop/rake_task"
3
4
  require "rake/testtask"
4
5
 
6
+ RuboCop::RakeTask.new
7
+
5
8
  Rake::TestTask.new(:test) do |t|
6
9
  t.libs << "test"
7
10
  t.libs << "lib"
8
11
  t.test_files = FileList["test/**/*_test.rb"]
9
12
  end
10
13
 
11
- if defined?(JRUBY_VERSION)
12
- task default: :test
14
+ if RUBY_ENGINE == "jruby"
15
+ task default: %i[rubocop test]
13
16
  else
14
17
  require "rake/extensiontask"
15
18
 
@@ -19,5 +22,5 @@ else
19
22
  ext.lib_dir = "lib/utf8_proc"
20
23
  end
21
24
 
22
- task default: %i[clobber compile test]
25
+ task default: %i[rubocop clobber compile test]
23
26
  end
@@ -2,10 +2,19 @@
2
2
  # rubocop:disable GlobalVars
3
3
  require "mkmf"
4
4
 
5
- $CFLAGS << " -std=c99 -Wno-declaration-after-statement"
6
-
7
5
  pkg_config("utf8proc")
6
+ unless have_library("utf8proc")
7
+ puts "Compiling local libutf8proc..."
8
+
9
+ libutf8proc_dir = File.expand_path(
10
+ File.join(File.dirname(__FILE__), "../../vendor/libutf8proc")
11
+ )
8
12
 
9
- have_library("utf8proc") || abort("This extension requires the utf8proc library.")
13
+ $VPATH << libutf8proc_dir
14
+ $srcs = ["utf8_proc.c", "utf8proc.c"]
15
+ $CFLAGS << " -I#{libutf8proc_dir}"
16
+ end
17
+
18
+ $CFLAGS << " -std=c99 -Wno-declaration-after-statement"
10
19
 
11
20
  create_makefile("utf8_proc/utf8_proc")
@@ -32,29 +32,60 @@ static inline VALUE normInternal(VALUE string, utf8proc_option_t options) {
32
32
  return new_str;
33
33
  }
34
34
 
35
+ // NFC
35
36
 
36
- VALUE toNFC(VALUE self, VALUE string) {
37
+ static VALUE toNFC(VALUE self, VALUE string) {
37
38
  return normInternal(string, UTF8PROC_STABLE | UTF8PROC_COMPOSE);
38
39
  }
39
40
 
40
- VALUE toNFD(VALUE self, VALUE string) {
41
+ static VALUE StoNFC(VALUE string) {
42
+ return normInternal(string, UTF8PROC_STABLE | UTF8PROC_COMPOSE);
43
+ }
44
+
45
+ // NFD
46
+
47
+ static VALUE toNFD(VALUE self, VALUE string) {
48
+ return normInternal(string, UTF8PROC_STABLE | UTF8PROC_DECOMPOSE);
49
+ }
50
+
51
+ static VALUE StoNFD(VALUE string) {
41
52
  return normInternal(string, UTF8PROC_STABLE | UTF8PROC_DECOMPOSE);
42
53
  }
43
54
 
44
- VALUE toNFKC(VALUE self, VALUE string) {
55
+ // NFKC
56
+
57
+ static VALUE toNFKC(VALUE self, VALUE string) {
45
58
  return normInternal(string,UTF8PROC_STABLE | UTF8PROC_COMPOSE | UTF8PROC_COMPAT);
46
59
  }
47
60
 
48
- VALUE toNFKD(VALUE self, VALUE string) {
61
+ static VALUE StoNFKC(VALUE string) {
62
+ return normInternal(string,UTF8PROC_STABLE | UTF8PROC_COMPOSE | UTF8PROC_COMPAT);
63
+ }
64
+
65
+ // NFKD
66
+
67
+ static VALUE toNFKD(VALUE self, VALUE string) {
68
+ return normInternal(string, UTF8PROC_STABLE | UTF8PROC_DECOMPOSE | UTF8PROC_COMPAT);
69
+ }
70
+
71
+ static VALUE StoNFKD(VALUE string) {
49
72
  return normInternal(string, UTF8PROC_STABLE | UTF8PROC_DECOMPOSE | UTF8PROC_COMPAT);
50
73
  }
51
74
 
52
- VALUE toNFKC_CF(VALUE self, VALUE string) {
75
+ // NFKC_CF
76
+
77
+ static VALUE toNFKC_CF(VALUE self, VALUE string) {
78
+ return normInternal(string, UTF8PROC_STABLE | UTF8PROC_COMPOSE | UTF8PROC_COMPAT | UTF8PROC_CASEFOLD);
79
+ }
80
+
81
+ static VALUE StoNFKC_CF(VALUE string) {
53
82
  return normInternal(string, UTF8PROC_STABLE | UTF8PROC_COMPOSE | UTF8PROC_COMPAT | UTF8PROC_CASEFOLD);
54
83
  }
55
84
 
85
+ // Parameterized normalization
56
86
 
57
- VALUE norm(int argc, VALUE* argv, VALUE self){
87
+
88
+ static VALUE toNorm(int argc, VALUE* argv, VALUE self){
58
89
  VALUE string;
59
90
  VALUE form;
60
91
  rb_scan_args(argc, argv, "11", &string, &form);
@@ -82,6 +113,33 @@ VALUE norm(int argc, VALUE* argv, VALUE self){
82
113
  }
83
114
  }
84
115
 
116
+ static VALUE StoNorm(int argc, VALUE* argv, VALUE string){
117
+ VALUE form;
118
+ rb_scan_args(argc, argv, "01", &form);
119
+
120
+ if (NIL_P(form)) {
121
+ return StoNFC(string);
122
+ }
123
+
124
+ ID s_form;
125
+ s_form = SYM2ID(form);
126
+ if (s_form == NFC) {
127
+ return StoNFC(string);
128
+ }else if(s_form == NFD) {
129
+ return StoNFD(string);
130
+ }else if(s_form == NFKC) {
131
+ return StoNFKC(string);
132
+ }else if(s_form == NFKD) {
133
+ return StoNFKD(string);
134
+ }else if(s_form == NFKC_CF) {
135
+ return StoNFKC_CF(string);
136
+ }else{
137
+ rb_raise(rb_eArgError, "%s",
138
+ "Argument must be one of [:nfc (default), :nfd, :nfkc, " \
139
+ ":nfkd, :nfkc_cf]");
140
+ }
141
+ }
142
+
85
143
  void Init_utf8_proc(void) {
86
144
  VALUE rb_mBase;
87
145
  rb_mBase = rb_define_module("UTF8Proc");
@@ -105,5 +163,14 @@ void Init_utf8_proc(void) {
105
163
  rb_define_singleton_method(rb_mBase, "NFKC", toNFKC, 1);
106
164
  rb_define_singleton_method(rb_mBase, "NFKD", toNFKD, 1);
107
165
  rb_define_singleton_method(rb_mBase, "NFKC_CF", toNFKC_CF, 1);
108
- rb_define_singleton_method(rb_mBase, "normalize", norm, -1);
166
+ rb_define_singleton_method(rb_mBase, "normalize", toNorm, -1);
167
+
168
+ VALUE rb_mStringExt;
169
+ rb_mStringExt = rb_define_module_under(rb_mBase, "StringExtension");
170
+ rb_define_method(rb_mStringExt, "NFC", StoNFC, 0);
171
+ rb_define_method(rb_mStringExt, "NFD", StoNFD, 0);
172
+ rb_define_method(rb_mStringExt, "NFKC", StoNFKC, 0);
173
+ rb_define_method(rb_mStringExt, "NFKD", StoNFKD, 0);
174
+ rb_define_method(rb_mStringExt, "NFKC_CF", StoNFKC_CF, 0);
175
+ rb_define_method(rb_mStringExt, "normalize", StoNorm, -1);
109
176
  }
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "utf8_proc"
4
+
5
+ class String
6
+ if RUBY_ENGINE == "jruby"
7
+ require "utf8_proc/core_ext/string_jruby"
8
+ else
9
+ include ::UTF8Proc::StringExtension
10
+ end
11
+ end
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+ # rubocop:disable MethodName
3
+
4
+ # This file should only be required within JRuby
5
+
6
+ require "java"
7
+ require "utf8_proc"
8
+
9
+ class String
10
+ def NFC
11
+ ::UTF8Proc.NFC(self)
12
+ end
13
+
14
+ def NFD
15
+ ::UTF8Proc.NFD(self)
16
+ end
17
+
18
+ def NFKC
19
+ ::UTF8Proc.NFKC(self)
20
+ end
21
+
22
+ def NFKD
23
+ ::UTF8Proc.NFKD(self)
24
+ end
25
+
26
+ def NFKC_CF
27
+ ::UTF8Proc.NFKC_CF(self)
28
+ end
29
+
30
+ def normalize(form = :nfc)
31
+ ::UTF8Proc.normalize(self, form)
32
+ end
33
+ end
@@ -1,4 +1,4 @@
1
1
  # frozen_string_literal: true
2
2
  module UTF8Proc
3
- VERSION = "0.3.0".freeze
3
+ VERSION = "0.4.0".freeze
4
4
  end
data/lib/utf8_proc.rb CHANGED
@@ -2,7 +2,7 @@
2
2
  require "utf8_proc/version"
3
3
 
4
4
  module UTF8Proc
5
- if defined?(JRUBY_VERSION)
5
+ if RUBY_ENGINE == "jruby"
6
6
  require "utf8_proc/jruby"
7
7
  include JRuby
8
8
  else
data/utf8_proc.gemspec CHANGED
@@ -19,6 +19,10 @@ Gem::Specification.new do |spec|
19
19
  spec.files = `git ls-files -z`.split("\x0").reject do |f|
20
20
  f.match(%r{^(test|spec|features)/})
21
21
  end
22
+ spec.files += ["vendor/libutf8proc/utf8proc.c",
23
+ "vendor/libutf8proc/utf8proc.h",
24
+ "vendor/libutf8proc/utf8proc_data.c"]
25
+
22
26
  spec.require_paths = ["lib"]
23
27
 
24
28
  spec.add_development_dependency "bundler", "~> 1.14"
@@ -27,7 +31,7 @@ Gem::Specification.new do |spec|
27
31
  spec.add_development_dependency "minitest", "~> 5.10"
28
32
  spec.add_development_dependency "rubocop", "~> 0.47"
29
33
 
30
- unless defined?(JRUBY_VERSION)
34
+ unless RUBY_ENGINE == "jruby"
31
35
  spec.extensions = ["ext/utf8_proc/extconf.rb"]
32
36
  spec.add_development_dependency "rake-compiler", "~> 1.0"
33
37
  end