utf8_proc 0.5.2 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ef828b3bc3e2413f1ef0df107c8924eaa84cac4a
4
- data.tar.gz: c4fe505d4cdb442a921df2bc3bd2d1bd0a08d08b
3
+ metadata.gz: 33591974889df9c707aed4f9cb97b04f623736c1
4
+ data.tar.gz: c7d78939a0a7b9a7f4c5c5f74047aa05a75eee61
5
5
  SHA512:
6
- metadata.gz: ea8ec35886cf54a9ce7047ba26e66a0904d4171a26a2697a656dc4d746c3509a6123e7e926beb27f26960bd5d218ade3377b0c38efcc2cafb1acf2ce5ea612d6
7
- data.tar.gz: 886eb31311b4d4f5559632d0347cd81ab6378d62285a958f0d2a2fcc193ae7f82aeefe945a192eee4d77d56ebdffa68426a0e4993890e83eec2bb305da3dbb5c
6
+ metadata.gz: 62c02a23182fe04aae49a257b8a5d40a7b344d3ea7ffb3070cd763a8224be9e045eb427eab04d7ed5e85a20e02e906f3f610a6f46404abcdb6d4dd5e08c4d8e0
7
+ data.tar.gz: 9ec79b0312bb78e1e0e3575432c52024d4cce0456cfd009bf2a8cf8acca196e8a53d7cd1012ee54444d0887425c6f2641fa86efe91a1e85207c9e1e476cba6a4
data/README.md CHANGED
@@ -28,6 +28,8 @@ Or install it yourself as:
28
28
 
29
29
  ## Usage
30
30
 
31
+ YARD documentation is available at [rubydoc.info](http://www.rubydoc.info/github/nomoon/utf8_proc)
32
+
31
33
  ```ruby
32
34
  require "utf8_proc"
33
35
 
@@ -65,58 +65,133 @@ static inline VALUE normInternal(VALUE *string, utf8proc_option_t options) {
65
65
  return new_str;
66
66
  }
67
67
 
68
- // NFC
69
-
68
+ /**
69
+ * Normalizes a String using NFC (Canonical Decomposition, followed by Canonical
70
+ * Composition)
71
+ *
72
+ * @param string [String] the String to normalize
73
+ *
74
+ * @raise [EncodingError] if *string* is not encoded in *UTF-8* or *US-ASCII*
75
+ * @return [String] a normalized string
76
+ */
70
77
  static VALUE toNFC(VALUE self, VALUE string) {
71
78
  return normInternal(&string, UTF8PROC_STABLE | UTF8PROC_COMPOSE);
72
79
  }
73
80
 
81
+ /**
82
+ * Normalizes self using NFC (Canonical Decomposition, followed by Canonical
83
+ * Composition)
84
+ *
85
+ * @raise [EncodingError] if *self* is not encoded in *UTF-8* or *US-ASCII*
86
+ * @return [String] a normalized copy of the string
87
+ */
74
88
  static VALUE StoNFC(VALUE string) {
75
89
  return normInternal(&string, UTF8PROC_STABLE | UTF8PROC_COMPOSE);
76
90
  }
77
91
 
78
- // NFD
79
-
92
+ /**
93
+ * Normalizes a string using NFD (Canonical Decomposition)
94
+ *
95
+ * @param string [String] the String to normalize
96
+ *
97
+ * @raise [EncodingError] if *string* is not encoded in *UTF-8* or *US-ASCII*
98
+ * @return [String] a normalized string
99
+ */
80
100
  static VALUE toNFD(VALUE self, VALUE string) {
81
101
  return normInternal(&string, UTF8PROC_STABLE | UTF8PROC_DECOMPOSE);
82
102
  }
83
103
 
104
+ /**
105
+ * Normalizes self using NFD (Canonical Decomposition)
106
+ *
107
+ * @raise [EncodingError] if *self* is not encoded in *UTF-8* or *US-ASCII*
108
+ * @return [String] a normalized copy of the string
109
+ */
84
110
  static VALUE StoNFD(VALUE string) {
85
111
  return normInternal(&string, UTF8PROC_STABLE | UTF8PROC_DECOMPOSE);
86
112
  }
87
113
 
88
- // NFKC
89
-
114
+ /**
115
+ * Normalizes a string using NFKC (Compatibility Decomposition, followed by
116
+ * Canonical Composition)
117
+ *
118
+ * @param string [String] the String to normalize
119
+ *
120
+ * @raise [EncodingError] if *string* is not encoded in *UTF-8* or *US-ASCII*
121
+ * @return [String] a normalized string
122
+ */
90
123
  static VALUE toNFKC(VALUE self, VALUE string) {
91
124
  return normInternal(&string, UTF8PROC_STABLE | UTF8PROC_COMPOSE | UTF8PROC_COMPAT);
92
125
  }
93
126
 
127
+ /**
128
+ * Normalizes self using NFKC (Compatibility Decomposition, followed by
129
+ * Canonical Composition)
130
+ *
131
+ * @raise [EncodingError] if *self* is not encoded in *UTF-8* or *US-ASCII*
132
+ * @return [String] a normalized copy of the string
133
+ */
94
134
  static VALUE StoNFKC(VALUE string) {
95
135
  return normInternal(&string, UTF8PROC_STABLE | UTF8PROC_COMPOSE | UTF8PROC_COMPAT);
96
136
  }
97
137
 
98
- // NFKD
99
-
138
+ /**
139
+ * Normalizes a string using NFKD (Compatibility Decomposition)
140
+ *
141
+ * @param string [String] the String to normalize
142
+ *
143
+ * @raise [EncodingError] if *string* is not encoded in *UTF-8* or *US-ASCII*
144
+ * @return [String] a normalized string
145
+ */
100
146
  static VALUE toNFKD(VALUE self, VALUE string) {
101
147
  return normInternal(&string, UTF8PROC_STABLE | UTF8PROC_DECOMPOSE | UTF8PROC_COMPAT);
102
148
  }
103
149
 
150
+ /**
151
+ * Normalizes self using NFKD (Compatibility Decomposition)
152
+ *
153
+ * @raise [EncodingError] if *self* is not encoded in *UTF-8* or *US-ASCII*
154
+ * @return [String] a normalized copy of the string
155
+ */
104
156
  static VALUE StoNFKD(VALUE string) {
105
157
  return normInternal(&string, UTF8PROC_STABLE | UTF8PROC_DECOMPOSE | UTF8PROC_COMPAT);
106
158
  }
107
159
 
108
- // NFKC_CF
109
-
160
+ /**
161
+ * Normalizes a string using NFKC (Compatibility Decomposition, followed by
162
+ * Canonical Composition) with case-folding
163
+ *
164
+ * @param string [String] the String to normalize
165
+ *
166
+ * @raise [EncodingError] if *string* is not encoded in *UTF-8* or *US-ASCII*
167
+ * @return [String] a normalized string
168
+ */
110
169
  static VALUE toNFKC_CF(VALUE self, VALUE string) {
111
170
  return normInternal(&string, UTF8PROC_STABLE | UTF8PROC_COMPOSE | UTF8PROC_COMPAT | UTF8PROC_CASEFOLD);
112
171
  }
113
172
 
173
+ /**
174
+ * Normalizes self using NFKC (Compatibility Decomposition, followed by
175
+ * Canonical Composition) with case-folding
176
+ *
177
+ * @raise [EncodingError] if *self* is not encoded in *UTF-8* or *US-ASCII*
178
+ * @return [String] a normalized copy of the string
179
+ */
114
180
  static VALUE StoNFKC_CF(VALUE string) {
115
181
  return normInternal(&string, UTF8PROC_STABLE | UTF8PROC_COMPOSE | UTF8PROC_COMPAT | UTF8PROC_CASEFOLD);
116
182
  }
117
183
 
118
- // Parameterized normalization
119
-
184
+ /**
185
+ * @overload normalize(string, form = :nfc)
186
+ * Normalizes a string according to one of the 5 possible forms
187
+ *
188
+ * @param string [String] the String to normalize
189
+ * @param form [:nfc, :nfd, :nfkc, :nfkd, :nfkc_cf] the normalization form
190
+ *
191
+ * @raise [EncodingError] if *string* is not encoded in *UTF-8* or *US-ASCII*
192
+ * @raise [ArgumentError] if *form* is not one of the 5 valid forms
193
+ * @return [String] a normalized string
194
+ */
120
195
  static VALUE toNorm(int argc, VALUE* argv, VALUE self){
121
196
  VALUE string;
122
197
  VALUE form;
@@ -145,6 +220,16 @@ static VALUE toNorm(int argc, VALUE* argv, VALUE self){
145
220
  }
146
221
  }
147
222
 
223
+ /**
224
+ * @overload normalize(string, form = :nfc)
225
+ * Normalizes self according to one of the 5 possible forms
226
+ *
227
+ * @param form [:nfc, :nfd, :nfkc, :nfkd, :nfkc_cf] the normalization form
228
+ *
229
+ * @raise [EncodingError] if *self* is not encoded in *UTF-8* or *US-ASCII*
230
+ * @raise [ArgumentError] if *form* is not one of the 5 valid forms
231
+ * @return [String] a normalized copy of the string
232
+ */
148
233
  static VALUE StoNorm(int argc, VALUE* argv, VALUE string){
149
234
  VALUE form;
150
235
  rb_scan_args(argc, argv, "01", &form);
@@ -186,6 +271,7 @@ void Init_utf8_proc(void) {
186
271
 
187
272
  const char *libVersion;
188
273
  libVersion = utf8proc_version();
274
+ // Displays the library version of the utf8proc library
189
275
  rb_define_const(rb_mBase, "LIBRARY_VERSION", rb_str_freeze(
190
276
  rb_enc_str_new(libVersion, strlen(libVersion), enc_utf8)
191
277
  ));
@@ -2,6 +2,7 @@
2
2
  require "utf8_proc/version"
3
3
  require "utf8_proc/benchmark"
4
4
 
5
+ # Unicode string normalization library using UTF8Proc
5
6
  module UTF8Proc
6
7
  if RUBY_ENGINE == "jruby"
7
8
  require "utf8_proc/jruby"
@@ -9,4 +10,13 @@ module UTF8Proc
9
10
  else
10
11
  require "utf8_proc/utf8_proc"
11
12
  end
13
+
14
+ # Add lowercase name aliases for normalization methods
15
+ class << self
16
+ alias nfc NFC
17
+ alias nfd NFD
18
+ alias nfkc NFKC
19
+ alias nfkd NFKD
20
+ alias nfkc_cf NFKC_CF
21
+ end
12
22
  end
@@ -1,10 +1,12 @@
1
1
  # frozen_string_literal: true
2
- # rubocop:disable MethodLength
2
+
3
3
  module UTF8Proc
4
+ # Benchmark module for comparing the speed of *UTF8Proc* and *UNF*
4
5
  module Benchmark
5
6
  module_function
6
7
 
7
- def run
8
+ # Runs the benchmark and displays the results.
9
+ def run # rubocop:disable MethodLength
8
10
  require "benchmark/ips"
9
11
  require "unf"
10
12
  # Various different normalizations of Unicode characters.
@@ -2,10 +2,23 @@
2
2
 
3
3
  require "utf8_proc"
4
4
 
5
- class String
6
- if RUBY_ENGINE == "jruby"
7
- require "utf8_proc/core_ext/string_jruby"
8
- else
9
- include ::UTF8Proc::StringExtension
5
+ module UTF8Proc
6
+ # Module containing C core extension methods for the {::String} class.
7
+ #
8
+ # You can activate this by using:
9
+ # require "utf8_proc/core_ext/string"
10
+ #
11
+ # It will load either C or Java extensions, depending on your Ruby version.
12
+ module StringExtension
13
+ if RUBY_ENGINE == "jruby"
14
+ require "utf8_proc/core_ext/string_jruby"
15
+ else
16
+ alias nfc NFC
17
+ alias nfd NFD
18
+ alias nfkc NFKC
19
+ alias nfkd NFKD
20
+ alias nfkc_cf NFKC_CF
21
+ String.send(:include, ::UTF8Proc::StringExtension)
22
+ end
10
23
  end
11
24
  end
@@ -6,28 +6,51 @@
6
6
  require "java"
7
7
  require "utf8_proc"
8
8
 
9
- class String
10
- def NFC
11
- ::UTF8Proc.NFC(self)
12
- end
13
-
14
- def NFD
15
- ::UTF8Proc.NFD(self)
16
- end
17
-
18
- def NFKC
19
- ::UTF8Proc.NFKC(self)
20
- end
21
-
22
- def NFKD
23
- ::UTF8Proc.NFKD(self)
24
- end
25
-
26
- def NFKC_CF
27
- ::UTF8Proc.NFKC_CF(self)
28
- end
29
-
30
- def normalize(form = :nfc)
31
- ::UTF8Proc.normalize(self, form)
9
+ module UTF8Proc
10
+ module JRuby
11
+ # Module containing JRuby core extension methods for the {::String} class.
12
+ #
13
+ # You can activate this by using:
14
+ # require "utf8_proc/core_ext/string"
15
+ #
16
+ # It will load either C or Java extensions, depending on your Ruby version.
17
+ module StringExtension
18
+ # @see UTF8Proc::StringExtension#NFC
19
+ def NFC
20
+ ::UTF8Proc.NFC(self)
21
+ end
22
+ alias nfc NFC
23
+
24
+ # @see UTF8Proc::StringExtension#NFD
25
+ def NFD
26
+ ::UTF8Proc.NFD(self)
27
+ end
28
+ alias nfd NFD
29
+
30
+ # @see UTF8Proc::StringExtension#NFKC
31
+ def NFKC
32
+ ::UTF8Proc.NFKC(self)
33
+ end
34
+ alias nfkc NFKC
35
+
36
+ # @see UTF8Proc::StringExtension#NFKD
37
+ def NFKD
38
+ ::UTF8Proc.NFKD(self)
39
+ end
40
+ alias nfkd NFKD
41
+
42
+ # @see UTF8Proc::StringExtension#NFKC_CF
43
+ def NFKC_CF
44
+ ::UTF8Proc.NFKC_CF(self)
45
+ end
46
+ alias nfkc_cf NFKC_CF
47
+
48
+ # @see UTF8Proc::StringExtension#normalize
49
+ def normalize(form = :nfc)
50
+ ::UTF8Proc.normalize(self, form)
51
+ end
52
+ end
32
53
  end
33
54
  end
55
+
56
+ String.send(:include, ::UTF8Proc::JRuby::StringExtension)
@@ -6,37 +6,49 @@
6
6
  require "java"
7
7
 
8
8
  module UTF8Proc
9
+ # JRuby normalization module.
10
+ #
11
+ # This module will load automatically depending on your Ruby version.
9
12
  module JRuby
13
+ # Displays your version of the Java VM
10
14
  LIBRARY_VERSION = "Java #{ENV_JAVA['java.version']}".freeze
11
15
 
12
16
  JTNORM = java.text.Normalizer
13
17
  private_constant :JTNORM
14
18
 
19
+ # @!visibility private
15
20
  def self.included(receiver)
16
21
  receiver.extend(ClassMethods)
17
22
  end
18
23
 
24
+ # Methods added to the {::UTF8Proc} module in JRuby (instead of the C ones)
19
25
  module ClassMethods
26
+ # @see UTF8Proc.NFC
20
27
  def NFC(string)
21
28
  JTNORM.normalize(string, JTNORM::Form::NFC)
22
29
  end
23
30
 
31
+ # @see UTF8Proc.NFD
24
32
  def NFD(string)
25
33
  JTNORM.normalize(string, JTNORM::Form::NFD)
26
34
  end
27
35
 
36
+ # @see UTF8Proc.NFKC
28
37
  def NFKC(string)
29
38
  JTNORM.normalize(string, JTNORM::Form::NFKC)
30
39
  end
31
40
 
41
+ # @see UTF8Proc.NFKD
32
42
  def NFKD(string)
33
43
  JTNORM.normalize(string, JTNORM::Form::NFKD)
34
44
  end
35
45
 
46
+ # @see UTF8Proc.NFKC_CF
36
47
  def NFKC_CF(string)
37
48
  NFKC(string).to_java(:string).toLowerCase
38
49
  end
39
50
 
51
+ # @see UTF8Proc.normalize
40
52
  def normalize(string, form = :nfc)
41
53
  case form
42
54
  when :nfc
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
  module UTF8Proc
3
- VERSION = "0.5.2".freeze
3
+ # The gem version
4
+ VERSION = "0.6.0".freeze
4
5
  end
@@ -32,8 +32,9 @@ Gem::Specification.new do |spec|
32
32
  spec.add_development_dependency "pry", "~> 0.10"
33
33
  spec.add_development_dependency "minitest", "~> 5.10"
34
34
  spec.add_development_dependency "rubocop", "~> 0.47"
35
- spec.add_development_dependency "benchmark-ips"
36
- spec.add_development_dependency "unf"
35
+ spec.add_development_dependency "yard", "~> 0.9"
36
+ spec.add_development_dependency "benchmark-ips", "~> 2.7"
37
+ spec.add_development_dependency "unf", "~> 0.1"
37
38
 
38
39
  unless RUBY_ENGINE == "jruby"
39
40
  spec.extensions = ["ext/utf8_proc/extconf.rb"]
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: utf8_proc
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.2
4
+ version: 0.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tim Bellefleur
@@ -80,34 +80,48 @@ dependencies:
80
80
  - - "~>"
81
81
  - !ruby/object:Gem::Version
82
82
  version: '0.47'
83
+ - !ruby/object:Gem::Dependency
84
+ name: yard
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '0.9'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '0.9'
83
97
  - !ruby/object:Gem::Dependency
84
98
  name: benchmark-ips
85
99
  requirement: !ruby/object:Gem::Requirement
86
100
  requirements:
87
- - - ">="
101
+ - - "~>"
88
102
  - !ruby/object:Gem::Version
89
- version: '0'
103
+ version: '2.7'
90
104
  type: :development
91
105
  prerelease: false
92
106
  version_requirements: !ruby/object:Gem::Requirement
93
107
  requirements:
94
- - - ">="
108
+ - - "~>"
95
109
  - !ruby/object:Gem::Version
96
- version: '0'
110
+ version: '2.7'
97
111
  - !ruby/object:Gem::Dependency
98
112
  name: unf
99
113
  requirement: !ruby/object:Gem::Requirement
100
114
  requirements:
101
- - - ">="
115
+ - - "~>"
102
116
  - !ruby/object:Gem::Version
103
- version: '0'
117
+ version: '0.1'
104
118
  type: :development
105
119
  prerelease: false
106
120
  version_requirements: !ruby/object:Gem::Requirement
107
121
  requirements:
108
- - - ">="
122
+ - - "~>"
109
123
  - !ruby/object:Gem::Version
110
- version: '0'
124
+ version: '0.1'
111
125
  - !ruby/object:Gem::Dependency
112
126
  name: rake-compiler
113
127
  requirement: !ruby/object:Gem::Requirement