utf8_proc 0.5.2 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ef828b3bc3e2413f1ef0df107c8924eaa84cac4a
4
- data.tar.gz: c4fe505d4cdb442a921df2bc3bd2d1bd0a08d08b
3
+ metadata.gz: 33591974889df9c707aed4f9cb97b04f623736c1
4
+ data.tar.gz: c7d78939a0a7b9a7f4c5c5f74047aa05a75eee61
5
5
  SHA512:
6
- metadata.gz: ea8ec35886cf54a9ce7047ba26e66a0904d4171a26a2697a656dc4d746c3509a6123e7e926beb27f26960bd5d218ade3377b0c38efcc2cafb1acf2ce5ea612d6
7
- data.tar.gz: 886eb31311b4d4f5559632d0347cd81ab6378d62285a958f0d2a2fcc193ae7f82aeefe945a192eee4d77d56ebdffa68426a0e4993890e83eec2bb305da3dbb5c
6
+ metadata.gz: 62c02a23182fe04aae49a257b8a5d40a7b344d3ea7ffb3070cd763a8224be9e045eb427eab04d7ed5e85a20e02e906f3f610a6f46404abcdb6d4dd5e08c4d8e0
7
+ data.tar.gz: 9ec79b0312bb78e1e0e3575432c52024d4cce0456cfd009bf2a8cf8acca196e8a53d7cd1012ee54444d0887425c6f2641fa86efe91a1e85207c9e1e476cba6a4
data/README.md CHANGED
@@ -28,6 +28,8 @@ Or install it yourself as:
28
28
 
29
29
  ## Usage
30
30
 
31
+ YARD documentation is available at [rubydoc.info](http://www.rubydoc.info/github/nomoon/utf8_proc)
32
+
31
33
  ```ruby
32
34
  require "utf8_proc"
33
35
 
@@ -65,58 +65,133 @@ static inline VALUE normInternal(VALUE *string, utf8proc_option_t options) {
65
65
  return new_str;
66
66
  }
67
67
 
68
- // NFC
69
-
68
+ /**
69
+ * Normalizes a String using NFC (Canonical Decomposition, followed by Canonical
70
+ * Composition)
71
+ *
72
+ * @param string [String] the String to normalize
73
+ *
74
+ * @raise [EncodingError] if *string* is not encoded in *UTF-8* or *US-ASCII*
75
+ * @return [String] a normalized string
76
+ */
70
77
  static VALUE toNFC(VALUE self, VALUE string) {
71
78
  return normInternal(&string, UTF8PROC_STABLE | UTF8PROC_COMPOSE);
72
79
  }
73
80
 
81
+ /**
82
+ * Normalizes self using NFC (Canonical Decomposition, followed by Canonical
83
+ * Composition)
84
+ *
85
+ * @raise [EncodingError] if *self* is not encoded in *UTF-8* or *US-ASCII*
86
+ * @return [String] a normalized copy of the string
87
+ */
74
88
  static VALUE StoNFC(VALUE string) {
75
89
  return normInternal(&string, UTF8PROC_STABLE | UTF8PROC_COMPOSE);
76
90
  }
77
91
 
78
- // NFD
79
-
92
+ /**
93
+ * Normalizes a string using NFD (Canonical Decomposition)
94
+ *
95
+ * @param string [String] the String to normalize
96
+ *
97
+ * @raise [EncodingError] if *string* is not encoded in *UTF-8* or *US-ASCII*
98
+ * @return [String] a normalized string
99
+ */
80
100
  static VALUE toNFD(VALUE self, VALUE string) {
81
101
  return normInternal(&string, UTF8PROC_STABLE | UTF8PROC_DECOMPOSE);
82
102
  }
83
103
 
104
+ /**
105
+ * Normalizes self using NFD (Canonical Decomposition)
106
+ *
107
+ * @raise [EncodingError] if *self* is not encoded in *UTF-8* or *US-ASCII*
108
+ * @return [String] a normalized copy of the string
109
+ */
84
110
  static VALUE StoNFD(VALUE string) {
85
111
  return normInternal(&string, UTF8PROC_STABLE | UTF8PROC_DECOMPOSE);
86
112
  }
87
113
 
88
- // NFKC
89
-
114
+ /**
115
+ * Normalizes a string using NFKC (Compatibility Decomposition, followed by
116
+ * Canonical Composition)
117
+ *
118
+ * @param string [String] the String to normalize
119
+ *
120
+ * @raise [EncodingError] if *string* is not encoded in *UTF-8* or *US-ASCII*
121
+ * @return [String] a normalized string
122
+ */
90
123
  static VALUE toNFKC(VALUE self, VALUE string) {
91
124
  return normInternal(&string, UTF8PROC_STABLE | UTF8PROC_COMPOSE | UTF8PROC_COMPAT);
92
125
  }
93
126
 
127
+ /**
128
+ * Normalizes self using NFKC (Compatibility Decomposition, followed by
129
+ * Canonical Composition)
130
+ *
131
+ * @raise [EncodingError] if *self* is not encoded in *UTF-8* or *US-ASCII*
132
+ * @return [String] a normalized copy of the string
133
+ */
94
134
  static VALUE StoNFKC(VALUE string) {
95
135
  return normInternal(&string, UTF8PROC_STABLE | UTF8PROC_COMPOSE | UTF8PROC_COMPAT);
96
136
  }
97
137
 
98
- // NFKD
99
-
138
+ /**
139
+ * Normalizes a string using NFKD (Compatibility Decomposition)
140
+ *
141
+ * @param string [String] the String to normalize
142
+ *
143
+ * @raise [EncodingError] if *string* is not encoded in *UTF-8* or *US-ASCII*
144
+ * @return [String] a normalized string
145
+ */
100
146
  static VALUE toNFKD(VALUE self, VALUE string) {
101
147
  return normInternal(&string, UTF8PROC_STABLE | UTF8PROC_DECOMPOSE | UTF8PROC_COMPAT);
102
148
  }
103
149
 
150
+ /**
151
+ * Normalizes self using NFKD (Compatibility Decomposition)
152
+ *
153
+ * @raise [EncodingError] if *self* is not encoded in *UTF-8* or *US-ASCII*
154
+ * @return [String] a normalized copy of the string
155
+ */
104
156
  static VALUE StoNFKD(VALUE string) {
105
157
  return normInternal(&string, UTF8PROC_STABLE | UTF8PROC_DECOMPOSE | UTF8PROC_COMPAT);
106
158
  }
107
159
 
108
- // NFKC_CF
109
-
160
+ /**
161
+ * Normalizes a string using NFKC (Compatibility Decomposition, followed by
162
+ * Canonical Composition) with case-folding
163
+ *
164
+ * @param string [String] the String to normalize
165
+ *
166
+ * @raise [EncodingError] if *string* is not encoded in *UTF-8* or *US-ASCII*
167
+ * @return [String] a normalized string
168
+ */
110
169
  static VALUE toNFKC_CF(VALUE self, VALUE string) {
111
170
  return normInternal(&string, UTF8PROC_STABLE | UTF8PROC_COMPOSE | UTF8PROC_COMPAT | UTF8PROC_CASEFOLD);
112
171
  }
113
172
 
173
+ /**
174
+ * Normalizes self using NFKC (Compatibility Decomposition, followed by
175
+ * Canonical Composition) with case-folding
176
+ *
177
+ * @raise [EncodingError] if *self* is not encoded in *UTF-8* or *US-ASCII*
178
+ * @return [String] a normalized copy of the string
179
+ */
114
180
  static VALUE StoNFKC_CF(VALUE string) {
115
181
  return normInternal(&string, UTF8PROC_STABLE | UTF8PROC_COMPOSE | UTF8PROC_COMPAT | UTF8PROC_CASEFOLD);
116
182
  }
117
183
 
118
- // Parameterized normalization
119
-
184
+ /**
185
+ * @overload normalize(string, form = :nfc)
186
+ * Normalizes a string according to one of the 5 possible forms
187
+ *
188
+ * @param string [String] the String to normalize
189
+ * @param form [:nfc, :nfd, :nfkc, :nfkd, :nfkc_cf] the normalization form
190
+ *
191
+ * @raise [EncodingError] if *string* is not encoded in *UTF-8* or *US-ASCII*
192
+ * @raise [ArgumentError] if *form* is not one of the 5 valid forms
193
+ * @return [String] a normalized string
194
+ */
120
195
  static VALUE toNorm(int argc, VALUE* argv, VALUE self){
121
196
  VALUE string;
122
197
  VALUE form;
@@ -145,6 +220,16 @@ static VALUE toNorm(int argc, VALUE* argv, VALUE self){
145
220
  }
146
221
  }
147
222
 
223
+ /**
224
+ * @overload normalize(string, form = :nfc)
225
+ * Normalizes self according to one of the 5 possible forms
226
+ *
227
+ * @param form [:nfc, :nfd, :nfkc, :nfkd, :nfkc_cf] the normalization form
228
+ *
229
+ * @raise [EncodingError] if *self* is not encoded in *UTF-8* or *US-ASCII*
230
+ * @raise [ArgumentError] if *form* is not one of the 5 valid forms
231
+ * @return [String] a normalized copy of the string
232
+ */
148
233
  static VALUE StoNorm(int argc, VALUE* argv, VALUE string){
149
234
  VALUE form;
150
235
  rb_scan_args(argc, argv, "01", &form);
@@ -186,6 +271,7 @@ void Init_utf8_proc(void) {
186
271
 
187
272
  const char *libVersion;
188
273
  libVersion = utf8proc_version();
274
+ // Displays the library version of the utf8proc library
189
275
  rb_define_const(rb_mBase, "LIBRARY_VERSION", rb_str_freeze(
190
276
  rb_enc_str_new(libVersion, strlen(libVersion), enc_utf8)
191
277
  ));
@@ -2,6 +2,7 @@
2
2
  require "utf8_proc/version"
3
3
  require "utf8_proc/benchmark"
4
4
 
5
+ # Unicode string normalization library using UTF8Proc
5
6
  module UTF8Proc
6
7
  if RUBY_ENGINE == "jruby"
7
8
  require "utf8_proc/jruby"
@@ -9,4 +10,13 @@ module UTF8Proc
9
10
  else
10
11
  require "utf8_proc/utf8_proc"
11
12
  end
13
+
14
+ # Add lowercase name aliases for normalization methods
15
+ class << self
16
+ alias nfc NFC
17
+ alias nfd NFD
18
+ alias nfkc NFKC
19
+ alias nfkd NFKD
20
+ alias nfkc_cf NFKC_CF
21
+ end
12
22
  end
@@ -1,10 +1,12 @@
1
1
  # frozen_string_literal: true
2
- # rubocop:disable MethodLength
2
+
3
3
  module UTF8Proc
4
+ # Benchmark module for comparing the speed of *UTF8Proc* and *UNF*
4
5
  module Benchmark
5
6
  module_function
6
7
 
7
- def run
8
+ # Runs the benchmark and displays the results.
9
+ def run # rubocop:disable MethodLength
8
10
  require "benchmark/ips"
9
11
  require "unf"
10
12
  # Various different normalizations of Unicode characters.
@@ -2,10 +2,23 @@
2
2
 
3
3
  require "utf8_proc"
4
4
 
5
- class String
6
- if RUBY_ENGINE == "jruby"
7
- require "utf8_proc/core_ext/string_jruby"
8
- else
9
- include ::UTF8Proc::StringExtension
5
+ module UTF8Proc
6
+ # Module containing C core extension methods for the {::String} class.
7
+ #
8
+ # You can activate this by using:
9
+ # require "utf8_proc/core_ext/string"
10
+ #
11
+ # It will load either C or Java extensions, depending on your Ruby version.
12
+ module StringExtension
13
+ if RUBY_ENGINE == "jruby"
14
+ require "utf8_proc/core_ext/string_jruby"
15
+ else
16
+ alias nfc NFC
17
+ alias nfd NFD
18
+ alias nfkc NFKC
19
+ alias nfkd NFKD
20
+ alias nfkc_cf NFKC_CF
21
+ String.send(:include, ::UTF8Proc::StringExtension)
22
+ end
10
23
  end
11
24
  end
@@ -6,28 +6,51 @@
6
6
  require "java"
7
7
  require "utf8_proc"
8
8
 
9
- class String
10
- def NFC
11
- ::UTF8Proc.NFC(self)
12
- end
13
-
14
- def NFD
15
- ::UTF8Proc.NFD(self)
16
- end
17
-
18
- def NFKC
19
- ::UTF8Proc.NFKC(self)
20
- end
21
-
22
- def NFKD
23
- ::UTF8Proc.NFKD(self)
24
- end
25
-
26
- def NFKC_CF
27
- ::UTF8Proc.NFKC_CF(self)
28
- end
29
-
30
- def normalize(form = :nfc)
31
- ::UTF8Proc.normalize(self, form)
9
+ module UTF8Proc
10
+ module JRuby
11
+ # Module containing JRuby core extension methods for the {::String} class.
12
+ #
13
+ # You can activate this by using:
14
+ # require "utf8_proc/core_ext/string"
15
+ #
16
+ # It will load either C or Java extensions, depending on your Ruby version.
17
+ module StringExtension
18
+ # @see UTF8Proc::StringExtension#NFC
19
+ def NFC
20
+ ::UTF8Proc.NFC(self)
21
+ end
22
+ alias nfc NFC
23
+
24
+ # @see UTF8Proc::StringExtension#NFD
25
+ def NFD
26
+ ::UTF8Proc.NFD(self)
27
+ end
28
+ alias nfd NFD
29
+
30
+ # @see UTF8Proc::StringExtension#NFKC
31
+ def NFKC
32
+ ::UTF8Proc.NFKC(self)
33
+ end
34
+ alias nfkc NFKC
35
+
36
+ # @see UTF8Proc::StringExtension#NFKD
37
+ def NFKD
38
+ ::UTF8Proc.NFKD(self)
39
+ end
40
+ alias nfkd NFKD
41
+
42
+ # @see UTF8Proc::StringExtension#NFKC_CF
43
+ def NFKC_CF
44
+ ::UTF8Proc.NFKC_CF(self)
45
+ end
46
+ alias nfkc_cf NFKC_CF
47
+
48
+ # @see UTF8Proc::StringExtension#normalize
49
+ def normalize(form = :nfc)
50
+ ::UTF8Proc.normalize(self, form)
51
+ end
52
+ end
32
53
  end
33
54
  end
55
+
56
+ String.send(:include, ::UTF8Proc::JRuby::StringExtension)
@@ -6,37 +6,49 @@
6
6
  require "java"
7
7
 
8
8
  module UTF8Proc
9
+ # JRuby normalization module.
10
+ #
11
+ # This module will load automatically depending on your Ruby version.
9
12
  module JRuby
13
+ # Displays your version of the Java VM
10
14
  LIBRARY_VERSION = "Java #{ENV_JAVA['java.version']}".freeze
11
15
 
12
16
  JTNORM = java.text.Normalizer
13
17
  private_constant :JTNORM
14
18
 
19
+ # @!visibility private
15
20
  def self.included(receiver)
16
21
  receiver.extend(ClassMethods)
17
22
  end
18
23
 
24
+ # Methods added to the {::UTF8Proc} module in JRuby (instead of the C ones)
19
25
  module ClassMethods
26
+ # @see UTF8Proc.NFC
20
27
  def NFC(string)
21
28
  JTNORM.normalize(string, JTNORM::Form::NFC)
22
29
  end
23
30
 
31
+ # @see UTF8Proc.NFD
24
32
  def NFD(string)
25
33
  JTNORM.normalize(string, JTNORM::Form::NFD)
26
34
  end
27
35
 
36
+ # @see UTF8Proc.NFKC
28
37
  def NFKC(string)
29
38
  JTNORM.normalize(string, JTNORM::Form::NFKC)
30
39
  end
31
40
 
41
+ # @see UTF8Proc.NFKD
32
42
  def NFKD(string)
33
43
  JTNORM.normalize(string, JTNORM::Form::NFKD)
34
44
  end
35
45
 
46
+ # @see UTF8Proc.NFKC_CF
36
47
  def NFKC_CF(string)
37
48
  NFKC(string).to_java(:string).toLowerCase
38
49
  end
39
50
 
51
+ # @see UTF8Proc.normalize
40
52
  def normalize(string, form = :nfc)
41
53
  case form
42
54
  when :nfc
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
  module UTF8Proc
3
- VERSION = "0.5.2".freeze
3
+ # The gem version
4
+ VERSION = "0.6.0".freeze
4
5
  end
@@ -32,8 +32,9 @@ Gem::Specification.new do |spec|
32
32
  spec.add_development_dependency "pry", "~> 0.10"
33
33
  spec.add_development_dependency "minitest", "~> 5.10"
34
34
  spec.add_development_dependency "rubocop", "~> 0.47"
35
- spec.add_development_dependency "benchmark-ips"
36
- spec.add_development_dependency "unf"
35
+ spec.add_development_dependency "yard", "~> 0.9"
36
+ spec.add_development_dependency "benchmark-ips", "~> 2.7"
37
+ spec.add_development_dependency "unf", "~> 0.1"
37
38
 
38
39
  unless RUBY_ENGINE == "jruby"
39
40
  spec.extensions = ["ext/utf8_proc/extconf.rb"]
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: utf8_proc
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.2
4
+ version: 0.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tim Bellefleur
@@ -80,34 +80,48 @@ dependencies:
80
80
  - - "~>"
81
81
  - !ruby/object:Gem::Version
82
82
  version: '0.47'
83
+ - !ruby/object:Gem::Dependency
84
+ name: yard
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '0.9'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '0.9'
83
97
  - !ruby/object:Gem::Dependency
84
98
  name: benchmark-ips
85
99
  requirement: !ruby/object:Gem::Requirement
86
100
  requirements:
87
- - - ">="
101
+ - - "~>"
88
102
  - !ruby/object:Gem::Version
89
- version: '0'
103
+ version: '2.7'
90
104
  type: :development
91
105
  prerelease: false
92
106
  version_requirements: !ruby/object:Gem::Requirement
93
107
  requirements:
94
- - - ">="
108
+ - - "~>"
95
109
  - !ruby/object:Gem::Version
96
- version: '0'
110
+ version: '2.7'
97
111
  - !ruby/object:Gem::Dependency
98
112
  name: unf
99
113
  requirement: !ruby/object:Gem::Requirement
100
114
  requirements:
101
- - - ">="
115
+ - - "~>"
102
116
  - !ruby/object:Gem::Version
103
- version: '0'
117
+ version: '0.1'
104
118
  type: :development
105
119
  prerelease: false
106
120
  version_requirements: !ruby/object:Gem::Requirement
107
121
  requirements:
108
- - - ">="
122
+ - - "~>"
109
123
  - !ruby/object:Gem::Version
110
- version: '0'
124
+ version: '0.1'
111
125
  - !ruby/object:Gem::Dependency
112
126
  name: rake-compiler
113
127
  requirement: !ruby/object:Gem::Requirement