active_normalizer 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 9b0c6ce529b81184c34259463d9cd709a79151be7aac3e21badfb18cbdc12e27
4
+ data.tar.gz: 39f0b93dbdc3d63ff75f348d563cab7045830076a62a60760ce5005f7cda73c7
5
+ SHA512:
6
+ metadata.gz: 46473393acf00fa67a10b9f753ef6fe6950357b25571241e98bf6f440916c416f32c24a4a1d096339bd3485b102e5684dc3c4cd70fdf67e2776f9408c229722c
7
+ data.tar.gz: 2c95c902942790809940c079692f6b1bd3d7f0d7e0275a71d16b48ecaaa889183fd68265fa447f79da0244b0c19aaf6dab0549f29666633c21d3cec62e6c44cc
@@ -0,0 +1,11 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /_yardoc/
4
+ /coverage/
5
+ /doc/
6
+ /pkg/
7
+ /spec/reports/
8
+ /tmp/
9
+
10
+ # rspec failure tracking
11
+ .rspec_status
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --format documentation
2
+ --color
3
+ --require spec_helper
@@ -0,0 +1,4 @@
1
+ sudo: false
2
+ language: ruby
3
+ rvm:
4
+ - 2.5.1
@@ -0,0 +1,7 @@
1
+ # Changelog
2
+
3
+ ## 1.0.0
4
+
5
+ 2018.06.15
6
+
7
+ Released v1.0.0.
data/Gemfile ADDED
@@ -0,0 +1,22 @@
1
+ source "https://rubygems.org"
2
+
3
+ git_source(:github) {|repo_name| "https://github.com/#{repo_name}" }
4
+
5
+ # Specify your gem's dependencies in active_normalizer.gemspec
6
+ gemspec
7
+
8
+
9
+ group :development do
10
+ gem "bundler", "~> 1.0"
11
+ gem "rake", "~> 10.0"
12
+ gem "rspec", "~> 3.7"
13
+ gem "pry"
14
+ gem "benchmark-ips"
15
+ end
16
+
17
+ group :test do
18
+ gem "unf"
19
+ gem "unicode"
20
+ gem "unicode_utils"
21
+ gem "activesupport"
22
+ end
@@ -0,0 +1,64 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ active_normalizer (0.0.0)
5
+
6
+ GEM
7
+ remote: https://rubygems.org/
8
+ specs:
9
+ activesupport (5.2.0)
10
+ concurrent-ruby (~> 1.0, >= 1.0.2)
11
+ i18n (>= 0.7, < 2)
12
+ minitest (~> 5.1)
13
+ tzinfo (~> 1.1)
14
+ benchmark-ips (2.7.2)
15
+ coderay (1.1.2)
16
+ concurrent-ruby (1.0.5)
17
+ diff-lcs (1.3)
18
+ i18n (1.0.1)
19
+ concurrent-ruby (~> 1.0)
20
+ method_source (0.9.0)
21
+ minitest (5.11.3)
22
+ pry (0.11.3)
23
+ coderay (~> 1.1.0)
24
+ method_source (~> 0.9.0)
25
+ rake (10.5.0)
26
+ rspec (3.7.0)
27
+ rspec-core (~> 3.7.0)
28
+ rspec-expectations (~> 3.7.0)
29
+ rspec-mocks (~> 3.7.0)
30
+ rspec-core (3.7.1)
31
+ rspec-support (~> 3.7.0)
32
+ rspec-expectations (3.7.0)
33
+ diff-lcs (>= 1.2.0, < 2.0)
34
+ rspec-support (~> 3.7.0)
35
+ rspec-mocks (3.7.0)
36
+ diff-lcs (>= 1.2.0, < 2.0)
37
+ rspec-support (~> 3.7.0)
38
+ rspec-support (3.7.1)
39
+ thread_safe (0.3.6)
40
+ tzinfo (1.2.5)
41
+ thread_safe (~> 0.1)
42
+ unf (0.1.4)
43
+ unf_ext
44
+ unf_ext (0.0.7.5)
45
+ unicode (0.4.4.4)
46
+ unicode_utils (1.4.0)
47
+
48
+ PLATFORMS
49
+ ruby
50
+
51
+ DEPENDENCIES
52
+ active_normalizer!
53
+ activesupport
54
+ benchmark-ips
55
+ bundler (~> 1.0)
56
+ pry
57
+ rake (~> 10.0)
58
+ rspec (~> 3.7)
59
+ unf
60
+ unicode
61
+ unicode_utils
62
+
63
+ BUNDLED WITH
64
+ 1.16.2
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2018 Juanito Fatas
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
@@ -0,0 +1,184 @@
1
+ # Active Normalizer
2
+
3
+ Normalize weird Japanese characters, see [tests](/spec) for examples.
4
+
5
+ Normalize fullwidth, halfwidth hiragana, katakana, symbols.
6
+
7
+ ## Usage
8
+
9
+ Each normalizer class accepts option of `:nfc`, `:nfd`, `:nfkd`, `:nfkc` (See [Normalization Forms][unicode-nf] for more information).
10
+ Each normalizer instance responds to `run`.
11
+
12
+ ```ruby
13
+ require "active_normalizer/normalizers/ruby"
14
+ nfkc_normalizer = ActiveNormalizer.new(
15
+ ActiveNormalizer::Normalizers::Ruby,
16
+ options: :nfkc
17
+ )
18
+ nfkc_normalizer.run(input)
19
+ ```
20
+
21
+ ## Benchmark
22
+
23
+ ```
24
+ Benchmarking simple string: 800ー12345
25
+ Warming up --------------------------------------
26
+ UNF 92.981k i/100ms
27
+ Unicode 36.002k i/100ms
28
+ Ruby 17.044k i/100ms
29
+ UnicodeUtils 12.681k i/100ms
30
+ ActiveSupport 7.482k i/100ms
31
+ Calculating -------------------------------------
32
+ UNF 1.173M (±17.6%) i/s - 5.672M in 5.041037s
33
+ Unicode 404.502k (± 6.8%) i/s - 2.016M in 5.008748s
34
+ Ruby 191.562k (±30.3%) i/s - 835.156k in 5.106057s
35
+ UnicodeUtils 132.477k (± 5.3%) i/s - 672.093k in 5.088759s
36
+ ActiveSupport 75.011k (±34.9%) i/s - 329.208k in 5.058559s
37
+
38
+ Comparison:
39
+ UNF: 1172663.8 i/s
40
+ Unicode: 404502.1 i/s - 2.90x slower
41
+ Ruby: 191562.4 i/s - 6.12x slower
42
+ UnicodeUtils: 132477.3 i/s - 8.85x slower
43
+ ActiveSupport: 75010.6 i/s - 15.63x slower
44
+
45
+ Warming up --------------------------------------
46
+ UNF 67.181k i/100ms
47
+ Unicode 31.572k i/100ms
48
+ Ruby 14.947k i/100ms
49
+ UnicodeUtils 12.443k i/100ms
50
+ ActiveSupport 5.561k i/100ms
51
+ Calculating -------------------------------------
52
+ UNF 997.098k (±25.2%) i/s - 27.477M in 30.052018s
53
+ Unicode 328.071k (±19.5%) i/s - 9.503M in 30.090451s
54
+ Ruby 177.045k (±32.8%) i/s - 4.529M in 30.071040s
55
+ UnicodeUtils 134.513k (± 6.7%) i/s - 4.019M in 30.059621s
56
+ ActiveSupport 68.063k (±44.7%) i/s - 1.668M in 30.131968s
57
+
58
+ Comparison:
59
+ UNF: 997097.6 i/s
60
+ Unicode: 328070.8 i/s - 3.04x slower
61
+ Ruby: 177044.6 i/s - 5.63x slower
62
+ UnicodeUtils: 134512.7 i/s - 7.41x slower
63
+ ActiveSupport: 68063.1 i/s - 14.65x slower
64
+
65
+
66
+ Benchmarking longer string: ㍻㍼㍽㍾㌀㌁㌂㌃㌄㌅㌆㌇㌈㌉㌊㌋㌌㌍㌎㌏㌐㌑㌒㌓㌔㌕㌖㌗㌘㌙㌚㌛㌜㌝㌞㌟㌠㌡㌢㌣㌤㌥㌦㌧㌨㌩㌪㌫㌬㌭㌮㌯㌰㌱㌲㌳㌴㌵㌶㌷㌸㌹㌺㌻㌼㌽㌾㌿㍀㍁㍂㍃㍄㍅㍆㍇㍈㍉㍊㍋㍌㍍㍎㍏㍐㍑㍒㍓㍔㍕㍖㍗
67
+ Warming up --------------------------------------
68
+ UNF 6.023k i/100ms
69
+ Unicode 1.238k i/100ms
70
+ Ruby 1.068k i/100ms
71
+ UnicodeUtils 319.000 i/100ms
72
+ ActiveSupport 258.000 i/100ms
73
+ Calculating -------------------------------------
74
+ UNF 59.891k (± 6.8%) i/s - 301.150k in 5.055411s
75
+ Unicode 11.740k (± 9.0%) i/s - 59.424k in 5.103353s
76
+ Ruby 10.655k (±10.9%) i/s - 53.400k in 5.091860s
77
+ UnicodeUtils 3.087k (± 8.9%) i/s - 15.312k in 5.004688s
78
+ ActiveSupport 2.533k (±11.1%) i/s - 12.642k in 5.064477s
79
+
80
+ Comparison:
81
+ UNF: 59890.8 i/s
82
+ Unicode: 11740.2 i/s - 5.10x slower
83
+ Ruby: 10655.0 i/s - 5.62x slower
84
+ UnicodeUtils: 3087.4 i/s - 19.40x slower
85
+ ActiveSupport: 2532.6 i/s - 23.65x slower
86
+
87
+ Warming up --------------------------------------
88
+ UNF 5.739k i/100ms
89
+ Unicode 1.122k i/100ms
90
+ Ruby 1.113k i/100ms
91
+ UnicodeUtils 312.000 i/100ms
92
+ ActiveSupport 254.000 i/100ms
93
+ Calculating -------------------------------------
94
+ UNF 59.371k (± 4.4%) i/s - 1.779M in 30.026571s
95
+ Unicode 10.780k (±17.3%) i/s - 310.794k in 30.106556s
96
+ Ruby 11.144k (± 6.7%) i/s - 332.787k in 30.034689s
97
+ UnicodeUtils 3.164k (± 4.9%) i/s - 94.848k in 30.056928s
98
+ ActiveSupport 2.635k (± 8.8%) i/s - 78.486k in 30.075836s
99
+
100
+ Comparison:
101
+ UNF: 59371.2 i/s
102
+ Ruby: 11143.9 i/s - 5.33x slower
103
+ Unicode: 10779.6 i/s - 5.51x slower
104
+ UnicodeUtils: 3163.5 i/s - 18.77x slower
105
+ ActiveSupport: 2635.3 i/s - 22.53x slower
106
+ ```
107
+
108
+ Benchmark code can be found at [bin/benchmark](bin/benchmark).
109
+
110
+ ## Installation
111
+
112
+ Add this line to your application's Gemfile:
113
+
114
+ ```ruby
115
+ gem "active_normalizer"
116
+ ```
117
+
118
+ And then execute:
119
+
120
+ $ bundle
121
+
122
+ Or install it yourself as:
123
+
124
+ $ gem install active_normalizer
125
+
126
+ ## Dependnecies
127
+
128
+ Active Normalizer provides a handful of normalizers. Their dependencies are not bundled except for one that utilizes standard library. You must bundle the normalizer's gem dependency.
129
+
130
+ #### ActiveNormalizer::Normalizers::Ruby
131
+
132
+ ```ruby
133
+ # no dependency required, standard library
134
+
135
+ require "active_normalizer/normalizers/ruby"
136
+ ```
137
+
138
+ #### ActiveNormalizer::Normalizers::UNF - unf
139
+
140
+ ```ruby
141
+ gem "unf"
142
+
143
+ require "active_normalizer/normalizers/unf"
144
+ ```
145
+
146
+ #### ActiveNormalizer::Normalizers::Unicode - unicode
147
+
148
+ ```ruby
149
+ gem "unicode"
150
+
151
+ require "active_normalizer/normalizers/unicode"
152
+ ```
153
+
154
+ #### ActiveNormalizer::Normalizers::UnicodeUtils - unicode_utils
155
+
156
+ ```ruby
157
+ gem "unicode_utils"
158
+
159
+ require "active_normalizer/normalizers/unicode_utils"
160
+ ```
161
+
162
+ #### ActiveNormalizer::Normalizers::ActiveSupportMultibyte - active_support
163
+
164
+ ```ruby
165
+ gem "active_support"
166
+
167
+ require "active_normalizer/normalizers/active_support"
168
+ ```
169
+
170
+ ## Development
171
+
172
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/hack` for an interactive prompt that will allow you to experiment.
173
+
174
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
175
+
176
+ ## Contributing
177
+
178
+ Bug reports and pull requests are welcome on GitHub at https://github.com/JuanitoFatas/active_normalizer.
179
+
180
+ ## License
181
+
182
+ The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
183
+
184
+ [unicode-nf]: http://unicode.org/reports/tr15/#Norm_Forms
@@ -0,0 +1,6 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task default: :spec
@@ -0,0 +1,61 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "active_normalizer"
5
+
6
+ require "active_normalizer/normalizers/unf"
7
+ require "active_normalizer/normalizers/unicode"
8
+ require "active_normalizer/normalizers/ruby"
9
+ require "active_normalizer/normalizers/unicode_utils"
10
+ require "active_normalizer/normalizers/active_support"
11
+
12
+ require "benchmark/ips"
13
+
14
+ class GCSuite
15
+ def warming(*); run_gc; end
16
+ def running(*); run_gc; end
17
+ def warmup_stats(*); end
18
+ def add_report(*); end
19
+ private
20
+ def run_gc
21
+ GC.enable
22
+ GC.start
23
+ GC.disable
24
+ end
25
+ end
26
+
27
+ def benchmark(input, suite)
28
+ Benchmark.ips do |x|
29
+ x.config(time: 5, warmup: 2)
30
+
31
+ x.config(suite: suite)
32
+ x.report("UNF") { ActiveNormalizer::Normalizers::UNF.new(:nfkc).run(input) }
33
+ x.report("Unicode") { ActiveNormalizer::Normalizers::Unicode.new(:nfkc).run(input) }
34
+ x.report("Ruby") { ActiveNormalizer::Normalizers::Ruby.new(:nfkc).run(input) }
35
+ x.report("UnicodeUtils") { ActiveNormalizer::Normalizers::UnicodeUtils.new(:nfkc).run(input) }
36
+ x.report("ActiveSupport") { ActiveNormalizer::Normalizers::ActiveSupport.new(:nfkc).run(input) }
37
+
38
+ x.compare!
39
+ end
40
+
41
+ Benchmark.ips do |x|
42
+ x.config(time: 30, warmup: 10)
43
+
44
+ x.config(suite: suite)
45
+ x.report("UNF") { ActiveNormalizer::Normalizers::UNF.new(:nfkc).run(input) }
46
+ x.report("Unicode") { ActiveNormalizer::Normalizers::Unicode.new(:nfkc).run(input) }
47
+ x.report("Ruby") { ActiveNormalizer::Normalizers::Ruby.new(:nfkc).run(input) }
48
+ x.report("UnicodeUtils") { ActiveNormalizer::Normalizers::UnicodeUtils.new(:nfkc).run(input) }
49
+ x.report("ActiveSupport") { ActiveNormalizer::Normalizers::ActiveSupport.new(:nfkc).run(input) }
50
+
51
+ x.compare!
52
+ end
53
+ end
54
+
55
+ suite = GCSuite.new
56
+
57
+ puts "Benchmarking simple string: 800ー12345"
58
+ benchmark("800ー12345", suite)
59
+
60
+ puts "\nBenchmarking longer string: ㍻㍼㍽㍾㌀㌁㌂㌃㌄㌅㌆㌇㌈㌉㌊㌋㌌㌍㌎㌏㌐㌑㌒㌓㌔㌕㌖㌗㌘㌙㌚㌛㌜㌝㌞㌟㌠㌡㌢㌣㌤㌥㌦㌧㌨㌩㌪㌫㌬㌭㌮㌯㌰㌱㌲㌳㌴㌵㌶㌷㌸㌹㌺㌻㌼㌽㌾㌿㍀㍁㍂㍃㍄㍅㍆㍇㍈㍉㍊㍋㍌㍍㍎㍏㍐㍑㍒㍓㍔㍕㍖㍗"
61
+ benchmark("㍻㍼㍽㍾㌀㌁㌂㌃㌄㌅㌆㌇㌈㌉㌊㌋㌌㌍㌎㌏㌐㌑㌒㌓㌔㌕㌖㌗㌘㌙㌚㌛㌜㌝㌞㌟㌠㌡㌢㌣㌤㌥㌦㌧㌨㌩㌪㌫㌬㌭㌮㌯㌰㌱㌲㌳㌴㌵㌶㌷㌸㌹㌺㌻㌼㌽㌾㌿㍀㍁㍂㍃㍄㍅㍆㍇㍈㍉㍊㍋㍌㍍㍎㍏㍐㍑㍒㍓㍔㍕㍖㍗", suite)
@@ -0,0 +1,10 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "active_normalizer"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ require "pry"
10
+ Pry.start(__FILE__)
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$"\n\t"
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "active_normalizer/version"
4
+ require "active_normalizer/normalizers/normalizer"
5
+
6
+ class ActiveNormalizer
7
+ MissingDependencyError = Class.new(RuntimeError)
8
+
9
+ def self.require_dependency(name, klass)
10
+ require name
11
+ rescue LoadError => exception
12
+ raise MissingDependencyError,
13
+ %(Missing dependency '#{name}' for #{klass}. See README.md for details.\n#{exception.class.name}: #{exception})
14
+ end
15
+
16
+ def initialize(normalizer_klass, options: nil)
17
+ @normalizer = normalizer_klass.new(options)
18
+ end
19
+
20
+ def run(text)
21
+ normalizer.run(text)
22
+ end
23
+
24
+ private
25
+
26
+ attr_reader :normalizer
27
+ end
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ ActiveNormalizer.require_dependency(
4
+ "active_support/multibyte/unicode",
5
+ "ActiveNormalizer::Normalizers::ActiveSupport"
6
+ )
7
+
8
+ # http://api.rubyonrails.org/classes/ActiveSupport/Multibyte/Unicode.html#method-i-normalize
9
+ class ActiveNormalizer
10
+ module Normalizers
11
+ class ActiveSupport < Normalizer
12
+ def initialize(normalization_form = nil)
13
+ @normalization_form = normalization_form
14
+ end
15
+
16
+ def run(text)
17
+ case normalization_form
18
+ when :nfd
19
+ ::ActiveSupport::Multibyte::Unicode.normalize(text, :d)
20
+ when :nfc
21
+ ::ActiveSupport::Multibyte::Unicode.normalize(text, :c)
22
+ when :nfkd
23
+ ::ActiveSupport::Multibyte::Unicode.normalize(text, :kd)
24
+ when :nfkc
25
+ ::ActiveSupport::Multibyte::Unicode.normalize(text, :kc)
26
+ else
27
+ raise_unknown_form_error(normalization_form)
28
+ end
29
+ end
30
+
31
+ private
32
+
33
+ attr_reader :normalization_form
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,23 @@
1
+ # frozen_string_literal: true
2
+
3
+ class ActiveNormalizer
4
+ module Normalizers
5
+ class Normalizer
6
+ class UnknownNormalizationFormError < StandardError
7
+ def initialize(form)
8
+ super error_message_for(form)
9
+ end
10
+
11
+ private
12
+
13
+ def error_message_for(form)
14
+ "Unexpected normalization form: '#{form}'. Expected symbols :nfd, :nfc, :nfkd, :nfkc. See http://unicode.org/reports/tr15/#Norm_Forms for more information."
15
+ end
16
+ end
17
+
18
+ def raise_unknown_form_error(form)
19
+ raise UnknownNormalizationFormError, form
20
+ end
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ # @since Ruby 2.2.0-preview2
4
+ class ActiveNormalizer
5
+ module Normalizers
6
+ class Ruby < Normalizer
7
+ def initialize(normalization_form = nil)
8
+ @normalization_form = normalization_form
9
+ end
10
+
11
+ def run(text)
12
+ case normalization_form
13
+ when :nfd
14
+ text.unicode_normalize(:nfd)
15
+ when :nfc
16
+ text.unicode_normalize(:nfc)
17
+ when :nfkd
18
+ text.unicode_normalize(:nfkd)
19
+ when :nfkc
20
+ text.unicode_normalize(:nfkc)
21
+ else
22
+ raise_unknown_form_error(normalization_form)
23
+ end
24
+ end
25
+
26
+ private
27
+
28
+ attr_reader :normalization_form
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+
3
+ ActiveNormalizer.require_dependency(
4
+ "unf",
5
+ "ActiveNormalizer::Normalizers::UNF"
6
+ )
7
+
8
+ # https://github.com/blackwinter/unicode
9
+ class ActiveNormalizer
10
+ module Normalizers
11
+ class UNF < Normalizer
12
+ def initialize(normalization_form = nil)
13
+ @normalization_form = normalization_form
14
+ end
15
+
16
+ def run(text)
17
+ case normalization_form
18
+ when :nfd
19
+ unf_normalizer.normalize(text, :nfd)
20
+ when :nfc
21
+ unf_normalizer.normalize(text, :nfc)
22
+ when :nfkd
23
+ unf_normalizer.normalize(text, :nfkd)
24
+ when :nfkc
25
+ unf_normalizer.normalize(text, :nfkc)
26
+ else
27
+ raise_unknown_form_error(normalization_form)
28
+ end
29
+ end
30
+
31
+ private
32
+
33
+ attr_reader :normalization_form, :unf_normalizer
34
+
35
+ def unf_normalizer
36
+ @_unf_normalizer ||= ::UNF::Normalizer.instance
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ ActiveNormalizer.require_dependency(
4
+ "unicode",
5
+ "ActiveNormalizer::Normalizers::Unicode"
6
+ )
7
+
8
+ # https://github.com/blackwinter/unicode
9
+ class ActiveNormalizer
10
+ module Normalizers
11
+ class Unicode < Normalizer
12
+ def initialize(normalization_form = nil)
13
+ @normalization_form = normalization_form
14
+ end
15
+
16
+ def run(text)
17
+ case normalization_form
18
+ when :nfd
19
+ ::Unicode.nfd(text)
20
+ when :nfc
21
+ ::Unicode.nfc(text)
22
+ when :nfkd
23
+ ::Unicode.nfkd(text)
24
+ when :nfkc
25
+ ::Unicode.nfkc(text)
26
+ else
27
+ raise_unknown_form_error(normalization_form)
28
+ end
29
+ end
30
+
31
+ private
32
+
33
+ attr_reader :normalization_form
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ ActiveNormalizer.require_dependency(
4
+ "unicode_utils",
5
+ "ActiveNormalizer::Normalizers::UnicodeUtils"
6
+ )
7
+
8
+ # https://github.com/lang/unicode_utils
9
+ class ActiveNormalizer
10
+ module Normalizers
11
+ class UnicodeUtils < Normalizer
12
+ def initialize(normalization_form = nil)
13
+ @normalization_form = normalization_form
14
+ end
15
+
16
+ def run(text)
17
+ case normalization_form
18
+ when :nfd
19
+ ::UnicodeUtils.nfd(text)
20
+ when :nfc
21
+ ::UnicodeUtils.nfc(text)
22
+ when :nfkd
23
+ ::UnicodeUtils.nfkd(text)
24
+ when :nfkc
25
+ ::UnicodeUtils.nfkc(text)
26
+ else
27
+ raise_unknown_form_error(normalization_form)
28
+ end
29
+ end
30
+
31
+ private
32
+
33
+ attr_reader :normalization_form
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ class ActiveNormalizer
4
+ VERSION = "1.0.0"
5
+ end
@@ -0,0 +1,25 @@
1
+
2
+ lib = File.expand_path("../lib", __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require "active_normalizer/version"
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "active_normalizer"
8
+ spec.version = ActiveNormalizer::VERSION
9
+ spec.authors = ["Juanito Fatas"]
10
+ spec.email = ["me@juanitofatas.com"]
11
+
12
+ spec.summary = %q{Normalize japanese characters}
13
+ spec.description = spec.summary
14
+ spec.homepage = "https://github.com/juanitofatas/active_normalizer"
15
+ spec.license = "MIT"
16
+
17
+ # Specify which files should be added to the gem when it is released.
18
+ # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
19
+ spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
20
+ `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
21
+ end
22
+ spec.bindir = "exe"
23
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
24
+ spec.require_paths = ["lib"]
25
+ end
metadata ADDED
@@ -0,0 +1,65 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: active_normalizer
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Juanito Fatas
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2018-06-17 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: Normalize japanese characters
14
+ email:
15
+ - me@juanitofatas.com
16
+ executables: []
17
+ extensions: []
18
+ extra_rdoc_files: []
19
+ files:
20
+ - ".gitignore"
21
+ - ".rspec"
22
+ - ".travis.yml"
23
+ - CHANGELOG.md
24
+ - Gemfile
25
+ - Gemfile.lock
26
+ - LICENSE.txt
27
+ - README.md
28
+ - Rakefile
29
+ - bin/benchmark
30
+ - bin/hack
31
+ - bin/setup
32
+ - lib/active_normalizer.rb
33
+ - lib/active_normalizer/normalizers/active_support.rb
34
+ - lib/active_normalizer/normalizers/normalizer.rb
35
+ - lib/active_normalizer/normalizers/ruby.rb
36
+ - lib/active_normalizer/normalizers/unf.rb
37
+ - lib/active_normalizer/normalizers/unicode.rb
38
+ - lib/active_normalizer/normalizers/unicode_utils.rb
39
+ - lib/active_normalizer/version.rb
40
+ - nomogiri.gemspec
41
+ homepage: https://github.com/juanitofatas/active_normalizer
42
+ licenses:
43
+ - MIT
44
+ metadata: {}
45
+ post_install_message:
46
+ rdoc_options: []
47
+ require_paths:
48
+ - lib
49
+ required_ruby_version: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - ">="
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ required_rubygems_version: !ruby/object:Gem::Requirement
55
+ requirements:
56
+ - - ">="
57
+ - !ruby/object:Gem::Version
58
+ version: '0'
59
+ requirements: []
60
+ rubyforge_project:
61
+ rubygems_version: 2.7.3
62
+ signing_key:
63
+ specification_version: 4
64
+ summary: Normalize japanese characters
65
+ test_files: []