charwidth 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +2 -2
- data/charwidth.gemspec +1 -0
- data/lib/charwidth.rb +76 -25
- data/lib/charwidth/string.rb +8 -0
- data/lib/charwidth/version.rb +1 -1
- data/spec/charwidth_spec.rb +76 -32
- data/spec/spec_helper.rb +1 -0
- metadata +17 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4d80ef9c4f6abc327fc91b3551c412da1be6f0f2
|
4
|
+
data.tar.gz: e8f1793a2de0a8edb0f71a4c50f1f3732801ebb7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5408b2545d686f3fc9fab6bb327ceae284bcdc9d05389dde12ac9a593c1b7f1a7d5de77666b6db2e5cc3626eab8e7b85f9c2d362ff9ab5c919dc8ad41858f74d
|
7
|
+
data.tar.gz: 31053172fbda051a8516b7f5e6e41fbfb98852340c9bd0d9f97f38213dd059a33e643133bf0a2e32977ddf7704537f329a72f1fb4a96e2d9c3888ab766405df3
|
data/README.md
CHANGED
@@ -6,7 +6,7 @@ Normalize Unicode fullwidth / halfwidth (zenkaku / hankaku) characters.
|
|
6
6
|
|
7
7
|
Add this line to your application's Gemfile:
|
8
8
|
|
9
|
-
gem '
|
9
|
+
gem 'charwidth'
|
10
10
|
|
11
11
|
|
12
12
|
And then execute:
|
@@ -15,7 +15,7 @@ And then execute:
|
|
15
15
|
|
16
16
|
Or install it yourself as:
|
17
17
|
|
18
|
-
$ gem install
|
18
|
+
$ gem install charwidth
|
19
19
|
|
20
20
|
# Usage
|
21
21
|
|
data/charwidth.gemspec
CHANGED
@@ -21,6 +21,7 @@ Gem::Specification.new do |spec|
|
|
21
21
|
spec.add_development_dependency "bundler", "~> 1.3"
|
22
22
|
spec.add_development_dependency "rake"
|
23
23
|
spec.add_development_dependency "rspec"
|
24
|
+
spec.add_development_dependency "rspec-its"
|
24
25
|
spec.add_development_dependency "simplecov"
|
25
26
|
spec.add_development_dependency "activerecord", ">= 3"
|
26
27
|
spec.add_development_dependency "sqlite3"
|
data/lib/charwidth.rb
CHANGED
@@ -6,6 +6,41 @@ module Charwidth
|
|
6
6
|
autoload :CLI, "charwidth/cli"
|
7
7
|
|
8
8
|
module ClassMethods
|
9
|
+
HALFWIDTH_TO_FULLWIDTH = {
|
10
|
+
ascii: [
|
11
|
+
Characters::ASCII_PUNCTUATION_AND_SYMBOLS,
|
12
|
+
Characters::FULLWIDTH_ASCII_VARIANTS,
|
13
|
+
],
|
14
|
+
white_parenthesis: [
|
15
|
+
Characters::WHITE_PARENTHESIS,
|
16
|
+
Characters::FULLWIDTH_BRACKETS,
|
17
|
+
],
|
18
|
+
cjk_punctuation: [
|
19
|
+
Characters::HALFWIDTH_CJK_PUCTUATION,
|
20
|
+
Characters::CJK_SYMBOLS_AND_PUNCTUATION,
|
21
|
+
],
|
22
|
+
katakana: [
|
23
|
+
Characters::HALFWIDTH_KATAKANA_VARIANTS,
|
24
|
+
Characters::KATAKANA,
|
25
|
+
],
|
26
|
+
hangul: [
|
27
|
+
Characters::HALFWIDTH_HANGUL_VARIANTS,
|
28
|
+
Characters::HANGUL,
|
29
|
+
],
|
30
|
+
latin_1_punctuation_and_symbols: [
|
31
|
+
Characters::LATIN_1_PUNCTUATION_AND_SYMBOLS,
|
32
|
+
Characters::FULLWIDTH_SYMBOL_VARIANTS,
|
33
|
+
],
|
34
|
+
mathematical_symbols: [
|
35
|
+
Characters::HALFWIDTH_SYMBOL_VARIANTS,
|
36
|
+
Characters::MATHEMATICAL_SYMBOLS,
|
37
|
+
],
|
38
|
+
space: [
|
39
|
+
Characters::SPACE,
|
40
|
+
Characters::IDEOGRAPHIC_SPACE,
|
41
|
+
],
|
42
|
+
}
|
43
|
+
|
9
44
|
# Normalize Unicode fullwidth / halfwidth (zenkaku / hankaku) characters
|
10
45
|
# options: {
|
11
46
|
# only: [:ascii, :white_parenthesis, :cjk_punctuation, :katakana, :space],
|
@@ -21,6 +56,24 @@ module Charwidth
|
|
21
56
|
normalize_charwidth!(string, options)
|
22
57
|
end
|
23
58
|
|
59
|
+
def to_full_width(string)
|
60
|
+
to_full_width!(string.dup)
|
61
|
+
end
|
62
|
+
|
63
|
+
def to_full_width!(src)
|
64
|
+
unify_voiced_katakana!(src)
|
65
|
+
|
66
|
+
before, after = "", ""
|
67
|
+
HALFWIDTH_TO_FULLWIDTH.each_value do |half, full|
|
68
|
+
before << half
|
69
|
+
after << full
|
70
|
+
end
|
71
|
+
|
72
|
+
escape_for_tr!(before)
|
73
|
+
escape_for_tr!(after)
|
74
|
+
src.tr!(before, after) || src
|
75
|
+
end
|
76
|
+
|
24
77
|
private
|
25
78
|
TYPES = [
|
26
79
|
:ascii, :white_parenthesis, :cjk_punctuation, :katakana, :hangul,
|
@@ -51,34 +104,23 @@ module Charwidth
|
|
51
104
|
before, after = "", ""
|
52
105
|
types.each do |type|
|
53
106
|
case type
|
54
|
-
when :ascii
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
when :cjk_punctuation
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
when :hangul
|
67
|
-
before << Characters::HALFWIDTH_HANGUL_VARIANTS
|
68
|
-
after << Characters::HANGUL
|
69
|
-
when :latin_1_punctuation_and_symbols
|
70
|
-
before << Characters::FULLWIDTH_SYMBOL_VARIANTS
|
71
|
-
after << Characters::LATIN_1_PUNCTUATION_AND_SYMBOLS
|
72
|
-
when :mathematical_symbols
|
73
|
-
before << Characters::HALFWIDTH_SYMBOL_VARIANTS
|
74
|
-
after << Characters::MATHEMATICAL_SYMBOLS
|
75
|
-
when :space
|
76
|
-
before << Characters::IDEOGRAPHIC_SPACE
|
77
|
-
after << Characters::SPACE
|
107
|
+
when :ascii, :white_parenthesis, :latin_1_punctuation_and_symbols, :space
|
108
|
+
# convert fullwidth to halfwidth
|
109
|
+
HALFWIDTH_TO_FULLWIDTH[type].tap{|half, full|
|
110
|
+
before << full
|
111
|
+
after << half
|
112
|
+
}
|
113
|
+
when :cjk_punctuation, :katakana, :hangul, :mathematical_symbols
|
114
|
+
# convert halfwidth to fullwidth
|
115
|
+
HALFWIDTH_TO_FULLWIDTH[type].tap{|half, full|
|
116
|
+
before << half
|
117
|
+
after << full
|
118
|
+
}
|
78
119
|
end
|
79
120
|
end
|
80
121
|
|
81
|
-
|
122
|
+
escape_for_tr!(before)
|
123
|
+
escape_for_tr!(after)
|
82
124
|
src.tr!(before, after) || src
|
83
125
|
end
|
84
126
|
|
@@ -94,6 +136,15 @@ module Charwidth
|
|
94
136
|
str.gsub!(h, f) || str
|
95
137
|
end
|
96
138
|
end
|
139
|
+
|
140
|
+
def escape_for_tr!(s)
|
141
|
+
s.gsub!('\\', '\\\\')
|
142
|
+
s.gsub!('-', '\\-')
|
143
|
+
s.gsub!('^', '\\^')
|
144
|
+
s.gsub!('[', '\\[')
|
145
|
+
s.gsub!(']', '\\]')
|
146
|
+
s
|
147
|
+
end
|
97
148
|
end
|
98
149
|
|
99
150
|
extend ClassMethods
|
data/lib/charwidth/string.rb
CHANGED
data/lib/charwidth/version.rb
CHANGED
data/spec/charwidth_spec.rb
CHANGED
@@ -2,43 +2,87 @@
|
|
2
2
|
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
3
3
|
|
4
4
|
describe "Charwidth" do
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
5
|
+
describe ".normalize" do
|
6
|
+
it "should convert full-width alphabet to half-width" do
|
7
|
+
expect(Charwidth.normalize("ABCabc")).to eq("ABCabc")
|
8
|
+
end
|
9
|
+
it "should convert full-width number to half-width" do
|
10
|
+
expect(Charwidth.normalize("123")).to eq("123")
|
11
|
+
end
|
12
|
+
it "should convert full-width ASCII symbol before numbers to half-width" do
|
13
|
+
expect(Charwidth.normalize("!"#")).to eq("!\"#")
|
14
|
+
end
|
15
|
+
it "should convert full-width ASCII symbol between numbers and upper-case to half-width" do
|
16
|
+
expect(Charwidth.normalize(":;")).to eq(":;")
|
17
|
+
end
|
18
|
+
it "should convert full-width ASCII symbol between upper-case and lower-case to half-width" do
|
19
|
+
expect(Charwidth.normalize("[\]")).to eq("[\\]")
|
20
|
+
end
|
21
|
+
it "should convert full-width ASCII symbol after lower-case to half-width" do
|
22
|
+
expect(Charwidth.normalize("{|}")).to eq("{|}")
|
23
|
+
end
|
23
24
|
|
24
|
-
|
25
|
-
|
26
|
-
|
25
|
+
it "should convert half-width CJK punctuation to full-width" do
|
26
|
+
expect(Charwidth.normalize("、。「」")).to eq("、。「」")
|
27
|
+
end
|
27
28
|
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
29
|
+
it "should convert half-width katakana to full-width" do
|
30
|
+
expect(Charwidth.normalize("アカサタナハマヤラワヲンァャッー・")).to eq("アカサタナハマヤラワヲンァャッー・")
|
31
|
+
expect(Charwidth.normalize("タチツテトナニヌネノ")).to eq("タチツテトナニヌネノ")
|
32
|
+
end
|
32
33
|
|
33
|
-
|
34
|
-
|
35
|
-
|
34
|
+
it "should unify half-width (semi) voiced katakana with dakuon to full-width" do
|
35
|
+
expect(Charwidth.normalize("ガザダバパ")).to eq("ガザダバパ")
|
36
|
+
end
|
37
|
+
|
38
|
+
it "should convert half-width hangul to full-width" do
|
39
|
+
expect(Charwidth.normalize("アカサタナハマヤラワヲンァャッー・")).to eq("アカサタナハマヤラワヲンァャッー・")
|
40
|
+
end
|
36
41
|
|
37
|
-
|
38
|
-
|
42
|
+
it "should convert IDIOGRAPHIC-SPACE to SPACE" do
|
43
|
+
expect(Charwidth.normalize("\u3000")).to eq(" ")
|
44
|
+
end
|
39
45
|
end
|
40
46
|
|
41
|
-
|
42
|
-
|
47
|
+
describe ".to_full_width" do
|
48
|
+
it "should convert half-width alphabet to full-width" do
|
49
|
+
expect(Charwidth.to_full_width("ABCabc")).to eq("ABCabc")
|
50
|
+
end
|
51
|
+
it "should convert half-width number to full-width" do
|
52
|
+
expect(Charwidth.to_full_width("123")).to eq("123")
|
53
|
+
end
|
54
|
+
it "should convert half-width ASCII symbol before numbers to full-width" do
|
55
|
+
expect(Charwidth.to_full_width("!\"#")).to eq("!"#")
|
56
|
+
end
|
57
|
+
it "should convert half-width ASCII symbol between numbers and upper-case to full-width" do
|
58
|
+
expect(Charwidth.to_full_width(":;")).to eq(":;")
|
59
|
+
end
|
60
|
+
it "should convert half-width ASCII symbol between upper-case and lower-case to full-width" do
|
61
|
+
expect(Charwidth.to_full_width("[\\]")).to eq("[\]")
|
62
|
+
end
|
63
|
+
it "should convert half-width ASCII symbol after lower-case to full-width" do
|
64
|
+
expect(Charwidth.to_full_width("{|}")).to eq("{|}")
|
65
|
+
end
|
66
|
+
|
67
|
+
it "should convert half-width CJK punctuation to full-width" do
|
68
|
+
expect(Charwidth.to_full_width("、。「」")).to eq("、。「」")
|
69
|
+
end
|
70
|
+
|
71
|
+
it "should convert half-width katakana to full-width" do
|
72
|
+
expect(Charwidth.to_full_width("アカサタナハマヤラワヲンァャッー・")).to eq("アカサタナハマヤラワヲンァャッー・")
|
73
|
+
expect(Charwidth.to_full_width("タチツテトナニヌネノ")).to eq("タチツテトナニヌネノ")
|
74
|
+
end
|
75
|
+
|
76
|
+
it "should unify half-width (semi) voiced katakana with dakuon to full-width" do
|
77
|
+
expect(Charwidth.to_full_width("ガザダバパ")).to eq("ガザダバパ")
|
78
|
+
end
|
79
|
+
|
80
|
+
it "should convert half-width hangul to full-width" do
|
81
|
+
expect(Charwidth.to_full_width("アカサタナハマヤラワヲンァャッー・")).to eq("アカサタナハマヤラワヲンァャッー・")
|
82
|
+
end
|
83
|
+
|
84
|
+
it "should convert SPACE to IDIOGRAPHIC-SPACE" do
|
85
|
+
expect(Charwidth.to_full_width(" ")).to eq("\u3000")
|
86
|
+
end
|
43
87
|
end
|
44
88
|
end
|
data/spec/spec_helper.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: charwidth
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- labocho
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2017-05-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -52,6 +52,20 @@ dependencies:
|
|
52
52
|
- - ">="
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: rspec-its
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
55
69
|
- !ruby/object:Gem::Dependency
|
56
70
|
name: simplecov
|
57
71
|
requirement: !ruby/object:Gem::Requirement
|
@@ -143,7 +157,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
143
157
|
version: '0'
|
144
158
|
requirements: []
|
145
159
|
rubyforge_project:
|
146
|
-
rubygems_version: 2.
|
160
|
+
rubygems_version: 2.6.10
|
147
161
|
signing_key:
|
148
162
|
specification_version: 4
|
149
163
|
summary: Normalize Japanese / Korean fullwidth (zenkaku) and halfwidth (hankaku) characters
|