charwidth 0.1.3 → 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +2 -2
- data/charwidth.gemspec +1 -0
- data/lib/charwidth.rb +76 -25
- data/lib/charwidth/string.rb +8 -0
- data/lib/charwidth/version.rb +1 -1
- data/spec/charwidth_spec.rb +76 -32
- data/spec/spec_helper.rb +1 -0
- metadata +17 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4d80ef9c4f6abc327fc91b3551c412da1be6f0f2
|
4
|
+
data.tar.gz: e8f1793a2de0a8edb0f71a4c50f1f3732801ebb7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5408b2545d686f3fc9fab6bb327ceae284bcdc9d05389dde12ac9a593c1b7f1a7d5de77666b6db2e5cc3626eab8e7b85f9c2d362ff9ab5c919dc8ad41858f74d
|
7
|
+
data.tar.gz: 31053172fbda051a8516b7f5e6e41fbfb98852340c9bd0d9f97f38213dd059a33e643133bf0a2e32977ddf7704537f329a72f1fb4a96e2d9c3888ab766405df3
|
data/README.md
CHANGED
@@ -6,7 +6,7 @@ Normalize Unicode fullwidth / halfwidth (zenkaku / hankaku) characters.
|
|
6
6
|
|
7
7
|
Add this line to your application's Gemfile:
|
8
8
|
|
9
|
-
gem '
|
9
|
+
gem 'charwidth'
|
10
10
|
|
11
11
|
|
12
12
|
And then execute:
|
@@ -15,7 +15,7 @@ And then execute:
|
|
15
15
|
|
16
16
|
Or install it yourself as:
|
17
17
|
|
18
|
-
$ gem install
|
18
|
+
$ gem install charwidth
|
19
19
|
|
20
20
|
# Usage
|
21
21
|
|
data/charwidth.gemspec
CHANGED
@@ -21,6 +21,7 @@ Gem::Specification.new do |spec|
|
|
21
21
|
spec.add_development_dependency "bundler", "~> 1.3"
|
22
22
|
spec.add_development_dependency "rake"
|
23
23
|
spec.add_development_dependency "rspec"
|
24
|
+
spec.add_development_dependency "rspec-its"
|
24
25
|
spec.add_development_dependency "simplecov"
|
25
26
|
spec.add_development_dependency "activerecord", ">= 3"
|
26
27
|
spec.add_development_dependency "sqlite3"
|
data/lib/charwidth.rb
CHANGED
@@ -6,6 +6,41 @@ module Charwidth
|
|
6
6
|
autoload :CLI, "charwidth/cli"
|
7
7
|
|
8
8
|
module ClassMethods
|
9
|
+
HALFWIDTH_TO_FULLWIDTH = {
|
10
|
+
ascii: [
|
11
|
+
Characters::ASCII_PUNCTUATION_AND_SYMBOLS,
|
12
|
+
Characters::FULLWIDTH_ASCII_VARIANTS,
|
13
|
+
],
|
14
|
+
white_parenthesis: [
|
15
|
+
Characters::WHITE_PARENTHESIS,
|
16
|
+
Characters::FULLWIDTH_BRACKETS,
|
17
|
+
],
|
18
|
+
cjk_punctuation: [
|
19
|
+
Characters::HALFWIDTH_CJK_PUCTUATION,
|
20
|
+
Characters::CJK_SYMBOLS_AND_PUNCTUATION,
|
21
|
+
],
|
22
|
+
katakana: [
|
23
|
+
Characters::HALFWIDTH_KATAKANA_VARIANTS,
|
24
|
+
Characters::KATAKANA,
|
25
|
+
],
|
26
|
+
hangul: [
|
27
|
+
Characters::HALFWIDTH_HANGUL_VARIANTS,
|
28
|
+
Characters::HANGUL,
|
29
|
+
],
|
30
|
+
latin_1_punctuation_and_symbols: [
|
31
|
+
Characters::LATIN_1_PUNCTUATION_AND_SYMBOLS,
|
32
|
+
Characters::FULLWIDTH_SYMBOL_VARIANTS,
|
33
|
+
],
|
34
|
+
mathematical_symbols: [
|
35
|
+
Characters::HALFWIDTH_SYMBOL_VARIANTS,
|
36
|
+
Characters::MATHEMATICAL_SYMBOLS,
|
37
|
+
],
|
38
|
+
space: [
|
39
|
+
Characters::SPACE,
|
40
|
+
Characters::IDEOGRAPHIC_SPACE,
|
41
|
+
],
|
42
|
+
}
|
43
|
+
|
9
44
|
# Normalize Unicode fullwidth / halfwidth (zenkaku / hankaku) characters
|
10
45
|
# options: {
|
11
46
|
# only: [:ascii, :white_parenthesis, :cjk_punctuation, :katakana, :space],
|
@@ -21,6 +56,24 @@ module Charwidth
|
|
21
56
|
normalize_charwidth!(string, options)
|
22
57
|
end
|
23
58
|
|
59
|
+
def to_full_width(string)
|
60
|
+
to_full_width!(string.dup)
|
61
|
+
end
|
62
|
+
|
63
|
+
def to_full_width!(src)
|
64
|
+
unify_voiced_katakana!(src)
|
65
|
+
|
66
|
+
before, after = "", ""
|
67
|
+
HALFWIDTH_TO_FULLWIDTH.each_value do |half, full|
|
68
|
+
before << half
|
69
|
+
after << full
|
70
|
+
end
|
71
|
+
|
72
|
+
escape_for_tr!(before)
|
73
|
+
escape_for_tr!(after)
|
74
|
+
src.tr!(before, after) || src
|
75
|
+
end
|
76
|
+
|
24
77
|
private
|
25
78
|
TYPES = [
|
26
79
|
:ascii, :white_parenthesis, :cjk_punctuation, :katakana, :hangul,
|
@@ -51,34 +104,23 @@ module Charwidth
|
|
51
104
|
before, after = "", ""
|
52
105
|
types.each do |type|
|
53
106
|
case type
|
54
|
-
when :ascii
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
when :cjk_punctuation
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
when :hangul
|
67
|
-
before << Characters::HALFWIDTH_HANGUL_VARIANTS
|
68
|
-
after << Characters::HANGUL
|
69
|
-
when :latin_1_punctuation_and_symbols
|
70
|
-
before << Characters::FULLWIDTH_SYMBOL_VARIANTS
|
71
|
-
after << Characters::LATIN_1_PUNCTUATION_AND_SYMBOLS
|
72
|
-
when :mathematical_symbols
|
73
|
-
before << Characters::HALFWIDTH_SYMBOL_VARIANTS
|
74
|
-
after << Characters::MATHEMATICAL_SYMBOLS
|
75
|
-
when :space
|
76
|
-
before << Characters::IDEOGRAPHIC_SPACE
|
77
|
-
after << Characters::SPACE
|
107
|
+
when :ascii, :white_parenthesis, :latin_1_punctuation_and_symbols, :space
|
108
|
+
# convert fullwidth to halfwidth
|
109
|
+
HALFWIDTH_TO_FULLWIDTH[type].tap{|half, full|
|
110
|
+
before << full
|
111
|
+
after << half
|
112
|
+
}
|
113
|
+
when :cjk_punctuation, :katakana, :hangul, :mathematical_symbols
|
114
|
+
# convert halfwidth to fullwidth
|
115
|
+
HALFWIDTH_TO_FULLWIDTH[type].tap{|half, full|
|
116
|
+
before << half
|
117
|
+
after << full
|
118
|
+
}
|
78
119
|
end
|
79
120
|
end
|
80
121
|
|
81
|
-
|
122
|
+
escape_for_tr!(before)
|
123
|
+
escape_for_tr!(after)
|
82
124
|
src.tr!(before, after) || src
|
83
125
|
end
|
84
126
|
|
@@ -94,6 +136,15 @@ module Charwidth
|
|
94
136
|
str.gsub!(h, f) || str
|
95
137
|
end
|
96
138
|
end
|
139
|
+
|
140
|
+
def escape_for_tr!(s)
|
141
|
+
s.gsub!('\\', '\\\\')
|
142
|
+
s.gsub!('-', '\\-')
|
143
|
+
s.gsub!('^', '\\^')
|
144
|
+
s.gsub!('[', '\\[')
|
145
|
+
s.gsub!(']', '\\]')
|
146
|
+
s
|
147
|
+
end
|
97
148
|
end
|
98
149
|
|
99
150
|
extend ClassMethods
|
data/lib/charwidth/string.rb
CHANGED
data/lib/charwidth/version.rb
CHANGED
data/spec/charwidth_spec.rb
CHANGED
@@ -2,43 +2,87 @@
|
|
2
2
|
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
3
3
|
|
4
4
|
describe "Charwidth" do
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
5
|
+
describe ".normalize" do
|
6
|
+
it "should convert full-width alphabet to half-width" do
|
7
|
+
expect(Charwidth.normalize("ABCabc")).to eq("ABCabc")
|
8
|
+
end
|
9
|
+
it "should convert full-width number to half-width" do
|
10
|
+
expect(Charwidth.normalize("123")).to eq("123")
|
11
|
+
end
|
12
|
+
it "should convert full-width ASCII symbol before numbers to half-width" do
|
13
|
+
expect(Charwidth.normalize("!"#")).to eq("!\"#")
|
14
|
+
end
|
15
|
+
it "should convert full-width ASCII symbol between numbers and upper-case to half-width" do
|
16
|
+
expect(Charwidth.normalize(":;")).to eq(":;")
|
17
|
+
end
|
18
|
+
it "should convert full-width ASCII symbol between upper-case and lower-case to half-width" do
|
19
|
+
expect(Charwidth.normalize("[\]")).to eq("[\\]")
|
20
|
+
end
|
21
|
+
it "should convert full-width ASCII symbol after lower-case to half-width" do
|
22
|
+
expect(Charwidth.normalize("{|}")).to eq("{|}")
|
23
|
+
end
|
23
24
|
|
24
|
-
|
25
|
-
|
26
|
-
|
25
|
+
it "should convert half-width CJK punctuation to full-width" do
|
26
|
+
expect(Charwidth.normalize("、。「」")).to eq("、。「」")
|
27
|
+
end
|
27
28
|
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
29
|
+
it "should convert half-width katakana to full-width" do
|
30
|
+
expect(Charwidth.normalize("アカサタナハマヤラワヲンァャッー・")).to eq("アカサタナハマヤラワヲンァャッー・")
|
31
|
+
expect(Charwidth.normalize("タチツテトナニヌネノ")).to eq("タチツテトナニヌネノ")
|
32
|
+
end
|
32
33
|
|
33
|
-
|
34
|
-
|
35
|
-
|
34
|
+
it "should unify half-width (semi) voiced katakana with dakuon to full-width" do
|
35
|
+
expect(Charwidth.normalize("ガザダバパ")).to eq("ガザダバパ")
|
36
|
+
end
|
37
|
+
|
38
|
+
it "should convert half-width hangul to full-width" do
|
39
|
+
expect(Charwidth.normalize("アカサタナハマヤラワヲンァャッー・")).to eq("アカサタナハマヤラワヲンァャッー・")
|
40
|
+
end
|
36
41
|
|
37
|
-
|
38
|
-
|
42
|
+
it "should convert IDIOGRAPHIC-SPACE to SPACE" do
|
43
|
+
expect(Charwidth.normalize("\u3000")).to eq(" ")
|
44
|
+
end
|
39
45
|
end
|
40
46
|
|
41
|
-
|
42
|
-
|
47
|
+
describe ".to_full_width" do
|
48
|
+
it "should convert half-width alphabet to full-width" do
|
49
|
+
expect(Charwidth.to_full_width("ABCabc")).to eq("ABCabc")
|
50
|
+
end
|
51
|
+
it "should convert half-width number to full-width" do
|
52
|
+
expect(Charwidth.to_full_width("123")).to eq("123")
|
53
|
+
end
|
54
|
+
it "should convert half-width ASCII symbol before numbers to full-width" do
|
55
|
+
expect(Charwidth.to_full_width("!\"#")).to eq("!"#")
|
56
|
+
end
|
57
|
+
it "should convert half-width ASCII symbol between numbers and upper-case to full-width" do
|
58
|
+
expect(Charwidth.to_full_width(":;")).to eq(":;")
|
59
|
+
end
|
60
|
+
it "should convert half-width ASCII symbol between upper-case and lower-case to full-width" do
|
61
|
+
expect(Charwidth.to_full_width("[\\]")).to eq("[\]")
|
62
|
+
end
|
63
|
+
it "should convert half-width ASCII symbol after lower-case to full-width" do
|
64
|
+
expect(Charwidth.to_full_width("{|}")).to eq("{|}")
|
65
|
+
end
|
66
|
+
|
67
|
+
it "should convert half-width CJK punctuation to full-width" do
|
68
|
+
expect(Charwidth.to_full_width("、。「」")).to eq("、。「」")
|
69
|
+
end
|
70
|
+
|
71
|
+
it "should convert half-width katakana to full-width" do
|
72
|
+
expect(Charwidth.to_full_width("アカサタナハマヤラワヲンァャッー・")).to eq("アカサタナハマヤラワヲンァャッー・")
|
73
|
+
expect(Charwidth.to_full_width("タチツテトナニヌネノ")).to eq("タチツテトナニヌネノ")
|
74
|
+
end
|
75
|
+
|
76
|
+
it "should unify half-width (semi) voiced katakana with dakuon to full-width" do
|
77
|
+
expect(Charwidth.to_full_width("ガザダバパ")).to eq("ガザダバパ")
|
78
|
+
end
|
79
|
+
|
80
|
+
it "should convert half-width hangul to full-width" do
|
81
|
+
expect(Charwidth.to_full_width("アカサタナハマヤラワヲンァャッー・")).to eq("アカサタナハマヤラワヲンァャッー・")
|
82
|
+
end
|
83
|
+
|
84
|
+
it "should convert SPACE to IDIOGRAPHIC-SPACE" do
|
85
|
+
expect(Charwidth.to_full_width(" ")).to eq("\u3000")
|
86
|
+
end
|
43
87
|
end
|
44
88
|
end
|
data/spec/spec_helper.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: charwidth
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- labocho
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2017-05-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -52,6 +52,20 @@ dependencies:
|
|
52
52
|
- - ">="
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: rspec-its
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
55
69
|
- !ruby/object:Gem::Dependency
|
56
70
|
name: simplecov
|
57
71
|
requirement: !ruby/object:Gem::Requirement
|
@@ -143,7 +157,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
143
157
|
version: '0'
|
144
158
|
requirements: []
|
145
159
|
rubyforge_project:
|
146
|
-
rubygems_version: 2.
|
160
|
+
rubygems_version: 2.6.10
|
147
161
|
signing_key:
|
148
162
|
specification_version: 4
|
149
163
|
summary: Normalize Japanese / Korean fullwidth (zenkaku) and halfwidth (hankaku) characters
|