charwidth 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 0b348719803e3e762056267c4f562cc2b88cd45d
4
- data.tar.gz: 7f6367623657410b4deeb6b3927bfd983ae2d5c6
3
+ metadata.gz: 4d80ef9c4f6abc327fc91b3551c412da1be6f0f2
4
+ data.tar.gz: e8f1793a2de0a8edb0f71a4c50f1f3732801ebb7
5
5
  SHA512:
6
- metadata.gz: 37bbf39dc106e747f4c0a8339e8c1bf58d9de55bea754680e719cbe149570783197cecdfec7a4a5fe5da03d0adf4b3efa6cda28915462d3263eb795bf253d99a
7
- data.tar.gz: 73fe1f134bb17213ce8dd8859c4d96a4801593fa52445876eaaac0d407cbe178d8f159e834e21bbb05d362e3da6eaa746394a7d62afa10829008434d863b6e6a
6
+ metadata.gz: 5408b2545d686f3fc9fab6bb327ceae284bcdc9d05389dde12ac9a593c1b7f1a7d5de77666b6db2e5cc3626eab8e7b85f9c2d362ff9ab5c919dc8ad41858f74d
7
+ data.tar.gz: 31053172fbda051a8516b7f5e6e41fbfb98852340c9bd0d9f97f38213dd059a33e643133bf0a2e32977ddf7704537f329a72f1fb4a96e2d9c3888ab766405df3
data/README.md CHANGED
@@ -6,7 +6,7 @@ Normalize Unicode fullwidth / halfwidth (zenkaku / hankaku) characters.
6
6
 
7
7
  Add this line to your application's Gemfile:
8
8
 
9
- gem 'charwidth_template'
9
+ gem 'charwidth'
10
10
 
11
11
 
12
12
  And then execute:
@@ -15,7 +15,7 @@ And then execute:
15
15
 
16
16
  Or install it yourself as:
17
17
 
18
- $ gem install charwidth_template
18
+ $ gem install charwidth
19
19
 
20
20
  # Usage
21
21
 
data/charwidth.gemspec CHANGED
@@ -21,6 +21,7 @@ Gem::Specification.new do |spec|
21
21
  spec.add_development_dependency "bundler", "~> 1.3"
22
22
  spec.add_development_dependency "rake"
23
23
  spec.add_development_dependency "rspec"
24
+ spec.add_development_dependency "rspec-its"
24
25
  spec.add_development_dependency "simplecov"
25
26
  spec.add_development_dependency "activerecord", ">= 3"
26
27
  spec.add_development_dependency "sqlite3"
data/lib/charwidth.rb CHANGED
@@ -6,6 +6,41 @@ module Charwidth
6
6
  autoload :CLI, "charwidth/cli"
7
7
 
8
8
  module ClassMethods
9
+ HALFWIDTH_TO_FULLWIDTH = {
10
+ ascii: [
11
+ Characters::ASCII_PUNCTUATION_AND_SYMBOLS,
12
+ Characters::FULLWIDTH_ASCII_VARIANTS,
13
+ ],
14
+ white_parenthesis: [
15
+ Characters::WHITE_PARENTHESIS,
16
+ Characters::FULLWIDTH_BRACKETS,
17
+ ],
18
+ cjk_punctuation: [
19
+ Characters::HALFWIDTH_CJK_PUCTUATION,
20
+ Characters::CJK_SYMBOLS_AND_PUNCTUATION,
21
+ ],
22
+ katakana: [
23
+ Characters::HALFWIDTH_KATAKANA_VARIANTS,
24
+ Characters::KATAKANA,
25
+ ],
26
+ hangul: [
27
+ Characters::HALFWIDTH_HANGUL_VARIANTS,
28
+ Characters::HANGUL,
29
+ ],
30
+ latin_1_punctuation_and_symbols: [
31
+ Characters::LATIN_1_PUNCTUATION_AND_SYMBOLS,
32
+ Characters::FULLWIDTH_SYMBOL_VARIANTS,
33
+ ],
34
+ mathematical_symbols: [
35
+ Characters::HALFWIDTH_SYMBOL_VARIANTS,
36
+ Characters::MATHEMATICAL_SYMBOLS,
37
+ ],
38
+ space: [
39
+ Characters::SPACE,
40
+ Characters::IDEOGRAPHIC_SPACE,
41
+ ],
42
+ }
43
+
9
44
  # Normalize Unicode fullwidth / halfwidth (zenkaku / hankaku) characters
10
45
  # options: {
11
46
  # only: [:ascii, :white_parenthesis, :cjk_punctuation, :katakana, :space],
@@ -21,6 +56,24 @@ module Charwidth
21
56
  normalize_charwidth!(string, options)
22
57
  end
23
58
 
59
+ def to_full_width(string)
60
+ to_full_width!(string.dup)
61
+ end
62
+
63
+ def to_full_width!(src)
64
+ unify_voiced_katakana!(src)
65
+
66
+ before, after = "", ""
67
+ HALFWIDTH_TO_FULLWIDTH.each_value do |half, full|
68
+ before << half
69
+ after << full
70
+ end
71
+
72
+ escape_for_tr!(before)
73
+ escape_for_tr!(after)
74
+ src.tr!(before, after) || src
75
+ end
76
+
24
77
  private
25
78
  TYPES = [
26
79
  :ascii, :white_parenthesis, :cjk_punctuation, :katakana, :hangul,
@@ -51,34 +104,23 @@ module Charwidth
51
104
  before, after = "", ""
52
105
  types.each do |type|
53
106
  case type
54
- when :ascii
55
- before << Characters::FULLWIDTH_ASCII_VARIANTS
56
- after << Characters::ASCII_PUNCTUATION_AND_SYMBOLS
57
- when :white_parenthesis
58
- before << Characters::FULLWIDTH_BRACKETS
59
- after << Characters::WHITE_PARENTHESIS
60
- when :cjk_punctuation
61
- before << Characters::HALFWIDTH_CJK_PUCTUATION
62
- after << Characters::CJK_SYMBOLS_AND_PUNCTUATION
63
- when :katakana
64
- before << Characters::HALFWIDTH_KATAKANA_VARIANTS
65
- after << Characters::KATAKANA
66
- when :hangul
67
- before << Characters::HALFWIDTH_HANGUL_VARIANTS
68
- after << Characters::HANGUL
69
- when :latin_1_punctuation_and_symbols
70
- before << Characters::FULLWIDTH_SYMBOL_VARIANTS
71
- after << Characters::LATIN_1_PUNCTUATION_AND_SYMBOLS
72
- when :mathematical_symbols
73
- before << Characters::HALFWIDTH_SYMBOL_VARIANTS
74
- after << Characters::MATHEMATICAL_SYMBOLS
75
- when :space
76
- before << Characters::IDEOGRAPHIC_SPACE
77
- after << Characters::SPACE
107
+ when :ascii, :white_parenthesis, :latin_1_punctuation_and_symbols, :space
108
+ # convert fullwidth to halfwidth
109
+ HALFWIDTH_TO_FULLWIDTH[type].tap{|half, full|
110
+ before << full
111
+ after << half
112
+ }
113
+ when :cjk_punctuation, :katakana, :hangul, :mathematical_symbols
114
+ # convert halfwidth to fullwidth
115
+ HALFWIDTH_TO_FULLWIDTH[type].tap{|half, full|
116
+ before << half
117
+ after << full
118
+ }
78
119
  end
79
120
  end
80
121
 
81
- after.sub!('\\', '\\\\\\\\') # escape for tr
122
+ escape_for_tr!(before)
123
+ escape_for_tr!(after)
82
124
  src.tr!(before, after) || src
83
125
  end
84
126
 
@@ -94,6 +136,15 @@ module Charwidth
94
136
  str.gsub!(h, f) || str
95
137
  end
96
138
  end
139
+
140
+ def escape_for_tr!(s)
141
+ s.gsub!('\\', '\\\\')
142
+ s.gsub!('-', '\\-')
143
+ s.gsub!('^', '\\^')
144
+ s.gsub!('[', '\\[')
145
+ s.gsub!(']', '\\]')
146
+ s
147
+ end
97
148
  end
98
149
 
99
150
  extend ClassMethods
@@ -8,6 +8,14 @@ module Charwidth
8
8
  def normalize_charwidth!(options = {})
9
9
  Charwidth.normalize!(self, options)
10
10
  end
11
+
12
+ def to_full_width
13
+ dup.to_full_width!
14
+ end
15
+
16
+ def to_full_width!
17
+ Charwidth.to_full_width!(self)
18
+ end
11
19
  end
12
20
  end
13
21
 
@@ -1,3 +1,3 @@
1
1
  module Charwidth
2
- VERSION = "0.1.3"
2
+ VERSION = "0.1.4"
3
3
  end
@@ -2,43 +2,87 @@
2
2
  require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
3
3
 
4
4
  describe "Charwidth" do
5
- it "should convert full-width alphabet to half-width" do
6
- Charwidth.normalize("ABCabc").should == "ABCabc"
7
- end
8
- it "should convert full-width number to half-width" do
9
- Charwidth.normalize("123").should == "123"
10
- end
11
- it "should convert full-width ASCII symbol before numbers to half-width" do
12
- Charwidth.normalize("!"#").should == "!\"#"
13
- end
14
- it "should convert full-width ASCII symbol between numbers and upper-case to half-width" do
15
- Charwidth.normalize(":;").should == ":;"
16
- end
17
- it "should convert full-width ASCII symbol between upper-case and lower-case to half-width" do
18
- Charwidth.normalize("[\]").should == "[\\]"
19
- end
20
- it "should convert full-width ASCII symbol after lower-case to half-width" do
21
- Charwidth.normalize("{|}").should == "{|}"
22
- end
5
+ describe ".normalize" do
6
+ it "should convert full-width alphabet to half-width" do
7
+ expect(Charwidth.normalize("ABCabc")).to eq("ABCabc")
8
+ end
9
+ it "should convert full-width number to half-width" do
10
+ expect(Charwidth.normalize("123")).to eq("123")
11
+ end
12
+ it "should convert full-width ASCII symbol before numbers to half-width" do
13
+ expect(Charwidth.normalize("!"#")).to eq("!\"#")
14
+ end
15
+ it "should convert full-width ASCII symbol between numbers and upper-case to half-width" do
16
+ expect(Charwidth.normalize(":;")).to eq(":;")
17
+ end
18
+ it "should convert full-width ASCII symbol between upper-case and lower-case to half-width" do
19
+ expect(Charwidth.normalize("[\]")).to eq("[\\]")
20
+ end
21
+ it "should convert full-width ASCII symbol after lower-case to half-width" do
22
+ expect(Charwidth.normalize("{|}")).to eq("{|}")
23
+ end
23
24
 
24
- it "should convert half-width CJK punctuation to full-width" do
25
- Charwidth.normalize("、。「」").should == "、。「」"
26
- end
25
+ it "should convert half-width CJK punctuation to full-width" do
26
+ expect(Charwidth.normalize("、。「」")).to eq("、。「」")
27
+ end
27
28
 
28
- it "should convert half-width katakana to full-width" do
29
- Charwidth.normalize("アカサタナハマヤラワヲンァャッー・").should == "アカサタナハマヤラワヲンァャッー・"
30
- Charwidth.normalize("タチツテトナニヌネノ").should == "タチツテトナニヌネノ"
31
- end
29
+ it "should convert half-width katakana to full-width" do
30
+ expect(Charwidth.normalize("アカサタナハマヤラワヲンァャッー・")).to eq("アカサタナハマヤラワヲンァャッー・")
31
+ expect(Charwidth.normalize("タチツテトナニヌネノ")).to eq("タチツテトナニヌネノ")
32
+ end
32
33
 
33
- it "should unify half-width (semi) voiced katakana with dakuon to full-width" do
34
- Charwidth.normalize("ガザダバパ").should == "ガザダバパ"
35
- end
34
+ it "should unify half-width (semi) voiced katakana with dakuon to full-width" do
35
+ expect(Charwidth.normalize("ガザダバパ")).to eq("ガザダバパ")
36
+ end
37
+
38
+ it "should convert half-width hangul to full-width" do
39
+ expect(Charwidth.normalize("アカサタナハマヤラワヲンァャッー・")).to eq("アカサタナハマヤラワヲンァャッー・")
40
+ end
36
41
 
37
- it "should convert half-width hangul to full-width" do
38
- Charwidth.normalize("アカサタナハマヤラワヲンァャッー・").should == "アカサタナハマヤラワヲンァャッー・"
42
+ it "should convert IDIOGRAPHIC-SPACE to SPACE" do
43
+ expect(Charwidth.normalize("\u3000")).to eq(" ")
44
+ end
39
45
  end
40
46
 
41
- it "should convert IDIOGRAPHIC-SPACE to SPACE" do
42
- Charwidth.normalize("\u3000").should == " "
47
+ describe ".to_full_width" do
48
+ it "should convert half-width alphabet to full-width" do
49
+ expect(Charwidth.to_full_width("ABCabc")).to eq("ABCabc")
50
+ end
51
+ it "should convert half-width number to full-width" do
52
+ expect(Charwidth.to_full_width("123")).to eq("123")
53
+ end
54
+ it "should convert half-width ASCII symbol before numbers to full-width" do
55
+ expect(Charwidth.to_full_width("!\"#")).to eq("!"#")
56
+ end
57
+ it "should convert half-width ASCII symbol between numbers and upper-case to full-width" do
58
+ expect(Charwidth.to_full_width(":;")).to eq(":;")
59
+ end
60
+ it "should convert half-width ASCII symbol between upper-case and lower-case to full-width" do
61
+ expect(Charwidth.to_full_width("[\\]")).to eq("[\]")
62
+ end
63
+ it "should convert half-width ASCII symbol after lower-case to full-width" do
64
+ expect(Charwidth.to_full_width("{|}")).to eq("{|}")
65
+ end
66
+
67
+ it "should convert half-width CJK punctuation to full-width" do
68
+ expect(Charwidth.to_full_width("、。「」")).to eq("、。「」")
69
+ end
70
+
71
+ it "should convert half-width katakana to full-width" do
72
+ expect(Charwidth.to_full_width("アカサタナハマヤラワヲンァャッー・")).to eq("アカサタナハマヤラワヲンァャッー・")
73
+ expect(Charwidth.to_full_width("タチツテトナニヌネノ")).to eq("タチツテトナニヌネノ")
74
+ end
75
+
76
+ it "should unify half-width (semi) voiced katakana with dakuon to full-width" do
77
+ expect(Charwidth.to_full_width("ガザダバパ")).to eq("ガザダバパ")
78
+ end
79
+
80
+ it "should convert half-width hangul to full-width" do
81
+ expect(Charwidth.to_full_width("アカサタナハマヤラワヲンァャッー・")).to eq("アカサタナハマヤラワヲンァャッー・")
82
+ end
83
+
84
+ it "should convert SPACE to IDIOGRAPHIC-SPACE" do
85
+ expect(Charwidth.to_full_width(" ")).to eq("\u3000")
86
+ end
43
87
  end
44
88
  end
data/spec/spec_helper.rb CHANGED
@@ -1,6 +1,7 @@
1
1
  $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
2
2
  $LOAD_PATH.unshift(File.dirname(__FILE__))
3
3
  require 'rspec'
4
+ require 'rspec/its'
4
5
  require 'charwidth'
5
6
 
6
7
  # Requires supporting files with custom matchers and macros, etc,
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: charwidth
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.1.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - labocho
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-04-04 00:00:00.000000000 Z
11
+ date: 2017-05-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -52,6 +52,20 @@ dependencies:
52
52
  - - ">="
53
53
  - !ruby/object:Gem::Version
54
54
  version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rspec-its
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
55
69
  - !ruby/object:Gem::Dependency
56
70
  name: simplecov
57
71
  requirement: !ruby/object:Gem::Requirement
@@ -143,7 +157,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
143
157
  version: '0'
144
158
  requirements: []
145
159
  rubyforge_project:
146
- rubygems_version: 2.2.0
160
+ rubygems_version: 2.6.10
147
161
  signing_key:
148
162
  specification_version: 4
149
163
  summary: Normalize Japanese / Korean fullwidth (zenkaku) and halfwidth (hankaku) characters