zhconv 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +4 -0
- data/Gemfile.lock +25 -0
- data/Manifest +14 -0
- data/README.md +44 -0
- data/Rakefile +22 -0
- data/data/zh-cn.txt +308 -0
- data/data/zh-hans.txt +2901 -0
- data/data/zh-hant.txt +4538 -0
- data/data/zh-hk.txt +205 -0
- data/data/zh-sg.txt +15 -0
- data/data/zh-tw.txt +809 -0
- data/lib/zhconv.rb +96 -0
- data/test/test_zhconv.rb +64 -0
- data/zhconv.gemspec +30 -0
- metadata +67 -0
data/lib/zhconv.rb
ADDED
@@ -0,0 +1,96 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
path = File.expand_path(File.dirname(__FILE__))
|
3
|
+
$:.unshift(path) unless $:.include?(path)
|
4
|
+
|
5
|
+
require 'open-uri'
|
6
|
+
|
7
|
+
module ZhConv
|
8
|
+
@converters = {}
|
9
|
+
VARIANTS = ["zh-hans", "zh-hant", "zh-cn", "zh-sg", "zh-tw", "zh-hk"]
|
10
|
+
|
11
|
+
class Converter
|
12
|
+
def initialize(url)
|
13
|
+
@mapping = {}
|
14
|
+
table = open(url).read
|
15
|
+
table.lines do |line|
|
16
|
+
matches = line.match(/^[\*\"]([^\"=]+)\"?\s*=>\s*\"?([^\s\/\"]+)\s?.*\"?[;,]$/)
|
17
|
+
if matches
|
18
|
+
@mapping[matches[1].strip] = matches[2].strip
|
19
|
+
end
|
20
|
+
end
|
21
|
+
#puts "create converter for (#{url}), words: #{@mapping.size}"
|
22
|
+
end
|
23
|
+
|
24
|
+
def convert(input)
|
25
|
+
@mapping.each_key do |key|
|
26
|
+
input.gsub!(key, @mapping[key])
|
27
|
+
end
|
28
|
+
input
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def self.convert(variant, message, use_web=true)
|
33
|
+
if use_web
|
34
|
+
case variant
|
35
|
+
when "zh-cn", "zh-sg"
|
36
|
+
converter = web_converter(variant)
|
37
|
+
message = converter.convert(message)
|
38
|
+
when "zh-tw", "zh-hk"
|
39
|
+
converter = web_converter(variant)
|
40
|
+
message = converter.convert(message)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
case variant
|
45
|
+
when "zh-cn", "zh-sg"
|
46
|
+
converter = local_converter(variant)
|
47
|
+
message = converter.convert(message)
|
48
|
+
when "zh-tw", "zh-hk"
|
49
|
+
converter = local_converter(variant)
|
50
|
+
message = converter.convert(message)
|
51
|
+
end
|
52
|
+
|
53
|
+
if use_web
|
54
|
+
case variant
|
55
|
+
when "zh-hans", "zh-cn", "zh-sg"
|
56
|
+
converter = web_converter("zh-hans")
|
57
|
+
message = converter.convert(message)
|
58
|
+
when "zh-hant", "zh-tw", "zh-hk"
|
59
|
+
converter = web_converter("zh-hant")
|
60
|
+
message = converter.convert(message)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
case variant
|
65
|
+
when "zh-hans", "zh-cn", "zh-sg"
|
66
|
+
converter = local_converter("zh-hans")
|
67
|
+
message = converter.convert(message)
|
68
|
+
when "zh-hant", "zh-tw", "zh-hk"
|
69
|
+
converter = local_converter("zh-hant")
|
70
|
+
message = converter.convert(message)
|
71
|
+
end
|
72
|
+
|
73
|
+
message
|
74
|
+
end
|
75
|
+
|
76
|
+
def self.local_converter(variant)
|
77
|
+
raise "variant #{variant} not supported" unless VARIANTS.index(variant)
|
78
|
+
url = converter_url(variant, false)
|
79
|
+
@converters[url] ||= Converter.new(url)
|
80
|
+
end
|
81
|
+
|
82
|
+
def self.web_converter(variant)
|
83
|
+
raise "variant #{variant} not supported" unless VARIANTS.index(variant)
|
84
|
+
url = converter_url(variant, true)
|
85
|
+
@converters[url] ||= Converter.new(url)
|
86
|
+
end
|
87
|
+
|
88
|
+
def self.converter_url(variant, web_converter=false)
|
89
|
+
if web_converter
|
90
|
+
"http://zh.wikipedia.org/w/index.php?title=MediaWiki:Conversiontable/#{variant}&action=raw&templates=expand"
|
91
|
+
else
|
92
|
+
File.expand_path(File.dirname(__FILE__)) + "/../data/#{variant}.txt"
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
end
|
data/test/test_zhconv.rb
ADDED
@@ -0,0 +1,64 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'test/unit'
|
4
|
+
require "zhconv"
|
5
|
+
|
6
|
+
class TestZhConv < Test::Unit::TestCase
|
7
|
+
def test_conv_trad
|
8
|
+
assert_equal "歷史", ZhConv.convert("zh-hant", "历史", false)
|
9
|
+
assert_equal "麵包", ZhConv.convert("zh-hant", "面包", false)
|
10
|
+
assert_equal "獅子山", ZhConv.convert("zh-hant", "狮子山", false)
|
11
|
+
assert_equal "五隻", ZhConv.convert("zh-hant", "五只", false)
|
12
|
+
|
13
|
+
text_zh_tw = "蜻蜓,是一種屬於蜻蛉目的昆蟲,於春天到秋天時發生,有細長翅膀與腹部。"
|
14
|
+
text_zh_cn = "蜻蜓,是一种属于蜻蛉目的昆虫,于春天到秋天时发生,有细长翅膀与腹部。"
|
15
|
+
assert_equal text_zh_tw, ZhConv.convert("zh-hant", text_zh_cn, false)
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_conv_hk
|
19
|
+
assert_equal "歷史", ZhConv.convert("zh-hk", "历史", false)
|
20
|
+
assert_equal "麵包", ZhConv.convert("zh-hk", "面包", false)
|
21
|
+
assert_equal "獅子山", ZhConv.convert("zh-hk", "狮子山", false)
|
22
|
+
assert_equal "羅納爾多", ZhConv.convert("zh-hk", "罗纳尔多", false)
|
23
|
+
end
|
24
|
+
|
25
|
+
def test_conv_simp
|
26
|
+
assert_equal "历史", ZhConv.convert("zh-hans", "曆史", false)
|
27
|
+
assert_equal "面包", ZhConv.convert("zh-hans", "麵包", false)
|
28
|
+
assert_equal "远程控制", ZhConv.convert("zh-hans", "遠程控制", false)
|
29
|
+
|
30
|
+
text_zh_tw = "蜻蜓,是一種屬於蜻蛉目的昆蟲,於春天到秋天時發生,有細長翅膀與腹部。"
|
31
|
+
text_zh_cn = "蜻蜓,是一种属于蜻蛉目的昆虫,于春天到秋天时发生,有细长翅膀与腹部。"
|
32
|
+
assert_equal text_zh_cn, ZhConv.convert("zh-hans", text_zh_tw, false)
|
33
|
+
end
|
34
|
+
|
35
|
+
def test_text
|
36
|
+
text_zh_tw = "蜻蜓,是一種屬於蜻蛉目的昆蟲,於春天到秋天時發生,有細長翅膀與腹部。"
|
37
|
+
text_zh_cn = "蜻蜓,是一种属于蜻蛉目的昆虫,于春天到秋天时发生,有细长翅膀与腹部。"
|
38
|
+
assert_equal text_zh_cn, ZhConv.convert("zh-cn", text_zh_tw, false)
|
39
|
+
end
|
40
|
+
|
41
|
+
|
42
|
+
def test_conv_trad_web
|
43
|
+
assert_equal "五隻", ZhConv.convert("zh-hant", "五只")
|
44
|
+
end
|
45
|
+
|
46
|
+
def test_conv_hk_web
|
47
|
+
assert_equal "朗拿度", ZhConv.convert("zh-hk", "罗纳尔多")
|
48
|
+
end
|
49
|
+
|
50
|
+
def test_conv_simp_web
|
51
|
+
assert_equal "远程控制", ZhConv.convert("zh-hans", "遠程控制")
|
52
|
+
end
|
53
|
+
|
54
|
+
def test_conv_cn_web
|
55
|
+
assert_equal "冰激凌", ZhConv.convert("zh-cn", "冰淇淋")
|
56
|
+
assert_equal "东加拿大", ZhConv.convert("zh-cn", "東加拿大")
|
57
|
+
end
|
58
|
+
|
59
|
+
def test_conv_sg_web
|
60
|
+
assert_equal "健力士世界纪录", ZhConv.convert("zh-sg", "吉尼斯世界纪录")
|
61
|
+
assert_equal "纽西兰", ZhConv.convert("zh-sg", "新西蘭")
|
62
|
+
end
|
63
|
+
|
64
|
+
end
|
data/zhconv.gemspec
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
|
3
|
+
Gem::Specification.new do |s|
|
4
|
+
s.name = "zhconv"
|
5
|
+
s.version = "0.1.1"
|
6
|
+
|
7
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
|
8
|
+
s.authors = ["Francis Chong"]
|
9
|
+
s.date = "2011-11-16"
|
10
|
+
s.description = "Convert chinese using mediawiki conversion table"
|
11
|
+
s.email = "francis@ignition.hk"
|
12
|
+
s.extra_rdoc_files = ["README.md", "lib/zhconv.rb"]
|
13
|
+
s.files = ["Gemfile", "Gemfile.lock", "Manifest", "README.md", "Rakefile", "data/zh-cn.txt", "data/zh-hans.txt", "data/zh-hant.txt", "data/zh-hk.txt", "data/zh-sg.txt", "data/zh-tw.txt", "lib/zhconv.rb", "test/test_zhconv.rb", "zhconv.gemspec"]
|
14
|
+
s.homepage = "http://github.com/siuying/zhconv"
|
15
|
+
s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "Zhconv", "--main", "README.md"]
|
16
|
+
s.require_paths = ["lib"]
|
17
|
+
s.rubyforge_project = "zhconv"
|
18
|
+
s.rubygems_version = "1.8.10"
|
19
|
+
s.summary = "Convert chinese using mediawiki conversion table"
|
20
|
+
s.test_files = ["test/test_zhconv.rb"]
|
21
|
+
|
22
|
+
if s.respond_to? :specification_version then
|
23
|
+
s.specification_version = 3
|
24
|
+
|
25
|
+
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
26
|
+
else
|
27
|
+
end
|
28
|
+
else
|
29
|
+
end
|
30
|
+
end
|
metadata
ADDED
@@ -0,0 +1,67 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: zhconv
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Francis Chong
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2011-11-16 00:00:00.000000000 Z
|
13
|
+
dependencies: []
|
14
|
+
description: Convert chinese using mediawiki conversion table
|
15
|
+
email: francis@ignition.hk
|
16
|
+
executables: []
|
17
|
+
extensions: []
|
18
|
+
extra_rdoc_files:
|
19
|
+
- README.md
|
20
|
+
- lib/zhconv.rb
|
21
|
+
files:
|
22
|
+
- Gemfile
|
23
|
+
- Gemfile.lock
|
24
|
+
- Manifest
|
25
|
+
- README.md
|
26
|
+
- Rakefile
|
27
|
+
- data/zh-cn.txt
|
28
|
+
- data/zh-hans.txt
|
29
|
+
- data/zh-hant.txt
|
30
|
+
- data/zh-hk.txt
|
31
|
+
- data/zh-sg.txt
|
32
|
+
- data/zh-tw.txt
|
33
|
+
- lib/zhconv.rb
|
34
|
+
- test/test_zhconv.rb
|
35
|
+
- zhconv.gemspec
|
36
|
+
homepage: http://github.com/siuying/zhconv
|
37
|
+
licenses: []
|
38
|
+
post_install_message:
|
39
|
+
rdoc_options:
|
40
|
+
- --line-numbers
|
41
|
+
- --inline-source
|
42
|
+
- --title
|
43
|
+
- Zhconv
|
44
|
+
- --main
|
45
|
+
- README.md
|
46
|
+
require_paths:
|
47
|
+
- lib
|
48
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
50
|
+
requirements:
|
51
|
+
- - ! '>='
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '0'
|
54
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
55
|
+
none: false
|
56
|
+
requirements:
|
57
|
+
- - ! '>='
|
58
|
+
- !ruby/object:Gem::Version
|
59
|
+
version: '1.2'
|
60
|
+
requirements: []
|
61
|
+
rubyforge_project: zhconv
|
62
|
+
rubygems_version: 1.8.10
|
63
|
+
signing_key:
|
64
|
+
specification_version: 3
|
65
|
+
summary: Convert chinese using mediawiki conversion table
|
66
|
+
test_files:
|
67
|
+
- test/test_zhconv.rb
|