zhconv 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,96 @@
1
+ # encoding: utf-8
2
+ path = File.expand_path(File.dirname(__FILE__))
3
+ $:.unshift(path) unless $:.include?(path)
4
+
5
+ require 'open-uri'
6
+
7
+ module ZhConv
8
+ @converters = {}
9
+ VARIANTS = ["zh-hans", "zh-hant", "zh-cn", "zh-sg", "zh-tw", "zh-hk"]
10
+
11
+ class Converter
12
+ def initialize(url)
13
+ @mapping = {}
14
+ table = open(url).read
15
+ table.lines do |line|
16
+ matches = line.match(/^[\*\"]([^\"=]+)\"?\s*=>\s*\"?([^\s\/\"]+)\s?.*\"?[;,]$/)
17
+ if matches
18
+ @mapping[matches[1].strip] = matches[2].strip
19
+ end
20
+ end
21
+ #puts "create converter for (#{url}), words: #{@mapping.size}"
22
+ end
23
+
24
+ def convert(input)
25
+ @mapping.each_key do |key|
26
+ input.gsub!(key, @mapping[key])
27
+ end
28
+ input
29
+ end
30
+ end
31
+
32
+ def self.convert(variant, message, use_web=true)
33
+ if use_web
34
+ case variant
35
+ when "zh-cn", "zh-sg"
36
+ converter = web_converter(variant)
37
+ message = converter.convert(message)
38
+ when "zh-tw", "zh-hk"
39
+ converter = web_converter(variant)
40
+ message = converter.convert(message)
41
+ end
42
+ end
43
+
44
+ case variant
45
+ when "zh-cn", "zh-sg"
46
+ converter = local_converter(variant)
47
+ message = converter.convert(message)
48
+ when "zh-tw", "zh-hk"
49
+ converter = local_converter(variant)
50
+ message = converter.convert(message)
51
+ end
52
+
53
+ if use_web
54
+ case variant
55
+ when "zh-hans", "zh-cn", "zh-sg"
56
+ converter = web_converter("zh-hans")
57
+ message = converter.convert(message)
58
+ when "zh-hant", "zh-tw", "zh-hk"
59
+ converter = web_converter("zh-hant")
60
+ message = converter.convert(message)
61
+ end
62
+ end
63
+
64
+ case variant
65
+ when "zh-hans", "zh-cn", "zh-sg"
66
+ converter = local_converter("zh-hans")
67
+ message = converter.convert(message)
68
+ when "zh-hant", "zh-tw", "zh-hk"
69
+ converter = local_converter("zh-hant")
70
+ message = converter.convert(message)
71
+ end
72
+
73
+ message
74
+ end
75
+
76
+ def self.local_converter(variant)
77
+ raise "variant #{variant} not supported" unless VARIANTS.index(variant)
78
+ url = converter_url(variant, false)
79
+ @converters[url] ||= Converter.new(url)
80
+ end
81
+
82
+ def self.web_converter(variant)
83
+ raise "variant #{variant} not supported" unless VARIANTS.index(variant)
84
+ url = converter_url(variant, true)
85
+ @converters[url] ||= Converter.new(url)
86
+ end
87
+
88
+ def self.converter_url(variant, web_converter=false)
89
+ if web_converter
90
+ "http://zh.wikipedia.org/w/index.php?title=MediaWiki:Conversiontable/#{variant}&action=raw&templates=expand"
91
+ else
92
+ File.expand_path(File.dirname(__FILE__)) + "/../data/#{variant}.txt"
93
+ end
94
+ end
95
+
96
+ end
@@ -0,0 +1,64 @@
1
+ # encoding: utf-8
2
+
3
+ require 'test/unit'
4
+ require "zhconv"
5
+
6
+ class TestZhConv < Test::Unit::TestCase
7
+ def test_conv_trad
8
+ assert_equal "歷史", ZhConv.convert("zh-hant", "历史", false)
9
+ assert_equal "麵包", ZhConv.convert("zh-hant", "面包", false)
10
+ assert_equal "獅子山", ZhConv.convert("zh-hant", "狮子山", false)
11
+ assert_equal "五隻", ZhConv.convert("zh-hant", "五只", false)
12
+
13
+ text_zh_tw = "蜻蜓,是一種屬於蜻蛉目的昆蟲,於春天到秋天時發生,有細長翅膀與腹部。"
14
+ text_zh_cn = "蜻蜓,是一种属于蜻蛉目的昆虫,于春天到秋天时发生,有细长翅膀与腹部。"
15
+ assert_equal text_zh_tw, ZhConv.convert("zh-hant", text_zh_cn, false)
16
+ end
17
+
18
+ def test_conv_hk
19
+ assert_equal "歷史", ZhConv.convert("zh-hk", "历史", false)
20
+ assert_equal "麵包", ZhConv.convert("zh-hk", "面包", false)
21
+ assert_equal "獅子山", ZhConv.convert("zh-hk", "狮子山", false)
22
+ assert_equal "羅納爾多", ZhConv.convert("zh-hk", "罗纳尔多", false)
23
+ end
24
+
25
+ def test_conv_simp
26
+ assert_equal "历史", ZhConv.convert("zh-hans", "曆史", false)
27
+ assert_equal "面包", ZhConv.convert("zh-hans", "麵包", false)
28
+ assert_equal "远程控制", ZhConv.convert("zh-hans", "遠程控制", false)
29
+
30
+ text_zh_tw = "蜻蜓,是一種屬於蜻蛉目的昆蟲,於春天到秋天時發生,有細長翅膀與腹部。"
31
+ text_zh_cn = "蜻蜓,是一种属于蜻蛉目的昆虫,于春天到秋天时发生,有细长翅膀与腹部。"
32
+ assert_equal text_zh_cn, ZhConv.convert("zh-hans", text_zh_tw, false)
33
+ end
34
+
35
+ def test_text
36
+ text_zh_tw = "蜻蜓,是一種屬於蜻蛉目的昆蟲,於春天到秋天時發生,有細長翅膀與腹部。"
37
+ text_zh_cn = "蜻蜓,是一种属于蜻蛉目的昆虫,于春天到秋天时发生,有细长翅膀与腹部。"
38
+ assert_equal text_zh_cn, ZhConv.convert("zh-cn", text_zh_tw, false)
39
+ end
40
+
41
+
42
+ def test_conv_trad_web
43
+ assert_equal "五隻", ZhConv.convert("zh-hant", "五只")
44
+ end
45
+
46
+ def test_conv_hk_web
47
+ assert_equal "朗拿度", ZhConv.convert("zh-hk", "罗纳尔多")
48
+ end
49
+
50
+ def test_conv_simp_web
51
+ assert_equal "远程控制", ZhConv.convert("zh-hans", "遠程控制")
52
+ end
53
+
54
+ def test_conv_cn_web
55
+ assert_equal "冰激凌", ZhConv.convert("zh-cn", "冰淇淋")
56
+ assert_equal "东加拿大", ZhConv.convert("zh-cn", "東加拿大")
57
+ end
58
+
59
+ def test_conv_sg_web
60
+ assert_equal "健力士世界纪录", ZhConv.convert("zh-sg", "吉尼斯世界纪录")
61
+ assert_equal "纽西兰", ZhConv.convert("zh-sg", "新西蘭")
62
+ end
63
+
64
+ end
@@ -0,0 +1,30 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = "zhconv"
5
+ s.version = "0.1.1"
6
+
7
+ s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
8
+ s.authors = ["Francis Chong"]
9
+ s.date = "2011-11-16"
10
+ s.description = "Convert chinese using mediawiki conversion table"
11
+ s.email = "francis@ignition.hk"
12
+ s.extra_rdoc_files = ["README.md", "lib/zhconv.rb"]
13
+ s.files = ["Gemfile", "Gemfile.lock", "Manifest", "README.md", "Rakefile", "data/zh-cn.txt", "data/zh-hans.txt", "data/zh-hant.txt", "data/zh-hk.txt", "data/zh-sg.txt", "data/zh-tw.txt", "lib/zhconv.rb", "test/test_zhconv.rb", "zhconv.gemspec"]
14
+ s.homepage = "http://github.com/siuying/zhconv"
15
+ s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "Zhconv", "--main", "README.md"]
16
+ s.require_paths = ["lib"]
17
+ s.rubyforge_project = "zhconv"
18
+ s.rubygems_version = "1.8.10"
19
+ s.summary = "Convert chinese using mediawiki conversion table"
20
+ s.test_files = ["test/test_zhconv.rb"]
21
+
22
+ if s.respond_to? :specification_version then
23
+ s.specification_version = 3
24
+
25
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
26
+ else
27
+ end
28
+ else
29
+ end
30
+ end
metadata ADDED
@@ -0,0 +1,67 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: zhconv
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Francis Chong
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2011-11-16 00:00:00.000000000 Z
13
+ dependencies: []
14
+ description: Convert chinese using mediawiki conversion table
15
+ email: francis@ignition.hk
16
+ executables: []
17
+ extensions: []
18
+ extra_rdoc_files:
19
+ - README.md
20
+ - lib/zhconv.rb
21
+ files:
22
+ - Gemfile
23
+ - Gemfile.lock
24
+ - Manifest
25
+ - README.md
26
+ - Rakefile
27
+ - data/zh-cn.txt
28
+ - data/zh-hans.txt
29
+ - data/zh-hant.txt
30
+ - data/zh-hk.txt
31
+ - data/zh-sg.txt
32
+ - data/zh-tw.txt
33
+ - lib/zhconv.rb
34
+ - test/test_zhconv.rb
35
+ - zhconv.gemspec
36
+ homepage: http://github.com/siuying/zhconv
37
+ licenses: []
38
+ post_install_message:
39
+ rdoc_options:
40
+ - --line-numbers
41
+ - --inline-source
42
+ - --title
43
+ - Zhconv
44
+ - --main
45
+ - README.md
46
+ require_paths:
47
+ - lib
48
+ required_ruby_version: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ required_rubygems_version: !ruby/object:Gem::Requirement
55
+ none: false
56
+ requirements:
57
+ - - ! '>='
58
+ - !ruby/object:Gem::Version
59
+ version: '1.2'
60
+ requirements: []
61
+ rubyforge_project: zhconv
62
+ rubygems_version: 1.8.10
63
+ signing_key:
64
+ specification_version: 3
65
+ summary: Convert chinese using mediawiki conversion table
66
+ test_files:
67
+ - test/test_zhconv.rb