chenillen-pinyin 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (6) hide show
  1. data/README.rdoc +29 -0
  2. data/Rakefile +14 -0
  3. data/dict/Mandarin.dat +25477 -0
  4. data/lib/pinyin.rb +75 -0
  5. data/pinyin.gemspec +31 -0
  6. metadata +64 -0
@@ -0,0 +1,75 @@
1
+ require 'singleton'
2
+
3
+ class Pinyin
4
+
5
+ # Initialize using Pinyin.new
6
+ # py = Pinyin.new
7
+ def initialize
8
+ ma = File.join(File.dirname(File.expand_path(__FILE__)),'../dict/Mandarin.dat')
9
+ @codes = {}
10
+ File.readlines(ma).each do |line|
11
+ nv = line.split(/\s/)
12
+ @codes[nv[0]] = nv[1]
13
+ end
14
+ end
15
+
16
+ # Generating permlink using '-' split
17
+ # Show the permlink: Interesting-Ruby-Tidbits-That-Dont-Need-Separate-Posts-17
18
+ # You can also pass a split symbol to_permlink(str, '~')
19
+ def to_permlink(str)
20
+ str_to_pinyin(str,'-')
21
+ end
22
+
23
+ # Generating shorter permlink with first letter of each charater
24
+ # Like: zgyh stands for "中国银行"
25
+ def to_pinyin_abbr(str)
26
+ str_to_pinyin(str,'',false,true)
27
+ end
28
+
29
+ #第一个字取全部,后面首字母.名称缩写。eg. liudh 刘德华
30
+ def to_pinyin_abbr_else(str)
31
+ str_to_pinyin(str,'',true,nil)
32
+ end
33
+
34
+ #通用情况 tone为取第几声的标识。eg. ni3hao3zhong1guo2
35
+ def to_pinyin(str,separator='',tone=false)
36
+ str_to_pinyin(str,separator,false,false,tone)
37
+ end
38
+
39
+
40
+ # Private Methods Started...
41
+ private
42
+
43
+ def get_value(code)
44
+ @codes[code]
45
+ end
46
+
47
+ def str_to_pinyin(str,separator='',abbr_else=false,abbr=false,tone=false)
48
+ res = []
49
+ str.unpack('U*').each_with_index do |t,idx|
50
+ code = sprintf('%x',t).upcase
51
+ val = get_value(code)
52
+ #是否找到拼音?
53
+ if val
54
+ unless tone
55
+ val = val.gsub(/\d/,'')
56
+ end
57
+ if (abbr and !abbr_else) or (abbr_else and idx!=0)
58
+ val = val[0..0]
59
+ end
60
+ res << val.downcase+separator
61
+ else
62
+ tmp = [t].pack('U*')
63
+ res << tmp if tmp =~ /^[_0-9a-zA-Z\s]*$/ #复原,去除特殊字符,如全角符号等。
64
+ end
65
+ end
66
+ unless separator==''
67
+ re = Regexp.new("\\#{separator}+")
68
+ re2 = Regexp.new("\\#{separator}$")
69
+ return res.join('').gsub(/\s+/,separator).gsub(re,separator).gsub(re2,'')
70
+ else
71
+ return res.join('')
72
+ end
73
+ end
74
+ end
75
+
@@ -0,0 +1,31 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = %q{pinyin}
5
+ s.version = "0.1.0"
6
+
7
+ s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
8
+ s.authors = ["Allen Chan"]
9
+ s.date = %q{2009-03-11}
10
+ s.description = %q{Convert Chinese Charaters to Pinyin letters.}
11
+ s.email = %q{chenillen@gmail.com}
12
+ s.extra_rdoc_files = ["lib/pinyin.rb", "README.rdoc"]
13
+ s.files = ["dict/Mandarin.dat", "lib/pinyin.rb", "Manifest", "Rakefile", "README.rdoc", "pinyin.gemspec"]
14
+ s.has_rdoc = true
15
+ s.homepage = %q{http://github.com/chenillen/uniquify}
16
+ s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "Pinyin", "--main", "README.rdoc"]
17
+ s.require_paths = ["lib"]
18
+ s.rubyforge_project = %q{pinyin}
19
+ s.rubygems_version = %q{1.3.1}
20
+ s.summary = %q{Convert Chinese Charaters to Pinyin letters.}
21
+
22
+ if s.respond_to? :specification_version then
23
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
24
+ s.specification_version = 2
25
+
26
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
27
+ else
28
+ end
29
+ else
30
+ end
31
+ end
metadata ADDED
@@ -0,0 +1,64 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: chenillen-pinyin
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Allen Chan
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-03-11 00:00:00 -07:00
13
+ default_executable:
14
+ dependencies: []
15
+
16
+ description: Convert Chinese Charaters to Pinyin letters.
17
+ email: chenillen@gmail.com
18
+ executables: []
19
+
20
+ extensions: []
21
+
22
+ extra_rdoc_files:
23
+ - lib/pinyin.rb
24
+ - README.rdoc
25
+ files:
26
+ - dict/Mandarin.dat
27
+ - lib/pinyin.rb
28
+ - Manifest
29
+ - Rakefile
30
+ - README.rdoc
31
+ - pinyin.gemspec
32
+ has_rdoc: true
33
+ homepage: http://github.com/chenillen/uniquify
34
+ post_install_message:
35
+ rdoc_options:
36
+ - --line-numbers
37
+ - --inline-source
38
+ - --title
39
+ - Pinyin
40
+ - --main
41
+ - README.rdoc
42
+ require_paths:
43
+ - lib
44
+ required_ruby_version: !ruby/object:Gem::Requirement
45
+ requirements:
46
+ - - ">="
47
+ - !ruby/object:Gem::Version
48
+ version: "0"
49
+ version:
50
+ required_rubygems_version: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: "1.2"
55
+ version:
56
+ requirements: []
57
+
58
+ rubyforge_project: pinyin
59
+ rubygems_version: 1.2.0
60
+ signing_key:
61
+ specification_version: 2
62
+ summary: Convert Chinese Charaters to Pinyin letters.
63
+ test_files: []
64
+