chenillen-pinyin 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (6) hide show
  1. data/README.rdoc +29 -0
  2. data/Rakefile +14 -0
  3. data/dict/Mandarin.dat +25477 -0
  4. data/lib/pinyin.rb +75 -0
  5. data/pinyin.gemspec +31 -0
  6. metadata +64 -0
@@ -0,0 +1,75 @@
1
+ require 'singleton'
2
+
3
+ class Pinyin
4
+
5
+ # Initialize using Pinyin.new
6
+ # py = Pinyin.new
7
+ def initialize
8
+ ma = File.join(File.dirname(File.expand_path(__FILE__)),'../dict/Mandarin.dat')
9
+ @codes = {}
10
+ File.readlines(ma).each do |line|
11
+ nv = line.split(/\s/)
12
+ @codes[nv[0]] = nv[1]
13
+ end
14
+ end
15
+
16
+ # Generating permlink using '-' split
17
+ # Show the permlink: Interesting-Ruby-Tidbits-That-Dont-Need-Separate-Posts-17
18
+ # You can also pass a split symbol to_permlink(str, '~')
19
+ def to_permlink(str)
20
+ str_to_pinyin(str,'-')
21
+ end
22
+
23
+ # Generating shorter permlink with first letter of each charater
24
+ # Like: zgyh stands for "中国银行"
25
+ def to_pinyin_abbr(str)
26
+ str_to_pinyin(str,'',false,true)
27
+ end
28
+
29
+ #第一个字取全部,后面首字母.名称缩写。eg. liudh 刘德华
30
+ def to_pinyin_abbr_else(str)
31
+ str_to_pinyin(str,'',true,nil)
32
+ end
33
+
34
+ #通用情况 tone为取第几声的标识。eg. ni3hao3zhong1guo2
35
+ def to_pinyin(str,separator='',tone=false)
36
+ str_to_pinyin(str,separator,false,false,tone)
37
+ end
38
+
39
+
40
+ # Private Methods Started...
41
+ private
42
+
43
+ def get_value(code)
44
+ @codes[code]
45
+ end
46
+
47
+ def str_to_pinyin(str,separator='',abbr_else=false,abbr=false,tone=false)
48
+ res = []
49
+ str.unpack('U*').each_with_index do |t,idx|
50
+ code = sprintf('%x',t).upcase
51
+ val = get_value(code)
52
+ #是否找到拼音?
53
+ if val
54
+ unless tone
55
+ val = val.gsub(/\d/,'')
56
+ end
57
+ if (abbr and !abbr_else) or (abbr_else and idx!=0)
58
+ val = val[0..0]
59
+ end
60
+ res << val.downcase+separator
61
+ else
62
+ tmp = [t].pack('U*')
63
+ res << tmp if tmp =~ /^[_0-9a-zA-Z\s]*$/ #复原,去除特殊字符,如全角符号等。
64
+ end
65
+ end
66
+ unless separator==''
67
+ re = Regexp.new("\\#{separator}+")
68
+ re2 = Regexp.new("\\#{separator}$")
69
+ return res.join('').gsub(/\s+/,separator).gsub(re,separator).gsub(re2,'')
70
+ else
71
+ return res.join('')
72
+ end
73
+ end
74
+ end
75
+
@@ -0,0 +1,31 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = %q{pinyin}
5
+ s.version = "0.1.0"
6
+
7
+ s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
8
+ s.authors = ["Allen Chan"]
9
+ s.date = %q{2009-03-11}
10
+ s.description = %q{Convert Chinese Charaters to Pinyin letters.}
11
+ s.email = %q{chenillen@gmail.com}
12
+ s.extra_rdoc_files = ["lib/pinyin.rb", "README.rdoc"]
13
+ s.files = ["dict/Mandarin.dat", "lib/pinyin.rb", "Manifest", "Rakefile", "README.rdoc", "pinyin.gemspec"]
14
+ s.has_rdoc = true
15
+ s.homepage = %q{http://github.com/chenillen/uniquify}
16
+ s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "Pinyin", "--main", "README.rdoc"]
17
+ s.require_paths = ["lib"]
18
+ s.rubyforge_project = %q{pinyin}
19
+ s.rubygems_version = %q{1.3.1}
20
+ s.summary = %q{Convert Chinese Charaters to Pinyin letters.}
21
+
22
+ if s.respond_to? :specification_version then
23
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
24
+ s.specification_version = 2
25
+
26
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
27
+ else
28
+ end
29
+ else
30
+ end
31
+ end
metadata ADDED
@@ -0,0 +1,64 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: chenillen-pinyin
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Allen Chan
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-03-11 00:00:00 -07:00
13
+ default_executable:
14
+ dependencies: []
15
+
16
+ description: Convert Chinese Charaters to Pinyin letters.
17
+ email: chenillen@gmail.com
18
+ executables: []
19
+
20
+ extensions: []
21
+
22
+ extra_rdoc_files:
23
+ - lib/pinyin.rb
24
+ - README.rdoc
25
+ files:
26
+ - dict/Mandarin.dat
27
+ - lib/pinyin.rb
28
+ - Manifest
29
+ - Rakefile
30
+ - README.rdoc
31
+ - pinyin.gemspec
32
+ has_rdoc: true
33
+ homepage: http://github.com/chenillen/uniquify
34
+ post_install_message:
35
+ rdoc_options:
36
+ - --line-numbers
37
+ - --inline-source
38
+ - --title
39
+ - Pinyin
40
+ - --main
41
+ - README.rdoc
42
+ require_paths:
43
+ - lib
44
+ required_ruby_version: !ruby/object:Gem::Requirement
45
+ requirements:
46
+ - - ">="
47
+ - !ruby/object:Gem::Version
48
+ version: "0"
49
+ version:
50
+ required_rubygems_version: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: "1.2"
55
+ version:
56
+ requirements: []
57
+
58
+ rubyforge_project: pinyin
59
+ rubygems_version: 1.2.0
60
+ signing_key:
61
+ specification_version: 2
62
+ summary: Convert Chinese Charaters to Pinyin letters.
63
+ test_files: []
64
+