chenillen-pinyin 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +29 -0
- data/Rakefile +14 -0
- data/dict/Mandarin.dat +25477 -0
- data/lib/pinyin.rb +75 -0
- data/pinyin.gemspec +31 -0
- metadata +64 -0
data/lib/pinyin.rb
ADDED
@@ -0,0 +1,75 @@
|
|
1
|
+
require 'singleton'
|
2
|
+
|
3
|
+
class Pinyin
|
4
|
+
|
5
|
+
# Initialize using Pinyin.new
|
6
|
+
# py = Pinyin.new
|
7
|
+
def initialize
|
8
|
+
ma = File.join(File.dirname(File.expand_path(__FILE__)),'../dict/Mandarin.dat')
|
9
|
+
@codes = {}
|
10
|
+
File.readlines(ma).each do |line|
|
11
|
+
nv = line.split(/\s/)
|
12
|
+
@codes[nv[0]] = nv[1]
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
# Generating permlink using '-' split
|
17
|
+
# Show the permlink: Interesting-Ruby-Tidbits-That-Dont-Need-Separate-Posts-17
|
18
|
+
# You can also pass a split symbol to_permlink(str, '~')
|
19
|
+
def to_permlink(str)
|
20
|
+
str_to_pinyin(str,'-')
|
21
|
+
end
|
22
|
+
|
23
|
+
# Generating shorter permlink with first letter of each charater
|
24
|
+
# Like: zgyh stands for "中国银行"
|
25
|
+
def to_pinyin_abbr(str)
|
26
|
+
str_to_pinyin(str,'',false,true)
|
27
|
+
end
|
28
|
+
|
29
|
+
#第一个字取全部,后面首字母.名称缩写。eg. liudh 刘德华
|
30
|
+
def to_pinyin_abbr_else(str)
|
31
|
+
str_to_pinyin(str,'',true,nil)
|
32
|
+
end
|
33
|
+
|
34
|
+
#通用情况 tone为取第几声的标识。eg. ni3hao3zhong1guo2
|
35
|
+
def to_pinyin(str,separator='',tone=false)
|
36
|
+
str_to_pinyin(str,separator,false,false,tone)
|
37
|
+
end
|
38
|
+
|
39
|
+
|
40
|
+
# Private Methods Started...
|
41
|
+
private
|
42
|
+
|
43
|
+
def get_value(code)
|
44
|
+
@codes[code]
|
45
|
+
end
|
46
|
+
|
47
|
+
def str_to_pinyin(str,separator='',abbr_else=false,abbr=false,tone=false)
|
48
|
+
res = []
|
49
|
+
str.unpack('U*').each_with_index do |t,idx|
|
50
|
+
code = sprintf('%x',t).upcase
|
51
|
+
val = get_value(code)
|
52
|
+
#是否找到拼音?
|
53
|
+
if val
|
54
|
+
unless tone
|
55
|
+
val = val.gsub(/\d/,'')
|
56
|
+
end
|
57
|
+
if (abbr and !abbr_else) or (abbr_else and idx!=0)
|
58
|
+
val = val[0..0]
|
59
|
+
end
|
60
|
+
res << val.downcase+separator
|
61
|
+
else
|
62
|
+
tmp = [t].pack('U*')
|
63
|
+
res << tmp if tmp =~ /^[_0-9a-zA-Z\s]*$/ #复原,去除特殊字符,如全角符号等。
|
64
|
+
end
|
65
|
+
end
|
66
|
+
unless separator==''
|
67
|
+
re = Regexp.new("\\#{separator}+")
|
68
|
+
re2 = Regexp.new("\\#{separator}$")
|
69
|
+
return res.join('').gsub(/\s+/,separator).gsub(re,separator).gsub(re2,'')
|
70
|
+
else
|
71
|
+
return res.join('')
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
data/pinyin.gemspec
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
|
3
|
+
Gem::Specification.new do |s|
|
4
|
+
s.name = %q{pinyin}
|
5
|
+
s.version = "0.1.0"
|
6
|
+
|
7
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
|
8
|
+
s.authors = ["Allen Chan"]
|
9
|
+
s.date = %q{2009-03-11}
|
10
|
+
s.description = %q{Convert Chinese Charaters to Pinyin letters.}
|
11
|
+
s.email = %q{chenillen@gmail.com}
|
12
|
+
s.extra_rdoc_files = ["lib/pinyin.rb", "README.rdoc"]
|
13
|
+
s.files = ["dict/Mandarin.dat", "lib/pinyin.rb", "Manifest", "Rakefile", "README.rdoc", "pinyin.gemspec"]
|
14
|
+
s.has_rdoc = true
|
15
|
+
s.homepage = %q{http://github.com/chenillen/uniquify}
|
16
|
+
s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "Pinyin", "--main", "README.rdoc"]
|
17
|
+
s.require_paths = ["lib"]
|
18
|
+
s.rubyforge_project = %q{pinyin}
|
19
|
+
s.rubygems_version = %q{1.3.1}
|
20
|
+
s.summary = %q{Convert Chinese Charaters to Pinyin letters.}
|
21
|
+
|
22
|
+
if s.respond_to? :specification_version then
|
23
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
24
|
+
s.specification_version = 2
|
25
|
+
|
26
|
+
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
27
|
+
else
|
28
|
+
end
|
29
|
+
else
|
30
|
+
end
|
31
|
+
end
|
metadata
ADDED
@@ -0,0 +1,64 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: chenillen-pinyin
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Allen Chan
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-03-11 00:00:00 -07:00
|
13
|
+
default_executable:
|
14
|
+
dependencies: []
|
15
|
+
|
16
|
+
description: Convert Chinese Charaters to Pinyin letters.
|
17
|
+
email: chenillen@gmail.com
|
18
|
+
executables: []
|
19
|
+
|
20
|
+
extensions: []
|
21
|
+
|
22
|
+
extra_rdoc_files:
|
23
|
+
- lib/pinyin.rb
|
24
|
+
- README.rdoc
|
25
|
+
files:
|
26
|
+
- dict/Mandarin.dat
|
27
|
+
- lib/pinyin.rb
|
28
|
+
- Manifest
|
29
|
+
- Rakefile
|
30
|
+
- README.rdoc
|
31
|
+
- pinyin.gemspec
|
32
|
+
has_rdoc: true
|
33
|
+
homepage: http://github.com/chenillen/uniquify
|
34
|
+
post_install_message:
|
35
|
+
rdoc_options:
|
36
|
+
- --line-numbers
|
37
|
+
- --inline-source
|
38
|
+
- --title
|
39
|
+
- Pinyin
|
40
|
+
- --main
|
41
|
+
- README.rdoc
|
42
|
+
require_paths:
|
43
|
+
- lib
|
44
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
45
|
+
requirements:
|
46
|
+
- - ">="
|
47
|
+
- !ruby/object:Gem::Version
|
48
|
+
version: "0"
|
49
|
+
version:
|
50
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: "1.2"
|
55
|
+
version:
|
56
|
+
requirements: []
|
57
|
+
|
58
|
+
rubyforge_project: pinyin
|
59
|
+
rubygems_version: 1.2.0
|
60
|
+
signing_key:
|
61
|
+
specification_version: 2
|
62
|
+
summary: Convert Chinese Charaters to Pinyin letters.
|
63
|
+
test_files: []
|
64
|
+
|