chenillen-pinyin 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +29 -0
- data/Rakefile +14 -0
- data/dict/Mandarin.dat +25477 -0
- data/lib/pinyin.rb +75 -0
- data/pinyin.gemspec +31 -0
- metadata +64 -0
data/lib/pinyin.rb
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
require 'singleton'
|
|
2
|
+
|
|
3
|
+
class Pinyin
|
|
4
|
+
|
|
5
|
+
# Initialize using Pinyin.new
|
|
6
|
+
# py = Pinyin.new
|
|
7
|
+
def initialize
|
|
8
|
+
ma = File.join(File.dirname(File.expand_path(__FILE__)),'../dict/Mandarin.dat')
|
|
9
|
+
@codes = {}
|
|
10
|
+
File.readlines(ma).each do |line|
|
|
11
|
+
nv = line.split(/\s/)
|
|
12
|
+
@codes[nv[0]] = nv[1]
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
# Generating permlink using '-' split
|
|
17
|
+
# Show the permlink: Interesting-Ruby-Tidbits-That-Dont-Need-Separate-Posts-17
|
|
18
|
+
# You can also pass a split symbol to_permlink(str, '~')
|
|
19
|
+
def to_permlink(str)
|
|
20
|
+
str_to_pinyin(str,'-')
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
# Generating shorter permlink with first letter of each charater
|
|
24
|
+
# Like: zgyh stands for "中国银行"
|
|
25
|
+
def to_pinyin_abbr(str)
|
|
26
|
+
str_to_pinyin(str,'',false,true)
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
#第一个字取全部,后面首字母.名称缩写。eg. liudh 刘德华
|
|
30
|
+
def to_pinyin_abbr_else(str)
|
|
31
|
+
str_to_pinyin(str,'',true,nil)
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
#通用情况 tone为取第几声的标识。eg. ni3hao3zhong1guo2
|
|
35
|
+
def to_pinyin(str,separator='',tone=false)
|
|
36
|
+
str_to_pinyin(str,separator,false,false,tone)
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
# Private Methods Started...
|
|
41
|
+
private
|
|
42
|
+
|
|
43
|
+
def get_value(code)
|
|
44
|
+
@codes[code]
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def str_to_pinyin(str,separator='',abbr_else=false,abbr=false,tone=false)
|
|
48
|
+
res = []
|
|
49
|
+
str.unpack('U*').each_with_index do |t,idx|
|
|
50
|
+
code = sprintf('%x',t).upcase
|
|
51
|
+
val = get_value(code)
|
|
52
|
+
#是否找到拼音?
|
|
53
|
+
if val
|
|
54
|
+
unless tone
|
|
55
|
+
val = val.gsub(/\d/,'')
|
|
56
|
+
end
|
|
57
|
+
if (abbr and !abbr_else) or (abbr_else and idx!=0)
|
|
58
|
+
val = val[0..0]
|
|
59
|
+
end
|
|
60
|
+
res << val.downcase+separator
|
|
61
|
+
else
|
|
62
|
+
tmp = [t].pack('U*')
|
|
63
|
+
res << tmp if tmp =~ /^[_0-9a-zA-Z\s]*$/ #复原,去除特殊字符,如全角符号等。
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
unless separator==''
|
|
67
|
+
re = Regexp.new("\\#{separator}+")
|
|
68
|
+
re2 = Regexp.new("\\#{separator}$")
|
|
69
|
+
return res.join('').gsub(/\s+/,separator).gsub(re,separator).gsub(re2,'')
|
|
70
|
+
else
|
|
71
|
+
return res.join('')
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
|
data/pinyin.gemspec
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
|
2
|
+
|
|
3
|
+
Gem::Specification.new do |s|
|
|
4
|
+
s.name = %q{pinyin}
|
|
5
|
+
s.version = "0.1.0"
|
|
6
|
+
|
|
7
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
|
|
8
|
+
s.authors = ["Allen Chan"]
|
|
9
|
+
s.date = %q{2009-03-11}
|
|
10
|
+
s.description = %q{Convert Chinese Charaters to Pinyin letters.}
|
|
11
|
+
s.email = %q{chenillen@gmail.com}
|
|
12
|
+
s.extra_rdoc_files = ["lib/pinyin.rb", "README.rdoc"]
|
|
13
|
+
s.files = ["dict/Mandarin.dat", "lib/pinyin.rb", "Manifest", "Rakefile", "README.rdoc", "pinyin.gemspec"]
|
|
14
|
+
s.has_rdoc = true
|
|
15
|
+
s.homepage = %q{http://github.com/chenillen/uniquify}
|
|
16
|
+
s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "Pinyin", "--main", "README.rdoc"]
|
|
17
|
+
s.require_paths = ["lib"]
|
|
18
|
+
s.rubyforge_project = %q{pinyin}
|
|
19
|
+
s.rubygems_version = %q{1.3.1}
|
|
20
|
+
s.summary = %q{Convert Chinese Charaters to Pinyin letters.}
|
|
21
|
+
|
|
22
|
+
if s.respond_to? :specification_version then
|
|
23
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
|
24
|
+
s.specification_version = 2
|
|
25
|
+
|
|
26
|
+
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
|
27
|
+
else
|
|
28
|
+
end
|
|
29
|
+
else
|
|
30
|
+
end
|
|
31
|
+
end
|
metadata
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: chenillen-pinyin
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.1.0
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- Allen Chan
|
|
8
|
+
autorequire:
|
|
9
|
+
bindir: bin
|
|
10
|
+
cert_chain: []
|
|
11
|
+
|
|
12
|
+
date: 2009-03-11 00:00:00 -07:00
|
|
13
|
+
default_executable:
|
|
14
|
+
dependencies: []
|
|
15
|
+
|
|
16
|
+
description: Convert Chinese Charaters to Pinyin letters.
|
|
17
|
+
email: chenillen@gmail.com
|
|
18
|
+
executables: []
|
|
19
|
+
|
|
20
|
+
extensions: []
|
|
21
|
+
|
|
22
|
+
extra_rdoc_files:
|
|
23
|
+
- lib/pinyin.rb
|
|
24
|
+
- README.rdoc
|
|
25
|
+
files:
|
|
26
|
+
- dict/Mandarin.dat
|
|
27
|
+
- lib/pinyin.rb
|
|
28
|
+
- Manifest
|
|
29
|
+
- Rakefile
|
|
30
|
+
- README.rdoc
|
|
31
|
+
- pinyin.gemspec
|
|
32
|
+
has_rdoc: true
|
|
33
|
+
homepage: http://github.com/chenillen/uniquify
|
|
34
|
+
post_install_message:
|
|
35
|
+
rdoc_options:
|
|
36
|
+
- --line-numbers
|
|
37
|
+
- --inline-source
|
|
38
|
+
- --title
|
|
39
|
+
- Pinyin
|
|
40
|
+
- --main
|
|
41
|
+
- README.rdoc
|
|
42
|
+
require_paths:
|
|
43
|
+
- lib
|
|
44
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
45
|
+
requirements:
|
|
46
|
+
- - ">="
|
|
47
|
+
- !ruby/object:Gem::Version
|
|
48
|
+
version: "0"
|
|
49
|
+
version:
|
|
50
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
51
|
+
requirements:
|
|
52
|
+
- - ">="
|
|
53
|
+
- !ruby/object:Gem::Version
|
|
54
|
+
version: "1.2"
|
|
55
|
+
version:
|
|
56
|
+
requirements: []
|
|
57
|
+
|
|
58
|
+
rubyforge_project: pinyin
|
|
59
|
+
rubygems_version: 1.2.0
|
|
60
|
+
signing_key:
|
|
61
|
+
specification_version: 2
|
|
62
|
+
summary: Convert Chinese Charaters to Pinyin letters.
|
|
63
|
+
test_files: []
|
|
64
|
+
|