ruby-pinyin 0.4.0 → 0.4.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.markdown +2 -0
- data/lib/ruby-pinyin.rb +0 -5
- data/lib/ruby-pinyin/backend/mmseg.rb +11 -1
- data/lib/ruby-pinyin/backend/simple.rb +2 -0
- data/lib/ruby-pinyin/util.rb +9 -5
- data/lib/ruby-pinyin/version.rb +1 -1
- metadata +14 -15
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f4a8ba70c1bdc1dfd7a0c38b0c4323a81390b1a6
|
4
|
+
data.tar.gz: 74e4bb55b339297ec8be7d69c775471e80e5640a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 575bbb06b72b13d0b660a729bbc8ea2c84ccecda62bdfef7ab59b1e3a7c63fd0a03cb9ac57a1b8595aa26da25f8d5f5fcf12aa821336cb14f4a4006a77834fb0
|
7
|
+
data.tar.gz: deb6f752765b1dcb3543c94ca7cc3a9b478356fbf08151157e82986c9bd89dd75165f29b9bcefc9f16d3f5dbdb0641c423eac469d68e8442d49d4c976d2dac14
|
data/README.markdown
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# ruby-pinyin: 支持多音字的汉字转拼音工具
|
2
|
+
[![Build Status](https://travis-ci.org/janx/ruby-pinyin.svg?branch=master)](https://travis-ci.org/janx/ruby-pinyin)
|
2
3
|
|
3
4
|
ruby-pinyin: zhī chí duō yīn zì de hàn zì zhuǎn pīn yīn gōng jù
|
4
5
|
|
@@ -89,3 +90,4 @@ ruby-pinyin中的拼音数据由作者整理自互联网,你可以在ruby-piny
|
|
89
90
|
|
90
91
|
* [Martin91](https://github.com/Martin91)
|
91
92
|
* [jaxi](https://github.com/jaxi)
|
93
|
+
* [jiangxin](https://github.com/jiangxin)
|
data/lib/ruby-pinyin.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
|
1
3
|
require 'rmmseg'
|
2
4
|
|
3
5
|
module PinYin
|
@@ -72,7 +74,15 @@ module PinYin
|
|
72
74
|
return pinyin.split(' ') if pinyin
|
73
75
|
|
74
76
|
#如果是个英文单词,直接返回,否则返回与词等长的nil数组
|
75
|
-
word =~ /^[_0-9a-zA-Z\s]*$/
|
77
|
+
if word =~ /^[_0-9a-zA-Z\s]*$/
|
78
|
+
word
|
79
|
+
elsif word.respond_to? :force_encoding
|
80
|
+
# word has been encoded in UTF-8 already
|
81
|
+
[nil] * word.size
|
82
|
+
else
|
83
|
+
# For ruby 1.8, there is no native utf-8 support
|
84
|
+
[nil] * word.unpack('U*').size
|
85
|
+
end
|
76
86
|
end
|
77
87
|
|
78
88
|
def apply(base, patch)
|
data/lib/ruby-pinyin/util.rb
CHANGED
@@ -1,8 +1,11 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
|
1
3
|
module PinYin
|
2
4
|
module Util
|
3
5
|
extend self
|
4
6
|
|
5
7
|
ASCIIMapping = {
|
8
|
+
'üē' => ['ue', 1], 'üé' => ['ue', 2], 'üě' => ['ue', 3], 'üè' => ['ue', 4],
|
6
9
|
'ā' => ['a', 1], 'ē' => ['e', 1], 'ī' => ['i', 1], 'ō' => ['o', 1], 'ū' => ['u', 1], 'ǖ' => ['v', 1],
|
7
10
|
'á' => ['a', 2], 'é' => ['e', 2], 'í' => ['i', 2], 'ó' => ['o', 2], 'ú' => ['u', 2], 'ǘ' => ['v', 2],
|
8
11
|
'ǎ' => ['a', 3], 'ě' => ['e', 3], 'ǐ' => ['i', 3], 'ǒ' => ['o', 3], 'ǔ' => ['u', 3], 'ǚ' => ['v', 3],
|
@@ -10,14 +13,15 @@ module PinYin
|
|
10
13
|
}
|
11
14
|
|
12
15
|
def to_ascii(reading, with_tone=true)
|
13
|
-
reading = reading.dup
|
14
|
-
|
15
16
|
ASCIIMapping.each do |char, (ascii, tone)|
|
16
|
-
if reading.
|
17
|
-
|
17
|
+
if reading.include? char
|
18
|
+
if with_tone
|
19
|
+
return reading.sub(char, ascii).concat(tone.to_s)
|
20
|
+
else
|
21
|
+
return reading.sub(char, ascii)
|
22
|
+
end
|
18
23
|
end
|
19
24
|
end
|
20
|
-
|
21
25
|
reading
|
22
26
|
end
|
23
27
|
|
data/lib/ruby-pinyin/version.rb
CHANGED
metadata
CHANGED
@@ -1,27 +1,27 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ruby-pinyin
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jan Xie
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-04-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rmmseg-cpp
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - ~>
|
17
|
+
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
19
|
version: 0.2.9
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- - ~>
|
24
|
+
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: 0.2.9
|
27
27
|
description: Pinyin is a romanization system (phonemic notation) of Chinese characters,
|
@@ -32,23 +32,23 @@ executables: []
|
|
32
32
|
extensions: []
|
33
33
|
extra_rdoc_files: []
|
34
34
|
files:
|
35
|
+
- LICENSE
|
36
|
+
- README.markdown
|
37
|
+
- lib/ruby-pinyin.rb
|
35
38
|
- lib/ruby-pinyin/backend.rb
|
36
39
|
- lib/ruby-pinyin/backend/mmseg.rb
|
37
40
|
- lib/ruby-pinyin/backend/simple.rb
|
38
41
|
- lib/ruby-pinyin/data/Mandarin.dat
|
42
|
+
- lib/ruby-pinyin/data/Punctuations.dat
|
39
43
|
- lib/ruby-pinyin/data/words.dat
|
40
44
|
- lib/ruby-pinyin/data/words.dic
|
41
|
-
- lib/ruby-pinyin/data/Punctuations.dat
|
42
|
-
- lib/ruby-pinyin/value.rb
|
43
45
|
- lib/ruby-pinyin/punctuation.rb
|
44
|
-
- lib/ruby-pinyin/version.rb
|
45
46
|
- lib/ruby-pinyin/util.rb
|
46
|
-
- lib/ruby-pinyin.rb
|
47
|
-
-
|
48
|
-
- README.markdown
|
47
|
+
- lib/ruby-pinyin/value.rb
|
48
|
+
- lib/ruby-pinyin/version.rb
|
49
49
|
homepage: https://github.com/janx/ruby-pinyin
|
50
50
|
licenses:
|
51
|
-
-
|
51
|
+
- BSD
|
52
52
|
metadata: {}
|
53
53
|
post_install_message:
|
54
54
|
rdoc_options: []
|
@@ -56,19 +56,18 @@ require_paths:
|
|
56
56
|
- lib
|
57
57
|
required_ruby_version: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
|
-
- -
|
59
|
+
- - ">="
|
60
60
|
- !ruby/object:Gem::Version
|
61
61
|
version: '0'
|
62
62
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
63
63
|
requirements:
|
64
|
-
- -
|
64
|
+
- - ">="
|
65
65
|
- !ruby/object:Gem::Version
|
66
66
|
version: '0'
|
67
67
|
requirements: []
|
68
68
|
rubyforge_project:
|
69
|
-
rubygems_version: 2.
|
69
|
+
rubygems_version: 2.2.0
|
70
70
|
signing_key:
|
71
71
|
specification_version: 4
|
72
72
|
summary: Convert Chinese characters into pinyin.
|
73
73
|
test_files: []
|
74
|
-
has_rdoc:
|