chinese_pinyin 0.7.0 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.github/workflows/chinese_pinyin.yml +26 -0
- data/CHANGELOG.md +18 -0
- data/README.md +59 -22
- data/Rakefile +1 -3
- data/bin/ch2py +54 -0
- data/chinese_pinyin.gemspec +3 -0
- data/data/pinyin-utf8.dat +1 -0
- data/lib/chinese_pinyin.rb +21 -16
- data/lib/chinese_pinyin/version.rb +2 -1
- data/test/chinese_pinyin_test.rb +11 -0
- metadata +13 -11
- data/.ruby-gemset +0 -1
- data/.ruby-version +0 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 23e4d70b5131cb483c5a9a6a542b1310ad4cad4535e8d295ff58ba9338f2b930
|
4
|
+
data.tar.gz: 1224699549ee0d9d644e1f96d084a3c17c7754f9236d2a21311a152fafd73b52
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4d913acf6b8b162685957081afc5ccc6a47337e1a5481bb347a27ae8748c16f87f56c367c0a064fe1b09246d288c7680064bb1f4673dcebce669f64e3c9b458d
|
7
|
+
data.tar.gz: 5974408e11538bc96644ef3b8f7b41c9b5a381719bd495347217eb5165a8771dc34d7d94c810bb0543b66aaa6a79c2d64aa268845187dad167244836cf032950
|
@@ -0,0 +1,26 @@
|
|
1
|
+
name: Chinese Pinyin
|
2
|
+
|
3
|
+
on:
|
4
|
+
pull_request:
|
5
|
+
branches:
|
6
|
+
- 'master'
|
7
|
+
|
8
|
+
push:
|
9
|
+
branches:
|
10
|
+
- 'ruby3-support'
|
11
|
+
- 'master'
|
12
|
+
|
13
|
+
jobs:
|
14
|
+
build:
|
15
|
+
runs-on: ubuntu-latest
|
16
|
+
strategy:
|
17
|
+
matrix:
|
18
|
+
ruby: ['2.1', '2.5', '2.6', '2.7', '3.0']
|
19
|
+
steps:
|
20
|
+
- uses: actions/checkout@v1
|
21
|
+
- uses: ruby/setup-ruby@v1
|
22
|
+
with:
|
23
|
+
ruby-version: ${{ matrix.ruby }}
|
24
|
+
- name: Build and test with Rake
|
25
|
+
run: |
|
26
|
+
rake test
|
data/CHANGELOG.md
ADDED
data/README.md
CHANGED
@@ -1,42 +1,79 @@
|
|
1
|
-
ChinesePinyin
|
2
|
-
|
1
|
+
# ChinesePinyin
|
2
|
+
|
3
|
+
[![Chinese Pinyin](https://github.com/flyerhzm/chinese_pinyin/actions/workflows/chinese_pinyin.yml/badge.svg)](https://github.com/flyerhzm/chinese_pinyin/actions/workflows/chinese_pinyin.yml)
|
3
4
|
|
4
5
|
Translate chinese hanzi to pinyin.
|
5
6
|
|
6
7
|
The dict is borrowed from <http://github.com/fayland/perl-lingua-han/tree/master/Lingua-Han-PinYin/>
|
7
8
|
|
8
|
-
Install
|
9
|
-
|
9
|
+
## Install
|
10
|
+
|
11
|
+
```
|
12
|
+
gem install chinese_pinyin
|
13
|
+
```
|
14
|
+
|
15
|
+
or add in Gemfile.
|
16
|
+
|
17
|
+
```
|
18
|
+
gem 'chinese_pinyin'
|
19
|
+
```
|
20
|
+
|
21
|
+
## Usage
|
22
|
+
|
23
|
+
By CLI
|
24
|
+
|
25
|
+
|
26
|
+
```
|
27
|
+
$ ch2py -h
|
28
|
+
Usage: ch2py [opts]
|
29
|
+
-c, --camelcase Camelcase of each word
|
30
|
+
-i, --stdin Read from stdard input
|
31
|
+
-t, --tone Show tone at end of word
|
32
|
+
-m, --tonemarks Show tone at top of letter, this
|
33
|
+
would cover -t option
|
34
|
+
-s, --splitter <splitter> Splitter of each word, use a space
|
35
|
+
by default
|
36
|
+
-v, --version Show version
|
37
|
+
-h, --help Show this help
|
10
38
|
|
11
|
-
|
39
|
+
$ ch2py 中文
|
40
|
+
zhong wen
|
41
|
+
```
|
12
42
|
|
13
|
-
|
14
|
-
-----
|
43
|
+
By code
|
15
44
|
|
16
|
-
|
17
|
-
|
45
|
+
```
|
46
|
+
require 'chinese_pinyin'
|
18
47
|
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
48
|
+
Pinyin.t('中国') => "zhong guo"
|
49
|
+
Pinyin.t('你好world') => "ni hao world"
|
50
|
+
Pinyin.t('中国', splitter: '-') => "zhong-guo"
|
51
|
+
Pinyin.t('中国', splitter: '') => "zhongguo"
|
52
|
+
Pinyin.t('中国', tone: true) => "zhong1 guo2"
|
53
|
+
Pinyin.t('中国', tonemarks: true) => "zhōng guó"
|
54
|
+
Pinyin.t('北京') { |letters| letters[0].upcase } => 'BJ'
|
55
|
+
Pinyin.t('北京') { |letters, i| letters[0].upcase if i == 0 } => 'B'
|
56
|
+
```
|
25
57
|
|
26
|
-
Polyphone Issue
|
27
|
-
---------------
|
58
|
+
## Polyphone Issue
|
28
59
|
|
29
60
|
use Words.dat to override default behavior.
|
30
61
|
|
31
62
|
by default
|
32
63
|
|
33
|
-
|
64
|
+
```
|
65
|
+
Pinyin.t('广州') => "yan zhou"
|
66
|
+
```
|
34
67
|
|
35
68
|
add file Words.dat
|
36
69
|
|
37
|
-
|
70
|
+
```
|
71
|
+
广州|guang3 zhou1
|
72
|
+
```
|
38
73
|
|
39
|
-
set ENV['WORDS_FILE'] for Words.dat
|
74
|
+
set `ENV['WORDS_FILE']` for Words.dat
|
40
75
|
|
41
|
-
|
42
|
-
|
76
|
+
```
|
77
|
+
ENV['WORDS_FILE'] = "Words.dat path"
|
78
|
+
Pinyin.t('广州') => "guang zhou"
|
79
|
+
```
|
data/Rakefile
CHANGED
data/bin/ch2py
ADDED
@@ -0,0 +1,54 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'optparse'
|
4
|
+
require File.expand_path('../../lib/chinese_pinyin', __FILE__)
|
5
|
+
|
6
|
+
opts = {}
|
7
|
+
|
8
|
+
parser = OptParse.new do |opt|
|
9
|
+
opt.banner = "Usage: ch2py [opts]"
|
10
|
+
|
11
|
+
opt.on('-c', '--camelcase', 'Camelcase of each word') do
|
12
|
+
opts[:camelcase] = true
|
13
|
+
end
|
14
|
+
|
15
|
+
opt.on('-i', '--stdin', 'Read from stdard input') do
|
16
|
+
opts[:stdin] = true
|
17
|
+
end
|
18
|
+
|
19
|
+
opt.on('-t', '--tone', 'Show tone at end of word') do
|
20
|
+
opts[:tone] = true
|
21
|
+
end
|
22
|
+
|
23
|
+
opt.on('-m', '--tonemarks', 'Show tone at top of letter, this would cover -t option') do
|
24
|
+
opts[:tonemarks] = true
|
25
|
+
end
|
26
|
+
|
27
|
+
opt.on('-s', '--splitter <splitter>', 'Splitter of each word, use a space by default') do |sp|
|
28
|
+
opts[:splitter] = sp
|
29
|
+
end
|
30
|
+
|
31
|
+
opt.on('-v', '--version', 'Show version') do
|
32
|
+
puts "ch2py: Version #{ChinesePinyin::VERSION}"
|
33
|
+
exit
|
34
|
+
end
|
35
|
+
|
36
|
+
opt.on('-h', '--help', 'Show this help') do
|
37
|
+
puts parser
|
38
|
+
exit
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
parser.parse!
|
43
|
+
|
44
|
+
args = parser.default_argv
|
45
|
+
|
46
|
+
if opts.fetch(:stdin, false)
|
47
|
+
chars = STDIN.readline
|
48
|
+
elsif args.empty?
|
49
|
+
raise OptParse::MissingArgument
|
50
|
+
else
|
51
|
+
chars = args.join('')
|
52
|
+
end
|
53
|
+
|
54
|
+
STDOUT.puts Pinyin.t(chars, opts)
|
data/chinese_pinyin.gemspec
CHANGED
@@ -6,6 +6,7 @@ require "chinese_pinyin/version"
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "chinese_pinyin"
|
8
8
|
s.version = ChinesePinyin::VERSION
|
9
|
+
s.license = "MIT"
|
9
10
|
s.platform = Gem::Platform::RUBY
|
10
11
|
s.authors = ["Richard Huang", "Hong, Liang"]
|
11
12
|
s.email = ["flyerhzm@gmail.com", "hongliang@bamajia.com"]
|
@@ -18,4 +19,6 @@ Gem::Specification.new do |s|
|
|
18
19
|
s.files = `git ls-files`.split("\n")
|
19
20
|
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
20
21
|
s.require_paths = ["lib"]
|
22
|
+
|
23
|
+
s.executables = ["ch2py"]
|
21
24
|
end
|
data/data/pinyin-utf8.dat
CHANGED
@@ -25357,3 +25357,4 @@
|
|
25357
25357
|
𧵳 she2
|
25358
25358
|
𧼮 tang1
|
25359
25359
|
𩜇 juan3
|
25360
|
+
嗯 en1
|
data/lib/chinese_pinyin.rb
CHANGED
@@ -6,27 +6,28 @@ require 'chinese_pinyin/version'
|
|
6
6
|
class Pinyin
|
7
7
|
|
8
8
|
TONE_MARK = {
|
9
|
-
a: %w(
|
10
|
-
o: %w(
|
11
|
-
e: %w(
|
12
|
-
i: %w(
|
13
|
-
u: %w(
|
14
|
-
v: %w(
|
9
|
+
a: %w(ā á ǎ à a),
|
10
|
+
o: %w(ō ó ǒ ò o),
|
11
|
+
e: %w(ē é ě è e),
|
12
|
+
i: %w(ī í ǐ ì i),
|
13
|
+
u: %w(ū ú ǔ ù u),
|
14
|
+
v: %w(ǖ ǘ ǚ ǜ ü)
|
15
15
|
}
|
16
16
|
|
17
17
|
class <<self
|
18
18
|
attr_accessor :table
|
19
|
-
attr_accessor :
|
19
|
+
attr_accessor :ruby1
|
20
20
|
|
21
21
|
def init_table
|
22
22
|
return if @table
|
23
23
|
|
24
24
|
# Ruby 2.0以后默认即为UTF-8编码,使用新的码表以提升效率
|
25
|
-
@
|
26
|
-
datfile = @
|
25
|
+
@ruby1 = !!(RUBY_VERSION =~ /^1/)
|
26
|
+
datfile = @ruby1 ? 'Mandarin.dat' : 'pinyin-utf8.dat'
|
27
27
|
@table = {}
|
28
28
|
|
29
|
-
File.
|
29
|
+
file = File.join(File.dirname(__FILE__), "../data/#{datfile}")
|
30
|
+
File.open(file, "r:UTF-8",) do |file|
|
30
31
|
while line = file.gets
|
31
32
|
key, value = line.split(' ', 2)
|
32
33
|
@table[key] = value
|
@@ -50,6 +51,7 @@ class Pinyin
|
|
50
51
|
end
|
51
52
|
|
52
53
|
def translate(chars, options={})
|
54
|
+
chars = chars.encode("UTF-8")
|
53
55
|
splitter = options.fetch(:splitter, ' ')
|
54
56
|
tonemarks = options.fetch(:tonemarks, false)
|
55
57
|
tone = options.fetch(:tone, false || tonemarks)
|
@@ -71,7 +73,7 @@ class Pinyin
|
|
71
73
|
is_english = false
|
72
74
|
|
73
75
|
chars.scan(/./).each do |char|
|
74
|
-
key = @
|
76
|
+
key = @ruby1 ? sprintf("%X", char.unpack("U").first) : char
|
75
77
|
|
76
78
|
if @table[key]
|
77
79
|
results << splitter if is_english
|
@@ -79,19 +81,22 @@ class Pinyin
|
|
79
81
|
is_english = false
|
80
82
|
pinyin = @table[key].chomp.split(' ', 2)[0]
|
81
83
|
|
82
|
-
pinyin.downcase!
|
84
|
+
pinyin.downcase! if @ruby1
|
83
85
|
pinyin.chop! unless tone
|
84
86
|
pinyin.capitalize! if camel
|
85
87
|
if tonemarks
|
86
88
|
tone_index = pinyin[-1].to_i
|
87
89
|
pinyin = pinyin[0...-1]
|
88
90
|
%w(a o e i u v).each { |v|
|
89
|
-
break if pinyin.tr! v, TONE_MARK[v.to_sym][tone_index]
|
91
|
+
break if pinyin.tr! v, TONE_MARK[v.to_sym][tone_index - 1]
|
90
92
|
}
|
91
93
|
end
|
92
|
-
|
93
|
-
|
94
|
-
|
94
|
+
if block_given?
|
95
|
+
results << (yield pinyin, results.size)
|
96
|
+
else
|
97
|
+
results << pinyin
|
98
|
+
results << splitter
|
99
|
+
end
|
95
100
|
else
|
96
101
|
if char =~ /[a-zA-Z0-9]/
|
97
102
|
results << char
|
data/test/chinese_pinyin_test.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# -*- encoding : utf-8 -*-
|
2
|
+
|
2
3
|
require File.expand_path(File.join(File.dirname(__FILE__), 'test_helper'))
|
3
4
|
|
4
5
|
ENV["WORDS_FILE"] = File.dirname(__FILE__) + '/Words.dat'
|
@@ -10,6 +11,11 @@ class PinyinTest < Test::Unit::TestCase
|
|
10
11
|
assert_equal("shang hai very good o ye", Pinyin.t('上海very good哦耶'))
|
11
12
|
end
|
12
13
|
|
14
|
+
def test_t_with_frozen_string
|
15
|
+
assert_equal("zhong guo", Pinyin.t('中国'.freeze))
|
16
|
+
assert_equal("shen zhen", Pinyin.t('深圳'.freeze))
|
17
|
+
end
|
18
|
+
|
13
19
|
def test_t_with_splitter
|
14
20
|
assert_equal("zhong-guo", Pinyin.t('中国', splitter: '-'))
|
15
21
|
assert_equal("huangzhimin", Pinyin.t('黄志敏', splitter: ''))
|
@@ -43,4 +49,9 @@ class PinyinTest < Test::Unit::TestCase
|
|
43
49
|
assert_equal('zhōng guó', Pinyin.t('中国', tonemarks: true))
|
44
50
|
assert_equal('běi jīng', Pinyin.t('北京', tonemarks: true))
|
45
51
|
end
|
52
|
+
|
53
|
+
def test_t_with_custom
|
54
|
+
assert_equal('BJ', Pinyin.t('北京') { |letters| letters[0].upcase } )
|
55
|
+
assert_equal('B', Pinyin.t('北京') { |letters, i| letters[0].upcase if i == 0 } )
|
56
|
+
end
|
46
57
|
end
|
metadata
CHANGED
@@ -1,31 +1,33 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: chinese_pinyin
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 1.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Richard Huang
|
8
8
|
- Hong, Liang
|
9
|
-
autorequire:
|
9
|
+
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2021-04-18 00:00:00.000000000 Z
|
13
13
|
dependencies: []
|
14
14
|
description: translate chinese hanzi to pinyin.
|
15
15
|
email:
|
16
16
|
- flyerhzm@gmail.com
|
17
17
|
- hongliang@bamajia.com
|
18
|
-
executables:
|
18
|
+
executables:
|
19
|
+
- ch2py
|
19
20
|
extensions: []
|
20
21
|
extra_rdoc_files: []
|
21
22
|
files:
|
23
|
+
- ".github/workflows/chinese_pinyin.yml"
|
22
24
|
- ".gitignore"
|
23
|
-
-
|
24
|
-
- ".ruby-version"
|
25
|
+
- CHANGELOG.md
|
25
26
|
- Gemfile
|
26
27
|
- MIT-LICENSE
|
27
28
|
- README.md
|
28
29
|
- Rakefile
|
30
|
+
- bin/ch2py
|
29
31
|
- chinese_pinyin.gemspec
|
30
32
|
- data/Mandarin.dat
|
31
33
|
- data/pinyin-utf8.dat
|
@@ -35,9 +37,10 @@ files:
|
|
35
37
|
- test/chinese_pinyin_test.rb
|
36
38
|
- test/test_helper.rb
|
37
39
|
homepage: http://github.com/flyerhzm/chinese_pinyin
|
38
|
-
licenses:
|
40
|
+
licenses:
|
41
|
+
- MIT
|
39
42
|
metadata: {}
|
40
|
-
post_install_message:
|
43
|
+
post_install_message:
|
41
44
|
rdoc_options: []
|
42
45
|
require_paths:
|
43
46
|
- lib
|
@@ -52,9 +55,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
52
55
|
- !ruby/object:Gem::Version
|
53
56
|
version: 1.3.6
|
54
57
|
requirements: []
|
55
|
-
|
56
|
-
|
57
|
-
signing_key:
|
58
|
+
rubygems_version: 3.1.4
|
59
|
+
signing_key:
|
58
60
|
specification_version: 4
|
59
61
|
summary: translate chinese hanzi to pinyin.
|
60
62
|
test_files:
|
data/.ruby-gemset
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
chinese_pinyin
|
data/.ruby-version
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
ruby-2.2.0
|