chinese_pinyin 0.7.0 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 536820144ef1284e973430e684d7511a5f4a21b5
4
- data.tar.gz: 235adb7c959a9c0a01e3d2234ac1abf75fefc79a
2
+ SHA256:
3
+ metadata.gz: 23e4d70b5131cb483c5a9a6a542b1310ad4cad4535e8d295ff58ba9338f2b930
4
+ data.tar.gz: 1224699549ee0d9d644e1f96d084a3c17c7754f9236d2a21311a152fafd73b52
5
5
  SHA512:
6
- metadata.gz: fcf06b8a4f831aac76d9ecd772b972f05b855e70fab14fca2d40aa5f6504fdd4045a56bc5a84dc22e1bb0cf857d72b6e6f0ce8133f37d95dae78f2f05e360330
7
- data.tar.gz: 526585ebbd8b4ecc987f31d5c591cbc8352708df9af9601f1a01c8ae8bb22f06da2da73e6017f2fbbd6f8b71e747009e16f2a6c88b0c0d215fe61c4222268b95
6
+ metadata.gz: 4d913acf6b8b162685957081afc5ccc6a47337e1a5481bb347a27ae8748c16f87f56c367c0a064fe1b09246d288c7680064bb1f4673dcebce669f64e3c9b458d
7
+ data.tar.gz: 5974408e11538bc96644ef3b8f7b41c9b5a381719bd495347217eb5165a8771dc34d7d94c810bb0543b66aaa6a79c2d64aa268845187dad167244836cf032950
@@ -0,0 +1,26 @@
1
+ name: Chinese Pinyin
2
+
3
+ on:
4
+ pull_request:
5
+ branches:
6
+ - 'master'
7
+
8
+ push:
9
+ branches:
10
+ - 'ruby3-support'
11
+ - 'master'
12
+
13
+ jobs:
14
+ build:
15
+ runs-on: ubuntu-latest
16
+ strategy:
17
+ matrix:
18
+ ruby: ['2.1', '2.5', '2.6', '2.7', '3.0']
19
+ steps:
20
+ - uses: actions/checkout@v1
21
+ - uses: ruby/setup-ruby@v1
22
+ with:
23
+ ruby-version: ${{ matrix.ruby }}
24
+ - name: Build and test with Rake
25
+ run: |
26
+ rake test
data/CHANGELOG.md ADDED
@@ -0,0 +1,18 @@
1
+ # Next Release
2
+
3
+ ## 1.1.0 (04/19/2021)
4
+
5
+ * support ruby 3
6
+
7
+ ## 1.0.2 (08/19/2019)
8
+
9
+ * add 嗯 to pinyin-utf8.dat
10
+
11
+ ## 1.0.1
12
+
13
+ * force utf8
14
+
15
+ ## 1.0.0 (06/23/2015)
16
+
17
+ * add executable cmd ch2py
18
+ * fix tone marks
data/README.md CHANGED
@@ -1,42 +1,79 @@
1
- ChinesePinyin
2
- =============
1
+ # ChinesePinyin
2
+
3
+ [![Chinese Pinyin](https://github.com/flyerhzm/chinese_pinyin/actions/workflows/chinese_pinyin.yml/badge.svg)](https://github.com/flyerhzm/chinese_pinyin/actions/workflows/chinese_pinyin.yml)
3
4
 
4
5
  Translate chinese hanzi to pinyin.
5
6
 
6
7
  The dict is borrowed from <http://github.com/fayland/perl-lingua-han/tree/master/Lingua-Han-PinYin/>
7
8
 
8
- Install
9
- -------
9
+ ## Install
10
+
11
+ ```
12
+ gem install chinese_pinyin
13
+ ```
14
+
15
+ or add in Gemfile.
16
+
17
+ ```
18
+ gem 'chinese_pinyin'
19
+ ```
20
+
21
+ ## Usage
22
+
23
+ By CLI
24
+
25
+
26
+ ```
27
+ $ ch2py -h
28
+ Usage: ch2py [opts]
29
+ -c, --camelcase Camelcase of each word
30
+ -i, --stdin Read from stdard input
31
+ -t, --tone Show tone at end of word
32
+ -m, --tonemarks Show tone at top of letter, this
33
+ would cover -t option
34
+ -s, --splitter <splitter> Splitter of each word, use a space
35
+ by default
36
+ -v, --version Show version
37
+ -h, --help Show this help
10
38
 
11
- sudo gem install chinese_pinyin
39
+ $ ch2py 中文
40
+ zhong wen
41
+ ```
12
42
 
13
- Usage
14
- -----
43
+ By code
15
44
 
16
- require 'rubygems'
17
- require 'chinese_pinyin'
45
+ ```
46
+ require 'chinese_pinyin'
18
47
 
19
- Pinyin.t('中国') => "zhong guo"
20
- Pinyin.t('你好world') => "ni hao world"
21
- Pinyin.t('中国', splitter: '-') => "zhong-guo"
22
- Pinyin.t('中国', splitter: '') => "zhongguo"
23
- Pinyin.t('中国', tone: true) => "zhong1 guo2"
24
- Pinyin.t('中国', tonemarks: true) => "zhōng guó"
48
+ Pinyin.t('中国') => "zhong guo"
49
+ Pinyin.t('你好world') => "ni hao world"
50
+ Pinyin.t('中国', splitter: '-') => "zhong-guo"
51
+ Pinyin.t('中国', splitter: '') => "zhongguo"
52
+ Pinyin.t('中国', tone: true) => "zhong1 guo2"
53
+ Pinyin.t('中国', tonemarks: true) => "zhōng guó"
54
+ Pinyin.t('北京') { |letters| letters[0].upcase } => 'BJ'
55
+ Pinyin.t('北京') { |letters, i| letters[0].upcase if i == 0 } => 'B'
56
+ ```
25
57
 
26
- Polyphone Issue
27
- ---------------
58
+ ## Polyphone Issue
28
59
 
29
60
  use Words.dat to override default behavior.
30
61
 
31
62
  by default
32
63
 
33
- Pinyin.t('广州') => "yan zhou"
64
+ ```
65
+ Pinyin.t('广州') => "yan zhou"
66
+ ```
34
67
 
35
68
  add file Words.dat
36
69
 
37
- 广州|guang3 zhou1
70
+ ```
71
+ 广州|guang3 zhou1
72
+ ```
38
73
 
39
- set ENV['WORDS_FILE'] for Words.dat
74
+ set `ENV['WORDS_FILE']` for Words.dat
40
75
 
41
- ENV['WORDS_FILE'] = "Words.dat path"
42
- Pinyin.t('广州') => "guang zhou"
76
+ ```
77
+ ENV['WORDS_FILE'] = "Words.dat path"
78
+ Pinyin.t('广州') => "guang zhou"
79
+ ```
data/Rakefile CHANGED
@@ -1,8 +1,6 @@
1
1
  $LOAD_PATH.unshift File.expand_path("../lib", __FILE__)
2
- require "bundler"
3
- Bundler.setup
4
2
 
5
- require "rake"
3
+ require 'bundler/setup'
6
4
  require 'rake/testtask'
7
5
  require "rdoc/task"
8
6
 
data/bin/ch2py ADDED
@@ -0,0 +1,54 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'optparse'
4
+ require File.expand_path('../../lib/chinese_pinyin', __FILE__)
5
+
6
+ opts = {}
7
+
8
+ parser = OptParse.new do |opt|
9
+ opt.banner = "Usage: ch2py [opts]"
10
+
11
+ opt.on('-c', '--camelcase', 'Camelcase of each word') do
12
+ opts[:camelcase] = true
13
+ end
14
+
15
+ opt.on('-i', '--stdin', 'Read from stdard input') do
16
+ opts[:stdin] = true
17
+ end
18
+
19
+ opt.on('-t', '--tone', 'Show tone at end of word') do
20
+ opts[:tone] = true
21
+ end
22
+
23
+ opt.on('-m', '--tonemarks', 'Show tone at top of letter, this would cover -t option') do
24
+ opts[:tonemarks] = true
25
+ end
26
+
27
+ opt.on('-s', '--splitter <splitter>', 'Splitter of each word, use a space by default') do |sp|
28
+ opts[:splitter] = sp
29
+ end
30
+
31
+ opt.on('-v', '--version', 'Show version') do
32
+ puts "ch2py: Version #{ChinesePinyin::VERSION}"
33
+ exit
34
+ end
35
+
36
+ opt.on('-h', '--help', 'Show this help') do
37
+ puts parser
38
+ exit
39
+ end
40
+ end
41
+
42
+ parser.parse!
43
+
44
+ args = parser.default_argv
45
+
46
+ if opts.fetch(:stdin, false)
47
+ chars = STDIN.readline
48
+ elsif args.empty?
49
+ raise OptParse::MissingArgument
50
+ else
51
+ chars = args.join('')
52
+ end
53
+
54
+ STDOUT.puts Pinyin.t(chars, opts)
@@ -6,6 +6,7 @@ require "chinese_pinyin/version"
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "chinese_pinyin"
8
8
  s.version = ChinesePinyin::VERSION
9
+ s.license = "MIT"
9
10
  s.platform = Gem::Platform::RUBY
10
11
  s.authors = ["Richard Huang", "Hong, Liang"]
11
12
  s.email = ["flyerhzm@gmail.com", "hongliang@bamajia.com"]
@@ -18,4 +19,6 @@ Gem::Specification.new do |s|
18
19
  s.files = `git ls-files`.split("\n")
19
20
  s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
20
21
  s.require_paths = ["lib"]
22
+
23
+ s.executables = ["ch2py"]
21
24
  end
data/data/pinyin-utf8.dat CHANGED
@@ -25357,3 +25357,4 @@
25357
25357
  𧵳 she2
25358
25358
  𧼮 tang1
25359
25359
  𩜇 juan3
25360
+ 嗯 en1
@@ -6,27 +6,28 @@ require 'chinese_pinyin/version'
6
6
  class Pinyin
7
7
 
8
8
  TONE_MARK = {
9
- a: %w(a ā á ǎ à),
10
- o: %w(o ō ó ǒ ò),
11
- e: %w(e ē é ě è),
12
- i: %w(i ī í ǐ ì),
13
- u: %w(u ū ú ǔ ù),
14
- v: %w(ü ǖ ǘ ǚ ǜ)
9
+ a: %w(ā á ǎ à a),
10
+ o: %w(ō ó ǒ ò o),
11
+ e: %w(ē é ě è e),
12
+ i: %w(ī í ǐ ì i),
13
+ u: %w(ū ú ǔ ù u),
14
+ v: %w(ǖ ǘ ǚ ǜ ü)
15
15
  }
16
16
 
17
17
  class <<self
18
18
  attr_accessor :table
19
- attr_accessor :ruby2
19
+ attr_accessor :ruby1
20
20
 
21
21
  def init_table
22
22
  return if @table
23
23
 
24
24
  # Ruby 2.0以后默认即为UTF-8编码,使用新的码表以提升效率
25
- @ruby2 = !!(RUBY_VERSION =~ /^2/)
26
- datfile = @ruby2 ? 'pinyin-utf8.dat' : 'Mandarin.dat'
25
+ @ruby1 = !!(RUBY_VERSION =~ /^1/)
26
+ datfile = @ruby1 ? 'Mandarin.dat' : 'pinyin-utf8.dat'
27
27
  @table = {}
28
28
 
29
- File.open(File.dirname(__FILE__) + "/../data/#{datfile}") do |file|
29
+ file = File.join(File.dirname(__FILE__), "../data/#{datfile}")
30
+ File.open(file, "r:UTF-8",) do |file|
30
31
  while line = file.gets
31
32
  key, value = line.split(' ', 2)
32
33
  @table[key] = value
@@ -50,6 +51,7 @@ class Pinyin
50
51
  end
51
52
 
52
53
  def translate(chars, options={})
54
+ chars = chars.encode("UTF-8")
53
55
  splitter = options.fetch(:splitter, ' ')
54
56
  tonemarks = options.fetch(:tonemarks, false)
55
57
  tone = options.fetch(:tone, false || tonemarks)
@@ -71,7 +73,7 @@ class Pinyin
71
73
  is_english = false
72
74
 
73
75
  chars.scan(/./).each do |char|
74
- key = @ruby2 ? char : sprintf("%X", char.unpack("U").first)
76
+ key = @ruby1 ? sprintf("%X", char.unpack("U").first) : char
75
77
 
76
78
  if @table[key]
77
79
  results << splitter if is_english
@@ -79,19 +81,22 @@ class Pinyin
79
81
  is_english = false
80
82
  pinyin = @table[key].chomp.split(' ', 2)[0]
81
83
 
82
- pinyin.downcase! unless @ruby2
84
+ pinyin.downcase! if @ruby1
83
85
  pinyin.chop! unless tone
84
86
  pinyin.capitalize! if camel
85
87
  if tonemarks
86
88
  tone_index = pinyin[-1].to_i
87
89
  pinyin = pinyin[0...-1]
88
90
  %w(a o e i u v).each { |v|
89
- break if pinyin.tr! v, TONE_MARK[v.to_sym][tone_index]
91
+ break if pinyin.tr! v, TONE_MARK[v.to_sym][tone_index - 1]
90
92
  }
91
93
  end
92
-
93
- results << pinyin
94
- results << splitter
94
+ if block_given?
95
+ results << (yield pinyin, results.size)
96
+ else
97
+ results << pinyin
98
+ results << splitter
99
+ end
95
100
  else
96
101
  if char =~ /[a-zA-Z0-9]/
97
102
  results << char
@@ -1,4 +1,5 @@
1
1
  # encoding: utf-8
2
+
2
3
  module ChinesePinyin
3
- VERSION = "0.7.0"
4
+ VERSION = "1.1.0"
4
5
  end
@@ -1,4 +1,5 @@
1
1
  # -*- encoding : utf-8 -*-
2
+
2
3
  require File.expand_path(File.join(File.dirname(__FILE__), 'test_helper'))
3
4
 
4
5
  ENV["WORDS_FILE"] = File.dirname(__FILE__) + '/Words.dat'
@@ -10,6 +11,11 @@ class PinyinTest < Test::Unit::TestCase
10
11
  assert_equal("shang hai very good o ye", Pinyin.t('上海very good哦耶'))
11
12
  end
12
13
 
14
+ def test_t_with_frozen_string
15
+ assert_equal("zhong guo", Pinyin.t('中国'.freeze))
16
+ assert_equal("shen zhen", Pinyin.t('深圳'.freeze))
17
+ end
18
+
13
19
  def test_t_with_splitter
14
20
  assert_equal("zhong-guo", Pinyin.t('中国', splitter: '-'))
15
21
  assert_equal("huangzhimin", Pinyin.t('黄志敏', splitter: ''))
@@ -43,4 +49,9 @@ class PinyinTest < Test::Unit::TestCase
43
49
  assert_equal('zhōng guó', Pinyin.t('中国', tonemarks: true))
44
50
  assert_equal('běi jīng', Pinyin.t('北京', tonemarks: true))
45
51
  end
52
+
53
+ def test_t_with_custom
54
+ assert_equal('BJ', Pinyin.t('北京') { |letters| letters[0].upcase } )
55
+ assert_equal('B', Pinyin.t('北京') { |letters, i| letters[0].upcase if i == 0 } )
56
+ end
46
57
  end
metadata CHANGED
@@ -1,31 +1,33 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: chinese_pinyin
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.0
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Richard Huang
8
8
  - Hong, Liang
9
- autorequire:
9
+ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2015-03-10 00:00:00.000000000 Z
12
+ date: 2021-04-18 00:00:00.000000000 Z
13
13
  dependencies: []
14
14
  description: translate chinese hanzi to pinyin.
15
15
  email:
16
16
  - flyerhzm@gmail.com
17
17
  - hongliang@bamajia.com
18
- executables: []
18
+ executables:
19
+ - ch2py
19
20
  extensions: []
20
21
  extra_rdoc_files: []
21
22
  files:
23
+ - ".github/workflows/chinese_pinyin.yml"
22
24
  - ".gitignore"
23
- - ".ruby-gemset"
24
- - ".ruby-version"
25
+ - CHANGELOG.md
25
26
  - Gemfile
26
27
  - MIT-LICENSE
27
28
  - README.md
28
29
  - Rakefile
30
+ - bin/ch2py
29
31
  - chinese_pinyin.gemspec
30
32
  - data/Mandarin.dat
31
33
  - data/pinyin-utf8.dat
@@ -35,9 +37,10 @@ files:
35
37
  - test/chinese_pinyin_test.rb
36
38
  - test/test_helper.rb
37
39
  homepage: http://github.com/flyerhzm/chinese_pinyin
38
- licenses: []
40
+ licenses:
41
+ - MIT
39
42
  metadata: {}
40
- post_install_message:
43
+ post_install_message:
41
44
  rdoc_options: []
42
45
  require_paths:
43
46
  - lib
@@ -52,9 +55,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
52
55
  - !ruby/object:Gem::Version
53
56
  version: 1.3.6
54
57
  requirements: []
55
- rubyforge_project:
56
- rubygems_version: 2.4.5
57
- signing_key:
58
+ rubygems_version: 3.1.4
59
+ signing_key:
58
60
  specification_version: 4
59
61
  summary: translate chinese hanzi to pinyin.
60
62
  test_files:
data/.ruby-gemset DELETED
@@ -1 +0,0 @@
1
- chinese_pinyin
data/.ruby-version DELETED
@@ -1 +0,0 @@
1
- ruby-2.2.0