ruby_pinyin 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/Gemfile +6 -0
- data/Gemfile.lock +22 -0
- data/README.md +35 -0
- data/Rakefile +2 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/lib/ruby_pinyin.rb +41 -0
- data/lib/ruby_pinyin/backend.rb +6 -0
- data/lib/ruby_pinyin/backend/mmseg.rb +110 -0
- data/lib/ruby_pinyin/backend/simple.rb +72 -0
- data/lib/ruby_pinyin/data/Mandarin.dat +41208 -0
- data/lib/ruby_pinyin/data/Punctuations.dat +14 -0
- data/lib/ruby_pinyin/data/words.dat +175180 -0
- data/lib/ruby_pinyin/data/words.dic +175180 -0
- data/lib/ruby_pinyin/punctuation.rb +46 -0
- data/lib/ruby_pinyin/util.rb +29 -0
- data/lib/ruby_pinyin/value.rb +16 -0
- data/lib/ruby_pinyin/version.rb +3 -0
- data/ruby_pinyin.gemspec +27 -0
- metadata +107 -0
@@ -0,0 +1,46 @@
|
|
1
|
+
module PinYin
|
2
|
+
module Punctuation
|
3
|
+
|
4
|
+
class <<self
|
5
|
+
|
6
|
+
def regexp
|
7
|
+
return @regexp if @regexp
|
8
|
+
|
9
|
+
escaped_punctuations = punctuations.values.map {|v| "\\#{[v].pack('H*')}"}.join
|
10
|
+
@regexp = Regexp.new "([#{escaped_punctuations}]+)$"
|
11
|
+
@regexp
|
12
|
+
end
|
13
|
+
|
14
|
+
def chinese_regexp
|
15
|
+
@chinese_regexp ||= /([\u3000-\u303F\uFF00-\uFFEF]+)/
|
16
|
+
end
|
17
|
+
|
18
|
+
def [](code)
|
19
|
+
punctuations[code]
|
20
|
+
end
|
21
|
+
|
22
|
+
def include?(code)
|
23
|
+
punctuations.has_key?(code)
|
24
|
+
end
|
25
|
+
|
26
|
+
def punctuations
|
27
|
+
return @punctuations if @punctuations
|
28
|
+
|
29
|
+
@punctuations = {}
|
30
|
+
src = File.expand_path('../data/Punctuations.dat', __FILE__)
|
31
|
+
load_from src
|
32
|
+
|
33
|
+
@punctuations
|
34
|
+
end
|
35
|
+
|
36
|
+
def load_from(file)
|
37
|
+
File.readlines(file).map do |line|
|
38
|
+
from, to = line.split(/\s+/)
|
39
|
+
@punctuations[from] = to
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
44
|
+
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
|
3
|
+
module PinYin
|
4
|
+
module Util
|
5
|
+
extend self
|
6
|
+
|
7
|
+
ASCIIMapping = {
|
8
|
+
'üē' => ['ue', 1], 'üé' => ['ue', 2], 'üě' => ['ue', 3], 'üè' => ['ue', 4],
|
9
|
+
'ā' => ['a', 1], 'ē' => ['e', 1], 'ī' => ['i', 1], 'ō' => ['o', 1], 'ū' => ['u', 1], 'ǖ' => ['v', 1],
|
10
|
+
'á' => ['a', 2], 'é' => ['e', 2], 'í' => ['i', 2], 'ó' => ['o', 2], 'ú' => ['u', 2], 'ǘ' => ['v', 2],
|
11
|
+
'ǎ' => ['a', 3], 'ě' => ['e', 3], 'ǐ' => ['i', 3], 'ǒ' => ['o', 3], 'ǔ' => ['u', 3], 'ǚ' => ['v', 3],
|
12
|
+
'à' => ['a', 4], 'è' => ['e', 4], 'ì' => ['i', 4], 'ò' => ['o', 4], 'ù' => ['u', 4], 'ǜ' => ['v', 4]
|
13
|
+
}
|
14
|
+
|
15
|
+
def to_ascii(reading, with_tone=true)
|
16
|
+
ASCIIMapping.each do |char, (ascii, tone)|
|
17
|
+
if reading.include? char
|
18
|
+
if with_tone
|
19
|
+
return reading.sub(char, ascii).concat(tone.to_s)
|
20
|
+
else
|
21
|
+
return reading.sub(char, ascii)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
reading
|
26
|
+
end
|
27
|
+
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
module PinYin
|
2
|
+
class Value < String
|
3
|
+
attr_accessor :english
|
4
|
+
alias :english? :english
|
5
|
+
|
6
|
+
def initialize(str, english=true)
|
7
|
+
super(str)
|
8
|
+
self.english = english
|
9
|
+
end
|
10
|
+
|
11
|
+
def split(*args)
|
12
|
+
result = super
|
13
|
+
result.map {|str| self.class.new(str, english)}
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
data/ruby_pinyin.gemspec
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
|
2
|
+
lib = File.expand_path("../lib", __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require "ruby_pinyin/version"
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "ruby_pinyin"
|
8
|
+
spec.version = PinYin::VERSION
|
9
|
+
spec.authors = ["Charles Zhang"]
|
10
|
+
spec.email = ["gis05zc@163.com"]
|
11
|
+
|
12
|
+
spec.summary = "Convert Chinese characters into pinyin."
|
13
|
+
spec.description = "Pinyin is a romanization system (phonemic notation) of Chinese characters, this gem helps you to convert Chinese characters into pinyin form."
|
14
|
+
spec.homepage = "https://github.com/chinazhangchao/ruby-pinyin"
|
15
|
+
spec.license = 'MIT'
|
16
|
+
|
17
|
+
spec.files = `git ls-files -z`.split("\x0").reject do |f|
|
18
|
+
f.match(%r{^(test|spec|features)/})
|
19
|
+
end
|
20
|
+
spec.bindir = "exe"
|
21
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
22
|
+
spec.require_paths = ["lib"]
|
23
|
+
|
24
|
+
spec.add_runtime_dependency('rmmseg-cpp-new', ['~> 0.3.1'])
|
25
|
+
spec.add_development_dependency "bundler", "~> 1.16"
|
26
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
27
|
+
end
|
metadata
ADDED
@@ -0,0 +1,107 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: ruby_pinyin
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Charles Zhang
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2017-11-11 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: rmmseg-cpp-new
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 0.3.1
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 0.3.1
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: bundler
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '1.16'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '1.16'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rake
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '10.0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '10.0'
|
55
|
+
description: Pinyin is a romanization system (phonemic notation) of Chinese characters,
|
56
|
+
this gem helps you to convert Chinese characters into pinyin form.
|
57
|
+
email:
|
58
|
+
- gis05zc@163.com
|
59
|
+
executables: []
|
60
|
+
extensions: []
|
61
|
+
extra_rdoc_files: []
|
62
|
+
files:
|
63
|
+
- ".gitignore"
|
64
|
+
- Gemfile
|
65
|
+
- Gemfile.lock
|
66
|
+
- README.md
|
67
|
+
- Rakefile
|
68
|
+
- bin/console
|
69
|
+
- bin/setup
|
70
|
+
- lib/ruby_pinyin.rb
|
71
|
+
- lib/ruby_pinyin/backend.rb
|
72
|
+
- lib/ruby_pinyin/backend/mmseg.rb
|
73
|
+
- lib/ruby_pinyin/backend/simple.rb
|
74
|
+
- lib/ruby_pinyin/data/Mandarin.dat
|
75
|
+
- lib/ruby_pinyin/data/Punctuations.dat
|
76
|
+
- lib/ruby_pinyin/data/words.dat
|
77
|
+
- lib/ruby_pinyin/data/words.dic
|
78
|
+
- lib/ruby_pinyin/punctuation.rb
|
79
|
+
- lib/ruby_pinyin/util.rb
|
80
|
+
- lib/ruby_pinyin/value.rb
|
81
|
+
- lib/ruby_pinyin/version.rb
|
82
|
+
- ruby_pinyin.gemspec
|
83
|
+
homepage: https://github.com/chinazhangchao/ruby-pinyin
|
84
|
+
licenses:
|
85
|
+
- MIT
|
86
|
+
metadata: {}
|
87
|
+
post_install_message:
|
88
|
+
rdoc_options: []
|
89
|
+
require_paths:
|
90
|
+
- lib
|
91
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
92
|
+
requirements:
|
93
|
+
- - ">="
|
94
|
+
- !ruby/object:Gem::Version
|
95
|
+
version: '0'
|
96
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
97
|
+
requirements:
|
98
|
+
- - ">="
|
99
|
+
- !ruby/object:Gem::Version
|
100
|
+
version: '0'
|
101
|
+
requirements: []
|
102
|
+
rubyforge_project:
|
103
|
+
rubygems_version: 2.6.13
|
104
|
+
signing_key:
|
105
|
+
specification_version: 4
|
106
|
+
summary: Convert Chinese characters into pinyin.
|
107
|
+
test_files: []
|