ruby-pinyin-ez 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,46 @@
1
+ module PinYin
2
+ module Punctuation
3
+
4
+ class <<self
5
+
6
+ def regexp
7
+ return @regexp if @regexp
8
+
9
+ escaped_punctuations = punctuations.values.map {|v| "\\#{[v].pack('H*')}"}.join
10
+ @regexp = Regexp.new "([#{escaped_punctuations}]+)$"
11
+ @regexp
12
+ end
13
+
14
+ def chinese_regexp
15
+ @chinese_regexp ||= /([\u3000-\u303F\uFF00-\uFFEF]+)/
16
+ end
17
+
18
+ def [](code)
19
+ punctuations[code]
20
+ end
21
+
22
+ def include?(code)
23
+ punctuations.has_key?(code)
24
+ end
25
+
26
+ def punctuations
27
+ return @punctuations if @punctuations
28
+
29
+ @punctuations = {}
30
+ src = File.expand_path('../data/Punctuations.dat', __FILE__)
31
+ load_from src
32
+
33
+ @punctuations
34
+ end
35
+
36
+ def load_from(file)
37
+ File.readlines(file).map do |line|
38
+ from, to = line.split(/\s+/)
39
+ @punctuations[from] = to
40
+ end
41
+ end
42
+
43
+ end
44
+
45
+ end
46
+ end
@@ -0,0 +1,29 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ module PinYin
4
+ module Util
5
+ extend self
6
+
7
+ ASCIIMapping = {
8
+ 'üē' => ['ue', 1], 'üé' => ['ue', 2], 'üě' => ['ue', 3], 'üè' => ['ue', 4],
9
+ 'ā' => ['a', 1], 'ē' => ['e', 1], 'ī' => ['i', 1], 'ō' => ['o', 1], 'ū' => ['u', 1], 'ǖ' => ['v', 1],
10
+ 'á' => ['a', 2], 'é' => ['e', 2], 'í' => ['i', 2], 'ó' => ['o', 2], 'ú' => ['u', 2], 'ǘ' => ['v', 2],
11
+ 'ǎ' => ['a', 3], 'ě' => ['e', 3], 'ǐ' => ['i', 3], 'ǒ' => ['o', 3], 'ǔ' => ['u', 3], 'ǚ' => ['v', 3],
12
+ 'à' => ['a', 4], 'è' => ['e', 4], 'ì' => ['i', 4], 'ò' => ['o', 4], 'ù' => ['u', 4], 'ǜ' => ['v', 4]
13
+ }
14
+
15
+ def to_ascii(reading, with_tone=true)
16
+ ASCIIMapping.each do |char, (ascii, tone)|
17
+ if reading.include? char
18
+ if with_tone
19
+ return reading.sub(char, ascii).concat(tone.to_s)
20
+ else
21
+ return reading.sub(char, ascii)
22
+ end
23
+ end
24
+ end
25
+ reading
26
+ end
27
+
28
+ end
29
+ end
@@ -0,0 +1,16 @@
1
+ module PinYin
2
+ class Value < String
3
+ attr_accessor :english
4
+ alias :english? :english
5
+
6
+ def initialize(str, english=true)
7
+ super(str)
8
+ self.english = english
9
+ end
10
+
11
+ def split(*args)
12
+ result = super
13
+ result.map {|str| self.class.new(str, english)}
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,3 @@
1
+ module PinYin
2
+ VERSION = '0.5.0'
3
+ end
@@ -0,0 +1,41 @@
1
+ require 'ruby-pinyin/util'
2
+ require 'ruby-pinyin/value'
3
+ require 'ruby-pinyin/punctuation'
4
+ require 'ruby-pinyin/backend'
5
+
6
+ module PinYin
7
+ class <<self
8
+
9
+ attr_accessor :backend
10
+
11
+ def romanize(str, tone=nil, include_punctuations=false)
12
+ backend.romanize(str, tone, include_punctuations)
13
+ end
14
+ alias :of_string :romanize
15
+
16
+ def permlink(str, sep='-')
17
+ of_string(str).join(sep)
18
+ end
19
+
20
+ def abbr(str, except_lead=false, except_english=true)
21
+ result = ""
22
+ of_string(str).each_with_index do |word, i|
23
+ w = (except_lead && i == 0) || (except_english && word.english?) ? word : word[0]
24
+ result << w
25
+ end
26
+ result
27
+ end
28
+
29
+ def sentence(str, tone=nil)
30
+ of_string(str, tone, true).join(' ')
31
+ end
32
+
33
+ def override_files=(files)
34
+ klass = backend ? backend.class : PinYin::Backend::MMSeg
35
+ self.backend = klass.new files
36
+ end
37
+
38
+ end
39
+ end
40
+
41
+ PinYin.backend = PinYin::Backend::EZSeg.new
metadata ADDED
@@ -0,0 +1,87 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: ruby-pinyin-ez
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.5.0
5
+ platform: ruby
6
+ authors:
7
+ - hzyhzy
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2020-02-02 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rmmseg-cpp-new
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: 0.3.1
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: 0.3.1
27
+ - !ruby/object:Gem::Dependency
28
+ name: minitest
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '5.4'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '5.4'
41
+ description: Pinyin is a romanization system (phonemic notation) of Chinese characters,
42
+ this gem helps you to convert Chinese characters into pinyin form.
43
+ email:
44
+ - 18670314023@163.com
45
+ executables: []
46
+ extensions: []
47
+ extra_rdoc_files: []
48
+ files:
49
+ - LICENSE
50
+ - README.markdown
51
+ - lib/ruby-pinyin.rb
52
+ - lib/ruby-pinyin/backend.rb
53
+ - lib/ruby-pinyin/backend/ezseg.rb
54
+ - lib/ruby-pinyin/backend/mmseg.rb
55
+ - lib/ruby-pinyin/backend/simple.rb
56
+ - lib/ruby-pinyin/data/Mandarin.dat
57
+ - lib/ruby-pinyin/data/Punctuations.dat
58
+ - lib/ruby-pinyin/data/words.dat
59
+ - lib/ruby-pinyin/data/words.dic
60
+ - lib/ruby-pinyin/punctuation.rb
61
+ - lib/ruby-pinyin/util.rb
62
+ - lib/ruby-pinyin/value.rb
63
+ - lib/ruby-pinyin/version.rb
64
+ homepage: https://github.com/hzyhzy/ruby-pinyin
65
+ licenses:
66
+ - BSD
67
+ metadata: {}
68
+ post_install_message:
69
+ rdoc_options: []
70
+ require_paths:
71
+ - lib
72
+ required_ruby_version: !ruby/object:Gem::Requirement
73
+ requirements:
74
+ - - ">="
75
+ - !ruby/object:Gem::Version
76
+ version: '0'
77
+ required_rubygems_version: !ruby/object:Gem::Requirement
78
+ requirements:
79
+ - - ">="
80
+ - !ruby/object:Gem::Version
81
+ version: '0'
82
+ requirements: []
83
+ rubygems_version: 3.0.6
84
+ signing_key:
85
+ specification_version: 4
86
+ summary: Convert Chinese characters into pinyin.
87
+ test_files: []