ruby-pinyin-ez 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,46 @@
1
+ module PinYin
2
+ module Punctuation
3
+
4
+ class <<self
5
+
6
+ def regexp
7
+ return @regexp if @regexp
8
+
9
+ escaped_punctuations = punctuations.values.map {|v| "\\#{[v].pack('H*')}"}.join
10
+ @regexp = Regexp.new "([#{escaped_punctuations}]+)$"
11
+ @regexp
12
+ end
13
+
14
+ def chinese_regexp
15
+ @chinese_regexp ||= /([\u3000-\u303F\uFF00-\uFFEF]+)/
16
+ end
17
+
18
+ def [](code)
19
+ punctuations[code]
20
+ end
21
+
22
+ def include?(code)
23
+ punctuations.has_key?(code)
24
+ end
25
+
26
+ def punctuations
27
+ return @punctuations if @punctuations
28
+
29
+ @punctuations = {}
30
+ src = File.expand_path('../data/Punctuations.dat', __FILE__)
31
+ load_from src
32
+
33
+ @punctuations
34
+ end
35
+
36
+ def load_from(file)
37
+ File.readlines(file).map do |line|
38
+ from, to = line.split(/\s+/)
39
+ @punctuations[from] = to
40
+ end
41
+ end
42
+
43
+ end
44
+
45
+ end
46
+ end
@@ -0,0 +1,29 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ module PinYin
4
+ module Util
5
+ extend self
6
+
7
+ ASCIIMapping = {
8
+ 'üē' => ['ue', 1], 'üé' => ['ue', 2], 'üě' => ['ue', 3], 'üè' => ['ue', 4],
9
+ 'ā' => ['a', 1], 'ē' => ['e', 1], 'ī' => ['i', 1], 'ō' => ['o', 1], 'ū' => ['u', 1], 'ǖ' => ['v', 1],
10
+ 'á' => ['a', 2], 'é' => ['e', 2], 'í' => ['i', 2], 'ó' => ['o', 2], 'ú' => ['u', 2], 'ǘ' => ['v', 2],
11
+ 'ǎ' => ['a', 3], 'ě' => ['e', 3], 'ǐ' => ['i', 3], 'ǒ' => ['o', 3], 'ǔ' => ['u', 3], 'ǚ' => ['v', 3],
12
+ 'à' => ['a', 4], 'è' => ['e', 4], 'ì' => ['i', 4], 'ò' => ['o', 4], 'ù' => ['u', 4], 'ǜ' => ['v', 4]
13
+ }
14
+
15
+ def to_ascii(reading, with_tone=true)
16
+ ASCIIMapping.each do |char, (ascii, tone)|
17
+ if reading.include? char
18
+ if with_tone
19
+ return reading.sub(char, ascii).concat(tone.to_s)
20
+ else
21
+ return reading.sub(char, ascii)
22
+ end
23
+ end
24
+ end
25
+ reading
26
+ end
27
+
28
+ end
29
+ end
@@ -0,0 +1,16 @@
1
+ module PinYin
2
+ class Value < String
3
+ attr_accessor :english
4
+ alias :english? :english
5
+
6
+ def initialize(str, english=true)
7
+ super(str)
8
+ self.english = english
9
+ end
10
+
11
+ def split(*args)
12
+ result = super
13
+ result.map {|str| self.class.new(str, english)}
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,3 @@
1
+ module PinYin
2
+ VERSION = '0.5.0'
3
+ end
@@ -0,0 +1,41 @@
1
+ require 'ruby-pinyin/util'
2
+ require 'ruby-pinyin/value'
3
+ require 'ruby-pinyin/punctuation'
4
+ require 'ruby-pinyin/backend'
5
+
6
+ module PinYin
7
+ class <<self
8
+
9
+ attr_accessor :backend
10
+
11
+ def romanize(str, tone=nil, include_punctuations=false)
12
+ backend.romanize(str, tone, include_punctuations)
13
+ end
14
+ alias :of_string :romanize
15
+
16
+ def permlink(str, sep='-')
17
+ of_string(str).join(sep)
18
+ end
19
+
20
+ def abbr(str, except_lead=false, except_english=true)
21
+ result = ""
22
+ of_string(str).each_with_index do |word, i|
23
+ w = (except_lead && i == 0) || (except_english && word.english?) ? word : word[0]
24
+ result << w
25
+ end
26
+ result
27
+ end
28
+
29
+ def sentence(str, tone=nil)
30
+ of_string(str, tone, true).join(' ')
31
+ end
32
+
33
+ def override_files=(files)
34
+ klass = backend ? backend.class : PinYin::Backend::MMSeg
35
+ self.backend = klass.new files
36
+ end
37
+
38
+ end
39
+ end
40
+
41
+ PinYin.backend = PinYin::Backend::EZSeg.new
metadata ADDED
@@ -0,0 +1,87 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: ruby-pinyin-ez
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.5.0
5
+ platform: ruby
6
+ authors:
7
+ - hzyhzy
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2020-02-02 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rmmseg-cpp-new
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: 0.3.1
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: 0.3.1
27
+ - !ruby/object:Gem::Dependency
28
+ name: minitest
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '5.4'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '5.4'
41
+ description: Pinyin is a romanization system (phonemic notation) of Chinese characters,
42
+ this gem helps you to convert Chinese characters into pinyin form.
43
+ email:
44
+ - 18670314023@163.com
45
+ executables: []
46
+ extensions: []
47
+ extra_rdoc_files: []
48
+ files:
49
+ - LICENSE
50
+ - README.markdown
51
+ - lib/ruby-pinyin.rb
52
+ - lib/ruby-pinyin/backend.rb
53
+ - lib/ruby-pinyin/backend/ezseg.rb
54
+ - lib/ruby-pinyin/backend/mmseg.rb
55
+ - lib/ruby-pinyin/backend/simple.rb
56
+ - lib/ruby-pinyin/data/Mandarin.dat
57
+ - lib/ruby-pinyin/data/Punctuations.dat
58
+ - lib/ruby-pinyin/data/words.dat
59
+ - lib/ruby-pinyin/data/words.dic
60
+ - lib/ruby-pinyin/punctuation.rb
61
+ - lib/ruby-pinyin/util.rb
62
+ - lib/ruby-pinyin/value.rb
63
+ - lib/ruby-pinyin/version.rb
64
+ homepage: https://github.com/hzyhzy/ruby-pinyin
65
+ licenses:
66
+ - BSD
67
+ metadata: {}
68
+ post_install_message:
69
+ rdoc_options: []
70
+ require_paths:
71
+ - lib
72
+ required_ruby_version: !ruby/object:Gem::Requirement
73
+ requirements:
74
+ - - ">="
75
+ - !ruby/object:Gem::Version
76
+ version: '0'
77
+ required_rubygems_version: !ruby/object:Gem::Requirement
78
+ requirements:
79
+ - - ">="
80
+ - !ruby/object:Gem::Version
81
+ version: '0'
82
+ requirements: []
83
+ rubygems_version: 3.0.6
84
+ signing_key:
85
+ specification_version: 4
86
+ summary: Convert Chinese characters into pinyin.
87
+ test_files: []