igo 0.1.4.3 → 0.1.4.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.en.md +7 -7
- data/README.ja.md +7 -7
- data/README.md +5 -5
- data/README.zh.md +7 -7
- data/lib/igo/ja.rb +34 -2
- data/lib/igo/version.rb +1 -1
- data/lib/igo/zh.rb +87 -13
- data/lib/igo.rb +14 -0
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f8d29ccb85327a2b7ed069ef514f2f93601219bfea47aa793560f7275a9d24cc
|
4
|
+
data.tar.gz: 1ab3272f76a86437873d4272148271972ec3e9d28fa672bd47d96a342af127b1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: abad6bdf5a657706408f6f2303624523eb4cdfefe31d010a69d09a558bce145a6bef92679266576c9a48a40750a332c3f814738da4b9fe31690a23e8e5436bc2
|
7
|
+
data.tar.gz: baca5c388703bd4788df84a6087e701d630e25df50cb3dc09c5b753304f66bd3bafd7ed0769b695ef4466a48b8bb1c8aaa5129cd24e1518cc1d978e911579a47
|
data/README.en.md
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
Support `zh-CN`, `ja-JP`
|
6
6
|
|
7
7
|
```ruby
|
8
|
-
|
9
|
-
#=> ["
|
8
|
+
cutted = j.cut "あー、合成は結合法則を満たすんでしたね"
|
9
|
+
#=> ["あー", "、", "合成", "は", "結合法則", "を", "満たす", "ん", "でした", "ね"]
|
10
10
|
|
11
|
-
|
12
|
-
#=>
|
11
|
+
cutted = j.cut "あー、合成は結合法則を満たすんでしたね", s: "/"
|
12
|
+
#=> "あー/、/合成/は/結合法則/を/満たす/ん/でした/ね"
|
13
13
|
|
14
14
|
```
|
15
15
|
|
@@ -36,10 +36,10 @@ require 'igo/ja'
|
|
36
36
|
j = Igo::Ja
|
37
37
|
|
38
38
|
cutted = j.cut "あー、合成は結合法則を満たすんでしたね"
|
39
|
-
#=> ["
|
39
|
+
#=> ["あー", "、", "合成", "は", "結合法則", "を", "満たす", "ん", "でした", "ね"]
|
40
40
|
|
41
41
|
cutted = j.cut "あー、合成は結合法則を満たすんでしたね", s: true
|
42
|
-
#=>
|
42
|
+
#=> "あー 、 合成 は 結合法則 を 満たす ん でした ね"
|
43
43
|
|
44
44
|
```
|
45
45
|
|
@@ -61,7 +61,7 @@ z = Igo::Zh
|
|
61
61
|
|
62
62
|
```ruby
|
63
63
|
z.pinyin "全世界的无产者,联合起来!"
|
64
|
-
#=> ["quan2", "shi4", "jie4", "
|
64
|
+
#=> ["quan2", "shi4", "jie4", "de5", "wu2", "chan3", "zhe3", "lian2", "he2", "qi3", "lai2"]
|
65
65
|
z.pinyin "全世界的无产者,联合起来!", s: 1
|
66
66
|
#=> "quan2 shi4 jie4 de wu2 chan3 zhe3 lian2 he2 qi3 lai2"
|
67
67
|
```
|
data/README.ja.md
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
Support `zh-CN`, `ja-JP`
|
6
6
|
|
7
7
|
```ruby
|
8
|
-
|
9
|
-
#=> ["
|
8
|
+
cutted = j.cut "あー、合成は結合法則を満たすんでしたね"
|
9
|
+
#=> ["あー", "、", "合成", "は", "結合法則", "を", "満たす", "ん", "でした", "ね"]
|
10
10
|
|
11
|
-
|
12
|
-
#=>
|
11
|
+
cutted = j.cut "あー、合成は結合法則を満たすんでしたね", s: "/"
|
12
|
+
#=> "あー/、/合成/は/結合法則/を/満たす/ん/でした/ね"
|
13
13
|
|
14
14
|
```
|
15
15
|
|
@@ -35,10 +35,10 @@ require 'igo/ja'
|
|
35
35
|
j = Igo::Ja
|
36
36
|
|
37
37
|
cutted = j.cut "あー、合成は結合法則を満たすんでしたね"
|
38
|
-
#=> ["
|
38
|
+
#=> ["あー", "、", "合成", "は", "結合法則", "を", "満たす", "ん", "でした", "ね"]
|
39
39
|
|
40
40
|
cutted = j.cut "あー、合成は結合法則を満たすんでしたね", s: true
|
41
|
-
#=>
|
41
|
+
#=> "あー 、 合成 は 結合法則 を 満たす ん でした ね"
|
42
42
|
|
43
43
|
```
|
44
44
|
|
@@ -60,7 +60,7 @@ z = Igo::Zh
|
|
60
60
|
|
61
61
|
```ruby
|
62
62
|
z.pinyin "全世界的无产者,联合起来!"
|
63
|
-
#=> ["quan2", "shi4", "jie4", "
|
63
|
+
#=> ["quan2", "shi4", "jie4", "de5", "wu2", "chan3", "zhe3", "lian2", "he2", "qi3", "lai2"]
|
64
64
|
z.pinyin "全世界的无产者,联合起来!", s: 1
|
65
65
|
#=> "quan2 shi4 jie4 de wu2 chan3 zhe3 lian2 he2 qi3 lai2"
|
66
66
|
```
|
data/README.md
CHANGED
@@ -6,7 +6,7 @@ Support `zh-CN`, `ja-JP`
|
|
6
6
|
|
7
7
|
```ruby
|
8
8
|
ja.cut "あー、合成は結合法則を満たすんでしたね"
|
9
|
-
#=>
|
9
|
+
#=> ["あー", "、", "合成", "は", "結合法則", "を", "満たす", "ん", "でした", "ね"]
|
10
10
|
|
11
11
|
zh.tag "全世界的无产者,联合起来!", s: true
|
12
12
|
#=> "全世界_n 的_uj 无产者_n ,_x 联合_v 起来_v !_x"
|
@@ -36,10 +36,10 @@ require 'igo/ja'
|
|
36
36
|
j = Igo::Ja
|
37
37
|
|
38
38
|
cutted = j.cut "あー、合成は結合法則を満たすんでしたね"
|
39
|
-
#=> ["
|
39
|
+
#=> ["あー", "、", "合成", "は", "結合法則", "を", "満たす", "ん", "でした", "ね"]
|
40
40
|
|
41
|
-
cutted = j.cut "あー、合成は結合法則を満たすんでしたね", s:
|
42
|
-
#=>
|
41
|
+
cutted = j.cut "あー、合成は結合法則を満たすんでしたね", s: "/"
|
42
|
+
#=> "あー/、/合成/は/結合法則/を/満たす/ん/でした/ね"
|
43
43
|
|
44
44
|
```
|
45
45
|
|
@@ -61,7 +61,7 @@ z = Igo::Zh
|
|
61
61
|
|
62
62
|
```ruby
|
63
63
|
z.pinyin "全世界的无产者,联合起来!"
|
64
|
-
#=> ["quan2", "shi4", "jie4", "
|
64
|
+
#=> ["quan2", "shi4", "jie4", "de5", "wu2", "chan3", "zhe3", "lian2", "he2", "qi3", "lai2"]
|
65
65
|
z.pinyin "全世界的无产者,联合起来!", s: 1
|
66
66
|
#=> "quan2 shi4 jie4 de wu2 chan3 zhe3 lian2 he2 qi3 lai2"
|
67
67
|
```
|
data/README.zh.md
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
Support `zh-CN`, `ja-JP`
|
6
6
|
|
7
7
|
```ruby
|
8
|
-
|
9
|
-
#=> ["
|
8
|
+
cutted = j.cut "あー、合成は結合法則を満たすんでしたね"
|
9
|
+
#=> ["あー", "、", "合成", "は", "結合法則", "を", "満たす", "ん", "でした", "ね"]
|
10
10
|
|
11
|
-
|
12
|
-
#=>
|
11
|
+
cutted = j.cut "あー、合成は結合法則を満たすんでしたね", s: "/"
|
12
|
+
#=> "あー/、/合成/は/結合法則/を/満たす/ん/でした/ね"
|
13
13
|
|
14
14
|
```
|
15
15
|
|
@@ -36,10 +36,10 @@ require 'igo/ja'
|
|
36
36
|
j = Igo::Ja
|
37
37
|
|
38
38
|
cutted = j.cut "あー、合成は結合法則を満たすんでしたね"
|
39
|
-
#=> ["
|
39
|
+
#=> ["あー", "、", "合成", "は", "結合法則", "を", "満たす", "ん", "でした", "ね"]
|
40
40
|
|
41
41
|
cutted = j.cut "あー、合成は結合法則を満たすんでしたね", s: true
|
42
|
-
#=>
|
42
|
+
#=> "あー 、 合成 は 結合法則 を 満たす ん でした ね"
|
43
43
|
|
44
44
|
```
|
45
45
|
|
@@ -61,7 +61,7 @@ z = Igo::Zh
|
|
61
61
|
|
62
62
|
```ruby
|
63
63
|
z.pinyin "全世界的无产者,联合起来!"
|
64
|
-
#=> ["quan2", "shi4", "jie4", "
|
64
|
+
#=> ["quan2", "shi4", "jie4", "de5", "wu2", "chan3", "zhe3", "lian2", "he2", "qi3", "lai2"]
|
65
65
|
z.pinyin "全世界的无产者,联合起来!", s: 1
|
66
66
|
#=> "quan2 shi4 jie4 de wu2 chan3 zhe3 lian2 he2 qi3 lai2"
|
67
67
|
```
|
data/lib/igo/ja.rb
CHANGED
@@ -3,16 +3,46 @@ require 'open-uri'
|
|
3
3
|
require 'uri'
|
4
4
|
|
5
5
|
module Igo
|
6
|
+
|
7
|
+
# ## 使い方 :: Usage / Ja
|
8
|
+
#
|
9
|
+
# require 'igo'
|
10
|
+
# require 'igo/ja'
|
11
|
+
#
|
12
|
+
# j = Igo::Ja
|
13
|
+
#
|
14
|
+
# cutted = j.cut "あー、合成は結合法則を満たすんでしたね"
|
15
|
+
# #=> ["あー", "、", "合成", "は", "結合法則", "を", "満たす", "ん", "でした", "ね"]
|
16
|
+
#
|
17
|
+
# cutted = j.cut "あー、合成は結合法則を満たすんでしたね", s: true
|
18
|
+
# #=> "あー 、 合成 は 結合法則 を 満たす ん でした ね"
|
19
|
+
#
|
20
|
+
# 下ノ関数は、暫く未完成です、ごめんね:
|
21
|
+
#
|
22
|
+
# `j.romaji`, `j.kana`, `j.tag`。
|
23
|
+
#
|
6
24
|
module Ja
|
7
25
|
|
8
26
|
SEARCH_URL = "https://jisho.org/search/"
|
9
27
|
|
10
28
|
class << self
|
29
|
+
# ## 使い方 :: Usage / Ja
|
30
|
+
#
|
31
|
+
# j = Igo::Ja
|
32
|
+
#
|
33
|
+
# cutted = j.cut "あー、合成は結合法則を満たすんでしたね"
|
34
|
+
# #=> ["あー", "、", "合成", "は", "結合法則", "を", "満たす", "ん", "でした", "ね"]
|
35
|
+
#
|
36
|
+
# cutted = j.cut "あー、合成は結合法則を満たすんでしたね", s: "/"
|
37
|
+
# #=> "あー/、/合成/は/結合法則/を/満たす/ん/でした/ね"
|
38
|
+
#
|
11
39
|
def cut str, s: false
|
12
40
|
str = URI.encode_www_form_component(str)
|
13
41
|
doc = Nokogiri::HTML(URI.open(SEARCH_URL + str).read)
|
14
42
|
cutted = doc.css(".japanese_word__text_wrapper").map{_1.text.strip}
|
15
|
-
s ? cutted.join(
|
43
|
+
# s ? cutted.join(s) : cutted
|
44
|
+
sep = s.is_a?(String) ? s : " "
|
45
|
+
s ? cutted.join(sep) : cutted
|
16
46
|
end
|
17
47
|
# def romaji str
|
18
48
|
# end
|
@@ -21,8 +51,10 @@ module Igo
|
|
21
51
|
# end
|
22
52
|
end
|
23
53
|
|
54
|
+
# TODO: tag word function
|
55
|
+
#
|
24
56
|
def tag str, s: false, ns: 0
|
25
|
-
|
57
|
+
# TODO
|
26
58
|
end
|
27
59
|
|
28
60
|
end
|
data/lib/igo/version.rb
CHANGED
data/lib/igo/zh.rb
CHANGED
@@ -1,25 +1,85 @@
|
|
1
1
|
require 'ruby_pinyin'
|
2
2
|
|
3
|
+
|
4
|
+
|
3
5
|
module Igo
|
6
|
+
# # 用法 :: Usage / Zh
|
7
|
+
|
8
|
+
# Lack __Trad-Zh__ :: 暂不支持「正體中文」
|
9
|
+
#
|
10
|
+
# ```ruby
|
11
|
+
# require 'igo'
|
12
|
+
# require 'igo/zh'
|
13
|
+
# z = Igo::Zh
|
14
|
+
# ```
|
15
|
+
#
|
16
|
+
# ### Pinyin :: 拼音
|
17
|
+
#
|
18
|
+
# ```ruby
|
19
|
+
# z.pinyin "全世界的无产者,联合起来!"
|
20
|
+
# #=> ["quan2", "shi4", "jie4", "de5", "wu2", "chan3", "zhe3", "lian2", "he2", "qi3", "lai2"]
|
21
|
+
# z.pinyin "全世界的无产者,联合起来!", s: 1
|
22
|
+
# #=> "quan2 shi4 jie4 de wu2 chan3 zhe3 lian2 he2 qi3 lai2"
|
23
|
+
# ```
|
24
|
+
#
|
25
|
+
# ### seperate word :: 分词
|
26
|
+
#
|
27
|
+
# ```ruby
|
28
|
+
# z.cut "全世界的无产者,联合起来!"
|
29
|
+
# #=> ["全世界", "的", "無產階級", ",", "聯合", "起來", "!"]
|
30
|
+
# z.cut "全世界的无产者,联合起来!", s: true
|
31
|
+
# #=> "全世界 的 无产者 ! 联合 起来 !"
|
32
|
+
# ```
|
33
|
+
# ### Taging word :: 标注词类
|
34
|
+
#
|
35
|
+
# 后续可能添加 `by` 关键词指定函数。
|
36
|
+
#
|
37
|
+
# ```ruby
|
38
|
+
# z.tag "全世界的无产者,联合起来!"
|
39
|
+
# #=> [["全世界", "n"], ["的", "uj"], ["无产者", "n"], ["!", "x"], ["联合", "v"], ["起来", "v"], ["!", "x"]]
|
40
|
+
# z.tag "全世界的无产者,联合起来!", s: true
|
41
|
+
# #=> "全世界_n 的_uj 无产者_n ,_x 联合_v 起来_v !_x"
|
42
|
+
# ```
|
43
|
+
# ### 词频统计
|
44
|
+
#
|
45
|
+
# ```ruby
|
46
|
+
# z.freq "全世界的无产者,联合起来!", 5
|
47
|
+
# #=> [["无产者", 9.96885201925], ["全世界", 6.80147590842], ["联合", 5.64979650728], ["起来", 3.96134044655]]
|
48
|
+
#
|
49
|
+
# # alias 拼音, 分词, 标记, 词频
|
50
|
+
#
|
51
|
+
# ```
|
4
52
|
module Zh
|
5
53
|
|
6
54
|
Tagging = JiebaRb::Tagging.new
|
7
55
|
Segment = JiebaRb::Segment.new mode: :mix, user_dict: "ext/cppjieba/dict/user.dict.utf8"
|
8
56
|
Keyword = JiebaRb::Keyword.new
|
9
57
|
|
10
|
-
# @params chinese: String
|
11
|
-
# @returns pinyin_numeraltone: String
|
12
58
|
class << self
|
13
|
-
|
14
|
-
|
15
|
-
|
59
|
+
# ### Pinyin :: 拼音
|
60
|
+
#
|
61
|
+
# z.pinyin "全世界的无产者,联合起来!"
|
62
|
+
# #=> ["quan2", "shi4", "jie4", "de5", "wu2", "chan3", "zhe3", "lian2", "he2", "qi3", "lai2"]
|
63
|
+
# z.pinyin "全世界的无产者,联合起来!", s: " "
|
64
|
+
# #=> "quan2 shi4 jie4 de wu2 chan3 zhe3 lian2 he2 qi3 lai2"
|
65
|
+
#
|
66
|
+
# @params chinese: String
|
67
|
+
# @returns pinyin_numeraltone: String
|
68
|
+
def pinyin str, s: false, ommit: " "
|
69
|
+
# tone 1, 2, 3, 4, 5
|
70
|
+
hanzi_ords = [13312..19903, 19968..40959, 63744..64255, 131072..173791, 173824..177983, 194560..195103]
|
71
|
+
|
72
|
+
res = str.split(/(?=[^A-Z\d])|(?<=[^A-Z\d])/i).map do |ch|
|
73
|
+
if hanzi_ords.map{|range| range.include? ch.ord}.any?
|
74
|
+
py = PinYin.sentence(ch, :ascii)
|
75
|
+
py =~ /\d/ ? py : (py+"5")
|
76
|
+
else ch
|
77
|
+
end
|
78
|
+
end.flatten.select{_1 != s and _1 != ommit}
|
79
|
+
|
80
|
+
sep = s.is_a?(String) ? s : " "
|
81
|
+
s ? res.join(sep) : res
|
16
82
|
|
17
|
-
end
|
18
|
-
def pinyin_tonal_s str
|
19
|
-
PinYin.sentence(token, :ascii)
|
20
|
-
end
|
21
|
-
def pinyin_tonal_a str
|
22
|
-
PinYin.of_string(token, :ascii)
|
23
83
|
end
|
24
84
|
|
25
85
|
def cut str, s: false, tag: false, by: "jieba"
|
@@ -28,8 +88,9 @@ module Igo
|
|
28
88
|
if tag
|
29
89
|
s ? Tagging.tag(str).map{_1.to_a.flatten.join("_")}.join(" ") : Tagging.tag(str).map{_1.to_a.flatten}
|
30
90
|
else
|
31
|
-
|
32
|
-
|
91
|
+
cutted = Segment.cut(str)
|
92
|
+
sep = s.is_a?(String) ? s : " "
|
93
|
+
s ? cutted.join(sep) : cutted
|
33
94
|
end
|
34
95
|
when /thulac/
|
35
96
|
require_relative 'thulac'
|
@@ -37,6 +98,15 @@ module Igo
|
|
37
98
|
end
|
38
99
|
end
|
39
100
|
|
101
|
+
# ### Taging word :: 标注词类
|
102
|
+
#
|
103
|
+
# 后续可能添加 `by` 关键词指定函数。
|
104
|
+
#
|
105
|
+
# z.tag "全世界的无产者,联合起来!"
|
106
|
+
# #=> [["全世界", "n"], ["的", "uj"], ["无产者", "n"], ["!", "x"], ["联合", "v"], ["起来", "v"], ["!", "x"]]
|
107
|
+
# z.tag "全世界的无产者,联合起来!", s: true
|
108
|
+
# #=> "全世界_n 的_uj 无产者_n ,_x 联合_v 起来_v !_x"
|
109
|
+
#
|
40
110
|
def tag str, s: false, by: 0
|
41
111
|
case by
|
42
112
|
when /thu/
|
@@ -47,6 +117,10 @@ module Igo
|
|
47
117
|
end
|
48
118
|
end
|
49
119
|
|
120
|
+
# ### 词频统计
|
121
|
+
#
|
122
|
+
# z.freq "全世界的无产者,联合起来!", 5
|
123
|
+
# #=> [["无产者", 9.96885201925], ["全世界", 6.80147590842], ["联合", 5.64979650728], ["起来", 3.96134044655]]
|
50
124
|
def termfreq string, num
|
51
125
|
Keyword.extract string, num
|
52
126
|
end
|
data/lib/igo.rb
CHANGED
@@ -5,6 +5,20 @@ require 'jieba_rb'
|
|
5
5
|
require_relative 'igo/zh'
|
6
6
|
require_relative 'igo/ja'
|
7
7
|
|
8
|
+
|
9
|
+
#
|
10
|
+
# __AUTHOR__: *saisui* saisui.github.io
|
11
|
+
#
|
12
|
+
# ja.cut "あー、合成は結合法則を満たすんでしたね"
|
13
|
+
# #=> ["では", "、", "圏論", "の", "話", "を", "しましょ", "う", "か", "N", "この", "前", "は", "、圏", "について", "紹介 しました"]
|
14
|
+
#
|
15
|
+
# zh.pinyin "床前明月光,疑是地上霜,好了", s: 1
|
16
|
+
# #=> chuang2 qian2 ming2 yue4 guang1, yi2 shi4 di4 shang4 shuang1, hao3 le5.
|
17
|
+
#
|
18
|
+
# zh.tag "全世界的无产者,联合起来!", s: true
|
19
|
+
# #=> "全世界_n 的_uj 无产者_n ,_x 联合_v 起来_v !_x"
|
20
|
+
#
|
21
|
+
|
8
22
|
module Igo
|
9
23
|
class Error < StandardError; end
|
10
24
|
# Your code goes here...
|