igo 0.1.4.3 → 0.1.4.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: cb6aa82cbe2554d14aac09e8d583297de352c4e005cf15f27b78ca50fb354f95
4
- data.tar.gz: 8dcf65e8294c2daf4bb0ec830ecccc42cda0cc1687d7be71f4c15494148f4fc5
3
+ metadata.gz: f8d29ccb85327a2b7ed069ef514f2f93601219bfea47aa793560f7275a9d24cc
4
+ data.tar.gz: 1ab3272f76a86437873d4272148271972ec3e9d28fa672bd47d96a342af127b1
5
5
  SHA512:
6
- metadata.gz: 18467db48f62b0b71a020a3f1874997a9dbfeed0ab0100aa18d8b0e622fd14cfddee7ec7d9725e4bbe452c47769af009afe2846f8ceeee8d3d59207081eb0468
7
- data.tar.gz: 64f3908baa695f3538b66ad98a9783656a802fbbc187a4cf23635bcf90833dbbd05c88e5c3f0795ae33caf7154db9a790fd28b309321f48e32596cbbffb07eec
6
+ metadata.gz: abad6bdf5a657706408f6f2303624523eb4cdfefe31d010a69d09a558bce145a6bef92679266576c9a48a40750a332c3f814738da4b9fe31690a23e8e5436bc2
7
+ data.tar.gz: baca5c388703bd4788df84a6087e701d630e25df50cb3dc09c5b753304f66bd3bafd7ed0769b695ef4466a48b8bb1c8aaa5129cd24e1518cc1d978e911579a47
data/README.en.md CHANGED
@@ -5,11 +5,11 @@
5
5
  Support `zh-CN`, `ja-JP`
6
6
 
7
7
  ```ruby
8
- ja.cut "あー、合成は結合法則を満たすんでしたね"
9
- #=> ["では", "、", "圏論", "", "", "を", "しましょ", "", "", "N", "この", "前", "は", "、圏", "について", "紹介 しました"]
8
+ cutted = j.cut "あー、合成は結合法則を満たすんでしたね"
9
+ #=> ["あー", "、", "合成", "", "結合法則", "を", "満たす", "", "でした", ""]
10
10
 
11
- zh.tag "全世界的无产者,联合起来!", s: true
12
- #=> "全世界_n 的_uj 无产者_n ,_x 联合_v 起来_v !_x"
11
+ cutted = j.cut "あー、合成は結合法則を満たすんでしたね", s: "/"
12
+ #=> "あー/、/合成/は/結合法則/を/満たす/ん/でした/ね"
13
13
 
14
14
  ```
15
15
 
@@ -36,10 +36,10 @@ require 'igo/ja'
36
36
  j = Igo::Ja
37
37
 
38
38
  cutted = j.cut "あー、合成は結合法則を満たすんでしたね"
39
- #=> ["では", "、", "圏論", "", "", "を", "しましょ", "", "", "N", "この", "前", "は", "、圏", "について", "紹介 しました"]
39
+ #=> ["あー", "、", "合成", "", "結合法則", "を", "満たす", "", "でした", ""]
40
40
 
41
41
  cutted = j.cut "あー、合成は結合法則を満たすんでしたね", s: true
42
- #=> "では圏論 しましょ N この 前 は 、圏 について 紹介しました"
42
+ #=> "あー合成 結合法則満たす でした "
43
43
 
44
44
  ```
45
45
 
@@ -61,7 +61,7 @@ z = Igo::Zh
61
61
 
62
62
  ```ruby
63
63
  z.pinyin "全世界的无产者,联合起来!"
64
- #=> ["quan2", "shi4", "jie4", "de", "wu2", "chan3", "zhe3", "lian2", "he2", "qi3", "lai2"]
64
+ #=> ["quan2", "shi4", "jie4", "de5", "wu2", "chan3", "zhe3", "lian2", "he2", "qi3", "lai2"]
65
65
  z.pinyin "全世界的无产者,联合起来!", s: 1
66
66
  #=> "quan2 shi4 jie4 de wu2 chan3 zhe3 lian2 he2 qi3 lai2"
67
67
  ```
data/README.ja.md CHANGED
@@ -5,11 +5,11 @@
5
5
  Support `zh-CN`, `ja-JP`
6
6
 
7
7
  ```ruby
8
- ja.cut "あー、合成は結合法則を満たすんでしたね"
9
- #=> ["では", "、", "圏論", "", "", "を", "しましょ", "", "", "N", "この", "前", "は", "、圏", "について", "紹介 しました"]
8
+ cutted = j.cut "あー、合成は結合法則を満たすんでしたね"
9
+ #=> ["あー", "、", "合成", "", "結合法則", "を", "満たす", "", "でした", ""]
10
10
 
11
- zh.tag "全世界的无产者,联合起来!", s: true
12
- #=> "全世界_n 的_uj 无产者_n ,_x 联合_v 起来_v !_x"
11
+ cutted = j.cut "あー、合成は結合法則を満たすんでしたね", s: "/"
12
+ #=> "あー/、/合成/は/結合法則/を/満たす/ん/でした/ね"
13
13
 
14
14
  ```
15
15
 
@@ -35,10 +35,10 @@ require 'igo/ja'
35
35
  j = Igo::Ja
36
36
 
37
37
  cutted = j.cut "あー、合成は結合法則を満たすんでしたね"
38
- #=> ["では", "、", "圏論", "", "", "を", "しましょ", "", "", "N", "この", "前", "は", "、圏", "について", "紹介 しました"]
38
+ #=> ["あー", "、", "合成", "", "結合法則", "を", "満たす", "", "でした", ""]
39
39
 
40
40
  cutted = j.cut "あー、合成は結合法則を満たすんでしたね", s: true
41
- #=> "では圏論 しましょ N この 前 は 、圏 について 紹介しました"
41
+ #=> "あー合成 結合法則満たす でした "
42
42
 
43
43
  ```
44
44
 
@@ -60,7 +60,7 @@ z = Igo::Zh
60
60
 
61
61
  ```ruby
62
62
  z.pinyin "全世界的无产者,联合起来!"
63
- #=> ["quan2", "shi4", "jie4", "de", "wu2", "chan3", "zhe3", "lian2", "he2", "qi3", "lai2"]
63
+ #=> ["quan2", "shi4", "jie4", "de5", "wu2", "chan3", "zhe3", "lian2", "he2", "qi3", "lai2"]
64
64
  z.pinyin "全世界的无产者,联合起来!", s: 1
65
65
  #=> "quan2 shi4 jie4 de wu2 chan3 zhe3 lian2 he2 qi3 lai2"
66
66
  ```
data/README.md CHANGED
@@ -6,7 +6,7 @@ Support `zh-CN`, `ja-JP`
6
6
 
7
7
  ```ruby
8
8
  ja.cut "あー、合成は結合法則を満たすんでしたね"
9
- #=> ["では", "、", "圏論", "", "", "を", "しましょ", "", "", "N", "この", "前", "は", "、圏", "について", "紹介 しました"]
9
+ #=> ["あー", "、", "合成", "", "結合法則", "を", "満たす", "", "でした", ""]
10
10
 
11
11
  zh.tag "全世界的无产者,联合起来!", s: true
12
12
  #=> "全世界_n 的_uj 无产者_n ,_x 联合_v 起来_v !_x"
@@ -36,10 +36,10 @@ require 'igo/ja'
36
36
  j = Igo::Ja
37
37
 
38
38
  cutted = j.cut "あー、合成は結合法則を満たすんでしたね"
39
- #=> ["では", "、", "圏論", "", "", "を", "しましょ", "", "", "N", "この", "前", "は", "、圏", "について", "紹介 しました"]
39
+ #=> ["あー", "、", "合成", "", "結合法則", "を", "満たす", "", "でした", ""]
40
40
 
41
- cutted = j.cut "あー、合成は結合法則を満たすんでしたね", s: true
42
- #=> "では 、 圏論 の 話 を しましょ う か N この 前 は 、圏 について 紹介しました"
41
+ cutted = j.cut "あー、合成は結合法則を満たすんでしたね", s: "/"
42
+ #=> "あー/、/合成/は/結合法則/を/満たす/ん/でした/ね"
43
43
 
44
44
  ```
45
45
 
@@ -61,7 +61,7 @@ z = Igo::Zh
61
61
 
62
62
  ```ruby
63
63
  z.pinyin "全世界的无产者,联合起来!"
64
- #=> ["quan2", "shi4", "jie4", "de", "wu2", "chan3", "zhe3", "lian2", "he2", "qi3", "lai2"]
64
+ #=> ["quan2", "shi4", "jie4", "de5", "wu2", "chan3", "zhe3", "lian2", "he2", "qi3", "lai2"]
65
65
  z.pinyin "全世界的无产者,联合起来!", s: 1
66
66
  #=> "quan2 shi4 jie4 de wu2 chan3 zhe3 lian2 he2 qi3 lai2"
67
67
  ```
data/README.zh.md CHANGED
@@ -5,11 +5,11 @@
5
5
  Support `zh-CN`, `ja-JP`
6
6
 
7
7
  ```ruby
8
- ja.cut "あー、合成は結合法則を満たすんでしたね"
9
- #=> ["では", "、", "圏論", "", "", "を", "しましょ", "", "", "N", "この", "前", "は", "、圏", "について", "紹介 しました"]
8
+ cutted = j.cut "あー、合成は結合法則を満たすんでしたね"
9
+ #=> ["あー", "、", "合成", "", "結合法則", "を", "満たす", "", "でした", ""]
10
10
 
11
- zh.tag "全世界的无产者,联合起来!", s: true
12
- #=> "全世界_n 的_uj 无产者_n ,_x 联合_v 起来_v !_x"
11
+ cutted = j.cut "あー、合成は結合法則を満たすんでしたね", s: "/"
12
+ #=> "あー/、/合成/は/結合法則/を/満たす/ん/でした/ね"
13
13
 
14
14
  ```
15
15
 
@@ -36,10 +36,10 @@ require 'igo/ja'
36
36
  j = Igo::Ja
37
37
 
38
38
  cutted = j.cut "あー、合成は結合法則を満たすんでしたね"
39
- #=> ["では", "、", "圏論", "", "", "を", "しましょ", "", "", "N", "この", "前", "は", "、圏", "について", "紹介 しました"]
39
+ #=> ["あー", "、", "合成", "", "結合法則", "を", "満たす", "", "でした", ""]
40
40
 
41
41
  cutted = j.cut "あー、合成は結合法則を満たすんでしたね", s: true
42
- #=> "では圏論 しましょ N この 前 は 、圏 について 紹介しました"
42
+ #=> "あー合成 結合法則満たす でした "
43
43
 
44
44
  ```
45
45
 
@@ -61,7 +61,7 @@ z = Igo::Zh
61
61
 
62
62
  ```ruby
63
63
  z.pinyin "全世界的无产者,联合起来!"
64
- #=> ["quan2", "shi4", "jie4", "de", "wu2", "chan3", "zhe3", "lian2", "he2", "qi3", "lai2"]
64
+ #=> ["quan2", "shi4", "jie4", "de5", "wu2", "chan3", "zhe3", "lian2", "he2", "qi3", "lai2"]
65
65
  z.pinyin "全世界的无产者,联合起来!", s: 1
66
66
  #=> "quan2 shi4 jie4 de wu2 chan3 zhe3 lian2 he2 qi3 lai2"
67
67
  ```
data/lib/igo/ja.rb CHANGED
@@ -3,16 +3,46 @@ require 'open-uri'
3
3
  require 'uri'
4
4
 
5
5
  module Igo
6
+
7
+ # ## 使い方 :: Usage / Ja
8
+ #
9
+ # require 'igo'
10
+ # require 'igo/ja'
11
+ #
12
+ # j = Igo::Ja
13
+ #
14
+ # cutted = j.cut "あー、合成は結合法則を満たすんでしたね"
15
+ # #=> ["あー", "、", "合成", "は", "結合法則", "を", "満たす", "ん", "でした", "ね"]
16
+ #
17
+ # cutted = j.cut "あー、合成は結合法則を満たすんでしたね", s: true
18
+ # #=> "あー 、 合成 は 結合法則 を 満たす ん でした ね"
19
+ #
20
+ # 下ノ関数は、暫く未完成です、ごめんね:
21
+ #
22
+ # `j.romaji`, `j.kana`, `j.tag`。
23
+ #
6
24
  module Ja
7
25
 
8
26
  SEARCH_URL = "https://jisho.org/search/"
9
27
 
10
28
  class << self
29
+ # ## 使い方 :: Usage / Ja
30
+ #
31
+ # j = Igo::Ja
32
+ #
33
+ # cutted = j.cut "あー、合成は結合法則を満たすんでしたね"
34
+ # #=> ["あー", "、", "合成", "は", "結合法則", "を", "満たす", "ん", "でした", "ね"]
35
+ #
36
+ # cutted = j.cut "あー、合成は結合法則を満たすんでしたね", s: "/"
37
+ # #=> "あー/、/合成/は/結合法則/を/満たす/ん/でした/ね"
38
+ #
11
39
  def cut str, s: false
12
40
  str = URI.encode_www_form_component(str)
13
41
  doc = Nokogiri::HTML(URI.open(SEARCH_URL + str).read)
14
42
  cutted = doc.css(".japanese_word__text_wrapper").map{_1.text.strip}
15
- s ? cutted.join(" ") : cutted
43
+ # s ? cutted.join(s) : cutted
44
+ sep = s.is_a?(String) ? s : " "
45
+ s ? cutted.join(sep) : cutted
16
46
  end
17
47
  # def romaji str
18
48
  # end
@@ -21,8 +51,10 @@ module Igo
21
51
  # end
22
52
  end
23
53
 
54
+ # TODO: tag word function
55
+ #
24
56
  def tag str, s: false, ns: 0
25
-
57
+ # TODO
26
58
  end
27
59
 
28
60
  end
data/lib/igo/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Igo
4
- VERSION = "0.1.4.3"
4
+ VERSION = "0.1.4.5"
5
5
  end
data/lib/igo/zh.rb CHANGED
@@ -1,25 +1,85 @@
1
1
  require 'ruby_pinyin'
2
2
 
3
+
4
+
3
5
  module Igo
6
+ # # 用法 :: Usage / Zh
7
+
8
+ # Lack __Trad-Zh__ :: 暂不支持「正體中文」
9
+ #
10
+ # ```ruby
11
+ # require 'igo'
12
+ # require 'igo/zh'
13
+ # z = Igo::Zh
14
+ # ```
15
+ #
16
+ # ### Pinyin :: 拼音
17
+ #
18
+ # ```ruby
19
+ # z.pinyin "全世界的无产者,联合起来!"
20
+ # #=> ["quan2", "shi4", "jie4", "de5", "wu2", "chan3", "zhe3", "lian2", "he2", "qi3", "lai2"]
21
+ # z.pinyin "全世界的无产者,联合起来!", s: 1
22
+ # #=> "quan2 shi4 jie4 de wu2 chan3 zhe3 lian2 he2 qi3 lai2"
23
+ # ```
24
+ #
25
+ # ### seperate word :: 分词
26
+ #
27
+ # ```ruby
28
+ # z.cut "全世界的无产者,联合起来!"
29
+ # #=> ["全世界", "的", "無產階級", ",", "聯合", "起來", "!"]
30
+ # z.cut "全世界的无产者,联合起来!", s: true
31
+ # #=> "全世界 的 无产者 ! 联合 起来 !"
32
+ # ```
33
+ # ### Taging word :: 标注词类
34
+ #
35
+ # 后续可能添加 `by` 关键词指定函数。
36
+ #
37
+ # ```ruby
38
+ # z.tag "全世界的无产者,联合起来!"
39
+ # #=> [["全世界", "n"], ["的", "uj"], ["无产者", "n"], ["!", "x"], ["联合", "v"], ["起来", "v"], ["!", "x"]]
40
+ # z.tag "全世界的无产者,联合起来!", s: true
41
+ # #=> "全世界_n 的_uj 无产者_n ,_x 联合_v 起来_v !_x"
42
+ # ```
43
+ # ### 词频统计
44
+ #
45
+ # ```ruby
46
+ # z.freq "全世界的无产者,联合起来!", 5
47
+ # #=> [["无产者", 9.96885201925], ["全世界", 6.80147590842], ["联合", 5.64979650728], ["起来", 3.96134044655]]
48
+ #
49
+ # # alias 拼音, 分词, 标记, 词频
50
+ #
51
+ # ```
4
52
  module Zh
5
53
 
6
54
  Tagging = JiebaRb::Tagging.new
7
55
  Segment = JiebaRb::Segment.new mode: :mix, user_dict: "ext/cppjieba/dict/user.dict.utf8"
8
56
  Keyword = JiebaRb::Keyword.new
9
57
 
10
- # @params chinese: String
11
- # @returns pinyin_numeraltone: String
12
58
  class << self
13
- def pinyin str, s: false
14
- res = str.each_char.map{PinYin.of_string(_1, :ascii)}.flatten
15
- s ? res.join(" ") : res
59
+ # ### Pinyin :: 拼音
60
+ #
61
+ # z.pinyin "全世界的无产者,联合起来!"
62
+ # #=> ["quan2", "shi4", "jie4", "de5", "wu2", "chan3", "zhe3", "lian2", "he2", "qi3", "lai2"]
63
+ # z.pinyin "全世界的无产者,联合起来!", s: " "
64
+ # #=> "quan2 shi4 jie4 de wu2 chan3 zhe3 lian2 he2 qi3 lai2"
65
+ #
66
+ # @params chinese: String
67
+ # @returns pinyin_numeraltone: String
68
+ def pinyin str, s: false, ommit: " "
69
+ # tone 1, 2, 3, 4, 5
70
+ hanzi_ords = [13312..19903, 19968..40959, 63744..64255, 131072..173791, 173824..177983, 194560..195103]
71
+
72
+ res = str.split(/(?=[^A-Z\d])|(?<=[^A-Z\d])/i).map do |ch|
73
+ if hanzi_ords.map{|range| range.include? ch.ord}.any?
74
+ py = PinYin.sentence(ch, :ascii)
75
+ py =~ /\d/ ? py : (py+"5")
76
+ else ch
77
+ end
78
+ end.flatten.select{_1 != s and _1 != ommit}
79
+
80
+ sep = s.is_a?(String) ? s : " "
81
+ s ? res.join(sep) : res
16
82
 
17
- end
18
- def pinyin_tonal_s str
19
- PinYin.sentence(token, :ascii)
20
- end
21
- def pinyin_tonal_a str
22
- PinYin.of_string(token, :ascii)
23
83
  end
24
84
 
25
85
  def cut str, s: false, tag: false, by: "jieba"
@@ -28,8 +88,9 @@ module Igo
28
88
  if tag
29
89
  s ? Tagging.tag(str).map{_1.to_a.flatten.join("_")}.join(" ") : Tagging.tag(str).map{_1.to_a.flatten}
30
90
  else
31
- res = Segment.cut(str)
32
- s ? res.join(" ") : res
91
+ cutted = Segment.cut(str)
92
+ sep = s.is_a?(String) ? s : " "
93
+ s ? cutted.join(sep) : cutted
33
94
  end
34
95
  when /thulac/
35
96
  require_relative 'thulac'
@@ -37,6 +98,15 @@ module Igo
37
98
  end
38
99
  end
39
100
 
101
+ # ### Taging word :: 标注词类
102
+ #
103
+ # 后续可能添加 `by` 关键词指定函数。
104
+ #
105
+ # z.tag "全世界的无产者,联合起来!"
106
+ # #=> [["全世界", "n"], ["的", "uj"], ["无产者", "n"], ["!", "x"], ["联合", "v"], ["起来", "v"], ["!", "x"]]
107
+ # z.tag "全世界的无产者,联合起来!", s: true
108
+ # #=> "全世界_n 的_uj 无产者_n ,_x 联合_v 起来_v !_x"
109
+ #
40
110
  def tag str, s: false, by: 0
41
111
  case by
42
112
  when /thu/
@@ -47,6 +117,10 @@ module Igo
47
117
  end
48
118
  end
49
119
 
120
+ # ### 词频统计
121
+ #
122
+ # z.freq "全世界的无产者,联合起来!", 5
123
+ # #=> [["无产者", 9.96885201925], ["全世界", 6.80147590842], ["联合", 5.64979650728], ["起来", 3.96134044655]]
50
124
  def termfreq string, num
51
125
  Keyword.extract string, num
52
126
  end
data/lib/igo.rb CHANGED
@@ -5,6 +5,20 @@ require 'jieba_rb'
5
5
  require_relative 'igo/zh'
6
6
  require_relative 'igo/ja'
7
7
 
8
+
9
+ #
10
+ # __AUTHOR__: *saisui* saisui.github.io
11
+ #
12
+ # ja.cut "あー、合成は結合法則を満たすんでしたね"
13
+ # #=> ["では", "、", "圏論", "の", "話", "を", "しましょ", "う", "か", "N", "この", "前", "は", "、圏", "について", "紹介 しました"]
14
+ #
15
+ # zh.pinyin "床前明月光,疑是地上霜,好了", s: 1
16
+ # #=> chuang2 qian2 ming2 yue4 guang1, yi2 shi4 di4 shang4 shuang1, hao3 le5.
17
+ #
18
+ # zh.tag "全世界的无产者,联合起来!", s: true
19
+ # #=> "全世界_n 的_uj 无产者_n ,_x 联合_v 起来_v !_x"
20
+ #
21
+
8
22
  module Igo
9
23
  class Error < StandardError; end
10
24
  # Your code goes here...
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: igo
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.4.3
4
+ version: 0.1.4.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - saisui