igo 0.1.4.2 → 0.1.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 676a3bfd6e2aa4c63963d67ec65aa18364b1c3b93e4478721453a9ec0cbe37f3
4
- data.tar.gz: b925f5b3fd74c3136cde15388451c8dcafcbd8f8d0bb458c414a01f142bbf124
3
+ metadata.gz: f3bb523eb2fb2de5a99f48c655e4b100cbd37cb21a4052c69ae7f5035415ba72
4
+ data.tar.gz: 5abec0cf2b57f2994a16b0e158ac5d5a36951c50c2bbeec176134aed529c984b
5
5
  SHA512:
6
- metadata.gz: 9cd700bdcd72406257e00a17a87d133b3d6ddb2aa216afc3a9289cf8b013e59b82d4d47a034974d77b22b1c8210ebce18d143886c0934eb0bfd022088873bcd7
7
- data.tar.gz: 25740b6c8ad3ad5e562bcb3c2d1925d9835ae0c0b6cca5d6901051f3dc89081d1963ba950bf594aa4a9ef680200fac7b98a7b4baeccc68f27f230e77d2394640
6
+ metadata.gz: 731af0cf150373bc8af9dc2a6577102d3ac6bffac7f90d4960da8b5af2fbb7f1f8f4b70c1093accc2327561cf1939655a286fdb16e717be5705f1b012e73ea74
7
+ data.tar.gz: 6e11f5dbc4ef39ded02b50e26e16cb836a7724f6b514987b086650edbbdbdf0032ee4a8cd5e802783f67d5c51f589806f58b32505c8cf5c73804b7b843b1824f
data/README.en.md CHANGED
@@ -5,11 +5,11 @@
5
5
  Support `zh-CN`, `ja-JP`
6
6
 
7
7
  ```ruby
8
- ja.cut "あー、合成は結合法則を満たすんでしたね"
9
- #=> ["では", "、", "圏論", "", "", "を", "しましょ", "", "", "N", "この", "前", "は", "、圏", "について", "紹介 しました"]
8
+ cutted = j.cut "あー、合成は結合法則を満たすんでしたね"
9
+ #=> ["あー", "、", "合成", "", "結合法則", "を", "満たす", "", "でした", ""]
10
10
 
11
- zh.tag "全世界的无产者,联合起来!", s: true
12
- #=> "全世界_n 的_uj 无产者_n ,_x 联合_v 起来_v !_x"
11
+ cutted = j.cut "あー、合成は結合法則を満たすんでしたね", s: "/"
12
+ #=> "あー/、/合成/は/結合法則/を/満たす/ん/でした/ね"
13
13
 
14
14
  ```
15
15
 
@@ -36,10 +36,10 @@ require 'igo/ja'
36
36
  j = Igo::Ja
37
37
 
38
38
  cutted = j.cut "あー、合成は結合法則を満たすんでしたね"
39
- #=> ["では", "、", "圏論", "", "", "を", "しましょ", "", "", "N", "この", "前", "は", "、圏", "について", "紹介 しました"]
39
+ #=> ["あー", "、", "合成", "", "結合法則", "を", "満たす", "", "でした", ""]
40
40
 
41
41
  cutted = j.cut "あー、合成は結合法則を満たすんでしたね", s: true
42
- #=> "では圏論 しましょ N この 前 は 、圏 について 紹介しました"
42
+ #=> "あー合成 結合法則満たす でした "
43
43
 
44
44
  ```
45
45
 
@@ -61,7 +61,7 @@ z = Igo::Zh
61
61
 
62
62
  ```ruby
63
63
  z.pinyin "全世界的无产者,联合起来!"
64
- #=> ["quan2", "shi4", "jie4", "de", "wu2", "chan3", "zhe3", "lian2", "he2", "qi3", "lai2"]
64
+ #=> ["quan2", "shi4", "jie4", "de5", "wu2", "chan3", "zhe3", "lian2", "he2", "qi3", "lai2"]
65
65
  z.pinyin "全世界的无产者,联合起来!", s: 1
66
66
  #=> "quan2 shi4 jie4 de wu2 chan3 zhe3 lian2 he2 qi3 lai2"
67
67
  ```
data/README.ja.md CHANGED
@@ -5,11 +5,11 @@
5
5
  Support `zh-CN`, `ja-JP`
6
6
 
7
7
  ```ruby
8
- ja.cut "あー、合成は結合法則を満たすんでしたね"
9
- #=> ["では", "、", "圏論", "", "", "を", "しましょ", "", "", "N", "この", "前", "は", "、圏", "について", "紹介 しました"]
8
+ cutted = j.cut "あー、合成は結合法則を満たすんでしたね"
9
+ #=> ["あー", "、", "合成", "", "結合法則", "を", "満たす", "", "でした", ""]
10
10
 
11
- zh.tag "全世界的无产者,联合起来!", s: true
12
- #=> "全世界_n 的_uj 无产者_n ,_x 联合_v 起来_v !_x"
11
+ cutted = j.cut "あー、合成は結合法則を満たすんでしたね", s: "/"
12
+ #=> "あー/、/合成/は/結合法則/を/満たす/ん/でした/ね"
13
13
 
14
14
  ```
15
15
 
@@ -35,10 +35,10 @@ require 'igo/ja'
35
35
  j = Igo::Ja
36
36
 
37
37
  cutted = j.cut "あー、合成は結合法則を満たすんでしたね"
38
- #=> ["では", "、", "圏論", "", "", "を", "しましょ", "", "", "N", "この", "前", "は", "、圏", "について", "紹介 しました"]
38
+ #=> ["あー", "、", "合成", "", "結合法則", "を", "満たす", "", "でした", ""]
39
39
 
40
40
  cutted = j.cut "あー、合成は結合法則を満たすんでしたね", s: true
41
- #=> "では圏論 しましょ N この 前 は 、圏 について 紹介しました"
41
+ #=> "あー合成 結合法則満たす でした "
42
42
 
43
43
  ```
44
44
 
@@ -60,7 +60,7 @@ z = Igo::Zh
60
60
 
61
61
  ```ruby
62
62
  z.pinyin "全世界的无产者,联合起来!"
63
- #=> ["quan2", "shi4", "jie4", "de", "wu2", "chan3", "zhe3", "lian2", "he2", "qi3", "lai2"]
63
+ #=> ["quan2", "shi4", "jie4", "de5", "wu2", "chan3", "zhe3", "lian2", "he2", "qi3", "lai2"]
64
64
  z.pinyin "全世界的无产者,联合起来!", s: 1
65
65
  #=> "quan2 shi4 jie4 de wu2 chan3 zhe3 lian2 he2 qi3 lai2"
66
66
  ```
data/README.md CHANGED
@@ -6,7 +6,7 @@ Support `zh-CN`, `ja-JP`
6
6
 
7
7
  ```ruby
8
8
  ja.cut "あー、合成は結合法則を満たすんでしたね"
9
- #=> ["では", "、", "圏論", "", "", "を", "しましょ", "", "", "N", "この", "前", "は", "、圏", "について", "紹介 しました"]
9
+ #=> ["あー", "、", "合成", "", "結合法則", "を", "満たす", "", "でした", ""]
10
10
 
11
11
  zh.tag "全世界的无产者,联合起来!", s: true
12
12
  #=> "全世界_n 的_uj 无产者_n ,_x 联合_v 起来_v !_x"
@@ -36,10 +36,10 @@ require 'igo/ja'
36
36
  j = Igo::Ja
37
37
 
38
38
  cutted = j.cut "あー、合成は結合法則を満たすんでしたね"
39
- #=> ["では", "、", "圏論", "", "", "を", "しましょ", "", "", "N", "この", "前", "は", "、圏", "について", "紹介 しました"]
39
+ #=> ["あー", "、", "合成", "", "結合法則", "を", "満たす", "", "でした", ""]
40
40
 
41
- cutted = j.cut "あー、合成は結合法則を満たすんでしたね", s: true
42
- #=> "では 、 圏論 の 話 を しましょ う か N この 前 は 、圏 について 紹介しました"
41
+ cutted = j.cut "あー、合成は結合法則を満たすんでしたね", s: "/"
42
+ #=> "あー/、/合成/は/結合法則/を/満たす/ん/でした/ね"
43
43
 
44
44
  ```
45
45
 
@@ -61,7 +61,7 @@ z = Igo::Zh
61
61
 
62
62
  ```ruby
63
63
  z.pinyin "全世界的无产者,联合起来!"
64
- #=> ["quan2", "shi4", "jie4", "de", "wu2", "chan3", "zhe3", "lian2", "he2", "qi3", "lai2"]
64
+ #=> ["quan2", "shi4", "jie4", "de5", "wu2", "chan3", "zhe3", "lian2", "he2", "qi3", "lai2"]
65
65
  z.pinyin "全世界的无产者,联合起来!", s: 1
66
66
  #=> "quan2 shi4 jie4 de wu2 chan3 zhe3 lian2 he2 qi3 lai2"
67
67
  ```
data/README.zh.md CHANGED
@@ -5,11 +5,11 @@
5
5
  Support `zh-CN`, `ja-JP`
6
6
 
7
7
  ```ruby
8
- ja.cut "あー、合成は結合法則を満たすんでしたね"
9
- #=> ["では", "、", "圏論", "", "", "を", "しましょ", "", "", "N", "この", "前", "は", "、圏", "について", "紹介 しました"]
8
+ cutted = j.cut "あー、合成は結合法則を満たすんでしたね"
9
+ #=> ["あー", "、", "合成", "", "結合法則", "を", "満たす", "", "でした", ""]
10
10
 
11
- zh.tag "全世界的无产者,联合起来!", s: true
12
- #=> "全世界_n 的_uj 无产者_n ,_x 联合_v 起来_v !_x"
11
+ cutted = j.cut "あー、合成は結合法則を満たすんでしたね", s: "/"
12
+ #=> "あー/、/合成/は/結合法則/を/満たす/ん/でした/ね"
13
13
 
14
14
  ```
15
15
 
@@ -36,10 +36,10 @@ require 'igo/ja'
36
36
  j = Igo::Ja
37
37
 
38
38
  cutted = j.cut "あー、合成は結合法則を満たすんでしたね"
39
- #=> ["では", "、", "圏論", "", "", "を", "しましょ", "", "", "N", "この", "前", "は", "、圏", "について", "紹介 しました"]
39
+ #=> ["あー", "、", "合成", "", "結合法則", "を", "満たす", "", "でした", ""]
40
40
 
41
41
  cutted = j.cut "あー、合成は結合法則を満たすんでしたね", s: true
42
- #=> "では圏論 しましょ N この 前 は 、圏 について 紹介しました"
42
+ #=> "あー合成 結合法則満たす でした "
43
43
 
44
44
  ```
45
45
 
@@ -61,7 +61,7 @@ z = Igo::Zh
61
61
 
62
62
  ```ruby
63
63
  z.pinyin "全世界的无产者,联合起来!"
64
- #=> ["quan2", "shi4", "jie4", "de", "wu2", "chan3", "zhe3", "lian2", "he2", "qi3", "lai2"]
64
+ #=> ["quan2", "shi4", "jie4", "de5", "wu2", "chan3", "zhe3", "lian2", "he2", "qi3", "lai2"]
65
65
  z.pinyin "全世界的无产者,联合起来!", s: 1
66
66
  #=> "quan2 shi4 jie4 de wu2 chan3 zhe3 lian2 he2 qi3 lai2"
67
67
  ```
data/lib/igo/ja.rb CHANGED
@@ -3,16 +3,46 @@ require 'open-uri'
3
3
  require 'uri'
4
4
 
5
5
  module Igo
6
+
7
+ # ## 使い方 :: Usage / Ja
8
+ #
9
+ # require 'igo'
10
+ # require 'igo/ja'
11
+ #
12
+ # j = Igo::Ja
13
+ #
14
+ # cutted = j.cut "あー、合成は結合法則を満たすんでしたね"
15
+ # #=> ["あー", "、", "合成", "は", "結合法則", "を", "満たす", "ん", "でした", "ね"]
16
+ #
17
+ # cutted = j.cut "あー、合成は結合法則を満たすんでしたね", s: true
18
+ # #=> "あー 、 合成 は 結合法則 を 満たす ん でした ね"
19
+ #
20
+ # 下ノ関数は、暫く未完成です、ごめんね:
21
+ #
22
+ # `j.romaji`, `j.kana`, `j.tag`。
23
+ #
6
24
  module Ja
7
25
 
8
26
  SEARCH_URL = "https://jisho.org/search/"
9
27
 
10
28
  class << self
29
+ # ## 使い方 :: Usage / Ja
30
+ #
31
+ # j = Igo::Ja
32
+ #
33
+ # cutted = j.cut "あー、合成は結合法則を満たすんでしたね"
34
+ # #=> ["あー", "、", "合成", "は", "結合法則", "を", "満たす", "ん", "でした", "ね"]
35
+ #
36
+ # cutted = j.cut "あー、合成は結合法則を満たすんでしたね", s: "/"
37
+ # #=> "あー/、/合成/は/結合法則/を/満たす/ん/でした/ね"
38
+ #
11
39
  def cut str, s: false
12
40
  str = URI.encode_www_form_component(str)
13
41
  doc = Nokogiri::HTML(URI.open(SEARCH_URL + str).read)
14
42
  cutted = doc.css(".japanese_word__text_wrapper").map{_1.text.strip}
15
- s ? cutted.join(" ") : cutted
43
+ # s ? cutted.join(s) : cutted
44
+ sep = s.is_a?(String) ? s : " "
45
+ s ? cutted.join(sep) : cutted
16
46
  end
17
47
  # def romaji str
18
48
  # end
@@ -21,8 +51,10 @@ module Igo
21
51
  # end
22
52
  end
23
53
 
54
+ # TODO: tag word function
55
+ #
24
56
  def tag str, s: false, ns: 0
25
-
57
+ # TODO
26
58
  end
27
59
 
28
60
  end
data/lib/igo/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Igo
4
- VERSION = "0.1.4.2"
4
+ VERSION = "0.1.4.4"
5
5
  end
data/lib/igo/zh.rb CHANGED
@@ -1,26 +1,77 @@
1
1
  require 'ruby_pinyin'
2
2
 
3
+
4
+
3
5
  module Igo
6
+ # # 用法 :: Usage / Zh
7
+
8
+ # Lack __Trad-Zh__ :: 暂不支持「正體中文」
9
+ #
10
+ # ```ruby
11
+ # require 'igo'
12
+ # require 'igo/zh'
13
+ # z = Igo::Zh
14
+ # ```
15
+ #
16
+ # ### Pinyin :: 拼音
17
+ #
18
+ # ```ruby
19
+ # z.pinyin "全世界的无产者,联合起来!"
20
+ # #=> ["quan2", "shi4", "jie4", "de5", "wu2", "chan3", "zhe3", "lian2", "he2", "qi3", "lai2"]
21
+ # z.pinyin "全世界的无产者,联合起来!", s: 1
22
+ # #=> "quan2 shi4 jie4 de wu2 chan3 zhe3 lian2 he2 qi3 lai2"
23
+ # ```
24
+ #
25
+ # ### seperate word :: 分词
26
+ #
27
+ # ```ruby
28
+ # z.cut "全世界的无产者,联合起来!"
29
+ # #=> ["全世界", "的", "無產階級", ",", "聯合", "起來", "!"]
30
+ # z.cut "全世界的无产者,联合起来!", s: true
31
+ # #=> "全世界 的 无产者 ! 联合 起来 !"
32
+ # ```
33
+ # ### Taging word :: 标注词类
34
+ #
35
+ # 后续可能添加 `by` 关键词指定函数。
36
+ #
37
+ # ```ruby
38
+ # z.tag "全世界的无产者,联合起来!"
39
+ # #=> [["全世界", "n"], ["的", "uj"], ["无产者", "n"], ["!", "x"], ["联合", "v"], ["起来", "v"], ["!", "x"]]
40
+ # z.tag "全世界的无产者,联合起来!", s: true
41
+ # #=> "全世界_n 的_uj 无产者_n ,_x 联合_v 起来_v !_x"
42
+ # ```
43
+ # ### 词频统计
44
+ #
45
+ # ```ruby
46
+ # z.freq "全世界的无产者,联合起来!", 5
47
+ # #=> [["无产者", 9.96885201925], ["全世界", 6.80147590842], ["联合", 5.64979650728], ["起来", 3.96134044655]]
48
+ #
49
+ # # alias 拼音, 分词, 标记, 词频
50
+ #
51
+ # ```
4
52
  module Zh
5
53
 
6
54
  Tagging = JiebaRb::Tagging.new
7
55
  Segment = JiebaRb::Segment.new mode: :mix, user_dict: "ext/cppjieba/dict/user.dict.utf8"
8
56
  Keyword = JiebaRb::Keyword.new
9
57
 
10
- # @params chinese: String
11
- # @returns pinyin_numeraltone: String
12
58
  class << self
13
- def pinyin str, s: false
14
- res = str.each_char.map{PinYin.of_string(_1, :ascii)}.flatten
15
- s ? res.join(" ") : res
59
+ # ### Pinyin :: 拼音
60
+ #
61
+ # z.pinyin "全世界的无产者,联合起来!"
62
+ # #=> ["quan2", "shi4", "jie4", "de5", "wu2", "chan3", "zhe3", "lian2", "he2", "qi3", "lai2"]
63
+ # z.pinyin "全世界的无产者,联合起来!", s: " "
64
+ # #=> "quan2 shi4 jie4 de wu2 chan3 zhe3 lian2 he2 qi3 lai2"
65
+ #
66
+ # @params chinese: String
67
+ # @returns pinyin_numeraltone: String
68
+ def pinyin str, s: false, ommit: " "
69
+ # tone 1, 2, 3, 4, 5
70
+ res = str.split(/(?=[^A-Z\d])|(?<=[^A-Z\d])/i).map{|ch| ch.ord > 0x4e00 ? (py = PinYin.of_string(ch, :ascii)[0]; py =~ /\d/ ? py : (py+"5")) : ch }.flatten.select{_1 != s and _1 != ommit}
71
+ sep = s.is_a?(String) ? s : " "
72
+ s ? res.join(sep) : res
16
73
 
17
74
  end
18
- def pinyin_tonal_s str
19
- PinYin.sentence(token, :ascii)
20
- end
21
- def pinyin_tonal_a str
22
- PinYin.of_string(token, :ascii)
23
- end
24
75
 
25
76
  def cut str, s: false, tag: false, by: "jieba"
26
77
  case by
@@ -28,24 +79,39 @@ module Igo
28
79
  if tag
29
80
  s ? Tagging.tag(str).map{_1.to_a.flatten.join("_")}.join(" ") : Tagging.tag(str).map{_1.to_a.flatten}
30
81
  else
31
- res = Segment.cut(str)
32
- s ? res.join(" ") : res
82
+ cutted = Segment.cut(str)
83
+ sep = s.is_a?(String) ? s : " "
84
+ s ? cutted.join(sep) : cutted
33
85
  end
34
86
  when /thulac/
87
+ require_relative 'thulac'
35
88
  Thulac.cut(str, text: s)
36
89
  end
37
90
  end
38
91
 
92
+ # ### Taging word :: 标注词类
93
+ #
94
+ # 后续可能添加 `by` 关键词指定函数。
95
+ #
96
+ # z.tag "全世界的无产者,联合起来!"
97
+ # #=> [["全世界", "n"], ["的", "uj"], ["无产者", "n"], ["!", "x"], ["联合", "v"], ["起来", "v"], ["!", "x"]]
98
+ # z.tag "全世界的无产者,联合起来!", s: true
99
+ # #=> "全世界_n 的_uj 无产者_n ,_x 联合_v 起来_v !_x"
100
+ #
39
101
  def tag str, s: false, by: 0
40
102
  case by
41
103
  when /thu/
42
- require './thulac'
104
+ require_relative 'thulac'
43
105
  Thulac.cut str, text: s
44
106
  else
45
107
  s ? Tagging.tag(str).map{_1.to_a.flatten.join("_")}.join(" ") : Tagging.tag(str).map{_1.to_a.flatten}
46
108
  end
47
109
  end
48
110
 
111
+ # ### 词频统计
112
+ #
113
+ # z.freq "全世界的无产者,联合起来!", 5
114
+ # #=> [["无产者", 9.96885201925], ["全世界", 6.80147590842], ["联合", 5.64979650728], ["起来", 3.96134044655]]
49
115
  def termfreq string, num
50
116
  Keyword.extract string, num
51
117
  end
data/lib/igo.rb CHANGED
@@ -2,8 +2,22 @@
2
2
 
3
3
  require_relative "igo/version"
4
4
  require 'jieba_rb'
5
- require 'igo/zh'
6
- require 'igo/ja'
5
+ require_relative 'igo/zh'
6
+ require_relative 'igo/ja'
7
+
8
+
9
+ #
10
+ # __AUTHOR__: *saisui* saisui.github.io
11
+ #
12
+ # ja.cut "あー、合成は結合法則を満たすんでしたね"
13
+ # #=> ["では", "、", "圏論", "の", "話", "を", "しましょ", "う", "か", "N", "この", "前", "は", "、圏", "について", "紹介 しました"]
14
+ #
15
+ # zh.pinyin "床前明月光,疑是地上霜,好了", s: 1
16
+ # #=> chuang2 qian2 ming2 yue4 guang1, yi2 shi4 di4 shang4 shuang1, hao3 le5.
17
+ #
18
+ # zh.tag "全世界的无产者,联合起来!", s: true
19
+ # #=> "全世界_n 的_uj 无产者_n ,_x 联合_v 起来_v !_x"
20
+ #
7
21
 
8
22
  module Igo
9
23
  class Error < StandardError; end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: igo
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.4.2
4
+ version: 0.1.4.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - saisui