igo 0.1.3 → 0.1.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: cd6b6d4214bdb3825759167d9d17797e0b988b72665cde7ecbd9162d9723deff
4
- data.tar.gz: a1e18a3d1e598b553a2da2e47a1955312d08fb0dc6a56bf4bea35d9eb26defc9
3
+ metadata.gz: e864f1dc86ce2b7164e50d13f42bc10d28b33fa4ae4f271ec912e410d461f697
4
+ data.tar.gz: 6e8ee3d9ac2e39306d2d02b79e9d86291bea4af1559cd7c4faf64bd009d947e3
5
5
  SHA512:
6
- metadata.gz: 4855cc233e4594677d671eb8cfea48ff20fddd51b22a7abda157229652761dc9af4a55b66ea71649a27ade2c9afba3435085d3d78d9ce883c1022030e09c7602
7
- data.tar.gz: 2ba7a4d3ea3749d69aba62f1edbcf057a68a5e39c1db6866d3b178f3ff1676be6f9a494ad7a924b735d8de85dab08391ca208841a452946a6cdca8adbe9679aa
6
+ metadata.gz: ae7bee63027e8de27766e941f99e38b4775be1fc8bf8aaf5ca454d41bf7d0346ea9eb7e9f502f93ce5e0f1f347ab6e0eb9a2f01f8c6f1ce0e7362108a2813997
7
+ data.tar.gz: 97aa666160536500f5910a8d7c072e5652b435d1d9cecf6a5d5b98211ba45e1e4a76f6831d7cd90ed31fb394074f48962507a9475859a23fde355533a0a76330
data/README.en.md ADDED
@@ -0,0 +1,106 @@
1
+ # 言語 :: Igo
2
+
3
+ [🌏](README.md) | [中文](README.zh.md) | [English](README.en.md) | [日本語](README.ja.md)
4
+
5
+ Support `zh-CN`, `ja-JP`
6
+
7
+ ```ruby
8
+ ja.cut "あー、合成は結合法則を満たすんでしたね"
9
+ #=> ["では", "、", "圏論", "の", "話", "を", "しましょ", "う", "か", "N", "この", "前", "は", "、圏", "について", "紹介 しました"]
10
+
11
+ zh.tag "全世界的无产者,联合起来!", s: true
12
+ #=> "全世界_n 的_uj 无产者_n ,_x 联合_v 起来_v !_x"
13
+
14
+ ```
15
+
16
+ ## Install :: インストール
17
+
18
+ ```cmd
19
+ gem install igo
20
+ ```
21
+
22
+ for use `jisho-api`, `Thulac` in __Python__, you need to install:
23
+
24
+ ```cmd
25
+ pip install jisho-api
26
+
27
+ pip install thulac
28
+
29
+ ```
30
+
31
+ ## 使い方 :: Usage / Ja
32
+
33
+ ```ruby
34
+ require 'igo'
35
+ require 'igo/ja'
36
+
37
+ j = Igo::Ja
38
+
39
+ cutted = j.cut "あー、合成は結合法則を満たすんでしたね"
40
+ #=> ["では", "、", "圏論", "の", "話", "を", "しましょ", "う", "か", "N", "この", "前", "は", "、圏", "について", "紹介 しました"]
41
+
42
+ cutted = j.cut "あー、合成は結合法則を満たすんでしたね", s: true
43
+ #=> "では 、 圏論 の 話 を しましょ う か N この 前 は 、圏 について 紹介しました"
44
+
45
+ ```
46
+
47
+ 下ノ関数は、暫く未完成です、ごめんね:
48
+
49
+ `j.romaji`, `j.kana`, `j.tag`。
50
+
51
+ ## 用法 :: Usage / Zh
52
+
53
+ Lack __Trad-Zh__ :: 暂不支持「正體中文」
54
+
55
+ ```ruby
56
+ require 'igo'
57
+ require 'igo/zh'
58
+ z = Igo::Zh
59
+ ```
60
+
61
+ ### Pinyin :: 拼音
62
+
63
+ ```ruby
64
+ z.pinyin "全世界的无产者,联合起来!"
65
+ #=> ["quan2", "shi4", "jie4", "de", "wu2", "chan3", "zhe3", "lian2", "he2", "qi3", "lai2"]
66
+ z.pinyin "全世界的无产者,联合起来!", s: 1
67
+ #=> "quan2 shi4 jie4 de wu2 chan3 zhe3 lian2 he2 qi3 lai2"
68
+ ```
69
+
70
+ ### seperate word :: 分词
71
+
72
+ ```ruby
73
+ z.cut "全世界的无产者,联合起来!"
74
+ #=> ["全世界", "的", "無產階級", ",", "聯合", "起來", "!"]
75
+ z.cut "全世界的无产者,联合起来!", s: true
76
+ #=> "全世界 的 无产者 ! 联合 起来 !"
77
+ ```
78
+ ### Taging word :: 标注词类
79
+
80
+ 后续可能添加 `by` 关键词指定函数。
81
+
82
+ ```ruby
83
+ z.tag "全世界的无产者,联合起来!"
84
+ #=> [["全世界", "n"], ["的", "uj"], ["无产者", "n"], ["!", "x"], ["联合", "v"], ["起来", "v"], ["!", "x"]]
85
+ z.tag "全世界的无产者,联合起来!", s: true
86
+ #=> "全世界_n 的_uj 无产者_n ,_x 联合_v 起来_v !_x"
87
+ ```
88
+ ### 词频统计
89
+
90
+ ```ruby
91
+ z.freq "全世界的无产者,联合起来!", 5
92
+ #=> [["无产者", 9.96885201925], ["全世界", 6.80147590842], ["联合", 5.64979650728], ["起来", 3.96134044655]]
93
+
94
+ # alias 拼音, 分词, 标记, 词频
95
+
96
+ ```
97
+
98
+ ## LINCENCE :: 协议
99
+
100
+ __MPL 2.0__
101
+
102
+ ## Requires
103
+
104
+ - `jieba-rb`
105
+ - `nokogiri`
106
+ - `open-uri`
data/README.ja.md ADDED
@@ -0,0 +1,105 @@
1
+ # 言語 :: Igo
2
+
3
+ [🌏](README.md) | [中文](README.zh.md) | [English](README.en.md) | [日本語](README.ja.md)
4
+
5
+ Support `zh-CN`, `ja-JP`
6
+
7
+ ```ruby
8
+ ja.cut "あー、合成は結合法則を満たすんでしたね"
9
+ #=> ["では", "、", "圏論", "の", "話", "を", "しましょ", "う", "か", "N", "この", "前", "は", "、圏", "について", "紹介 しました"]
10
+
11
+ zh.tag "全世界的无产者,联合起来!", s: true
12
+ #=> "全世界_n 的_uj 无产者_n ,_x 联合_v 起来_v !_x"
13
+
14
+ ```
15
+
16
+ ## Install :: インストール
17
+
18
+ ```cmd
19
+ gem install igo
20
+ ```
21
+
22
+ __Python__ ノ `Jisho-api`・`Thulac` 使いたいなら、以下の pip がインストール必要です:
23
+
24
+ ```cmd
25
+ pip install jisho-api
26
+
27
+ pip install thulac
28
+ ```
29
+
30
+ ## 使い方 :: Usage / Ja
31
+
32
+ ```ruby
33
+ require 'igo'
34
+ require 'igo/ja'
35
+
36
+ j = Igo::Ja
37
+
38
+ cutted = j.cut "あー、合成は結合法則を満たすんでしたね"
39
+ #=> ["では", "、", "圏論", "の", "話", "を", "しましょ", "う", "か", "N", "この", "前", "は", "、圏", "について", "紹介 しました"]
40
+
41
+ cutted = j.cut "あー、合成は結合法則を満たすんでしたね", s: true
42
+ #=> "では 、 圏論 の 話 を しましょ う か N この 前 は 、圏 について 紹介しました"
43
+
44
+ ```
45
+
46
+ 下ノ関数は、暫く未完成です、ごめんね:
47
+
48
+ `j.romaji`, `j.kana`, `j.tag`。
49
+
50
+ ## 用法 :: Usage / Zh
51
+
52
+ Lack __Trad-Zh__ :: 暂不支持「正體中文」
53
+
54
+ ```ruby
55
+ require 'igo'
56
+ require 'igo/zh'
57
+ z = Igo::Zh
58
+ ```
59
+
60
+ ### Pinyin :: 拼音
61
+
62
+ ```ruby
63
+ z.pinyin "全世界的无产者,联合起来!"
64
+ #=> ["quan2", "shi4", "jie4", "de", "wu2", "chan3", "zhe3", "lian2", "he2", "qi3", "lai2"]
65
+ z.pinyin "全世界的无产者,联合起来!", s: 1
66
+ #=> "quan2 shi4 jie4 de wu2 chan3 zhe3 lian2 he2 qi3 lai2"
67
+ ```
68
+
69
+ ### seperate word :: 分词
70
+
71
+ ```ruby
72
+ z.cut "全世界的无产者,联合起来!"
73
+ #=> ["全世界", "的", "無產階級", ",", "聯合", "起來", "!"]
74
+ z.cut "全世界的无产者,联合起来!", s: true
75
+ #=> "全世界 的 无产者 ! 联合 起来 !"
76
+ ```
77
+ ### Taging word :: 标注词类
78
+
79
+ 后续可能添加 `by` 关键词指定函数。
80
+
81
+ ```ruby
82
+ z.tag "全世界的无产者,联合起来!"
83
+ #=> [["全世界", "n"], ["的", "uj"], ["无产者", "n"], ["!", "x"], ["联合", "v"], ["起来", "v"], ["!", "x"]]
84
+ z.tag "全世界的无产者,联合起来!", s: true
85
+ #=> "全世界_n 的_uj 无产者_n ,_x 联合_v 起来_v !_x"
86
+ ```
87
+ ### 词频统计
88
+
89
+ ```ruby
90
+ z.freq "全世界的无产者,联合起来!", 5
91
+ #=> [["无产者", 9.96885201925], ["全世界", 6.80147590842], ["联合", 5.64979650728], ["起来", 3.96134044655]]
92
+
93
+ # alias 拼音, 分词, 标记, 词频
94
+
95
+ ```
96
+
97
+ ## LINCENCE :: 协议
98
+
99
+ __MPL 2.0__
100
+
101
+ ## Requires
102
+
103
+ - `jieba-rb`
104
+ - `nokogiri`
105
+ - `open-uri`
data/README.md CHANGED
@@ -1,12 +1,33 @@
1
1
  # 言語 :: Igo
2
2
 
3
+ [🌏](README.md) | [中文](README.zh.md) | [English](README.en.md) | [日本語](README.ja.md)
4
+
3
5
  Support `zh-CN`, `ja-JP`
4
6
 
5
- install
7
+ ```ruby
8
+ ja.cut "あー、合成は結合法則を満たすんでしたね"
9
+ #=> ["では", "、", "圏論", "の", "話", "を", "しましょ", "う", "か", "N", "この", "前", "は", "、圏", "について", "紹介 しました"]
10
+
11
+ zh.tag "全世界的无产者,联合起来!", s: true
12
+ #=> "全世界_n 的_uj 无产者_n ,_x 联合_v 起来_v !_x"
13
+
14
+ ```
15
+
16
+ ## Install :: インストール
17
+
6
18
  ```cmd
7
19
  gem install igo
8
20
  ```
9
21
 
22
+ for use `jisho-api`, `Thulac` in __Python__, you need to install:
23
+
24
+ ```cmd
25
+ pip install jisho-api
26
+
27
+ pip install thulac
28
+
29
+ ```
30
+
10
31
  ## 使い方 :: Usage / Ja
11
32
 
12
33
  ```ruby
data/README.zh.md ADDED
@@ -0,0 +1,106 @@
1
+ # 言語 :: Igo
2
+
3
+ [🌏](README.md) | [中文](README.zh.md) | [English](README.en.md) | [日本語](README.ja.md)
4
+
5
+ Support `zh-CN`, `ja-JP`
6
+
7
+ ```ruby
8
+ ja.cut "あー、合成は結合法則を満たすんでしたね"
9
+ #=> ["では", "、", "圏論", "の", "話", "を", "しましょ", "う", "か", "N", "この", "前", "は", "、圏", "について", "紹介 しました"]
10
+
11
+ zh.tag "全世界的无产者,联合起来!", s: true
12
+ #=> "全世界_n 的_uj 无产者_n ,_x 联合_v 起来_v !_x"
13
+
14
+ ```
15
+
16
+ ## Install :: 安装
17
+
18
+ ```cmd
19
+ gem install igo
20
+ ```
21
+
22
+ 要使用 __Python__ 库的 `jisho_api`, `Thulac`,你得先安装...:
23
+
24
+ ```cmd
25
+ pip install jisho-api
26
+
27
+ pip install thulac
28
+
29
+ ```
30
+
31
+ ## 使い方 :: Usage / Ja
32
+
33
+ ```ruby
34
+ require 'igo'
35
+ require 'igo/ja'
36
+
37
+ j = Igo::Ja
38
+
39
+ cutted = j.cut "あー、合成は結合法則を満たすんでしたね"
40
+ #=> ["では", "、", "圏論", "の", "話", "を", "しましょ", "う", "か", "N", "この", "前", "は", "、圏", "について", "紹介 しました"]
41
+
42
+ cutted = j.cut "あー、合成は結合法則を満たすんでしたね", s: true
43
+ #=> "では 、 圏論 の 話 を しましょ う か N この 前 は 、圏 について 紹介しました"
44
+
45
+ ```
46
+
47
+ 下ノ関数は、暫く未完成です、ごめんね:
48
+
49
+ `j.romaji`, `j.kana`, `j.tag`。
50
+
51
+ ## 用法 :: Usage / Zh
52
+
53
+ Lack __Trad-Zh__ :: 暂不支持「正體中文」
54
+
55
+ ```ruby
56
+ require 'igo'
57
+ require 'igo/zh'
58
+ z = Igo::Zh
59
+ ```
60
+
61
+ ### Pinyin :: 拼音
62
+
63
+ ```ruby
64
+ z.pinyin "全世界的无产者,联合起来!"
65
+ #=> ["quan2", "shi4", "jie4", "de", "wu2", "chan3", "zhe3", "lian2", "he2", "qi3", "lai2"]
66
+ z.pinyin "全世界的无产者,联合起来!", s: 1
67
+ #=> "quan2 shi4 jie4 de wu2 chan3 zhe3 lian2 he2 qi3 lai2"
68
+ ```
69
+
70
+ ### seperate word :: 分词
71
+
72
+ ```ruby
73
+ z.cut "全世界的无产者,联合起来!"
74
+ #=> ["全世界", "的", "無產階級", ",", "聯合", "起來", "!"]
75
+ z.cut "全世界的无产者,联合起来!", s: true
76
+ #=> "全世界 的 无产者 ! 联合 起来 !"
77
+ ```
78
+ ### Taging word :: 标注词类
79
+
80
+ 后续可能添加 `by` 关键词指定函数。
81
+
82
+ ```ruby
83
+ z.tag "全世界的无产者,联合起来!"
84
+ #=> [["全世界", "n"], ["的", "uj"], ["无产者", "n"], ["!", "x"], ["联合", "v"], ["起来", "v"], ["!", "x"]]
85
+ z.tag "全世界的无产者,联合起来!", s: true
86
+ #=> "全世界_n 的_uj 无产者_n ,_x 联合_v 起来_v !_x"
87
+ ```
88
+ ### 词频统计
89
+
90
+ ```ruby
91
+ z.freq "全世界的无产者,联合起来!", 5
92
+ #=> [["无产者", 9.96885201925], ["全世界", 6.80147590842], ["联合", 5.64979650728], ["起来", 3.96134044655]]
93
+
94
+ # alias 拼音, 分词, 标记, 词频
95
+
96
+ ```
97
+
98
+ ## LINCENCE :: 协议
99
+
100
+ __MPL 2.0__
101
+
102
+ ## Requires
103
+
104
+ - `jieba-rb`
105
+ - `nokogiri`
106
+ - `open-uri`
data/lib/igo/ja.rb CHANGED
@@ -2,9 +2,10 @@ require 'jisho_api'
2
2
  require 'nokogiri'
3
3
  require 'open-uri'
4
4
  require 'uri'
5
+ require 'nagisa'
5
6
 
6
7
  module Igo
7
- module Jisho
8
+ module Ja
8
9
 
9
10
  SEARCH_URL = "https://jisho.org/search/"
10
11
 
@@ -21,9 +22,13 @@ module Igo
21
22
  # def kana str
22
23
  # end
23
24
  end
25
+
26
+ def tag str, s: false, ns: 0
27
+
28
+ end
29
+
24
30
  end
25
31
 
26
- Ja = Jisho
27
32
 
28
33
  class << self
29
34
  def 日本語
data/lib/igo/thulac.rb ADDED
@@ -0,0 +1,15 @@
1
+ require 'pycall/import'
2
+
3
+ module Thulac
4
+ PyCall.exec("import thulac")
5
+ PyCall.exec("thulac1 = thulac.thulac()")
6
+ end
7
+ class << Thulac
8
+ def cut str, text: false
9
+ text = text ? "True" : "False"
10
+ PyCall.eval(<<-EOF
11
+ thulac1.cut(#{str.inspect}, text=#{text})
12
+ EOF
13
+ )
14
+ end
15
+ end
data/lib/igo/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Igo
4
- VERSION = "0.1.3"
4
+ VERSION = "0.1.4.1"
5
5
  end
data/lib/igo/zh.rb CHANGED
@@ -22,17 +22,28 @@ module Igo
22
22
  PinYin.of_string(token, :ascii)
23
23
  end
24
24
 
25
- def cut str, s: false, tag: false
26
- if tag
27
- s ? Tagging.tag(str).map{_1.to_a.flatten.join("_")}.join(" ") : Tagging.tag(str).map{_1.to_a.flatten}
28
- else
29
- res = Segment.cut(str)
30
- s ? res.join(" ") : res
25
+ def cut str, s: false, tag: false, by: "jieba"
26
+ case by
27
+ when /jieba/
28
+ if tag
29
+ s ? Tagging.tag(str).map{_1.to_a.flatten.join("_")}.join(" ") : Tagging.tag(str).map{_1.to_a.flatten}
30
+ else
31
+ res = Segment.cut(str)
32
+ s ? res.join(" ") : res
33
+ end
34
+ when /thulac/
35
+ Thulac.cut(str, text: s)
31
36
  end
32
37
  end
33
38
 
34
- def tag str, s: false
35
- s ? Tagging.tag(str).map{_1.to_a.flatten.join("_")}.join(" ") : Tagging.tag(str).map{_1.to_a.flatten}
39
+ def tag str, s: false, by: 0
40
+ case by
41
+ when /thu/
42
+ require './thulac'
43
+ Thulac.cut str, text: s
44
+ else
45
+ s ? Tagging.tag(str).map{_1.to_a.flatten.join("_")}.join(" ") : Tagging.tag(str).map{_1.to_a.flatten}
46
+ end
36
47
  end
37
48
 
38
49
  def termfreq string, num
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: igo
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.1.4.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - saisui
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-12-23 00:00:00.000000000 Z
11
+ date: 2023-12-24 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: It is easy.
14
14
  email:
@@ -22,11 +22,15 @@ files:
22
22
  - CHANGELOG.md
23
23
  - CODE_OF_CONDUCT.md
24
24
  - LICENSE.txt
25
+ - README.en.md
26
+ - README.ja.md
25
27
  - README.md
28
+ - README.zh.md
26
29
  - Rakefile
27
30
  - igo.rb
28
31
  - lib/igo.rb
29
32
  - lib/igo/ja.rb
33
+ - lib/igo/thulac.rb
30
34
  - lib/igo/version.rb
31
35
  - lib/igo/zh.rb
32
36
  - sig/igo.rbs