igo 0.1.3 → 0.1.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.en.md +105 -0
- data/README.ja.md +104 -0
- data/README.md +21 -1
- data/README.zh.md +105 -0
- data/lib/igo/ja.rb +6 -3
- data/lib/igo/thulac.rb +15 -0
- data/lib/igo/version.rb +1 -1
- data/lib/igo/zh.rb +19 -8
- data/lib/igo.rb +0 -1
- metadata +6 -4
- data/igo.rb +0 -12
- data/sig/igo.rbs +0 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 676a3bfd6e2aa4c63963d67ec65aa18364b1c3b93e4478721453a9ec0cbe37f3
|
4
|
+
data.tar.gz: b925f5b3fd74c3136cde15388451c8dcafcbd8f8d0bb458c414a01f142bbf124
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9cd700bdcd72406257e00a17a87d133b3d6ddb2aa216afc3a9289cf8b013e59b82d4d47a034974d77b22b1c8210ebce18d143886c0934eb0bfd022088873bcd7
|
7
|
+
data.tar.gz: 25740b6c8ad3ad5e562bcb3c2d1925d9835ae0c0b6cca5d6901051f3dc89081d1963ba950bf594aa4a9ef680200fac7b98a7b4baeccc68f27f230e77d2394640
|
data/README.en.md
ADDED
@@ -0,0 +1,105 @@
|
|
1
|
+
# 言語 :: Igo
|
2
|
+
|
3
|
+
[🌏](README.md) | [中文](README.zh.md) | [English](README.en.md) | [日本語](README.ja.md)
|
4
|
+
|
5
|
+
Support `zh-CN`, `ja-JP`
|
6
|
+
|
7
|
+
```ruby
|
8
|
+
ja.cut "あー、合成は結合法則を満たすんでしたね"
|
9
|
+
#=> ["では", "、", "圏論", "の", "話", "を", "しましょ", "う", "か", "N", "この", "前", "は", "、圏", "について", "紹介 しました"]
|
10
|
+
|
11
|
+
zh.tag "全世界的无产者,联合起来!", s: true
|
12
|
+
#=> "全世界_n 的_uj 无产者_n ,_x 联合_v 起来_v !_x"
|
13
|
+
|
14
|
+
```
|
15
|
+
|
16
|
+
## Install :: インストール
|
17
|
+
|
18
|
+
```cmd
|
19
|
+
gem install igo
|
20
|
+
```
|
21
|
+
|
22
|
+
for use `Thulac` in __Python__, you need to install:
|
23
|
+
|
24
|
+
```cmd
|
25
|
+
|
26
|
+
pip install thulac
|
27
|
+
|
28
|
+
```
|
29
|
+
|
30
|
+
## 使い方 :: Usage / Ja
|
31
|
+
|
32
|
+
```ruby
|
33
|
+
require 'igo'
|
34
|
+
require 'igo/ja'
|
35
|
+
|
36
|
+
j = Igo::Ja
|
37
|
+
|
38
|
+
cutted = j.cut "あー、合成は結合法則を満たすんでしたね"
|
39
|
+
#=> ["では", "、", "圏論", "の", "話", "を", "しましょ", "う", "か", "N", "この", "前", "は", "、圏", "について", "紹介 しました"]
|
40
|
+
|
41
|
+
cutted = j.cut "あー、合成は結合法則を満たすんでしたね", s: true
|
42
|
+
#=> "では 、 圏論 の 話 を しましょ う か N この 前 は 、圏 について 紹介しました"
|
43
|
+
|
44
|
+
```
|
45
|
+
|
46
|
+
下ノ関数は、暫く未完成です、ごめんね:
|
47
|
+
|
48
|
+
`j.romaji`, `j.kana`, `j.tag`。
|
49
|
+
|
50
|
+
## 用法 :: Usage / Zh
|
51
|
+
|
52
|
+
Lack __Trad-Zh__ :: 暂不支持「正體中文」
|
53
|
+
|
54
|
+
```ruby
|
55
|
+
require 'igo'
|
56
|
+
require 'igo/zh'
|
57
|
+
z = Igo::Zh
|
58
|
+
```
|
59
|
+
|
60
|
+
### Pinyin :: 拼音
|
61
|
+
|
62
|
+
```ruby
|
63
|
+
z.pinyin "全世界的无产者,联合起来!"
|
64
|
+
#=> ["quan2", "shi4", "jie4", "de", "wu2", "chan3", "zhe3", "lian2", "he2", "qi3", "lai2"]
|
65
|
+
z.pinyin "全世界的无产者,联合起来!", s: 1
|
66
|
+
#=> "quan2 shi4 jie4 de wu2 chan3 zhe3 lian2 he2 qi3 lai2"
|
67
|
+
```
|
68
|
+
|
69
|
+
### seperate word :: 分词
|
70
|
+
|
71
|
+
```ruby
|
72
|
+
z.cut "全世界的无产者,联合起来!"
|
73
|
+
#=> ["全世界", "的", "無產階級", ",", "聯合", "起來", "!"]
|
74
|
+
z.cut "全世界的无产者,联合起来!", s: true
|
75
|
+
#=> "全世界 的 无产者 ! 联合 起来 !"
|
76
|
+
```
|
77
|
+
### Taging word :: 标注词类
|
78
|
+
|
79
|
+
后续可能添加 `by` 关键词指定函数。
|
80
|
+
|
81
|
+
```ruby
|
82
|
+
z.tag "全世界的无产者,联合起来!"
|
83
|
+
#=> [["全世界", "n"], ["的", "uj"], ["无产者", "n"], ["!", "x"], ["联合", "v"], ["起来", "v"], ["!", "x"]]
|
84
|
+
z.tag "全世界的无产者,联合起来!", s: true
|
85
|
+
#=> "全世界_n 的_uj 无产者_n ,_x 联合_v 起来_v !_x"
|
86
|
+
```
|
87
|
+
### 词频统计
|
88
|
+
|
89
|
+
```ruby
|
90
|
+
z.freq "全世界的无产者,联合起来!", 5
|
91
|
+
#=> [["无产者", 9.96885201925], ["全世界", 6.80147590842], ["联合", 5.64979650728], ["起来", 3.96134044655]]
|
92
|
+
|
93
|
+
# alias 拼音, 分词, 标记, 词频
|
94
|
+
|
95
|
+
```
|
96
|
+
|
97
|
+
## LINCENCE :: 协议
|
98
|
+
|
99
|
+
__MPL 2.0__
|
100
|
+
|
101
|
+
## Requires
|
102
|
+
|
103
|
+
- `jieba-rb`
|
104
|
+
- `nokogiri`
|
105
|
+
- `open-uri`
|
data/README.ja.md
ADDED
@@ -0,0 +1,104 @@
|
|
1
|
+
# 言語 :: Igo
|
2
|
+
|
3
|
+
[🌏](README.md) | [中文](README.zh.md) | [English](README.en.md) | [日本語](README.ja.md)
|
4
|
+
|
5
|
+
Support `zh-CN`, `ja-JP`
|
6
|
+
|
7
|
+
```ruby
|
8
|
+
ja.cut "あー、合成は結合法則を満たすんでしたね"
|
9
|
+
#=> ["では", "、", "圏論", "の", "話", "を", "しましょ", "う", "か", "N", "この", "前", "は", "、圏", "について", "紹介 しました"]
|
10
|
+
|
11
|
+
zh.tag "全世界的无产者,联合起来!", s: true
|
12
|
+
#=> "全世界_n 的_uj 无产者_n ,_x 联合_v 起来_v !_x"
|
13
|
+
|
14
|
+
```
|
15
|
+
|
16
|
+
## Install :: インストール
|
17
|
+
|
18
|
+
```cmd
|
19
|
+
gem install igo
|
20
|
+
```
|
21
|
+
|
22
|
+
__Python__ ノ `Thulac` 使いたいなら、以下の pip がインストール必要です:
|
23
|
+
|
24
|
+
```cmd
|
25
|
+
|
26
|
+
pip install thulac
|
27
|
+
```
|
28
|
+
|
29
|
+
## 使い方 :: Usage / Ja
|
30
|
+
|
31
|
+
```ruby
|
32
|
+
require 'igo'
|
33
|
+
require 'igo/ja'
|
34
|
+
|
35
|
+
j = Igo::Ja
|
36
|
+
|
37
|
+
cutted = j.cut "あー、合成は結合法則を満たすんでしたね"
|
38
|
+
#=> ["では", "、", "圏論", "の", "話", "を", "しましょ", "う", "か", "N", "この", "前", "は", "、圏", "について", "紹介 しました"]
|
39
|
+
|
40
|
+
cutted = j.cut "あー、合成は結合法則を満たすんでしたね", s: true
|
41
|
+
#=> "では 、 圏論 の 話 を しましょ う か N この 前 は 、圏 について 紹介しました"
|
42
|
+
|
43
|
+
```
|
44
|
+
|
45
|
+
下ノ関数は、暫く未完成です、ごめんね:
|
46
|
+
|
47
|
+
`j.romaji`, `j.kana`, `j.tag`。
|
48
|
+
|
49
|
+
## 用法 :: Usage / Zh
|
50
|
+
|
51
|
+
Lack __Trad-Zh__ :: 暂不支持「正體中文」
|
52
|
+
|
53
|
+
```ruby
|
54
|
+
require 'igo'
|
55
|
+
require 'igo/zh'
|
56
|
+
z = Igo::Zh
|
57
|
+
```
|
58
|
+
|
59
|
+
### Pinyin :: 拼音
|
60
|
+
|
61
|
+
```ruby
|
62
|
+
z.pinyin "全世界的无产者,联合起来!"
|
63
|
+
#=> ["quan2", "shi4", "jie4", "de", "wu2", "chan3", "zhe3", "lian2", "he2", "qi3", "lai2"]
|
64
|
+
z.pinyin "全世界的无产者,联合起来!", s: 1
|
65
|
+
#=> "quan2 shi4 jie4 de wu2 chan3 zhe3 lian2 he2 qi3 lai2"
|
66
|
+
```
|
67
|
+
|
68
|
+
### seperate word :: 分词
|
69
|
+
|
70
|
+
```ruby
|
71
|
+
z.cut "全世界的无产者,联合起来!"
|
72
|
+
#=> ["全世界", "的", "無產階級", ",", "聯合", "起來", "!"]
|
73
|
+
z.cut "全世界的无产者,联合起来!", s: true
|
74
|
+
#=> "全世界 的 无产者 ! 联合 起来 !"
|
75
|
+
```
|
76
|
+
### Taging word :: 标注词类
|
77
|
+
|
78
|
+
后续可能添加 `by` 关键词指定函数。
|
79
|
+
|
80
|
+
```ruby
|
81
|
+
z.tag "全世界的无产者,联合起来!"
|
82
|
+
#=> [["全世界", "n"], ["的", "uj"], ["无产者", "n"], ["!", "x"], ["联合", "v"], ["起来", "v"], ["!", "x"]]
|
83
|
+
z.tag "全世界的无产者,联合起来!", s: true
|
84
|
+
#=> "全世界_n 的_uj 无产者_n ,_x 联合_v 起来_v !_x"
|
85
|
+
```
|
86
|
+
### 词频统计
|
87
|
+
|
88
|
+
```ruby
|
89
|
+
z.freq "全世界的无产者,联合起来!", 5
|
90
|
+
#=> [["无产者", 9.96885201925], ["全世界", 6.80147590842], ["联合", 5.64979650728], ["起来", 3.96134044655]]
|
91
|
+
|
92
|
+
# alias 拼音, 分词, 标记, 词频
|
93
|
+
|
94
|
+
```
|
95
|
+
|
96
|
+
## LINCENCE :: 协议
|
97
|
+
|
98
|
+
__MPL 2.0__
|
99
|
+
|
100
|
+
## Requires
|
101
|
+
|
102
|
+
- `jieba-rb`
|
103
|
+
- `nokogiri`
|
104
|
+
- `open-uri`
|
data/README.md
CHANGED
@@ -1,12 +1,32 @@
|
|
1
1
|
# 言語 :: Igo
|
2
2
|
|
3
|
+
[🌏](README.md) | [中文](README.zh.md) | [English](README.en.md) | [日本語](README.ja.md)
|
4
|
+
|
3
5
|
Support `zh-CN`, `ja-JP`
|
4
6
|
|
5
|
-
|
7
|
+
```ruby
|
8
|
+
ja.cut "あー、合成は結合法則を満たすんでしたね"
|
9
|
+
#=> ["では", "、", "圏論", "の", "話", "を", "しましょ", "う", "か", "N", "この", "前", "は", "、圏", "について", "紹介 しました"]
|
10
|
+
|
11
|
+
zh.tag "全世界的无产者,联合起来!", s: true
|
12
|
+
#=> "全世界_n 的_uj 无产者_n ,_x 联合_v 起来_v !_x"
|
13
|
+
|
14
|
+
```
|
15
|
+
|
16
|
+
## Install :: インストール
|
17
|
+
|
6
18
|
```cmd
|
7
19
|
gem install igo
|
8
20
|
```
|
9
21
|
|
22
|
+
for use `Thulac` in __Python__, you need to install:
|
23
|
+
|
24
|
+
```cmd
|
25
|
+
|
26
|
+
pip install thulac
|
27
|
+
|
28
|
+
```
|
29
|
+
|
10
30
|
## 使い方 :: Usage / Ja
|
11
31
|
|
12
32
|
```ruby
|
data/README.zh.md
ADDED
@@ -0,0 +1,105 @@
|
|
1
|
+
# 言語 :: Igo
|
2
|
+
|
3
|
+
[🌏](README.md) | [中文](README.zh.md) | [English](README.en.md) | [日本語](README.ja.md)
|
4
|
+
|
5
|
+
Support `zh-CN`, `ja-JP`
|
6
|
+
|
7
|
+
```ruby
|
8
|
+
ja.cut "あー、合成は結合法則を満たすんでしたね"
|
9
|
+
#=> ["では", "、", "圏論", "の", "話", "を", "しましょ", "う", "か", "N", "この", "前", "は", "、圏", "について", "紹介 しました"]
|
10
|
+
|
11
|
+
zh.tag "全世界的无产者,联合起来!", s: true
|
12
|
+
#=> "全世界_n 的_uj 无产者_n ,_x 联合_v 起来_v !_x"
|
13
|
+
|
14
|
+
```
|
15
|
+
|
16
|
+
## Install :: 安装
|
17
|
+
|
18
|
+
```cmd
|
19
|
+
gem install igo
|
20
|
+
```
|
21
|
+
|
22
|
+
要使用 __Python__ 库的 `Thulac`,你得先安装...:
|
23
|
+
|
24
|
+
```cmd
|
25
|
+
|
26
|
+
pip install thulac
|
27
|
+
|
28
|
+
```
|
29
|
+
|
30
|
+
## 使い方 :: Usage / Ja
|
31
|
+
|
32
|
+
```ruby
|
33
|
+
require 'igo'
|
34
|
+
require 'igo/ja'
|
35
|
+
|
36
|
+
j = Igo::Ja
|
37
|
+
|
38
|
+
cutted = j.cut "あー、合成は結合法則を満たすんでしたね"
|
39
|
+
#=> ["では", "、", "圏論", "の", "話", "を", "しましょ", "う", "か", "N", "この", "前", "は", "、圏", "について", "紹介 しました"]
|
40
|
+
|
41
|
+
cutted = j.cut "あー、合成は結合法則を満たすんでしたね", s: true
|
42
|
+
#=> "では 、 圏論 の 話 を しましょ う か N この 前 は 、圏 について 紹介しました"
|
43
|
+
|
44
|
+
```
|
45
|
+
|
46
|
+
下ノ関数は、暫く未完成です、ごめんね:
|
47
|
+
|
48
|
+
`j.romaji`, `j.kana`, `j.tag`。
|
49
|
+
|
50
|
+
## 用法 :: Usage / Zh
|
51
|
+
|
52
|
+
Lack __Trad-Zh__ :: 暂不支持「正體中文」
|
53
|
+
|
54
|
+
```ruby
|
55
|
+
require 'igo'
|
56
|
+
require 'igo/zh'
|
57
|
+
z = Igo::Zh
|
58
|
+
```
|
59
|
+
|
60
|
+
### Pinyin :: 拼音
|
61
|
+
|
62
|
+
```ruby
|
63
|
+
z.pinyin "全世界的无产者,联合起来!"
|
64
|
+
#=> ["quan2", "shi4", "jie4", "de", "wu2", "chan3", "zhe3", "lian2", "he2", "qi3", "lai2"]
|
65
|
+
z.pinyin "全世界的无产者,联合起来!", s: 1
|
66
|
+
#=> "quan2 shi4 jie4 de wu2 chan3 zhe3 lian2 he2 qi3 lai2"
|
67
|
+
```
|
68
|
+
|
69
|
+
### seperate word :: 分词
|
70
|
+
|
71
|
+
```ruby
|
72
|
+
z.cut "全世界的无产者,联合起来!"
|
73
|
+
#=> ["全世界", "的", "無產階級", ",", "聯合", "起來", "!"]
|
74
|
+
z.cut "全世界的无产者,联合起来!", s: true
|
75
|
+
#=> "全世界 的 无产者 ! 联合 起来 !"
|
76
|
+
```
|
77
|
+
### Taging word :: 标注词类
|
78
|
+
|
79
|
+
后续可能添加 `by` 关键词指定函数。
|
80
|
+
|
81
|
+
```ruby
|
82
|
+
z.tag "全世界的无产者,联合起来!"
|
83
|
+
#=> [["全世界", "n"], ["的", "uj"], ["无产者", "n"], ["!", "x"], ["联合", "v"], ["起来", "v"], ["!", "x"]]
|
84
|
+
z.tag "全世界的无产者,联合起来!", s: true
|
85
|
+
#=> "全世界_n 的_uj 无产者_n ,_x 联合_v 起来_v !_x"
|
86
|
+
```
|
87
|
+
### 词频统计
|
88
|
+
|
89
|
+
```ruby
|
90
|
+
z.freq "全世界的无产者,联合起来!", 5
|
91
|
+
#=> [["无产者", 9.96885201925], ["全世界", 6.80147590842], ["联合", 5.64979650728], ["起来", 3.96134044655]]
|
92
|
+
|
93
|
+
# alias 拼音, 分词, 标记, 词频
|
94
|
+
|
95
|
+
```
|
96
|
+
|
97
|
+
## LINCENCE :: 协议
|
98
|
+
|
99
|
+
__MPL 2.0__
|
100
|
+
|
101
|
+
## Requires
|
102
|
+
|
103
|
+
- `jieba-rb`
|
104
|
+
- `nokogiri`
|
105
|
+
- `open-uri`
|
data/lib/igo/ja.rb
CHANGED
@@ -1,10 +1,9 @@
|
|
1
|
-
require 'jisho_api'
|
2
1
|
require 'nokogiri'
|
3
2
|
require 'open-uri'
|
4
3
|
require 'uri'
|
5
4
|
|
6
5
|
module Igo
|
7
|
-
module
|
6
|
+
module Ja
|
8
7
|
|
9
8
|
SEARCH_URL = "https://jisho.org/search/"
|
10
9
|
|
@@ -21,9 +20,13 @@ module Igo
|
|
21
20
|
# def kana str
|
22
21
|
# end
|
23
22
|
end
|
23
|
+
|
24
|
+
def tag str, s: false, ns: 0
|
25
|
+
|
26
|
+
end
|
27
|
+
|
24
28
|
end
|
25
29
|
|
26
|
-
Ja = Jisho
|
27
30
|
|
28
31
|
class << self
|
29
32
|
def 日本語
|
data/lib/igo/thulac.rb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
require 'pycall/import'
|
2
|
+
|
3
|
+
module Thulac
|
4
|
+
PyCall.exec("import thulac")
|
5
|
+
PyCall.exec("thulac1 = thulac.thulac()")
|
6
|
+
end
|
7
|
+
class << Thulac
|
8
|
+
def cut str, text: false
|
9
|
+
text = text ? "True" : "False"
|
10
|
+
PyCall.eval(<<-EOF
|
11
|
+
thulac1.cut(#{str.inspect}, text=#{text})
|
12
|
+
EOF
|
13
|
+
)
|
14
|
+
end
|
15
|
+
end
|
data/lib/igo/version.rb
CHANGED
data/lib/igo/zh.rb
CHANGED
@@ -22,17 +22,28 @@ module Igo
|
|
22
22
|
PinYin.of_string(token, :ascii)
|
23
23
|
end
|
24
24
|
|
25
|
-
def cut str, s: false, tag: false
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
25
|
+
def cut str, s: false, tag: false, by: "jieba"
|
26
|
+
case by
|
27
|
+
when /jieba/
|
28
|
+
if tag
|
29
|
+
s ? Tagging.tag(str).map{_1.to_a.flatten.join("_")}.join(" ") : Tagging.tag(str).map{_1.to_a.flatten}
|
30
|
+
else
|
31
|
+
res = Segment.cut(str)
|
32
|
+
s ? res.join(" ") : res
|
33
|
+
end
|
34
|
+
when /thulac/
|
35
|
+
Thulac.cut(str, text: s)
|
31
36
|
end
|
32
37
|
end
|
33
38
|
|
34
|
-
def tag str, s: false
|
35
|
-
|
39
|
+
def tag str, s: false, by: 0
|
40
|
+
case by
|
41
|
+
when /thu/
|
42
|
+
require './thulac'
|
43
|
+
Thulac.cut str, text: s
|
44
|
+
else
|
45
|
+
s ? Tagging.tag(str).map{_1.to_a.flatten.join("_")}.join(" ") : Tagging.tag(str).map{_1.to_a.flatten}
|
46
|
+
end
|
36
47
|
end
|
37
48
|
|
38
49
|
def termfreq string, num
|
data/lib/igo.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: igo
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.4.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- saisui
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-12-
|
11
|
+
date: 2023-12-24 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: It is easy.
|
14
14
|
email:
|
@@ -22,14 +22,16 @@ files:
|
|
22
22
|
- CHANGELOG.md
|
23
23
|
- CODE_OF_CONDUCT.md
|
24
24
|
- LICENSE.txt
|
25
|
+
- README.en.md
|
26
|
+
- README.ja.md
|
25
27
|
- README.md
|
28
|
+
- README.zh.md
|
26
29
|
- Rakefile
|
27
|
-
- igo.rb
|
28
30
|
- lib/igo.rb
|
29
31
|
- lib/igo/ja.rb
|
32
|
+
- lib/igo/thulac.rb
|
30
33
|
- lib/igo/version.rb
|
31
34
|
- lib/igo/zh.rb
|
32
|
-
- sig/igo.rbs
|
33
35
|
homepage: https://github.com/saisui/igo-rb
|
34
36
|
licenses:
|
35
37
|
- MPL-2.0
|
data/igo.rb
DELETED
data/sig/igo.rbs
DELETED