igo 0.1.5.1 → 0.1.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/lib/igo/ja.rb +25 -19
- data/lib/igo/version.rb +1 -1
- data/lib/igo/zh.rb +7 -2
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2433d8075d9ebf3f1140db29644ad784b30a651f82164882d7da53d091f5b06f
|
4
|
+
data.tar.gz: 254cd75391dab89a4a9ecbae5ee695c5ba1d9545f3eb99125779a8f48620492d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 36c79c294645d61b9f3bf26f782708c549d86faa72d4cf967e89c7ed4e2031c3ed28d54c416aad9518b497b3b2befad2e3147502880fe0fbd1702fa23ffc1088
|
7
|
+
data.tar.gz: 91d1be1e4a30c42446970991c2a7a5b972754f0822cbf1ee6e6d1c1026ffe257c63d2fa89d420d4d052b4b5a276b77239a27ea67d0a6906101b51746afef075a
|
data/README.md
CHANGED
data/lib/igo/ja.rb
CHANGED
@@ -3,6 +3,7 @@ require 'open-uri'
|
|
3
3
|
require 'uri'
|
4
4
|
require 'concurrent'
|
5
5
|
require 'timeout'
|
6
|
+
require 'romaji'
|
6
7
|
|
7
8
|
module Igo
|
8
9
|
|
@@ -19,9 +20,6 @@ module Igo
|
|
19
20
|
# cutted = j.cut "あー、合成は結合法則を満たすんでしたね", s: true
|
20
21
|
# #=> "あー 、 合成 は 結合法則 を 満たす ん でした ね"
|
21
22
|
#
|
22
|
-
# 下ノ関数は、暫く未完成です、ごめんね:
|
23
|
-
#
|
24
|
-
# `j.romaji`, `j.kana`, `j.tag`。
|
25
23
|
#
|
26
24
|
module Ja
|
27
25
|
|
@@ -48,9 +46,10 @@ module Igo
|
|
48
46
|
|
49
47
|
tag(str, s: s, kana: false, tag: false)
|
50
48
|
end
|
51
|
-
# def romaji str
|
52
|
-
# end
|
53
49
|
|
50
|
+
private def _romaji str
|
51
|
+
Romaji.kana2romaji str
|
52
|
+
end
|
54
53
|
# def kana str
|
55
54
|
# end
|
56
55
|
def kana str, s: false, lr: "()"
|
@@ -73,7 +72,7 @@ module Igo
|
|
73
72
|
tag(str, s: s, lr: lr, tag: false, kana: true, kana_only: true)
|
74
73
|
end
|
75
74
|
|
76
|
-
def ruby str, s: false, lr: "()"
|
75
|
+
def ruby str, s: false, lr: "()", romaji: false
|
77
76
|
# str = URI.encode_www_form_component(str)
|
78
77
|
# doc = Nokogiri::HTML(URI.open(SEARCH_URL + str).read)
|
79
78
|
|
@@ -94,16 +93,16 @@ module Igo
|
|
94
93
|
# else cutted
|
95
94
|
# end
|
96
95
|
|
97
|
-
tag
|
96
|
+
tag str, s: s, lr: lr, kana: true, tag: false, romaji: romaji
|
98
97
|
|
99
98
|
end
|
100
99
|
# TODO: tag word function
|
101
100
|
#
|
102
101
|
def romaji str, s: false
|
103
|
-
|
102
|
+
tag str, s: s, romaji: true, kana_only: true, tag: false
|
104
103
|
end
|
105
104
|
|
106
|
-
def tag str, s: false, ns: 0, lr: "()", sp:"_", short: false, tag: true, kana: false, timeout: 10, kana_only: false
|
105
|
+
def tag str, s: false, ns: 0, lr: "()", sp:"_", short: false, tag: true, kana: false, timeout: 10, kana_only: false, romaji: false
|
107
106
|
|
108
107
|
def async_query(arr, timeout=0, &block)
|
109
108
|
promises = arr.map do |element|
|
@@ -127,18 +126,25 @@ module Igo
|
|
127
126
|
results
|
128
127
|
end
|
129
128
|
|
130
|
-
def _tag str
|
129
|
+
def _tag str, romaji: false
|
131
130
|
str = URI.encode_www_form_component(str)
|
132
131
|
doc = Nokogiri::HTML(URI.open(SEARCH_URL + str).read)
|
133
132
|
|
134
133
|
cutted = doc.css(".japanese_word")
|
135
|
-
.map do
|
136
|
-
_1
|
137
|
-
_1
|
134
|
+
.map do
|
135
|
+
text = _1.css(".japanese_word__text_wrapper, japanese_word__text_wrapper").text.strip
|
136
|
+
kn = _1&.css(".japanese_word__furigana").text
|
137
|
+
cat = _1.attr("data-pos")
|
138
|
+
|
139
|
+
romj = kn.empty? ? _romaji(text) : _romaji(kn)
|
140
|
+
|
141
|
+
[text, # text
|
142
|
+
romaji ? romj : kn , # romaji || kana || ""
|
143
|
+
cat] # tag || nil
|
138
144
|
end
|
139
145
|
end
|
140
146
|
|
141
|
-
def _stringify cutted, s: "/", lr: "()", sp:"_", short: false, vis_tag: true, vis_kana: true, kana_only: false
|
147
|
+
def _stringify cutted, s: "/", lr: "()", sp:"_", short: false, vis_tag: true, vis_kana: true, kana_only: false, romaji: false
|
142
148
|
# cutted.each{ _1[1] = nil } unless vis_kana
|
143
149
|
# cutted.each{ _1[2] = nil } unless vis_tag
|
144
150
|
if kana_only
|
@@ -162,15 +168,15 @@ module Igo
|
|
162
168
|
end
|
163
169
|
end
|
164
170
|
|
165
|
-
def singo_proc str, s: false, ns: 0, lr: "()", sp:"_", short: false, vis_tag: true, vis_kana: true, kana_only: false
|
166
|
-
cutted = _tag str
|
171
|
+
def singo_proc str, s: false, ns: 0, lr: "()", sp:"_", short: false, vis_tag: true, vis_kana: true, kana_only: false, romaji: false
|
172
|
+
cutted = _tag str, romaji: romaji
|
167
173
|
if short
|
168
174
|
short = short.is_a?(Integer) ? short : 4
|
169
175
|
cutted = cutted.map{[ *_1[0,2], (_1[2][0, short].downcase rescue nil) ]}
|
170
176
|
end
|
171
177
|
|
172
178
|
if s
|
173
|
-
_stringify cutted, s: s, lr: lr, sp: sp, short: short, vis_tag: vis_kana, vis_kana: vis_tag, kana_only: kana_only
|
179
|
+
_stringify cutted, s: s, lr: lr, sp: sp, short: short, vis_tag: vis_kana, vis_kana: vis_tag, kana_only: kana_only, romaji: romaji
|
174
180
|
else
|
175
181
|
cutted = cutted.map{_1.values_at(* [0, vis_kana ? 1 : 0, vis_tag ? 2 : 0].uniq)}
|
176
182
|
cutted[0].size == 1 ? cutted.flatten : cutted
|
@@ -179,10 +185,10 @@ module Igo
|
|
179
185
|
|
180
186
|
case str
|
181
187
|
when String
|
182
|
-
singo_proc str, s: s, ns: ns, lr: lr, sp: sp, short: short, vis_tag: tag, vis_kana: kana, kana_only: kana_only
|
188
|
+
singo_proc str, s: s, ns: ns, lr: lr, sp: sp, short: short, vis_tag: tag, vis_kana: kana, kana_only: kana_only, romaji: romaji
|
183
189
|
when Array
|
184
190
|
async_query str, timeout do
|
185
|
-
singo_proc _1, s: s, ns: ns, lr: lr, sp: sp, short: short, vis_tag: tag, vis_kana: kana, kana_only: kana_only
|
191
|
+
singo_proc _1, s: s, ns: ns, lr: lr, sp: sp, short: short, vis_tag: tag, vis_kana: kana, kana_only: kana_only, romaji: romaji
|
186
192
|
end
|
187
193
|
end
|
188
194
|
# TODO
|
data/lib/igo/version.rb
CHANGED
data/lib/igo/zh.rb
CHANGED
@@ -109,11 +109,16 @@ module Igo
|
|
109
109
|
# z.tag "全世界的无产者,联合起来!", s: true
|
110
110
|
# #=> "全世界_n 的_uj 无产者_n ,_x 联合_v 起来_v !_x"
|
111
111
|
#
|
112
|
-
def tag str, s: false, by: 0
|
112
|
+
def tag str, s: false, by: 0, sp: "_"
|
113
113
|
case by
|
114
114
|
when /thu/
|
115
115
|
require_relative 'thulac'
|
116
|
-
Thulac.cut
|
116
|
+
cutted = Thulac.cut(str).to_a.map(&:to_a)
|
117
|
+
if s
|
118
|
+
s = s.is_a?(String) ? s : " "
|
119
|
+
cutted.map{_1.join(sp)}.join(s)
|
120
|
+
else cutted
|
121
|
+
end
|
117
122
|
else
|
118
123
|
s ? Tagging.tag(str).map{_1.to_a.flatten.join("_")}.join(" ") : Tagging.tag(str).map{_1.to_a.flatten}
|
119
124
|
end
|
metadata
CHANGED
@@ -1,16 +1,16 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: igo
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.5.
|
4
|
+
version: 0.1.5.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- saisui
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-12-
|
11
|
+
date: 2023-12-26 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
|
-
description:
|
13
|
+
description: A gem for chinese / japanese word cut, pronounciation, hiragana, kana.
|
14
14
|
email:
|
15
15
|
- kozmozenjel@outlook.com
|
16
16
|
executables: []
|