kanji-translator 0.1.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -0
- data/README.md +7 -4
- data/lib/kanji/translator/version.rb +1 -1
- data/lib/kanji/translator.rb +50 -4
- metadata +15 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 6d693a0727ff440e9630d4ac66f28ff42da8c707a3d222deedfaa2a394304e55
|
|
4
|
+
data.tar.gz: 5c26cc604c58942286badd65577a7b8094f9f304d34ae5e8474489ce18757676
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: b3e115cb0d6057dde1642b4fdf8c61acc59b4a8275e471df8a04e4ba01311dfe119677ef4a5fd7d39348687532d5e5d7711c9da2b598529960461eddff3fc140
|
|
7
|
+
data.tar.gz: 279143b96cef0e5ecccb6cd8e35b80409e6ca734ba9ec72ef61b0080aea94c76fb4d93f958155e43d4562a1a4143121f0cee4140d6f36b59fb6918f68823ab5e
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,12 @@
|
|
|
1
1
|
## [Unreleased]
|
|
2
2
|
|
|
3
|
+
## [1.0.0] - 2025-09-08
|
|
4
|
+
|
|
5
|
+
- Breaking: `to_slug` のデフォルト挙動を `segmenter: :tiny` に変更(語境界ごとにハイフン区切り)。
|
|
6
|
+
- Added: `segmenter: :space` オプションを追加。
|
|
7
|
+
- Added: 依存に `tiny_segmenter (~> 0.0.6)` を追加。
|
|
8
|
+
- Docs/Tests: READMEとRSpecを更新し新仕様を反映。
|
|
9
|
+
|
|
3
10
|
## [0.1.0] - 2025-09-08
|
|
4
11
|
|
|
5
12
|
- Initial release
|
data/README.md
CHANGED
|
@@ -8,13 +8,11 @@
|
|
|
8
8
|
|
|
9
9
|
Ruby 3.2以上が必要です。
|
|
10
10
|
|
|
11
|
-
Rubygems公開後:
|
|
12
|
-
|
|
13
11
|
```bash
|
|
14
12
|
bundle add kanji-translator
|
|
15
13
|
```
|
|
16
14
|
|
|
17
|
-
|
|
15
|
+
GitHubから使う場合(任意):
|
|
18
16
|
|
|
19
17
|
```ruby
|
|
20
18
|
# Gemfile
|
|
@@ -51,15 +49,20 @@ require "kanji/translator/core_ext/string"
|
|
|
51
49
|
- ひらがな読みをカタカナに変換して返します。
|
|
52
50
|
- `Kanji::Translator.to_roma(text, **opts)`
|
|
53
51
|
- 簡易ヘボン式のローマ字(ASCII、小文字)で返します。拗音/促音(ゃゅょ/っ)に対応。長音記号「ー」は無視します(例: おう→ou)。
|
|
54
|
-
- `Kanji::Translator.to_slug(text, separator: "-", downcase: true, collapse: true, **opts)`
|
|
52
|
+
- `Kanji::Translator.to_slug(text, separator: "-", downcase: true, collapse: true, segmenter: :tiny, **opts)`
|
|
55
53
|
- `to_roma` の結果をスラッグ化します。
|
|
56
54
|
- 非英数字を `separator` に置換、連続区切りを圧縮、前後の区切りをトリムします。
|
|
55
|
+
- `segmenter: :tiny`(デフォルト): TinySegmenter で分かち、語ごとにハイフン区切り(例: "学校案内" → "gakkou-annai")。
|
|
56
|
+
- `segmenter: :space`: 空白でのみ分割(例: "学校 案内" → "gakkou-annai")。
|
|
57
|
+
- `segmenter: nil`: 分かちなし(語の自動区切りなし)。
|
|
57
58
|
|
|
58
59
|
例(オプション):
|
|
59
60
|
|
|
60
61
|
```ruby
|
|
61
62
|
Kanji::Translator.to_hira("漢字", timeout: 3, retries: 1)
|
|
62
63
|
Kanji::Translator.to_slug("東京タワー 2010") #=> "toukyou-tawa-2010"
|
|
64
|
+
Kanji::Translator.to_slug("学校 案内", segmenter: :space) #=> "gakkou-annai"
|
|
65
|
+
Kanji::Translator.to_slug("学校案内", segmenter: nil) #=> "gakkouannai"
|
|
63
66
|
Kanji::Translator.to_slug("Foo Bar", separator: "_") #=> "foo_bar"
|
|
64
67
|
```
|
|
65
68
|
|
data/lib/kanji/translator.rb
CHANGED
|
@@ -31,10 +31,28 @@ module Kanji
|
|
|
31
31
|
hiragana_to_romaji(hira)
|
|
32
32
|
end
|
|
33
33
|
|
|
34
|
-
def self.to_slug(text,
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
34
|
+
def self.to_slug(text, **opts)
|
|
35
|
+
sep = opts.fetch(:separator, "-")
|
|
36
|
+
downcase = opts.fetch(:downcase, true)
|
|
37
|
+
collapse = opts.fetch(:collapse, true)
|
|
38
|
+
segmenter = opts.fetch(:segmenter, :tiny)
|
|
39
|
+
net_opts = slice_opts(opts, :timeout, :retries, :backoff, :user_agent)
|
|
40
|
+
|
|
41
|
+
s = case segmenter
|
|
42
|
+
when :tiny
|
|
43
|
+
tokens = segment_with_tiny(text)
|
|
44
|
+
parts = tokens.filter_map { |tok| normalize_slug_part(tok, net_opts) }
|
|
45
|
+
parts.join(sep)
|
|
46
|
+
when :space
|
|
47
|
+
tokens = segment_with_space(text)
|
|
48
|
+
parts = tokens.filter_map { |tok| normalize_slug_part(tok, net_opts) }
|
|
49
|
+
parts.join(sep)
|
|
50
|
+
else
|
|
51
|
+
roma = to_roma(text, **net_opts)
|
|
52
|
+
roma.dup
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
s = s.downcase if downcase
|
|
38
56
|
# Replace non-alphanumeric with separator
|
|
39
57
|
s = s.gsub(/[^a-z0-9]+/, sep)
|
|
40
58
|
# Collapse duplicate separators
|
|
@@ -193,5 +211,33 @@ module Kanji
|
|
|
193
211
|
jitter = rand * 0.05
|
|
194
212
|
sleep_s + jitter
|
|
195
213
|
end
|
|
214
|
+
|
|
215
|
+
def self.segment_with_tiny(text)
|
|
216
|
+
require "tiny_segmenter"
|
|
217
|
+
TinySegmenter.new.segment(text)
|
|
218
|
+
rescue LoadError
|
|
219
|
+
raise Error, "tiny_segmenter gem is not installed. Add `tiny_segmenter` or omit segmenter option."
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
def self.japanese_token?(tok)
|
|
223
|
+
# Kanji, Kana, prolonged sound mark, iteration marks, small kana
|
|
224
|
+
!!(tok =~ /[一-龯々〆ヵヶぁ-ゖゝゞァ-ヴー]/)
|
|
225
|
+
end
|
|
226
|
+
|
|
227
|
+
def self.segment_with_space(text)
|
|
228
|
+
text.split(/\s+/)
|
|
229
|
+
end
|
|
230
|
+
|
|
231
|
+
def self.normalize_slug_part(tok, net_opts)
|
|
232
|
+
if japanese_token?(tok)
|
|
233
|
+
to_roma(tok, **net_opts)
|
|
234
|
+
elsif tok =~ /[A-Za-z0-9]+/
|
|
235
|
+
tok
|
|
236
|
+
end
|
|
237
|
+
end
|
|
238
|
+
|
|
239
|
+
def self.slice_opts(hash, *keys)
|
|
240
|
+
hash.slice(*keys)
|
|
241
|
+
end
|
|
196
242
|
end
|
|
197
243
|
end
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: kanji-translator
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version:
|
|
4
|
+
version: 1.0.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Hiromu Kodani
|
|
@@ -23,6 +23,20 @@ dependencies:
|
|
|
23
23
|
- - "~>"
|
|
24
24
|
- !ruby/object:Gem::Version
|
|
25
25
|
version: '1.16'
|
|
26
|
+
- !ruby/object:Gem::Dependency
|
|
27
|
+
name: tiny_segmenter
|
|
28
|
+
requirement: !ruby/object:Gem::Requirement
|
|
29
|
+
requirements:
|
|
30
|
+
- - "~>"
|
|
31
|
+
- !ruby/object:Gem::Version
|
|
32
|
+
version: 0.0.6
|
|
33
|
+
type: :runtime
|
|
34
|
+
prerelease: false
|
|
35
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
36
|
+
requirements:
|
|
37
|
+
- - "~>"
|
|
38
|
+
- !ruby/object:Gem::Version
|
|
39
|
+
version: 0.0.6
|
|
26
40
|
description: |-
|
|
27
41
|
Fetches readings for Japanese Kanji from yomikatawa.com and converts them
|
|
28
42
|
to hiragana, katakana, or Hepburn-style romaji. Includes timeout/retry
|