imedic-tools 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.github/dependabot.yml +10 -0
- data/.github/workflows/push_gem.yml +52 -0
- data/.gitignore +7 -0
- data/Gemfile +4 -0
- data/LICENSE +24 -0
- data/README.md +55 -0
- data/Rakefile +3 -0
- data/exe/atok2kotoeri +93 -0
- data/exe/atok2msime +103 -0
- data/exe/msime2atok +101 -0
- data/imedic-tools.gemspec +29 -0
- data/lib/imedic/tools/version.rb +7 -0
- data/lib/imedic/tools.rb +3 -0
- data/lib/imedic-tools.rb +3 -0
- metadata +61 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: 7670f860570f73ef0fdf31fe278598246eab359159af59e8ebc3c1e5bc35aaef
|
|
4
|
+
data.tar.gz: c472bf447d23344b7f34403f67bbc7408cc1f259662adcf0b2e29803fff382fa
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: 3ba08c54418c73016822bb12f24d4d4f47791dcc0d71c52bb0f431828f3b61ef09db7b92972971526a3218f49dba2dfed5f245c9db20e32885a2fbb42c440a14
|
|
7
|
+
data.tar.gz: 57354a763840c018d3c1fec1bf7ec0ea4c4b798cecdde30e2a8e14d3d7033f422b2e9daf09e728e7887752f812add47891b7c8b7f9daaf5d5bf462fcaab23045
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
name: Publish gem to rubygems.org
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
tags:
|
|
6
|
+
- "v*"
|
|
7
|
+
|
|
8
|
+
permissions:
|
|
9
|
+
contents: read
|
|
10
|
+
|
|
11
|
+
jobs:
|
|
12
|
+
push:
|
|
13
|
+
if: github.repository == 'knu/imedic-tools'
|
|
14
|
+
runs-on: ubuntu-latest
|
|
15
|
+
|
|
16
|
+
environment:
|
|
17
|
+
name: rubygems.org
|
|
18
|
+
url: https://rubygems.org/gems/imedic-tools
|
|
19
|
+
|
|
20
|
+
permissions:
|
|
21
|
+
contents: write
|
|
22
|
+
id-token: write
|
|
23
|
+
|
|
24
|
+
steps:
|
|
25
|
+
- name: Harden Runner
|
|
26
|
+
uses: step-security/harden-runner@ab7a9404c0f3da075243ca237b5fac12c98deaa5 # v2.19.3
|
|
27
|
+
with:
|
|
28
|
+
egress-policy: audit
|
|
29
|
+
|
|
30
|
+
- name: Check out repository
|
|
31
|
+
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
|
32
|
+
with:
|
|
33
|
+
persist-credentials: false
|
|
34
|
+
|
|
35
|
+
- name: Set up Ruby
|
|
36
|
+
uses: ruby/setup-ruby@8aeb6ff8030dd539317f8e1769a044873b56ea71 # v1.268.0
|
|
37
|
+
with:
|
|
38
|
+
ruby-version: ruby
|
|
39
|
+
|
|
40
|
+
- name: Install dependencies
|
|
41
|
+
run: bundle install --jobs 4 --retry 3
|
|
42
|
+
|
|
43
|
+
- name: Publish to RubyGems
|
|
44
|
+
uses: rubygems/release-gem@6317d8d1f7e28c24d28f6eff169ea854948bd9f7 # v1.2.0
|
|
45
|
+
|
|
46
|
+
- name: Create GitHub release
|
|
47
|
+
env:
|
|
48
|
+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
49
|
+
run: |
|
|
50
|
+
set -euo pipefail
|
|
51
|
+
tag_name="$(git describe --tags --abbrev=0)"
|
|
52
|
+
gh release create "${tag_name}" --verify-tag --generate-notes
|
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
Copyright (c) 2026 Akinori Musha
|
|
2
|
+
|
|
3
|
+
Redistribution and use in source and binary forms, with or without
|
|
4
|
+
modification, are permitted provided that the following conditions
|
|
5
|
+
are met:
|
|
6
|
+
|
|
7
|
+
1. Redistributions of source code must retain the above copyright
|
|
8
|
+
notice, this list of conditions and the following disclaimer.
|
|
9
|
+
|
|
10
|
+
2. Redistributions in binary form must reproduce the above copyright
|
|
11
|
+
notice, this list of conditions and the following disclaimer in the
|
|
12
|
+
documentation and/or other materials provided with the distribution.
|
|
13
|
+
|
|
14
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
15
|
+
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
16
|
+
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
17
|
+
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
18
|
+
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
19
|
+
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
20
|
+
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
21
|
+
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
22
|
+
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
23
|
+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
24
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
data/README.md
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
# imedic-tools
|
|
2
|
+
|
|
3
|
+
This gem provides command-line tools for converting Japanese input dictionary word-list files between common formats.
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
- **ATOK support**: Reads and writes ATOK word-list text files
|
|
8
|
+
- **MS-IME support**: Reads and writes modern Microsoft IME word-list files with a Unicode header, UTF-16LE BOM, and CRLF line endings
|
|
9
|
+
- **Kotoeri output**: Generates UTF-8 CSV files for Kotoeri-compatible import workflows
|
|
10
|
+
- **Comment preservation**: Converts comments between ATOK and MS-IME comment syntax
|
|
11
|
+
- **Multiple input files**: Accepts one or more input files and writes a combined dictionary to standard output
|
|
12
|
+
|
|
13
|
+
## Installation
|
|
14
|
+
|
|
15
|
+
Install the gem from RubyGems:
|
|
16
|
+
|
|
17
|
+
```sh
|
|
18
|
+
gem install imedic-tools
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
The scripts under `exe/` can also be used standalone without installing the gem.
|
|
22
|
+
|
|
23
|
+
## Usage
|
|
24
|
+
|
|
25
|
+
This gem includes three tools:
|
|
26
|
+
|
|
27
|
+
- `atok2msime`: Converts ATOK word-list files to Microsoft IME format
|
|
28
|
+
- `msime2atok`: Converts Microsoft IME word-list files to ATOK format
|
|
29
|
+
- `atok2kotoeri`: Converts ATOK word-list files to Kotoeri CSV format
|
|
30
|
+
|
|
31
|
+
Usage is common to all tools:
|
|
32
|
+
|
|
33
|
+
```sh
|
|
34
|
+
atok2msime < input.atok.txt > output.msime.txt
|
|
35
|
+
atok2msime input1.atok.txt input2.atok.txt > output.msime.txt
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
Each tool accepts one or more input files, or standard input when no file is given, and writes the converted word list to standard output.
|
|
39
|
+
|
|
40
|
+
## ATOK Input Format
|
|
41
|
+
|
|
42
|
+
Input files are expected to be ATOK word-list text files:
|
|
43
|
+
|
|
44
|
+
```text
|
|
45
|
+
!!ATOK_TANGO_TEXT_HEADER_1
|
|
46
|
+
!! Optional comment
|
|
47
|
+
|
|
48
|
+
よみ 単語 名詞*
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
The tools read UTF-8 files and UTF-8/UTF-16 files with a BOM.
|
|
52
|
+
|
|
53
|
+
## License
|
|
54
|
+
|
|
55
|
+
This project is distributed under the 2-clause BSD license.
|
data/Rakefile
ADDED
data/exe/atok2kotoeri
ADDED
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
|
|
3
|
+
HINSHI_ATOK2KOTOERI = {
|
|
4
|
+
"カ行五段" => "無品詞",
|
|
5
|
+
"ガ行五段" => "無品詞",
|
|
6
|
+
"サ変動詞" => "無品詞",
|
|
7
|
+
"サ行五段" => "無品詞",
|
|
8
|
+
"ザ変動詞" => "無品詞",
|
|
9
|
+
"タ行五段" => "無品詞",
|
|
10
|
+
"ナ行五段" => "無品詞",
|
|
11
|
+
"バ行五段" => "無品詞",
|
|
12
|
+
"マ行五段" => "無品詞",
|
|
13
|
+
"ラ行五段" => "無品詞",
|
|
14
|
+
"ワ行五段" => "無品詞",
|
|
15
|
+
"一段動詞" => "無品詞",
|
|
16
|
+
"冠数詞" => "普通名詞",
|
|
17
|
+
"副詞" => "無品詞",
|
|
18
|
+
"助数詞" => "数字列接尾語",
|
|
19
|
+
"単漢字" => "無品詞",
|
|
20
|
+
"名詞" => "普通名詞",
|
|
21
|
+
"名詞サ変" => "サ変名詞",
|
|
22
|
+
"名詞ザ変" => "サ変名詞",
|
|
23
|
+
"名詞形動" => "形容動詞",
|
|
24
|
+
"固有一般" => "普通名詞",
|
|
25
|
+
"固有人名" => "人名",
|
|
26
|
+
"固有人姓" => "人名",
|
|
27
|
+
"固有商品" => "普通名詞",
|
|
28
|
+
"固有地名" => "地名",
|
|
29
|
+
"固有組織" => "普通名詞",
|
|
30
|
+
"形動タリ" => "形容動詞",
|
|
31
|
+
"形容動詞" => "形容動詞",
|
|
32
|
+
"形容詞" => "形容詞",
|
|
33
|
+
"感動詞" => "無品詞",
|
|
34
|
+
"接尾語" => "無品詞",
|
|
35
|
+
"接続詞" => "無品詞",
|
|
36
|
+
"接頭語" => "無品詞",
|
|
37
|
+
"数詞" => "普通名詞",
|
|
38
|
+
"独立語" => "無品詞",
|
|
39
|
+
"短縮読み" => "無品詞",
|
|
40
|
+
"連体詞" => "無品詞",
|
|
41
|
+
"顔文字" => "無品詞",
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
def each_atok_entry(file)
|
|
45
|
+
open_input(file) do |io|
|
|
46
|
+
io.each_line(chomp: true) do |line|
|
|
47
|
+
next if /^(!!|\s*$)/ === line
|
|
48
|
+
|
|
49
|
+
yomi, tango, hinshi = line.split(/\t+/)
|
|
50
|
+
next if hinshi.nil?
|
|
51
|
+
|
|
52
|
+
yield yomi, tango, hinshi.sub(/[*$]+\z/, "")
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def open_input(file)
|
|
58
|
+
if file == "-"
|
|
59
|
+
setup_input(STDIN)
|
|
60
|
+
yield STDIN
|
|
61
|
+
else
|
|
62
|
+
File.open(file, "rb") do |io|
|
|
63
|
+
setup_input(io)
|
|
64
|
+
yield io
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def setup_input(io)
|
|
70
|
+
io.binmode
|
|
71
|
+
if (encoding = io.set_encoding_by_bom)
|
|
72
|
+
io.set_encoding(encoding, Encoding::UTF_8)
|
|
73
|
+
else
|
|
74
|
+
io.set_encoding(Encoding::UTF_8)
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
def quote(word)
|
|
79
|
+
'"' << word.gsub('"', '""') << '"'
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
$stdout.set_encoding(Encoding::UTF_8)
|
|
83
|
+
|
|
84
|
+
files = ARGV.empty? ? ["-"] : ARGV
|
|
85
|
+
|
|
86
|
+
files.each do |file|
|
|
87
|
+
each_atok_entry(file) do |yomi, tango, hinshi|
|
|
88
|
+
newhinshi = HINSHI_ATOK2KOTOERI.fetch(hinshi, "無品詞")
|
|
89
|
+
puts [yomi, tango, newhinshi].map { |word| quote(word) }.join(",")
|
|
90
|
+
rescue EncodingError => e
|
|
91
|
+
warn "skipped: %s %s [%s]: #{e}" % [yomi, tango, hinshi]
|
|
92
|
+
end
|
|
93
|
+
end
|
data/exe/atok2msime
ADDED
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
|
|
3
|
+
HINSHI_ATOK2MSIME = {
|
|
4
|
+
'カ行五段' => 'か行五段',
|
|
5
|
+
'ガ行五段' => 'が行五段',
|
|
6
|
+
'サ変動詞' => 'さ変動詞',
|
|
7
|
+
'サ行五段' => 'さ行五段',
|
|
8
|
+
'ザ変動詞' => 'ざ変動詞',
|
|
9
|
+
'タ行五段' => 'た行五段',
|
|
10
|
+
'ナ行五段' => 'な行五段',
|
|
11
|
+
'バ行五段' => 'ば行五段',
|
|
12
|
+
'マ行五段' => 'ま行五段',
|
|
13
|
+
'ラ行五段' => 'ら行五段',
|
|
14
|
+
'ワ行五段' => 'あわ行五段',
|
|
15
|
+
'一段動詞' => '一段動詞',
|
|
16
|
+
'冠数詞' => '冠数詞',
|
|
17
|
+
'副詞' => '副詞',
|
|
18
|
+
'助数詞' => '助数詞',
|
|
19
|
+
'単漢字' => '単漢字',
|
|
20
|
+
'名詞' => '名詞',
|
|
21
|
+
'名詞形動' => '形容動詞',
|
|
22
|
+
'名詞サ変' => 'さ変名詞',
|
|
23
|
+
'名詞ザ変' => 'ざ変名詞',
|
|
24
|
+
'固有一般' => '固有名詞',
|
|
25
|
+
'固有人名' => '名',
|
|
26
|
+
'固有人姓' => '姓',
|
|
27
|
+
'固有商品' => '固有名詞',
|
|
28
|
+
'固有地名' => '地名その他',
|
|
29
|
+
'固有組織' => '社名',
|
|
30
|
+
'形動タリ' => '形容動詞タル',
|
|
31
|
+
'形容動詞' => '形容動詞',
|
|
32
|
+
'形容詞' => '形容詞',
|
|
33
|
+
'感動詞' => '感動詞',
|
|
34
|
+
'接尾語' => '接尾語',
|
|
35
|
+
'接続詞' => '接続詞',
|
|
36
|
+
'接頭語' => '接頭語',
|
|
37
|
+
'数詞' => '数量',
|
|
38
|
+
'独立語' => '独立語',
|
|
39
|
+
'短縮読み' => '短縮よみ',
|
|
40
|
+
'連体詞' => '連体詞',
|
|
41
|
+
'顔文字' => '顔文字',
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
def convert_atok_file(file)
|
|
45
|
+
open_input(file) do |io|
|
|
46
|
+
io.each_line(chomp: true) do |line|
|
|
47
|
+
case line
|
|
48
|
+
when /\A\s*\z|\A!!ATOK_TANGO_TEXT_HEADER_1\z/
|
|
49
|
+
next
|
|
50
|
+
when /\A!!(\s.*\z)?/
|
|
51
|
+
puts "!#{$1}"
|
|
52
|
+
next
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
yomi, tango, hinshi = line.split(/\t+/)
|
|
56
|
+
next if hinshi.nil?
|
|
57
|
+
|
|
58
|
+
hinshi = hinshi.sub(/[*$]+\z/, "")
|
|
59
|
+
newhinshi = HINSHI_ATOK2MSIME[hinshi] or
|
|
60
|
+
raise "unsupported hinshi"
|
|
61
|
+
|
|
62
|
+
puts [yomi, tango, newhinshi].join("\t")
|
|
63
|
+
rescue => e
|
|
64
|
+
warn "skipped: %s %s [%s]: #{e}" % [yomi, tango, hinshi]
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def open_input(file)
|
|
70
|
+
if file == "-"
|
|
71
|
+
setup_input(STDIN)
|
|
72
|
+
yield STDIN
|
|
73
|
+
else
|
|
74
|
+
File.open(file, "rb") do |io|
|
|
75
|
+
setup_input(io)
|
|
76
|
+
yield io
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def setup_input(io)
|
|
82
|
+
io.binmode
|
|
83
|
+
if (encoding = io.set_encoding_by_bom)
|
|
84
|
+
io.set_encoding(encoding, Encoding::UTF_8)
|
|
85
|
+
else
|
|
86
|
+
io.set_encoding(Encoding::UTF_8)
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
$stdout.set_encoding(Encoding::UTF_16LE, Encoding::UTF_8, crlf_newline: true)
|
|
91
|
+
$stdout.write("\uFEFF")
|
|
92
|
+
|
|
93
|
+
puts "!Microsoft IME Dictionary Tool",
|
|
94
|
+
"!Version:",
|
|
95
|
+
"!Format:WORDLIST",
|
|
96
|
+
"!DateTime: #{Time.now.strftime("%Y年%m月%d日")}",
|
|
97
|
+
""
|
|
98
|
+
|
|
99
|
+
files = ARGV.empty? ? ["-"] : ARGV
|
|
100
|
+
|
|
101
|
+
files.each do |file|
|
|
102
|
+
convert_atok_file(file)
|
|
103
|
+
end
|
data/exe/msime2atok
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
|
|
3
|
+
HINSHI_MSIME2ATOK = {
|
|
4
|
+
"か行五段" => "カ行五段",
|
|
5
|
+
"が行五段" => "ガ行五段",
|
|
6
|
+
"さ変動詞" => "サ変動詞",
|
|
7
|
+
"さ行五段" => "サ行五段",
|
|
8
|
+
"ざ変動詞" => "ザ変動詞",
|
|
9
|
+
"た行五段" => "タ行五段",
|
|
10
|
+
"な行五段" => "ナ行五段",
|
|
11
|
+
"ば行五段" => "バ行五段",
|
|
12
|
+
"ま行五段" => "マ行五段",
|
|
13
|
+
"ら行五段" => "ラ行五段",
|
|
14
|
+
"あわ行五段" => "ワ行五段",
|
|
15
|
+
"わ行五段" => "ワ行五段",
|
|
16
|
+
"一段動詞" => "一段動詞",
|
|
17
|
+
"冠数詞" => "冠数詞",
|
|
18
|
+
"副詞" => "副詞",
|
|
19
|
+
"助数詞" => "助数詞",
|
|
20
|
+
"単漢字" => "単漢字",
|
|
21
|
+
"名詞" => "名詞",
|
|
22
|
+
"形容動詞" => "形容動詞",
|
|
23
|
+
"さ変名詞" => "名詞サ変",
|
|
24
|
+
"ざ変名詞" => "名詞ザ変",
|
|
25
|
+
"固有名詞" => "固有一般",
|
|
26
|
+
"名" => "固有人名",
|
|
27
|
+
"姓" => "固有人姓",
|
|
28
|
+
"地名その他" => "固有地名",
|
|
29
|
+
"社名" => "固有組織",
|
|
30
|
+
"形容動詞タル" => "形動タリ",
|
|
31
|
+
"形容詞" => "形容詞",
|
|
32
|
+
"感動詞" => "感動詞",
|
|
33
|
+
"接尾語" => "接尾語",
|
|
34
|
+
"接続詞" => "接続詞",
|
|
35
|
+
"接頭語" => "接頭語",
|
|
36
|
+
"数量" => "数詞",
|
|
37
|
+
"独立語" => "独立語",
|
|
38
|
+
"短縮よみ" => "短縮読み",
|
|
39
|
+
"連体詞" => "連体詞",
|
|
40
|
+
"顔文字" => "顔文字",
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
def each_msime_line(file, &block)
|
|
44
|
+
open_input(file) do |io|
|
|
45
|
+
io.each_line(chomp: true, &block)
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def open_input(file)
|
|
50
|
+
if file == "-"
|
|
51
|
+
setup_input(STDIN)
|
|
52
|
+
yield STDIN
|
|
53
|
+
else
|
|
54
|
+
File.open(file, "rb") do |io|
|
|
55
|
+
setup_input(io)
|
|
56
|
+
yield io
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def setup_input(io)
|
|
62
|
+
io.binmode
|
|
63
|
+
if (encoding = io.set_encoding_by_bom)
|
|
64
|
+
io.set_encoding(encoding, Encoding::UTF_8)
|
|
65
|
+
else
|
|
66
|
+
io.set_encoding(Encoding::UTF_8)
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def convert_msime_file(file)
|
|
71
|
+
each_msime_line(file) do |line|
|
|
72
|
+
case line
|
|
73
|
+
when /\A\s*\z|\A!\S/
|
|
74
|
+
next
|
|
75
|
+
when /\A!(\s.*\z)?/
|
|
76
|
+
puts "!!#{$1}"
|
|
77
|
+
next
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
yomi, tango, hinshi = line.split(/\t+/)
|
|
81
|
+
next if hinshi.nil?
|
|
82
|
+
|
|
83
|
+
hinshi = hinshi.sub(/[*$]+\z/, "")
|
|
84
|
+
newhinshi = HINSHI_MSIME2ATOK[hinshi] or
|
|
85
|
+
raise "unsupported hinshi"
|
|
86
|
+
|
|
87
|
+
puts [yomi, tango, "#{newhinshi}*"].join("\t")
|
|
88
|
+
rescue => e
|
|
89
|
+
warn "skipped: %s %s [%s]: #{e}" % [yomi, tango, hinshi]
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
$stdout.set_encoding(Encoding::UTF_8, Encoding::UTF_8, crlf_newline: true)
|
|
94
|
+
|
|
95
|
+
puts "!!ATOK_TANGO_TEXT_HEADER_1"
|
|
96
|
+
|
|
97
|
+
files = ARGV.empty? ? ["-"] : ARGV
|
|
98
|
+
|
|
99
|
+
files.each do |file|
|
|
100
|
+
convert_msime_file(file)
|
|
101
|
+
end
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "lib/imedic/tools/version"
|
|
4
|
+
|
|
5
|
+
Gem::Specification.new do |spec|
|
|
6
|
+
spec.name = "imedic-tools"
|
|
7
|
+
spec.version = Imedic::Tools::VERSION
|
|
8
|
+
spec.authors = ["Akinori Musha"]
|
|
9
|
+
spec.email = ["knu@idaemons.org"]
|
|
10
|
+
|
|
11
|
+
spec.summary = "Japanese input dictionary word-list conversion tools"
|
|
12
|
+
spec.description = "Command-line tools that convert Japanese input dictionary word-list files between common formats."
|
|
13
|
+
spec.homepage = "https://github.com/knu/imedic-tools"
|
|
14
|
+
spec.license = "BSD-2-Clause"
|
|
15
|
+
spec.required_ruby_version = ">= 3.1"
|
|
16
|
+
|
|
17
|
+
spec.metadata = {
|
|
18
|
+
"bug_tracker_uri" => "https://github.com/knu/imedic-tools/issues",
|
|
19
|
+
"changelog_uri" => "https://github.com/knu/imedic-tools/releases",
|
|
20
|
+
"homepage_uri" => spec.homepage,
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
spec.files = Dir.chdir(__dir__) do
|
|
24
|
+
`git ls-files -z`.split("\x0")
|
|
25
|
+
end
|
|
26
|
+
spec.bindir = "exe"
|
|
27
|
+
spec.executables = ["atok2kotoeri", "atok2msime", "msime2atok"]
|
|
28
|
+
spec.require_paths = ["lib"]
|
|
29
|
+
end
|
data/lib/imedic/tools.rb
ADDED
data/lib/imedic-tools.rb
ADDED
metadata
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: imedic-tools
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.1.0
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- Akinori Musha
|
|
8
|
+
bindir: exe
|
|
9
|
+
cert_chain: []
|
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
|
11
|
+
dependencies: []
|
|
12
|
+
description: Command-line tools that convert Japanese input dictionary word-list files
|
|
13
|
+
between common formats.
|
|
14
|
+
email:
|
|
15
|
+
- knu@idaemons.org
|
|
16
|
+
executables:
|
|
17
|
+
- atok2kotoeri
|
|
18
|
+
- atok2msime
|
|
19
|
+
- msime2atok
|
|
20
|
+
extensions: []
|
|
21
|
+
extra_rdoc_files: []
|
|
22
|
+
files:
|
|
23
|
+
- ".github/dependabot.yml"
|
|
24
|
+
- ".github/workflows/push_gem.yml"
|
|
25
|
+
- ".gitignore"
|
|
26
|
+
- Gemfile
|
|
27
|
+
- LICENSE
|
|
28
|
+
- README.md
|
|
29
|
+
- Rakefile
|
|
30
|
+
- exe/atok2kotoeri
|
|
31
|
+
- exe/atok2msime
|
|
32
|
+
- exe/msime2atok
|
|
33
|
+
- imedic-tools.gemspec
|
|
34
|
+
- lib/imedic-tools.rb
|
|
35
|
+
- lib/imedic/tools.rb
|
|
36
|
+
- lib/imedic/tools/version.rb
|
|
37
|
+
homepage: https://github.com/knu/imedic-tools
|
|
38
|
+
licenses:
|
|
39
|
+
- BSD-2-Clause
|
|
40
|
+
metadata:
|
|
41
|
+
bug_tracker_uri: https://github.com/knu/imedic-tools/issues
|
|
42
|
+
changelog_uri: https://github.com/knu/imedic-tools/releases
|
|
43
|
+
homepage_uri: https://github.com/knu/imedic-tools
|
|
44
|
+
rdoc_options: []
|
|
45
|
+
require_paths:
|
|
46
|
+
- lib
|
|
47
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
48
|
+
requirements:
|
|
49
|
+
- - ">="
|
|
50
|
+
- !ruby/object:Gem::Version
|
|
51
|
+
version: '3.1'
|
|
52
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
53
|
+
requirements:
|
|
54
|
+
- - ">="
|
|
55
|
+
- !ruby/object:Gem::Version
|
|
56
|
+
version: '0'
|
|
57
|
+
requirements: []
|
|
58
|
+
rubygems_version: 3.6.9
|
|
59
|
+
specification_version: 4
|
|
60
|
+
summary: Japanese input dictionary word-list conversion tools
|
|
61
|
+
test_files: []
|