suika 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +2 -1
- data/.travis.yml +8 -2
- data/CHANGELOG.md +37 -0
- data/Gemfile.lock +36 -0
- data/README.md +27 -7
- data/dict/{ipadic.gz → sysdic.gz} +0 -0
- data/lib/suika.rb +1 -0
- data/lib/suika/char_def.rb +18 -17
- data/lib/suika/lattice.rb +8 -10
- data/lib/suika/node.rb +21 -0
- data/lib/suika/tagger.rb +57 -43
- data/lib/suika/version.rb +1 -1
- data/suika.gemspec +2 -3
- metadata +13 -10
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bfb2e78e15c648ee309868bdfd3f386a66b1cff633cb546880132cdb9b8f3806
|
4
|
+
data.tar.gz: d398f4de11a4af80b7c62c4e468fa2e3f9393bbb2211e0f2e317a08ed05c73b5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6fea777a4229725a174aa0955bcf5f775cc6e319fa73cf6f204385b3a47d84997b74b8798f78269f442aa11dcf151b1319c079c315dfea103dcd7a148cf0b5c5
|
7
|
+
data.tar.gz: 17d2fd7f248c965b6d585542c4ec4cb90e87663f96b0522f06f1c3e94c55a18e59f08e4ab1e7724f9dd48a114c2c0f35a0499100fb8850a89d2822de87927988
|
data/.rubocop.yml
CHANGED
data/.travis.yml
CHANGED
data/CHANGELOG.md
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
## 0.2.0
|
2
|
+
|
3
|
+
### Breaking Change
|
4
|
+
- Change to use dartsclone for trie library.
|
5
|
+
|
6
|
+
|
7
|
+
## 0.1.4
|
8
|
+
|
9
|
+
### Bug Fixes
|
10
|
+
- Fix CharDef.char_type to return 'DEFAULT' when unknown character code is given.
|
11
|
+
|
12
|
+
### Features
|
13
|
+
- Add character code of square era name Reiwa.
|
14
|
+
|
15
|
+
## 0.1.3
|
16
|
+
|
17
|
+
### Bug Fixes
|
18
|
+
- Fix unknown word processing.
|
19
|
+
|
20
|
+
### Changes
|
21
|
+
- Remove redundant spaces from output.
|
22
|
+
|
23
|
+
|
24
|
+
## 0.1.2
|
25
|
+
|
26
|
+
### Bug Fixes
|
27
|
+
- Fix local variable typo in Tagger.parse.
|
28
|
+
|
29
|
+
|
30
|
+
## 0.1.1
|
31
|
+
|
32
|
+
### Bug Fixes
|
33
|
+
- Fix specification of class in CharDef.char_type.
|
34
|
+
|
35
|
+
|
36
|
+
## 0.1.0
|
37
|
+
- First release.
|
data/Gemfile.lock
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
suika (0.2.0)
|
5
|
+
dartsclone (>= 0.2.0)
|
6
|
+
|
7
|
+
GEM
|
8
|
+
remote: https://rubygems.org/
|
9
|
+
specs:
|
10
|
+
dartsclone (0.2.0)
|
11
|
+
diff-lcs (1.4.4)
|
12
|
+
rake (12.3.3)
|
13
|
+
rspec (3.9.0)
|
14
|
+
rspec-core (~> 3.9.0)
|
15
|
+
rspec-expectations (~> 3.9.0)
|
16
|
+
rspec-mocks (~> 3.9.0)
|
17
|
+
rspec-core (3.9.2)
|
18
|
+
rspec-support (~> 3.9.3)
|
19
|
+
rspec-expectations (3.9.2)
|
20
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
21
|
+
rspec-support (~> 3.9.0)
|
22
|
+
rspec-mocks (3.9.1)
|
23
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
24
|
+
rspec-support (~> 3.9.0)
|
25
|
+
rspec-support (3.9.3)
|
26
|
+
|
27
|
+
PLATFORMS
|
28
|
+
ruby
|
29
|
+
|
30
|
+
DEPENDENCIES
|
31
|
+
rake (~> 12.0)
|
32
|
+
rspec (~> 3.0)
|
33
|
+
suika!
|
34
|
+
|
35
|
+
BUNDLED WITH
|
36
|
+
2.1.2
|
data/README.md
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
# Suika
|
2
2
|
|
3
|
+
[](https://travis-ci.org/yoshoku/suika)
|
3
4
|
[](https://badge.fury.io/rb/suika)
|
4
5
|
[](https://github.com/yoshoku/suika/blob/master/LICENSE.txt)
|
5
6
|
[](https://rubydoc.info/gems/suika)
|
@@ -30,13 +31,13 @@ require 'suika'
|
|
30
31
|
tagger = Suika::Tagger.new
|
31
32
|
tagger.parse('すもももももももものうち').each { |token| puts token }
|
32
33
|
|
33
|
-
# すもも
|
34
|
-
# も
|
35
|
-
# もも
|
36
|
-
# も
|
37
|
-
# もも
|
38
|
-
# の
|
39
|
-
# うち
|
34
|
+
# すもも 名詞,一般,*,*,*,*,すもも,スモモ,スモモ
|
35
|
+
# も 助詞,係助詞,*,*,*,*,も,モ,モ
|
36
|
+
# もも 名詞,一般,*,*,*,*,もも,モモ,モモ
|
37
|
+
# も 助詞,係助詞,*,*,*,*,も,モ,モ
|
38
|
+
# もも 名詞,一般,*,*,*,*,もも,モモ,モモ
|
39
|
+
# の 助詞,連体化,*,*,*,*,の,ノ,ノ
|
40
|
+
# うち 名詞,非自立,副詞可能,*,*,*,うち,ウチ,ウチ
|
40
41
|
```
|
41
42
|
|
42
43
|
Since the Tagger class loads the binary dictionary at initialization, it is recommended to reuse the instance.
|
@@ -51,6 +52,25 @@ sentences.each do |sentence|
|
|
51
52
|
end
|
52
53
|
```
|
53
54
|
|
55
|
+
## Test
|
56
|
+
Suika was able to parse all sentences in the [Livedoor news corpus](https://www.rondhuit.com/download.html#ldcc)
|
57
|
+
without any error.
|
58
|
+
|
59
|
+
```ruby
|
60
|
+
require 'suika'
|
61
|
+
|
62
|
+
tagger = Suika::Tagger.new
|
63
|
+
|
64
|
+
Dir.glob('ldcc-20140209/text/*/*.txt').each do |filename|
|
65
|
+
File.foreach(filename) do |sentence|
|
66
|
+
sentence.strip!
|
67
|
+
puts tagger.parse(sentence) unless sentence.empty?
|
68
|
+
end
|
69
|
+
end
|
70
|
+
```
|
71
|
+
|
72
|
+

|
73
|
+
|
54
74
|
## Contributing
|
55
75
|
|
56
76
|
Bug reports and pull requests are welcome on GitHub at https://github.com/yoshoku/suika.
|
Binary file
|
data/lib/suika.rb
CHANGED
data/lib/suika/char_def.rb
CHANGED
@@ -5,10 +5,11 @@ module Suika
|
|
5
5
|
class CharDef
|
6
6
|
# @!visibility private
|
7
7
|
def self.char_type(ch)
|
8
|
-
|
9
|
-
CHAR_TYPES.find do |
|
10
|
-
Object.const_get("CharDef::#{
|
8
|
+
ch_code = ch.unpack1('U*')
|
9
|
+
ch_type = CHAR_TYPES.find do |ct|
|
10
|
+
Object.const_get("::Suika::CharDef::#{ct}").any? { |r| r.include?(ch_code) }
|
11
11
|
end
|
12
|
+
ch_type || 'DEFAULT'
|
12
13
|
end
|
13
14
|
|
14
15
|
# @!visibility private
|
@@ -16,39 +17,41 @@ module Suika
|
|
16
17
|
CHAR_CATEGORY[char_type(ch)]
|
17
18
|
end
|
18
19
|
|
20
|
+
MAX_GROUPING_SIZE = 24
|
21
|
+
|
19
22
|
CHAR_CATEGORY = {
|
20
23
|
'DEFAULT' => {
|
21
|
-
invoke:
|
24
|
+
invoke: false, group: true, length: 0
|
22
25
|
},
|
23
26
|
'SPACE' => {
|
24
|
-
invoke:
|
27
|
+
invoke: false, group: true, length: 0
|
25
28
|
},
|
26
29
|
'KANJI' => {
|
27
|
-
invoke:
|
30
|
+
invoke: false, group: false, length: 2
|
28
31
|
},
|
29
32
|
'SYMBOL' => {
|
30
|
-
invoke:
|
33
|
+
invoke: true, group: true, length: 0
|
31
34
|
},
|
32
35
|
'NUMERIC' => {
|
33
|
-
invoke:
|
36
|
+
invoke: true, group: true, length: 0
|
34
37
|
},
|
35
38
|
'ALPHA' => {
|
36
|
-
invoke:
|
39
|
+
invoke: true, group: true, length: 0
|
37
40
|
},
|
38
41
|
'HIRAGANA' => {
|
39
|
-
invoke:
|
42
|
+
invoke: false, group: true, length: 2
|
40
43
|
},
|
41
44
|
'KATAKANA' => {
|
42
|
-
invoke:
|
45
|
+
invoke: true, group: true, length: 2
|
43
46
|
},
|
44
47
|
'KANJINUMERIC' => {
|
45
|
-
invoke:
|
48
|
+
invoke: true, group: true, length: 0
|
46
49
|
},
|
47
50
|
'GREEK' => {
|
48
|
-
invoke:
|
51
|
+
invoke: true, group: true, length: 0
|
49
52
|
},
|
50
53
|
'CYRILLIC' => {
|
51
|
-
invoke:
|
54
|
+
invoke: true, group: true, length: 0
|
52
55
|
}
|
53
56
|
}.freeze
|
54
57
|
|
@@ -117,6 +120,7 @@ module Suika
|
|
117
120
|
0x2B00..0x2BFF, # Miscellaneous Symbols and Arrows
|
118
121
|
0x2A00..0x2AFF, # Supplemental Mathematical Operators
|
119
122
|
0x3300..0x33FF,
|
123
|
+
0x32FF..0x32FF, # Square era name REIWA
|
120
124
|
0x3200..0x32FE, # ENclosed CJK Letters and Months
|
121
125
|
0x3000..0x303F, # CJK Symbol and Punctuation
|
122
126
|
0xFE30..0xFE4F, # CJK Compatibility Forms
|
@@ -171,8 +175,6 @@ module Suika
|
|
171
175
|
0xF900..0xFA2D,
|
172
176
|
0xFA30..0xFA6A
|
173
177
|
].freeze
|
174
|
-
|
175
|
-
# rubocop:disable Style/AsciiComments
|
176
178
|
# KANJI-NUMERIC (一 二 三 四 五 六 七 八 九 十 百 千 万 億 兆)
|
177
179
|
# 0x4E00 KANJINUMERIC KANJI
|
178
180
|
KANJINUMERIC = [
|
@@ -192,7 +194,6 @@ module Suika
|
|
192
194
|
0x5104..0x5104,
|
193
195
|
0x5146..0x5146
|
194
196
|
].freeze
|
195
|
-
# rubocop:enable Style/AsciiComments
|
196
197
|
|
197
198
|
private_constant :CHAR_CATEGORY, :CHAR_TYPES
|
198
199
|
|
data/lib/suika/lattice.rb
CHANGED
@@ -4,8 +4,6 @@ module Suika
|
|
4
4
|
# @!visibility private
|
5
5
|
class Lattice
|
6
6
|
# @!visibility private
|
7
|
-
Node = Struct.new(:surface, :min_cost, :min_prev, :left_id, :right_id, :cost, :attrs, keyword_init: true)
|
8
|
-
|
9
7
|
attr_reader :begin_nodes, :end_nodes, :length
|
10
8
|
|
11
9
|
# @!visibility private
|
@@ -13,17 +11,17 @@ module Suika
|
|
13
11
|
@length = length
|
14
12
|
@begin_nodes = Array.new(length + 1) { [] }
|
15
13
|
@end_nodes = Array.new(length + 1) { [] }
|
16
|
-
bos = Node.new(surface: 'BOS', left_id: 0, right_id: 0, cost: 0, attrs: [])
|
17
|
-
@end_nodes[0].
|
18
|
-
eos = Node.new(surface: 'EOS', left_id: 0, right_id: 0, cost: 0, attrs: [])
|
19
|
-
@begin_nodes[length].
|
14
|
+
bos = Node.new(surface: 'BOS', unknown: false, left_id: 0, right_id: 0, cost: 0, attrs: [])
|
15
|
+
@end_nodes[0].push(bos)
|
16
|
+
eos = Node.new(surface: 'EOS', unknown: false, left_id: 0, right_id: 0, cost: 0, attrs: [])
|
17
|
+
@begin_nodes[length].push(eos)
|
20
18
|
end
|
21
19
|
|
22
20
|
# @!visibility private
|
23
|
-
def insert(begin_id, end_id, surface, left_id, right_id, cost, attrs)
|
24
|
-
node = Node.new(surface: surface, left_id: left_id, right_id: right_id, cost: cost, attrs: attrs)
|
25
|
-
@begin_nodes[begin_id].
|
26
|
-
@end_nodes[end_id].
|
21
|
+
def insert(begin_id, end_id, surface, unknown, left_id, right_id, cost, attrs)
|
22
|
+
node = Node.new(surface: surface, unknown: unknown, left_id: left_id, right_id: right_id, cost: cost, attrs: attrs)
|
23
|
+
@begin_nodes[begin_id].push(node)
|
24
|
+
@end_nodes[end_id].push(node)
|
27
25
|
end
|
28
26
|
end
|
29
27
|
end
|
data/lib/suika/node.rb
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Suika
|
4
|
+
# @!visibility private
|
5
|
+
class Node
|
6
|
+
# @!visibility private
|
7
|
+
attr_accessor :surface, :unknown, :min_cost, :min_prev, :left_id, :right_id, :cost, :attrs
|
8
|
+
|
9
|
+
# @!visibility private
|
10
|
+
def initialize(surface: '', unknown: false, min_cost: 0, min_prev: nil, left_id: 0, right_id: 0, cost: 0, attrs: [])
|
11
|
+
@surface = surface
|
12
|
+
@unknown = unknown
|
13
|
+
@min_cost = min_cost
|
14
|
+
@min_prev = min_prev
|
15
|
+
@left_id = left_id
|
16
|
+
@right_id = right_id
|
17
|
+
@cost = cost
|
18
|
+
@attrs = attrs
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
data/lib/suika/tagger.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require '
|
3
|
+
require 'dartsclone'
|
4
|
+
require 'rubygems/package'
|
4
5
|
require 'zlib'
|
5
6
|
|
6
7
|
module Suika
|
@@ -12,22 +13,22 @@ module Suika
|
|
12
13
|
# tagger = Suika::Tagger.new
|
13
14
|
# tagger.parse('すもももももももものうち').each { |token| puts token }
|
14
15
|
#
|
15
|
-
# # すもも
|
16
|
-
# # も
|
17
|
-
# # もも
|
18
|
-
# # も
|
19
|
-
# # もも
|
20
|
-
# # の
|
21
|
-
# # うち
|
16
|
+
# # すもも 名詞,一般,*,*,*,*,すもも,スモモ,スモモ
|
17
|
+
# # も 助詞,係助詞,*,*,*,*,も,モ,モ
|
18
|
+
# # もも 名詞,一般,*,*,*,*,もも,モモ,モモ
|
19
|
+
# # も 助詞,係助詞,*,*,*,*,も,モ,モ
|
20
|
+
# # もも 名詞,一般,*,*,*,*,もも,モモ,モモ
|
21
|
+
# # の 助詞,連体化,*,*,*,*,の,ノ,ノ
|
22
|
+
# # うち 名詞,非自立,副詞可能,*,*,*,うち,ウチ,ウチ
|
22
23
|
#
|
23
24
|
class Tagger
|
24
25
|
# Create a new tagger by loading the built-in binary dictionary.
|
25
26
|
def initialize
|
26
|
-
|
27
|
-
|
28
|
-
@
|
29
|
-
@
|
30
|
-
@
|
27
|
+
raise IOError, 'SHA1 digest of dictionary file does not match.' unless DICTIONARY_KEY == Digest::SHA1.file(DICTIONARY_PATH).to_s
|
28
|
+
|
29
|
+
@sysdic = Marshal.load(Zlib::GzipReader.open(DICTIONARY_PATH, &:read))
|
30
|
+
@trie = DartsClone::DoubleArray.new
|
31
|
+
@trie.set_array(@sysdic[:trie])
|
31
32
|
end
|
32
33
|
|
33
34
|
# Parse the given sentence.
|
@@ -39,44 +40,40 @@ module Suika
|
|
39
40
|
terminal = sentence.length
|
40
41
|
|
41
42
|
while start < terminal
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
43
|
+
step = terminal - start
|
44
|
+
|
45
|
+
query = sentence[start..-1]
|
46
|
+
result = trie.common_prefix_search(query)
|
47
|
+
unless result.empty?
|
48
|
+
words, indices = result
|
49
|
+
words.each_with_index do |word, i|
|
50
|
+
features[indices[i]].each do |el|
|
51
|
+
lattice.insert(start, start + word.length, word, false,
|
52
|
+
el[0].to_i, el[1].to_i, el[2].to_i, el[3..-1])
|
51
53
|
end
|
52
|
-
is_unknown = false
|
53
54
|
end
|
54
|
-
|
55
|
-
word = sentence[start..pos]
|
56
|
-
end
|
57
|
-
|
58
|
-
unless is_unknown
|
59
|
-
start += 1
|
60
|
-
next
|
55
|
+
step = words.map(&:size).min
|
61
56
|
end
|
62
57
|
|
63
58
|
word = sentence[start]
|
64
|
-
char_type = CharDef.char_type(sentence[start])
|
65
59
|
char_cate = CharDef.char_category(sentence[start])
|
66
|
-
|
67
|
-
|
60
|
+
char_type = CharDef.char_type(sentence[start])
|
61
|
+
if char_cate[:invoke]
|
62
|
+
char_length = char_cate[:group] ? CharDef::MAX_GROUPING_SIZE : char_cate[:length]
|
63
|
+
unk_terminal = [start + char_length, terminal].min
|
68
64
|
pos = start + 1
|
69
|
-
while pos < unk_terminal && char_type == CharDef.char_type(
|
70
|
-
word <<
|
65
|
+
while pos < unk_terminal && char_type == CharDef.char_type(sentence[pos])
|
66
|
+
word << sentence[pos]
|
71
67
|
pos += 1
|
72
68
|
end
|
73
69
|
end
|
74
|
-
|
75
|
-
lattice.insert(start, start + word.length,
|
76
|
-
|
77
|
-
el[3..-1])
|
70
|
+
unknowns[char_type].each do |el|
|
71
|
+
lattice.insert(start, start + word.length, word, true,
|
72
|
+
el[0].to_i, el[1].to_i, el[2].to_i, el[3..-1])
|
78
73
|
end
|
79
|
-
|
74
|
+
step = [step, word.length].min
|
75
|
+
|
76
|
+
start += step
|
80
77
|
end
|
81
78
|
|
82
79
|
viterbi(lattice)
|
@@ -84,9 +81,25 @@ module Suika
|
|
84
81
|
|
85
82
|
private
|
86
83
|
|
84
|
+
DICTIONARY_PATH = "#{__dir__}/../../dict/sysdic.gz"
|
85
|
+
DICTIONARY_KEY = '562e53853b8a5b9f4857536b0748847a0878ebf0'
|
87
86
|
INT_MAX = 2**(([42].pack('i').size * 16) - 2) - 1
|
88
87
|
|
89
|
-
private_constant :INT_MAX
|
88
|
+
private_constant :DICTIONARY_PATH, :DICTIONARY_KEY, :INT_MAX
|
89
|
+
|
90
|
+
attr_reader :trie
|
91
|
+
|
92
|
+
def features
|
93
|
+
@sysdic[:dictionary]
|
94
|
+
end
|
95
|
+
|
96
|
+
def unknowns
|
97
|
+
@sysdic[:unknown_dictionary]
|
98
|
+
end
|
99
|
+
|
100
|
+
def costmat
|
101
|
+
@sysdic[:cost_matrix]
|
102
|
+
end
|
90
103
|
|
91
104
|
def viterbi(lattice)
|
92
105
|
bos = lattice.end_nodes[0].first
|
@@ -98,7 +111,7 @@ module Suika
|
|
98
111
|
rnode.min_cost = INT_MAX
|
99
112
|
rnode.min_prev = nil
|
100
113
|
lattice.end_nodes[n].each do |lnode|
|
101
|
-
cost = lnode.min_cost +
|
114
|
+
cost = lnode.min_cost + costmat[lnode.right_id][rnode.left_id] + rnode.cost
|
102
115
|
if cost < rnode.min_cost
|
103
116
|
rnode.min_cost = cost
|
104
117
|
rnode.min_prev = lnode
|
@@ -111,9 +124,10 @@ module Suika
|
|
111
124
|
prev_node = eos.min_prev
|
112
125
|
res = []
|
113
126
|
until prev_node.nil?
|
114
|
-
res.
|
127
|
+
res.push("#{prev_node.surface}\t#{prev_node.attrs.join(',')}") if prev_node.surface != 'BOS' && prev_node.surface != 'EOS'
|
115
128
|
prev_node = prev_node.min_prev
|
116
129
|
end
|
130
|
+
|
117
131
|
res.reverse
|
118
132
|
end
|
119
133
|
end
|
data/lib/suika/version.rb
CHANGED
data/suika.gemspec
CHANGED
@@ -12,11 +12,10 @@ Gem::Specification.new do |spec|
|
|
12
12
|
spec.description = 'Suika is a Japanese morphological analyzer written in pure Ruby.'
|
13
13
|
spec.homepage = 'https://github.com/yoshoku/suika'
|
14
14
|
spec.license = 'BSD-3-Clause'
|
15
|
-
spec.required_ruby_version = Gem::Requirement.new('>= 2.3.0')
|
16
15
|
|
17
16
|
spec.metadata['homepage_uri'] = spec.homepage
|
18
17
|
spec.metadata['source_code_uri'] = spec.homepage
|
19
|
-
spec.metadata['changelog_uri'] = 'https://github.com/yoshoku/
|
18
|
+
spec.metadata['changelog_uri'] = 'https://github.com/yoshoku/suika/blob/master/CHANGELOG.md'
|
20
19
|
spec.metadata['documentation_uri'] = 'https://rubydoc.info/gems/suika'
|
21
20
|
|
22
21
|
# Specify which files should be added to the gem when it is released.
|
@@ -28,5 +27,5 @@ Gem::Specification.new do |spec|
|
|
28
27
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
29
28
|
spec.require_paths = ['lib']
|
30
29
|
|
31
|
-
spec.add_runtime_dependency '
|
30
|
+
spec.add_runtime_dependency 'dartsclone', '>= 0.2.0'
|
32
31
|
end
|
metadata
CHANGED
@@ -1,29 +1,29 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: suika
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-10-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name:
|
14
|
+
name: dartsclone
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - "
|
17
|
+
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version:
|
19
|
+
version: 0.2.0
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- - "
|
24
|
+
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version:
|
26
|
+
version: 0.2.0
|
27
27
|
description: Suika is a Japanese morphological analyzer written in pure Ruby.
|
28
28
|
email:
|
29
29
|
- yoshoku@outlook.com
|
@@ -35,18 +35,21 @@ files:
|
|
35
35
|
- ".rspec"
|
36
36
|
- ".rubocop.yml"
|
37
37
|
- ".travis.yml"
|
38
|
+
- CHANGELOG.md
|
38
39
|
- CODE_OF_CONDUCT.md
|
39
40
|
- Gemfile
|
41
|
+
- Gemfile.lock
|
40
42
|
- LICENSE.txt
|
41
43
|
- NOTICE.txt
|
42
44
|
- README.md
|
43
45
|
- Rakefile
|
44
46
|
- bin/console
|
45
47
|
- bin/setup
|
46
|
-
- dict/
|
48
|
+
- dict/sysdic.gz
|
47
49
|
- lib/suika.rb
|
48
50
|
- lib/suika/char_def.rb
|
49
51
|
- lib/suika/lattice.rb
|
52
|
+
- lib/suika/node.rb
|
50
53
|
- lib/suika/tagger.rb
|
51
54
|
- lib/suika/version.rb
|
52
55
|
- suika.gemspec
|
@@ -56,7 +59,7 @@ licenses:
|
|
56
59
|
metadata:
|
57
60
|
homepage_uri: https://github.com/yoshoku/suika
|
58
61
|
source_code_uri: https://github.com/yoshoku/suika
|
59
|
-
changelog_uri: https://github.com/yoshoku/
|
62
|
+
changelog_uri: https://github.com/yoshoku/suika/blob/master/CHANGELOG.md
|
60
63
|
documentation_uri: https://rubydoc.info/gems/suika
|
61
64
|
post_install_message:
|
62
65
|
rdoc_options: []
|
@@ -66,7 +69,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
66
69
|
requirements:
|
67
70
|
- - ">="
|
68
71
|
- !ruby/object:Gem::Version
|
69
|
-
version:
|
72
|
+
version: '0'
|
70
73
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
71
74
|
requirements:
|
72
75
|
- - ">="
|