ikku 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +14 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +29 -0
- data/Rakefile +2 -0
- data/ikku.gemspec +23 -0
- data/lib/ikku.rb +296 -0
- data/lib/ikku/version.rb +3 -0
- metadata +109 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 4916cf8fe3220837e015717ad3360e206115a738
|
4
|
+
data.tar.gz: ef486b20ffa6bd0f791702fa57b1ebe00a1f31e7
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 4ce3e1436b314357b8290c5cd8f147b209029ea77d9fcf4166e14e3f0f47abfb82f5592bc0478a01c122b669a18050822ba68ba8dffeb11a08737d594a3a063f
|
7
|
+
data.tar.gz: c27ce9f7d9d00005b2b16d9ccb48af8d4ee2ca18f48b30606aebd1346d37b1f514566a142e5d5a922248ff5ab0cbd869718d5ed87fe8d0ecea5e3a13c482ffc0
|
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2015 Ryo Nakamura
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
# Ikku
|
2
|
+
Discover haiku from text.
|
3
|
+
|
4
|
+
## Requirements
|
5
|
+
- Ruby 2.0.0+
|
6
|
+
- MeCab with IPADIC (e.g. `brew install mecab mecab-ipadic`)
|
7
|
+
|
8
|
+
## Example
|
9
|
+
```rb
|
10
|
+
# Ikku::Reviewer class is the main interface for this library.
|
11
|
+
require "ikku"
|
12
|
+
reviewer = Ikku::Reviewer.new
|
13
|
+
|
14
|
+
# Judge if given text is haiku or not.
|
15
|
+
reviewer.judge("古池や蛙飛び込む水の音") #=> true
|
16
|
+
reviewer.judge("ああ古池や蛙飛び込む水の音ああ") #=> false
|
17
|
+
|
18
|
+
# Find one available haiku from given text.
|
19
|
+
reviewer.find("ああ古池や蛙飛び込む水の音ああ")
|
20
|
+
#=> [["古池", "や"], ["蛙", "飛び込む"], ["水", "の", "音"]]
|
21
|
+
|
22
|
+
# Search searches all available haikus from given text.
|
23
|
+
reviewer.search("ああ古池や蛙飛び込む水の音ああ天秤や京江戸かけて千代の春ああ")
|
24
|
+
#=> [
|
25
|
+
# [["古池", "や"], ["蛙", "飛び込む"], ["水", "の", "音"]],
|
26
|
+
# [["天秤", "や"], ["京", "江戸", "かけ", "て"], ["千代", "の", "春"]]
|
27
|
+
# ]
|
28
|
+
#
|
29
|
+
```
|
data/Rakefile
ADDED
data/ikku.gemspec
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
lib = File.expand_path("../lib", __FILE__)
|
2
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
3
|
+
require "ikku/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |spec|
|
6
|
+
spec.name = "ikku"
|
7
|
+
spec.version = Ikku::VERSION
|
8
|
+
spec.authors = ["Ryo Nakamura"]
|
9
|
+
spec.email = ["r7kamura@gmail.com"]
|
10
|
+
spec.summary = "Discover haiku from text."
|
11
|
+
spec.homepage = "https://github.com/r7kamura/ikku"
|
12
|
+
spec.license = "MIT"
|
13
|
+
|
14
|
+
spec.files = `git ls-files -z`.split("\x0")
|
15
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
16
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
17
|
+
spec.require_paths = ["lib"]
|
18
|
+
|
19
|
+
spec.add_runtime_dependency "natto"
|
20
|
+
spec.add_development_dependency "bundler", "~> 1.7"
|
21
|
+
spec.add_development_dependency "pry"
|
22
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
23
|
+
end
|
data/lib/ikku.rb
ADDED
@@ -0,0 +1,296 @@
|
|
1
|
+
require "csv"
|
2
|
+
require "natto"
|
3
|
+
require "ikku/version"
|
4
|
+
|
5
|
+
module Ikku
|
6
|
+
class Reviewer
|
7
|
+
# Find one available haiku from given text.
|
8
|
+
# @return [Array<Array>]
|
9
|
+
def find(text)
|
10
|
+
nodes = parser.parse(text)
|
11
|
+
nodes.length.times.find do |index|
|
12
|
+
if (phrases = Scanner.new(nodes[index..-1]).scan)
|
13
|
+
break phrases
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
# Judge if given text is haiku or not.
|
19
|
+
# @return [true, false]
|
20
|
+
def judge(text)
|
21
|
+
!Scanner.new(parser.parse(text), exactly: true).scan.nil?
|
22
|
+
end
|
23
|
+
|
24
|
+
# Search all available haikus from given text.
|
25
|
+
# @return [Array<Array>]
|
26
|
+
def search(text)
|
27
|
+
nodes = parser.parse(text)
|
28
|
+
nodes.length.times.map do |index|
|
29
|
+
Scanner.new(nodes[index..-1]).scan
|
30
|
+
end.compact
|
31
|
+
end
|
32
|
+
|
33
|
+
private
|
34
|
+
|
35
|
+
def parser
|
36
|
+
@parser ||= Parser.new
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
# Find one haiku that starts from the 1st node of given nodes.
|
41
|
+
class Scanner
|
42
|
+
RULE = [5, 7, 5]
|
43
|
+
|
44
|
+
attr_writer :count
|
45
|
+
|
46
|
+
def initialize(nodes, exactly: false)
|
47
|
+
@exactly = exactly
|
48
|
+
@nodes = nodes
|
49
|
+
end
|
50
|
+
|
51
|
+
def scan
|
52
|
+
if has_valid_first_node? && has_valid_last_node?
|
53
|
+
@nodes.each_with_index do |node, index|
|
54
|
+
if consume(node)
|
55
|
+
if has_full_count?
|
56
|
+
return phrases unless @exactly
|
57
|
+
end
|
58
|
+
else
|
59
|
+
return
|
60
|
+
end
|
61
|
+
end
|
62
|
+
phrases if has_full_count?
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
private
|
67
|
+
|
68
|
+
def consume(node)
|
69
|
+
case
|
70
|
+
when node.pronounciation_length > max_consumable_length
|
71
|
+
false
|
72
|
+
when first_of_phrase? && !node.first_of_phrase?
|
73
|
+
false
|
74
|
+
else
|
75
|
+
phrases[phrase_index] ||= []
|
76
|
+
phrases[phrase_index] << node
|
77
|
+
self.count += node.pronounciation_length
|
78
|
+
true
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
# @note Pronounciation count
|
83
|
+
def count
|
84
|
+
@count ||= 0
|
85
|
+
end
|
86
|
+
|
87
|
+
def first_of_phrase?
|
88
|
+
RULE.inject([]) do |array, length|
|
89
|
+
array << array.last.to_i + length
|
90
|
+
end.include?(count)
|
91
|
+
end
|
92
|
+
|
93
|
+
def has_full_count?
|
94
|
+
count == RULE.inject(0, :+)
|
95
|
+
end
|
96
|
+
|
97
|
+
def has_valid_first_node?
|
98
|
+
@nodes.first.first_of_ikku?
|
99
|
+
end
|
100
|
+
|
101
|
+
def has_valid_last_node?
|
102
|
+
@nodes.last.last_of_ikku?
|
103
|
+
end
|
104
|
+
|
105
|
+
def max_consumable_length
|
106
|
+
RULE[0..phrase_index].inject(0, :+) - count
|
107
|
+
end
|
108
|
+
|
109
|
+
def phrase_index
|
110
|
+
RULE.length.times.find do |index|
|
111
|
+
count < RULE[0..index].inject(0, :+)
|
112
|
+
end || RULE.length - 1
|
113
|
+
end
|
114
|
+
|
115
|
+
def phrases
|
116
|
+
@phrases ||= []
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
class Parser
|
121
|
+
def parse(text)
|
122
|
+
mecab.enum_parse(text).map do |mecab_node|
|
123
|
+
Node.new(mecab_node)
|
124
|
+
end.select(&:analyzable?)
|
125
|
+
end
|
126
|
+
|
127
|
+
private
|
128
|
+
|
129
|
+
def mecab
|
130
|
+
@mecab ||= Natto::MeCab.new
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
class Node
|
135
|
+
STAT_ID_FOR_NORMAL = 0
|
136
|
+
STAT_ID_FOR_UNKNOWN = 1
|
137
|
+
STAT_ID_FOR_BOS = 2
|
138
|
+
STAT_ID_FOR_EOS = 3
|
139
|
+
|
140
|
+
# @param node [Natto::MeCabNode]
|
141
|
+
def initialize(node)
|
142
|
+
@node = node
|
143
|
+
end
|
144
|
+
|
145
|
+
def analyzable?
|
146
|
+
!bos? && !eos?
|
147
|
+
end
|
148
|
+
|
149
|
+
def auxiliary_verb?
|
150
|
+
type == "助動詞"
|
151
|
+
end
|
152
|
+
|
153
|
+
def bos?
|
154
|
+
stat == STAT_ID_FOR_BOS
|
155
|
+
end
|
156
|
+
|
157
|
+
def dependent?
|
158
|
+
subtype1 == "非自立"
|
159
|
+
end
|
160
|
+
|
161
|
+
def element_of_ikku?
|
162
|
+
normal?
|
163
|
+
end
|
164
|
+
|
165
|
+
def last_of_ikku?
|
166
|
+
case
|
167
|
+
when type == "連体詞"
|
168
|
+
false
|
169
|
+
when ["名詞接続", "格助詞", "係助詞", "連体化", "接続助詞", "並立助詞", "副詞化", "数接続"].include?(type)
|
170
|
+
false
|
171
|
+
when auxiliary_verb? && root_form == "だ"
|
172
|
+
false
|
173
|
+
else
|
174
|
+
true
|
175
|
+
end
|
176
|
+
end
|
177
|
+
|
178
|
+
def eos?
|
179
|
+
stat == STAT_ID_FOR_EOS
|
180
|
+
end
|
181
|
+
|
182
|
+
def feature
|
183
|
+
@feature ||= CSV.parse(@node.feature)[0]
|
184
|
+
end
|
185
|
+
|
186
|
+
def filler?
|
187
|
+
type == "フィラー"
|
188
|
+
end
|
189
|
+
|
190
|
+
def first_of_ikku?
|
191
|
+
case
|
192
|
+
when !first_of_phrase?
|
193
|
+
false
|
194
|
+
# when filler?
|
195
|
+
# false
|
196
|
+
when ["、", "・", " ", " "].include?(surface)
|
197
|
+
false
|
198
|
+
else
|
199
|
+
true
|
200
|
+
end
|
201
|
+
end
|
202
|
+
|
203
|
+
def first_of_phrase?
|
204
|
+
case
|
205
|
+
when particle?
|
206
|
+
false
|
207
|
+
when auxiliary_verb?
|
208
|
+
false
|
209
|
+
when independent?
|
210
|
+
false
|
211
|
+
when postfix?
|
212
|
+
false
|
213
|
+
when dependent? && ["する", "できる"].include?(root_form)
|
214
|
+
false
|
215
|
+
else
|
216
|
+
true
|
217
|
+
end
|
218
|
+
end
|
219
|
+
|
220
|
+
def independent?
|
221
|
+
subtype1 == "自立"
|
222
|
+
end
|
223
|
+
|
224
|
+
def inspect
|
225
|
+
to_s.inspect
|
226
|
+
end
|
227
|
+
|
228
|
+
def normal?
|
229
|
+
stat == STAT_ID_FOR_NORMAL
|
230
|
+
end
|
231
|
+
|
232
|
+
def particle?
|
233
|
+
type == "助詞"
|
234
|
+
end
|
235
|
+
|
236
|
+
def postfix?
|
237
|
+
subtype1 == "接尾"
|
238
|
+
end
|
239
|
+
|
240
|
+
def pronounciation
|
241
|
+
feature[8]
|
242
|
+
end
|
243
|
+
|
244
|
+
def pronounciation_length
|
245
|
+
@pronounciation_length ||= begin
|
246
|
+
if pronounciation
|
247
|
+
pronounciation_mora.length
|
248
|
+
else
|
249
|
+
0
|
250
|
+
end
|
251
|
+
end
|
252
|
+
end
|
253
|
+
|
254
|
+
def pronounciation_mora
|
255
|
+
if pronounciation
|
256
|
+
pronounciation.tr("ぁ-ゔ","ァ-ヴ").gsub(/[^アイウエオカ-モヤユヨラ-ロワヲンヴー]/, "")
|
257
|
+
end
|
258
|
+
end
|
259
|
+
|
260
|
+
def root_form
|
261
|
+
feature[6]
|
262
|
+
end
|
263
|
+
|
264
|
+
def stat
|
265
|
+
@node.stat
|
266
|
+
end
|
267
|
+
|
268
|
+
def subtype1
|
269
|
+
feature[1]
|
270
|
+
end
|
271
|
+
|
272
|
+
def subtype2
|
273
|
+
feature[2]
|
274
|
+
end
|
275
|
+
|
276
|
+
def subtype3
|
277
|
+
feature[3]
|
278
|
+
end
|
279
|
+
|
280
|
+
def surface
|
281
|
+
@node.surface
|
282
|
+
end
|
283
|
+
|
284
|
+
def symbol?
|
285
|
+
type == "記号"
|
286
|
+
end
|
287
|
+
|
288
|
+
def to_s
|
289
|
+
surface
|
290
|
+
end
|
291
|
+
|
292
|
+
def type
|
293
|
+
feature[0]
|
294
|
+
end
|
295
|
+
end
|
296
|
+
end
|
data/lib/ikku/version.rb
ADDED
metadata
ADDED
@@ -0,0 +1,109 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: ikku
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Ryo Nakamura
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2015-02-13 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: natto
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: bundler
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '1.7'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '1.7'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: pry
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: rake
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '10.0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '10.0'
|
69
|
+
description:
|
70
|
+
email:
|
71
|
+
- r7kamura@gmail.com
|
72
|
+
executables: []
|
73
|
+
extensions: []
|
74
|
+
extra_rdoc_files: []
|
75
|
+
files:
|
76
|
+
- ".gitignore"
|
77
|
+
- Gemfile
|
78
|
+
- LICENSE.txt
|
79
|
+
- README.md
|
80
|
+
- Rakefile
|
81
|
+
- ikku.gemspec
|
82
|
+
- lib/ikku.rb
|
83
|
+
- lib/ikku/version.rb
|
84
|
+
homepage: https://github.com/r7kamura/ikku
|
85
|
+
licenses:
|
86
|
+
- MIT
|
87
|
+
metadata: {}
|
88
|
+
post_install_message:
|
89
|
+
rdoc_options: []
|
90
|
+
require_paths:
|
91
|
+
- lib
|
92
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
97
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
98
|
+
requirements:
|
99
|
+
- - ">="
|
100
|
+
- !ruby/object:Gem::Version
|
101
|
+
version: '0'
|
102
|
+
requirements: []
|
103
|
+
rubyforge_project:
|
104
|
+
rubygems_version: 2.4.5
|
105
|
+
signing_key:
|
106
|
+
specification_version: 4
|
107
|
+
summary: Discover haiku from text.
|
108
|
+
test_files: []
|
109
|
+
has_rdoc:
|