ikku 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 4916cf8fe3220837e015717ad3360e206115a738
4
+ data.tar.gz: ef486b20ffa6bd0f791702fa57b1ebe00a1f31e7
5
+ SHA512:
6
+ metadata.gz: 4ce3e1436b314357b8290c5cd8f147b209029ea77d9fcf4166e14e3f0f47abfb82f5592bc0478a01c122b669a18050822ba68ba8dffeb11a08737d594a3a063f
7
+ data.tar.gz: c27ce9f7d9d00005b2b16d9ccb48af8d4ee2ca18f48b30606aebd1346d37b1f514566a142e5d5a922248ff5ab0cbd869718d5ed87fe8d0ecea5e3a13c482ffc0
data/.gitignore ADDED
@@ -0,0 +1,14 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ *.bundle
11
+ *.so
12
+ *.o
13
+ *.a
14
+ mkmf.log
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in ikku.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2015 Ryo Nakamura
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,29 @@
1
+ # Ikku
2
+ Discover haiku from text.
3
+
4
+ ## Requirements
5
+ - Ruby 2.0.0+
6
+ - MeCab with IPADIC (e.g. `brew install mecab mecab-ipadic`)
7
+
8
+ ## Example
9
+ ```rb
10
+ # Ikku::Reviewer class is the main interface for this library.
11
+ require "ikku"
12
+ reviewer = Ikku::Reviewer.new
13
+
14
+ # Judge if given text is haiku or not.
15
+ reviewer.judge("古池や蛙飛び込む水の音") #=> true
16
+ reviewer.judge("ああ古池や蛙飛び込む水の音ああ") #=> false
17
+
18
+ # Find one available haiku from given text.
19
+ reviewer.find("ああ古池や蛙飛び込む水の音ああ")
20
+ #=> [["古池", "や"], ["蛙", "飛び込む"], ["水", "の", "音"]]
21
+
22
+ # Search searches all available haikus from given text.
23
+ reviewer.search("ああ古池や蛙飛び込む水の音ああ天秤や京江戸かけて千代の春ああ")
24
+ #=> [
25
+ # [["古池", "や"], ["蛙", "飛び込む"], ["水", "の", "音"]],
26
+ # [["天秤", "や"], ["京", "江戸", "かけ", "て"], ["千代", "の", "春"]]
27
+ # ]
28
+ #
29
+ ```
data/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ require "bundler/gem_tasks"
2
+
data/ikku.gemspec ADDED
@@ -0,0 +1,23 @@
1
+ lib = File.expand_path("../lib", __FILE__)
2
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
3
+ require "ikku/version"
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = "ikku"
7
+ spec.version = Ikku::VERSION
8
+ spec.authors = ["Ryo Nakamura"]
9
+ spec.email = ["r7kamura@gmail.com"]
10
+ spec.summary = "Discover haiku from text."
11
+ spec.homepage = "https://github.com/r7kamura/ikku"
12
+ spec.license = "MIT"
13
+
14
+ spec.files = `git ls-files -z`.split("\x0")
15
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
16
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
17
+ spec.require_paths = ["lib"]
18
+
19
+ spec.add_runtime_dependency "natto"
20
+ spec.add_development_dependency "bundler", "~> 1.7"
21
+ spec.add_development_dependency "pry"
22
+ spec.add_development_dependency "rake", "~> 10.0"
23
+ end
data/lib/ikku.rb ADDED
@@ -0,0 +1,296 @@
1
+ require "csv"
2
+ require "natto"
3
+ require "ikku/version"
4
+
5
+ module Ikku
6
+ class Reviewer
7
+ # Find one available haiku from given text.
8
+ # @return [Array<Array>]
9
+ def find(text)
10
+ nodes = parser.parse(text)
11
+ nodes.length.times.find do |index|
12
+ if (phrases = Scanner.new(nodes[index..-1]).scan)
13
+ break phrases
14
+ end
15
+ end
16
+ end
17
+
18
+ # Judge if given text is haiku or not.
19
+ # @return [true, false]
20
+ def judge(text)
21
+ !Scanner.new(parser.parse(text), exactly: true).scan.nil?
22
+ end
23
+
24
+ # Search all available haikus from given text.
25
+ # @return [Array<Array>]
26
+ def search(text)
27
+ nodes = parser.parse(text)
28
+ nodes.length.times.map do |index|
29
+ Scanner.new(nodes[index..-1]).scan
30
+ end.compact
31
+ end
32
+
33
+ private
34
+
35
+ def parser
36
+ @parser ||= Parser.new
37
+ end
38
+ end
39
+
40
+ # Find one haiku that starts from the 1st node of given nodes.
41
+ class Scanner
42
+ RULE = [5, 7, 5]
43
+
44
+ attr_writer :count
45
+
46
+ def initialize(nodes, exactly: false)
47
+ @exactly = exactly
48
+ @nodes = nodes
49
+ end
50
+
51
+ def scan
52
+ if has_valid_first_node? && has_valid_last_node?
53
+ @nodes.each_with_index do |node, index|
54
+ if consume(node)
55
+ if has_full_count?
56
+ return phrases unless @exactly
57
+ end
58
+ else
59
+ return
60
+ end
61
+ end
62
+ phrases if has_full_count?
63
+ end
64
+ end
65
+
66
+ private
67
+
68
+ def consume(node)
69
+ case
70
+ when node.pronounciation_length > max_consumable_length
71
+ false
72
+ when first_of_phrase? && !node.first_of_phrase?
73
+ false
74
+ else
75
+ phrases[phrase_index] ||= []
76
+ phrases[phrase_index] << node
77
+ self.count += node.pronounciation_length
78
+ true
79
+ end
80
+ end
81
+
82
+ # @note Pronounciation count
83
+ def count
84
+ @count ||= 0
85
+ end
86
+
87
+ def first_of_phrase?
88
+ RULE.inject([]) do |array, length|
89
+ array << array.last.to_i + length
90
+ end.include?(count)
91
+ end
92
+
93
+ def has_full_count?
94
+ count == RULE.inject(0, :+)
95
+ end
96
+
97
+ def has_valid_first_node?
98
+ @nodes.first.first_of_ikku?
99
+ end
100
+
101
+ def has_valid_last_node?
102
+ @nodes.last.last_of_ikku?
103
+ end
104
+
105
+ def max_consumable_length
106
+ RULE[0..phrase_index].inject(0, :+) - count
107
+ end
108
+
109
+ def phrase_index
110
+ RULE.length.times.find do |index|
111
+ count < RULE[0..index].inject(0, :+)
112
+ end || RULE.length - 1
113
+ end
114
+
115
+ def phrases
116
+ @phrases ||= []
117
+ end
118
+ end
119
+
120
+ class Parser
121
+ def parse(text)
122
+ mecab.enum_parse(text).map do |mecab_node|
123
+ Node.new(mecab_node)
124
+ end.select(&:analyzable?)
125
+ end
126
+
127
+ private
128
+
129
+ def mecab
130
+ @mecab ||= Natto::MeCab.new
131
+ end
132
+ end
133
+
134
+ class Node
135
+ STAT_ID_FOR_NORMAL = 0
136
+ STAT_ID_FOR_UNKNOWN = 1
137
+ STAT_ID_FOR_BOS = 2
138
+ STAT_ID_FOR_EOS = 3
139
+
140
+ # @param node [Natto::MeCabNode]
141
+ def initialize(node)
142
+ @node = node
143
+ end
144
+
145
+ def analyzable?
146
+ !bos? && !eos?
147
+ end
148
+
149
+ def auxiliary_verb?
150
+ type == "助動詞"
151
+ end
152
+
153
+ def bos?
154
+ stat == STAT_ID_FOR_BOS
155
+ end
156
+
157
+ def dependent?
158
+ subtype1 == "非自立"
159
+ end
160
+
161
+ def element_of_ikku?
162
+ normal?
163
+ end
164
+
165
+ def last_of_ikku?
166
+ case
167
+ when type == "連体詞"
168
+ false
169
+ when ["名詞接続", "格助詞", "係助詞", "連体化", "接続助詞", "並立助詞", "副詞化", "数接続"].include?(type)
170
+ false
171
+ when auxiliary_verb? && root_form == "だ"
172
+ false
173
+ else
174
+ true
175
+ end
176
+ end
177
+
178
+ def eos?
179
+ stat == STAT_ID_FOR_EOS
180
+ end
181
+
182
+ def feature
183
+ @feature ||= CSV.parse(@node.feature)[0]
184
+ end
185
+
186
+ def filler?
187
+ type == "フィラー"
188
+ end
189
+
190
+ def first_of_ikku?
191
+ case
192
+ when !first_of_phrase?
193
+ false
194
+ # when filler?
195
+ # false
196
+ when ["、", "・", " ", " "].include?(surface)
197
+ false
198
+ else
199
+ true
200
+ end
201
+ end
202
+
203
+ def first_of_phrase?
204
+ case
205
+ when particle?
206
+ false
207
+ when auxiliary_verb?
208
+ false
209
+ when independent?
210
+ false
211
+ when postfix?
212
+ false
213
+ when dependent? && ["する", "できる"].include?(root_form)
214
+ false
215
+ else
216
+ true
217
+ end
218
+ end
219
+
220
+ def independent?
221
+ subtype1 == "自立"
222
+ end
223
+
224
+ def inspect
225
+ to_s.inspect
226
+ end
227
+
228
+ def normal?
229
+ stat == STAT_ID_FOR_NORMAL
230
+ end
231
+
232
+ def particle?
233
+ type == "助詞"
234
+ end
235
+
236
+ def postfix?
237
+ subtype1 == "接尾"
238
+ end
239
+
240
+ def pronounciation
241
+ feature[8]
242
+ end
243
+
244
+ def pronounciation_length
245
+ @pronounciation_length ||= begin
246
+ if pronounciation
247
+ pronounciation_mora.length
248
+ else
249
+ 0
250
+ end
251
+ end
252
+ end
253
+
254
+ def pronounciation_mora
255
+ if pronounciation
256
+ pronounciation.tr("ぁ-ゔ","ァ-ヴ").gsub(/[^アイウエオカ-モヤユヨラ-ロワヲンヴー]/, "")
257
+ end
258
+ end
259
+
260
+ def root_form
261
+ feature[6]
262
+ end
263
+
264
+ def stat
265
+ @node.stat
266
+ end
267
+
268
+ def subtype1
269
+ feature[1]
270
+ end
271
+
272
+ def subtype2
273
+ feature[2]
274
+ end
275
+
276
+ def subtype3
277
+ feature[3]
278
+ end
279
+
280
+ def surface
281
+ @node.surface
282
+ end
283
+
284
+ def symbol?
285
+ type == "記号"
286
+ end
287
+
288
+ def to_s
289
+ surface
290
+ end
291
+
292
+ def type
293
+ feature[0]
294
+ end
295
+ end
296
+ end
@@ -0,0 +1,3 @@
1
+ module Ikku
2
+ VERSION = "0.0.1"
3
+ end
metadata ADDED
@@ -0,0 +1,109 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: ikku
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Ryo Nakamura
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-02-13 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: natto
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: bundler
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '1.7'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '1.7'
41
+ - !ruby/object:Gem::Dependency
42
+ name: pry
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rake
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '10.0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '10.0'
69
+ description:
70
+ email:
71
+ - r7kamura@gmail.com
72
+ executables: []
73
+ extensions: []
74
+ extra_rdoc_files: []
75
+ files:
76
+ - ".gitignore"
77
+ - Gemfile
78
+ - LICENSE.txt
79
+ - README.md
80
+ - Rakefile
81
+ - ikku.gemspec
82
+ - lib/ikku.rb
83
+ - lib/ikku/version.rb
84
+ homepage: https://github.com/r7kamura/ikku
85
+ licenses:
86
+ - MIT
87
+ metadata: {}
88
+ post_install_message:
89
+ rdoc_options: []
90
+ require_paths:
91
+ - lib
92
+ required_ruby_version: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ required_rubygems_version: !ruby/object:Gem::Requirement
98
+ requirements:
99
+ - - ">="
100
+ - !ruby/object:Gem::Version
101
+ version: '0'
102
+ requirements: []
103
+ rubyforge_project:
104
+ rubygems_version: 2.4.5
105
+ signing_key:
106
+ specification_version: 4
107
+ summary: Discover haiku from text.
108
+ test_files: []
109
+ has_rdoc: