ikku 0.0.4 → 0.0.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: a70c684dd795a3e2b7f41e2e48045ad85ce25771
4
- data.tar.gz: 18cf28254326d74bfc6b7594530bbde28b224c53
3
+ metadata.gz: 4186a5706c4014ef2dc6ac0738e93ef39e548f3d
4
+ data.tar.gz: 91da30ecd5430d0a3cef04c40fb8df78a6d6c08a
5
5
  SHA512:
6
- metadata.gz: 70e986f93c25c2b17ee3c9bfea3804491641d408bc8aeb2502a5614f4c1a43d2d994628f85df3574c124c3cd8996701794044f370206da4639d5069a600c79b3
7
- data.tar.gz: eb527ec36dcc1570296e3026d34492c26571f4501e757f38a46181bff2165e35decbf5151ea8e4d01242504eef925bdc45f68c69814b53096e2f08e1ff319e87
6
+ metadata.gz: d8926c58122478c30e3226b888a51494fc9a552d8fdabe49f7f310a9bedeefc16dc275d76e4e1212b752f39f14f056866abcc027be7d11b2aabb474563d67dd0
7
+ data.tar.gz: 46213e3d2fae5cd31467d01236bb9efbe1bb513d4be587f249f0ae8f41d1ea2cd45a005138c72cee3d152df834015a540bd472beb370dbed0d6788bb49f38094
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --require spec_helper
data/CHANGELOG.md CHANGED
@@ -1,3 +1,6 @@
1
+ ## 0.0.5
2
+ - Improve pattern about 自立・非自立・助動詞
3
+
1
4
  ## 0.0.4
2
5
  - Fix bug of Ikku::Scanner#consume
3
6
 
data/ikku.gemspec CHANGED
@@ -20,4 +20,5 @@ Gem::Specification.new do |spec|
20
20
  spec.add_development_dependency "bundler", "~> 1.7"
21
21
  spec.add_development_dependency "pry"
22
22
  spec.add_development_dependency "rake", "~> 10.0"
23
+ spec.add_development_dependency "rspec", "3.2.0"
23
24
  end
data/lib/ikku.rb CHANGED
@@ -1,305 +1,2 @@
1
- require "csv"
2
- require "natto"
1
+ require "ikku/reviewer"
3
2
  require "ikku/version"
4
-
5
- module Ikku
6
- class Reviewer
7
- def initialize(rule: nil)
8
- @rule = rule
9
- end
10
-
11
- # Find one available haiku from given text.
12
- # @return [Array<Array>]
13
- def find(text)
14
- nodes = parser.parse(text)
15
- nodes.length.times.find do |index|
16
- if (phrases = Scanner.new(nodes[index..-1], rule: @rule).scan)
17
- break phrases
18
- end
19
- end
20
- end
21
-
22
- # Judge if given text is haiku or not.
23
- # @return [true, false]
24
- def judge(text)
25
- !Scanner.new(parser.parse(text), exactly: true, rule: @rule).scan.nil?
26
- end
27
-
28
- # Search all available haikus from given text.
29
- # @return [Array<Array>]
30
- def search(text)
31
- nodes = parser.parse(text)
32
- nodes.length.times.map do |index|
33
- Scanner.new(nodes[index..-1], rule: @rule).scan
34
- end.compact
35
- end
36
-
37
- private
38
-
39
- def parser
40
- @parser ||= Parser.new
41
- end
42
- end
43
-
44
- # Find one haiku that starts from the 1st node of given nodes.
45
- class Scanner
46
- DEFAULT_RULE = [5, 7, 5]
47
-
48
- attr_writer :count
49
-
50
- def initialize(nodes, exactly: false, rule: nil)
51
- @exactly = exactly
52
- @nodes = nodes
53
- @rule = rule
54
- end
55
-
56
- # @note Pronounciation count
57
- def count
58
- @count ||= 0
59
- end
60
-
61
- def scan
62
- if has_valid_first_node? && has_valid_last_node?
63
- @nodes.each_with_index do |node, index|
64
- if consume(node)
65
- if has_full_count?
66
- return phrases unless @exactly
67
- end
68
- else
69
- return
70
- end
71
- end
72
- phrases if has_full_count?
73
- end
74
- end
75
-
76
- private
77
-
78
- def consume(node)
79
- case
80
- when node.pronounciation_length > max_consumable_length
81
- false
82
- when first_of_phrase? && !node.first_of_phrase?
83
- false
84
- else
85
- phrases[phrase_index] ||= []
86
- phrases[phrase_index] << node
87
- self.count += node.pronounciation_length
88
- true
89
- end
90
- end
91
-
92
- def first_of_phrase?
93
- rule.inject([]) do |array, length|
94
- array << array.last.to_i + length
95
- end.include?(count)
96
- end
97
-
98
- def has_full_count?
99
- count == rule.inject(0, :+)
100
- end
101
-
102
- def has_valid_first_node?
103
- @nodes.first.first_of_ikku?
104
- end
105
-
106
- def has_valid_last_node?
107
- @nodes.last.last_of_ikku?
108
- end
109
-
110
- def max_consumable_length
111
- rule[0..phrase_index].inject(0, :+) - count
112
- end
113
-
114
- def phrase_index
115
- rule.length.times.find do |index|
116
- count < rule[0..index].inject(0, :+)
117
- end || rule.length - 1
118
- end
119
-
120
- def phrases
121
- @phrases ||= []
122
- end
123
-
124
- def rule
125
- @rule || DEFAULT_RULE
126
- end
127
- end
128
-
129
- class Parser
130
- def parse(text)
131
- mecab.enum_parse(text).map do |mecab_node|
132
- Node.new(mecab_node)
133
- end.select(&:analyzable?)
134
- end
135
-
136
- private
137
-
138
- def mecab
139
- @mecab ||= Natto::MeCab.new
140
- end
141
- end
142
-
143
- class Node
144
- STAT_ID_FOR_NORMAL = 0
145
- STAT_ID_FOR_UNKNOWN = 1
146
- STAT_ID_FOR_BOS = 2
147
- STAT_ID_FOR_EOS = 3
148
-
149
- # @param node [Natto::MeCabNode]
150
- def initialize(node)
151
- @node = node
152
- end
153
-
154
- def analyzable?
155
- !bos? && !eos?
156
- end
157
-
158
- def auxiliary_verb?
159
- type == "助動詞"
160
- end
161
-
162
- def bos?
163
- stat == STAT_ID_FOR_BOS
164
- end
165
-
166
- def dependent?
167
- subtype1 == "非自立"
168
- end
169
-
170
- def element_of_ikku?
171
- normal?
172
- end
173
-
174
- def last_of_ikku?
175
- case
176
- when type == "連体詞"
177
- false
178
- when ["名詞接続", "格助詞", "係助詞", "連体化", "接続助詞", "並立助詞", "副詞化", "数接続"].include?(type)
179
- false
180
- when auxiliary_verb? && root_form == "だ"
181
- false
182
- else
183
- true
184
- end
185
- end
186
-
187
- def eos?
188
- stat == STAT_ID_FOR_EOS
189
- end
190
-
191
- def feature
192
- @feature ||= CSV.parse(@node.feature)[0]
193
- end
194
-
195
- def filler?
196
- type == "フィラー"
197
- end
198
-
199
- def first_of_ikku?
200
- case
201
- when !first_of_phrase?
202
- false
203
- # when filler?
204
- # false
205
- when ["、", "・", " ", " "].include?(surface)
206
- false
207
- else
208
- true
209
- end
210
- end
211
-
212
- def first_of_phrase?
213
- case
214
- when particle?
215
- false
216
- when auxiliary_verb?
217
- false
218
- when independent?
219
- false
220
- when postfix?
221
- false
222
- when dependent? && ["する", "できる"].include?(root_form)
223
- false
224
- else
225
- true
226
- end
227
- end
228
-
229
- def independent?
230
- subtype1 == "自立"
231
- end
232
-
233
- def inspect
234
- to_s.inspect
235
- end
236
-
237
- def normal?
238
- stat == STAT_ID_FOR_NORMAL
239
- end
240
-
241
- def particle?
242
- type == "助詞"
243
- end
244
-
245
- def postfix?
246
- subtype1 == "接尾"
247
- end
248
-
249
- def pronounciation
250
- feature[8]
251
- end
252
-
253
- def pronounciation_length
254
- @pronounciation_length ||= begin
255
- if pronounciation
256
- pronounciation_mora.length
257
- else
258
- 0
259
- end
260
- end
261
- end
262
-
263
- def pronounciation_mora
264
- if pronounciation
265
- pronounciation.tr("ぁ-ゔ","ァ-ヴ").gsub(/[^アイウエオカ-モヤユヨラ-ロワヲンヴー]/, "")
266
- end
267
- end
268
-
269
- def root_form
270
- feature[6]
271
- end
272
-
273
- def stat
274
- @node.stat
275
- end
276
-
277
- def subtype1
278
- feature[1]
279
- end
280
-
281
- def subtype2
282
- feature[2]
283
- end
284
-
285
- def subtype3
286
- feature[3]
287
- end
288
-
289
- def surface
290
- @node.surface
291
- end
292
-
293
- def symbol?
294
- type == "記号"
295
- end
296
-
297
- def to_s
298
- surface
299
- end
300
-
301
- def type
302
- feature[0]
303
- end
304
- end
305
- end
data/lib/ikku/node.rb ADDED
@@ -0,0 +1,119 @@
1
+ require "csv"
2
+
3
+ module Ikku
4
+ class Node
5
+ STAT_ID_FOR_NORMAL = 0
6
+ STAT_ID_FOR_UNKNOWN = 1
7
+ STAT_ID_FOR_BOS = 2
8
+ STAT_ID_FOR_EOS = 3
9
+
10
+ # @param node [Natto::MeCabNode]
11
+ def initialize(node)
12
+ @node = node
13
+ end
14
+
15
+ def analyzable?
16
+ !bos? && !eos?
17
+ end
18
+
19
+ def bos?
20
+ stat == STAT_ID_FOR_BOS
21
+ end
22
+
23
+ def eos?
24
+ stat == STAT_ID_FOR_EOS
25
+ end
26
+
27
+ def feature
28
+ @feature ||= CSV.parse(@node.feature)[0]
29
+ end
30
+
31
+ def first_of_ikku?
32
+ case
33
+ when !first_of_phrase?
34
+ false
35
+ when ["、", "・", " ", " "].include?(surface)
36
+ false
37
+ else
38
+ true
39
+ end
40
+ end
41
+
42
+ def first_of_phrase?
43
+ case
44
+ when ["助詞", "助動詞"].include?(type)
45
+ false
46
+ when ["非自立", "接尾"].include?(subtype1)
47
+ false
48
+ when subtype1 == "自立" && ["する", "できる"].include?(root_form)
49
+ false
50
+ else
51
+ true
52
+ end
53
+ end
54
+
55
+ def inspect
56
+ to_s.inspect
57
+ end
58
+
59
+ def last_of_ikku?
60
+ !["名詞接続", "格助詞", "係助詞", "連体化", "接続助詞", "並立助詞", "副詞化", "数接続", "連体詞"].include?(type)
61
+ end
62
+
63
+ def normal?
64
+ stat == STAT_ID_FOR_NORMAL
65
+ end
66
+
67
+ def pronounciation
68
+ feature[8]
69
+ end
70
+
71
+ def pronounciation_length
72
+ @pronounciation_length ||= begin
73
+ if pronounciation
74
+ pronounciation_mora.length
75
+ else
76
+ 0
77
+ end
78
+ end
79
+ end
80
+
81
+ def pronounciation_mora
82
+ if pronounciation
83
+ pronounciation.tr("ぁ-ゔ","ァ-ヴ").gsub(/[^アイウエオカ-モヤユヨラ-ロワヲンヴー]/, "")
84
+ end
85
+ end
86
+
87
+ def root_form
88
+ feature[6]
89
+ end
90
+
91
+ def stat
92
+ @node.stat
93
+ end
94
+
95
+ def subtype1
96
+ feature[1]
97
+ end
98
+
99
+ def subtype2
100
+ feature[2]
101
+ end
102
+
103
+ def subtype3
104
+ feature[3]
105
+ end
106
+
107
+ def surface
108
+ @node.surface
109
+ end
110
+
111
+ def to_s
112
+ surface
113
+ end
114
+
115
+ def type
116
+ feature[0]
117
+ end
118
+ end
119
+ end
@@ -0,0 +1,18 @@
1
+ require "natto"
2
+ require "ikku/node"
3
+
4
+ module Ikku
5
+ class Parser
6
+ def parse(text)
7
+ mecab.enum_parse(text).map do |mecab_node|
8
+ Node.new(mecab_node)
9
+ end.select(&:analyzable?)
10
+ end
11
+
12
+ private
13
+
14
+ def mecab
15
+ @mecab ||= Natto::MeCab.new
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,42 @@
1
+ require "ikku/parser"
2
+ require "ikku/scanner"
3
+
4
+ module Ikku
5
+ class Reviewer
6
+ def initialize(rule: nil)
7
+ @rule = rule
8
+ end
9
+
10
+ # Find one available haiku from given text.
11
+ # @return [Array<Array>]
12
+ def find(text)
13
+ nodes = parser.parse(text)
14
+ nodes.length.times.find do |index|
15
+ if (phrases = Scanner.new(nodes[index..-1], rule: @rule).scan)
16
+ break phrases
17
+ end
18
+ end
19
+ end
20
+
21
+ # Judge if given text is haiku or not.
22
+ # @return [true, false]
23
+ def judge(text)
24
+ !Scanner.new(parser.parse(text), exactly: true, rule: @rule).scan.nil?
25
+ end
26
+
27
+ # Search all available haikus from given text.
28
+ # @return [Array<Array>]
29
+ def search(text)
30
+ nodes = parser.parse(text)
31
+ nodes.length.times.map do |index|
32
+ Scanner.new(nodes[index..-1], rule: @rule).scan
33
+ end.compact
34
+ end
35
+
36
+ private
37
+
38
+ def parser
39
+ @parser ||= Parser.new
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,86 @@
1
+ module Ikku
2
+ # Find one haiku that starts from the 1st node of given nodes.
3
+ class Scanner
4
+ DEFAULT_RULE = [5, 7, 5]
5
+
6
+ attr_writer :count
7
+
8
+ def initialize(nodes, exactly: false, rule: nil)
9
+ @exactly = exactly
10
+ @nodes = nodes
11
+ @rule = rule
12
+ end
13
+
14
+ # @note Pronounciation count
15
+ def count
16
+ @count ||= 0
17
+ end
18
+
19
+ def scan
20
+ if has_valid_first_node? && has_valid_last_node?
21
+ @nodes.each_with_index do |node, index|
22
+ if consume(node)
23
+ if has_full_count?
24
+ return phrases unless @exactly
25
+ end
26
+ else
27
+ return
28
+ end
29
+ end
30
+ phrases if has_full_count?
31
+ end
32
+ end
33
+
34
+ private
35
+
36
+ def consume(node)
37
+ case
38
+ when node.pronounciation_length > max_consumable_length
39
+ false
40
+ when first_of_phrase? && !node.first_of_phrase?
41
+ false
42
+ else
43
+ phrases[phrase_index] ||= []
44
+ phrases[phrase_index] << node
45
+ self.count += node.pronounciation_length
46
+ true
47
+ end
48
+ end
49
+
50
+ def first_of_phrase?
51
+ rule.inject([]) do |array, length|
52
+ array << array.last.to_i + length
53
+ end.include?(count)
54
+ end
55
+
56
+ def has_full_count?
57
+ count == rule.inject(0, :+)
58
+ end
59
+
60
+ def has_valid_first_node?
61
+ @nodes.first.first_of_ikku?
62
+ end
63
+
64
+ def has_valid_last_node?
65
+ @nodes.last.last_of_ikku?
66
+ end
67
+
68
+ def max_consumable_length
69
+ rule[0..phrase_index].inject(0, :+) - count
70
+ end
71
+
72
+ def phrase_index
73
+ rule.length.times.find do |index|
74
+ count < rule[0..index].inject(0, :+)
75
+ end || rule.length - 1
76
+ end
77
+
78
+ def phrases
79
+ @phrases ||= []
80
+ end
81
+
82
+ def rule
83
+ @rule || DEFAULT_RULE
84
+ end
85
+ end
86
+ end
data/lib/ikku/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Ikku
2
- VERSION = "0.0.4"
2
+ VERSION = "0.0.5"
3
3
  end
@@ -0,0 +1,113 @@
1
+ require "spec_helper"
2
+
3
+ RSpec.describe Ikku::Reviewer do
4
+ let(:instance) do
5
+ described_class.new(rule: rule)
6
+ end
7
+
8
+ let(:rule) do
9
+ nil
10
+ end
11
+
12
+ let(:text) do
13
+ "古池や蛙飛び込む水の音"
14
+ end
15
+
16
+ describe "#find" do
17
+ subject do
18
+ instance.find(text)
19
+ end
20
+
21
+ context "without ikku" do
22
+ let(:text) do
23
+ "test"
24
+ end
25
+
26
+ it { is_expected.to be_nil }
27
+ end
28
+
29
+ context "with valid ikku" do
30
+ it { is_expected.to be_a Array }
31
+ end
32
+
33
+ context "with text including ikku" do
34
+ let(:text) do
35
+ "ああ#{super()}ああ"
36
+ end
37
+
38
+ it { is_expected.to be_a Array }
39
+ end
40
+ end
41
+
42
+ describe "#judge" do
43
+ subject do
44
+ instance.judge(text)
45
+ end
46
+
47
+ context "with valid ikku" do
48
+ it { is_expected.to be true }
49
+ end
50
+
51
+ context "with invalid ikku" do
52
+ let(:text) do
53
+ "#{super()}ああ"
54
+ end
55
+
56
+ it { is_expected.to be false }
57
+ end
58
+
59
+ context "with rule option and valid ikku" do
60
+ let(:rule) do
61
+ [4, 3, 5]
62
+ end
63
+
64
+ let(:text) do
65
+ "すもももももももものうち"
66
+ end
67
+
68
+ it { is_expected.to be true }
69
+ end
70
+
71
+ context "with rule option and invalid ikku" do
72
+ let(:rule) do
73
+ [4, 3, 5]
74
+ end
75
+
76
+ it { is_expected.to be false }
77
+ end
78
+
79
+ context "with phrase starting with independent verb (歩く)" do
80
+ let(:text) do
81
+ "なぜ鳩は頭を振って歩くのか"
82
+ end
83
+
84
+ it { is_expected.to be true }
85
+ end
86
+ end
87
+
88
+ describe "#search" do
89
+ subject do
90
+ instance.search(text)
91
+ end
92
+
93
+ context "without ikku" do
94
+ let(:text) do
95
+ "test"
96
+ end
97
+
98
+ it { is_expected.to be_a Array }
99
+ end
100
+
101
+ context "with valid ikku" do
102
+ it { is_expected.to be_a Array }
103
+ end
104
+
105
+ context "with text including ikku" do
106
+ let(:text) do
107
+ "ああ#{super()}ああ"
108
+ end
109
+
110
+ it { is_expected.to be_a Array }
111
+ end
112
+ end
113
+ end
@@ -0,0 +1,18 @@
1
+ $LOAD_PATH.unshift File.expand_path("../../lib", __FILE__)
2
+ require "ikku"
3
+
4
+ RSpec.configure do |config|
5
+ config.expect_with :rspec do |expectations|
6
+ expectations.include_chain_clauses_in_custom_matcher_descriptions = true
7
+ end
8
+
9
+ config.mock_with :rspec do |mocks|
10
+ mocks.verify_partial_doubles = true
11
+ end
12
+
13
+ config.filter_run :focus
14
+ config.run_all_when_everything_filtered = true
15
+ config.disable_monkey_patching!
16
+ config.warnings = true
17
+ config.default_formatter = "doc" if config.files_to_run.one?
18
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ikku
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.0.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ryo Nakamura
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-02-14 00:00:00.000000000 Z
11
+ date: 2015-02-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: natto
@@ -66,6 +66,20 @@ dependencies:
66
66
  - - "~>"
67
67
  - !ruby/object:Gem::Version
68
68
  version: '10.0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: rspec
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - '='
74
+ - !ruby/object:Gem::Version
75
+ version: 3.2.0
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - '='
81
+ - !ruby/object:Gem::Version
82
+ version: 3.2.0
69
83
  description:
70
84
  email:
71
85
  - r7kamura@gmail.com
@@ -74,6 +88,7 @@ extensions: []
74
88
  extra_rdoc_files: []
75
89
  files:
76
90
  - ".gitignore"
91
+ - ".rspec"
77
92
  - CHANGELOG.md
78
93
  - Gemfile
79
94
  - LICENSE.txt
@@ -81,7 +96,13 @@ files:
81
96
  - Rakefile
82
97
  - ikku.gemspec
83
98
  - lib/ikku.rb
99
+ - lib/ikku/node.rb
100
+ - lib/ikku/parser.rb
101
+ - lib/ikku/reviewer.rb
102
+ - lib/ikku/scanner.rb
84
103
  - lib/ikku/version.rb
104
+ - spec/ikku/reviewer_spec.rb
105
+ - spec/spec_helper.rb
85
106
  homepage: https://github.com/r7kamura/ikku
86
107
  licenses:
87
108
  - MIT
@@ -106,5 +127,7 @@ rubygems_version: 2.4.5
106
127
  signing_key:
107
128
  specification_version: 4
108
129
  summary: Discover haiku from text.
109
- test_files: []
130
+ test_files:
131
+ - spec/ikku/reviewer_spec.rb
132
+ - spec/spec_helper.rb
110
133
  has_rdoc: