ikku 0.0.4 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: a70c684dd795a3e2b7f41e2e48045ad85ce25771
4
- data.tar.gz: 18cf28254326d74bfc6b7594530bbde28b224c53
3
+ metadata.gz: 4186a5706c4014ef2dc6ac0738e93ef39e548f3d
4
+ data.tar.gz: 91da30ecd5430d0a3cef04c40fb8df78a6d6c08a
5
5
  SHA512:
6
- metadata.gz: 70e986f93c25c2b17ee3c9bfea3804491641d408bc8aeb2502a5614f4c1a43d2d994628f85df3574c124c3cd8996701794044f370206da4639d5069a600c79b3
7
- data.tar.gz: eb527ec36dcc1570296e3026d34492c26571f4501e757f38a46181bff2165e35decbf5151ea8e4d01242504eef925bdc45f68c69814b53096e2f08e1ff319e87
6
+ metadata.gz: d8926c58122478c30e3226b888a51494fc9a552d8fdabe49f7f310a9bedeefc16dc275d76e4e1212b752f39f14f056866abcc027be7d11b2aabb474563d67dd0
7
+ data.tar.gz: 46213e3d2fae5cd31467d01236bb9efbe1bb513d4be587f249f0ae8f41d1ea2cd45a005138c72cee3d152df834015a540bd472beb370dbed0d6788bb49f38094
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --require spec_helper
data/CHANGELOG.md CHANGED
@@ -1,3 +1,6 @@
1
+ ## 0.0.5
2
+ - Improve pattern about 自立・非自立・助動詞
3
+
1
4
  ## 0.0.4
2
5
  - Fix bug of Ikku::Scanner#consume
3
6
 
data/ikku.gemspec CHANGED
@@ -20,4 +20,5 @@ Gem::Specification.new do |spec|
20
20
  spec.add_development_dependency "bundler", "~> 1.7"
21
21
  spec.add_development_dependency "pry"
22
22
  spec.add_development_dependency "rake", "~> 10.0"
23
+ spec.add_development_dependency "rspec", "3.2.0"
23
24
  end
data/lib/ikku.rb CHANGED
@@ -1,305 +1,2 @@
1
- require "csv"
2
- require "natto"
1
+ require "ikku/reviewer"
3
2
  require "ikku/version"
4
-
5
- module Ikku
6
- class Reviewer
7
- def initialize(rule: nil)
8
- @rule = rule
9
- end
10
-
11
- # Find one available haiku from given text.
12
- # @return [Array<Array>]
13
- def find(text)
14
- nodes = parser.parse(text)
15
- nodes.length.times.find do |index|
16
- if (phrases = Scanner.new(nodes[index..-1], rule: @rule).scan)
17
- break phrases
18
- end
19
- end
20
- end
21
-
22
- # Judge if given text is haiku or not.
23
- # @return [true, false]
24
- def judge(text)
25
- !Scanner.new(parser.parse(text), exactly: true, rule: @rule).scan.nil?
26
- end
27
-
28
- # Search all available haikus from given text.
29
- # @return [Array<Array>]
30
- def search(text)
31
- nodes = parser.parse(text)
32
- nodes.length.times.map do |index|
33
- Scanner.new(nodes[index..-1], rule: @rule).scan
34
- end.compact
35
- end
36
-
37
- private
38
-
39
- def parser
40
- @parser ||= Parser.new
41
- end
42
- end
43
-
44
- # Find one haiku that starts from the 1st node of given nodes.
45
- class Scanner
46
- DEFAULT_RULE = [5, 7, 5]
47
-
48
- attr_writer :count
49
-
50
- def initialize(nodes, exactly: false, rule: nil)
51
- @exactly = exactly
52
- @nodes = nodes
53
- @rule = rule
54
- end
55
-
56
- # @note Pronounciation count
57
- def count
58
- @count ||= 0
59
- end
60
-
61
- def scan
62
- if has_valid_first_node? && has_valid_last_node?
63
- @nodes.each_with_index do |node, index|
64
- if consume(node)
65
- if has_full_count?
66
- return phrases unless @exactly
67
- end
68
- else
69
- return
70
- end
71
- end
72
- phrases if has_full_count?
73
- end
74
- end
75
-
76
- private
77
-
78
- def consume(node)
79
- case
80
- when node.pronounciation_length > max_consumable_length
81
- false
82
- when first_of_phrase? && !node.first_of_phrase?
83
- false
84
- else
85
- phrases[phrase_index] ||= []
86
- phrases[phrase_index] << node
87
- self.count += node.pronounciation_length
88
- true
89
- end
90
- end
91
-
92
- def first_of_phrase?
93
- rule.inject([]) do |array, length|
94
- array << array.last.to_i + length
95
- end.include?(count)
96
- end
97
-
98
- def has_full_count?
99
- count == rule.inject(0, :+)
100
- end
101
-
102
- def has_valid_first_node?
103
- @nodes.first.first_of_ikku?
104
- end
105
-
106
- def has_valid_last_node?
107
- @nodes.last.last_of_ikku?
108
- end
109
-
110
- def max_consumable_length
111
- rule[0..phrase_index].inject(0, :+) - count
112
- end
113
-
114
- def phrase_index
115
- rule.length.times.find do |index|
116
- count < rule[0..index].inject(0, :+)
117
- end || rule.length - 1
118
- end
119
-
120
- def phrases
121
- @phrases ||= []
122
- end
123
-
124
- def rule
125
- @rule || DEFAULT_RULE
126
- end
127
- end
128
-
129
- class Parser
130
- def parse(text)
131
- mecab.enum_parse(text).map do |mecab_node|
132
- Node.new(mecab_node)
133
- end.select(&:analyzable?)
134
- end
135
-
136
- private
137
-
138
- def mecab
139
- @mecab ||= Natto::MeCab.new
140
- end
141
- end
142
-
143
- class Node
144
- STAT_ID_FOR_NORMAL = 0
145
- STAT_ID_FOR_UNKNOWN = 1
146
- STAT_ID_FOR_BOS = 2
147
- STAT_ID_FOR_EOS = 3
148
-
149
- # @param node [Natto::MeCabNode]
150
- def initialize(node)
151
- @node = node
152
- end
153
-
154
- def analyzable?
155
- !bos? && !eos?
156
- end
157
-
158
- def auxiliary_verb?
159
- type == "助動詞"
160
- end
161
-
162
- def bos?
163
- stat == STAT_ID_FOR_BOS
164
- end
165
-
166
- def dependent?
167
- subtype1 == "非自立"
168
- end
169
-
170
- def element_of_ikku?
171
- normal?
172
- end
173
-
174
- def last_of_ikku?
175
- case
176
- when type == "連体詞"
177
- false
178
- when ["名詞接続", "格助詞", "係助詞", "連体化", "接続助詞", "並立助詞", "副詞化", "数接続"].include?(type)
179
- false
180
- when auxiliary_verb? && root_form == "だ"
181
- false
182
- else
183
- true
184
- end
185
- end
186
-
187
- def eos?
188
- stat == STAT_ID_FOR_EOS
189
- end
190
-
191
- def feature
192
- @feature ||= CSV.parse(@node.feature)[0]
193
- end
194
-
195
- def filler?
196
- type == "フィラー"
197
- end
198
-
199
- def first_of_ikku?
200
- case
201
- when !first_of_phrase?
202
- false
203
- # when filler?
204
- # false
205
- when ["、", "・", " ", " "].include?(surface)
206
- false
207
- else
208
- true
209
- end
210
- end
211
-
212
- def first_of_phrase?
213
- case
214
- when particle?
215
- false
216
- when auxiliary_verb?
217
- false
218
- when independent?
219
- false
220
- when postfix?
221
- false
222
- when dependent? && ["する", "できる"].include?(root_form)
223
- false
224
- else
225
- true
226
- end
227
- end
228
-
229
- def independent?
230
- subtype1 == "自立"
231
- end
232
-
233
- def inspect
234
- to_s.inspect
235
- end
236
-
237
- def normal?
238
- stat == STAT_ID_FOR_NORMAL
239
- end
240
-
241
- def particle?
242
- type == "助詞"
243
- end
244
-
245
- def postfix?
246
- subtype1 == "接尾"
247
- end
248
-
249
- def pronounciation
250
- feature[8]
251
- end
252
-
253
- def pronounciation_length
254
- @pronounciation_length ||= begin
255
- if pronounciation
256
- pronounciation_mora.length
257
- else
258
- 0
259
- end
260
- end
261
- end
262
-
263
- def pronounciation_mora
264
- if pronounciation
265
- pronounciation.tr("ぁ-ゔ","ァ-ヴ").gsub(/[^アイウエオカ-モヤユヨラ-ロワヲンヴー]/, "")
266
- end
267
- end
268
-
269
- def root_form
270
- feature[6]
271
- end
272
-
273
- def stat
274
- @node.stat
275
- end
276
-
277
- def subtype1
278
- feature[1]
279
- end
280
-
281
- def subtype2
282
- feature[2]
283
- end
284
-
285
- def subtype3
286
- feature[3]
287
- end
288
-
289
- def surface
290
- @node.surface
291
- end
292
-
293
- def symbol?
294
- type == "記号"
295
- end
296
-
297
- def to_s
298
- surface
299
- end
300
-
301
- def type
302
- feature[0]
303
- end
304
- end
305
- end
data/lib/ikku/node.rb ADDED
@@ -0,0 +1,119 @@
1
+ require "csv"
2
+
3
+ module Ikku
4
+ class Node
5
+ STAT_ID_FOR_NORMAL = 0
6
+ STAT_ID_FOR_UNKNOWN = 1
7
+ STAT_ID_FOR_BOS = 2
8
+ STAT_ID_FOR_EOS = 3
9
+
10
+ # @param node [Natto::MeCabNode]
11
+ def initialize(node)
12
+ @node = node
13
+ end
14
+
15
+ def analyzable?
16
+ !bos? && !eos?
17
+ end
18
+
19
+ def bos?
20
+ stat == STAT_ID_FOR_BOS
21
+ end
22
+
23
+ def eos?
24
+ stat == STAT_ID_FOR_EOS
25
+ end
26
+
27
+ def feature
28
+ @feature ||= CSV.parse(@node.feature)[0]
29
+ end
30
+
31
+ def first_of_ikku?
32
+ case
33
+ when !first_of_phrase?
34
+ false
35
+ when ["、", "・", " ", " "].include?(surface)
36
+ false
37
+ else
38
+ true
39
+ end
40
+ end
41
+
42
+ def first_of_phrase?
43
+ case
44
+ when ["助詞", "助動詞"].include?(type)
45
+ false
46
+ when ["非自立", "接尾"].include?(subtype1)
47
+ false
48
+ when subtype1 == "自立" && ["する", "できる"].include?(root_form)
49
+ false
50
+ else
51
+ true
52
+ end
53
+ end
54
+
55
+ def inspect
56
+ to_s.inspect
57
+ end
58
+
59
+ def last_of_ikku?
60
+ !["名詞接続", "格助詞", "係助詞", "連体化", "接続助詞", "並立助詞", "副詞化", "数接続", "連体詞"].include?(type)
61
+ end
62
+
63
+ def normal?
64
+ stat == STAT_ID_FOR_NORMAL
65
+ end
66
+
67
+ def pronounciation
68
+ feature[8]
69
+ end
70
+
71
+ def pronounciation_length
72
+ @pronounciation_length ||= begin
73
+ if pronounciation
74
+ pronounciation_mora.length
75
+ else
76
+ 0
77
+ end
78
+ end
79
+ end
80
+
81
+ def pronounciation_mora
82
+ if pronounciation
83
+ pronounciation.tr("ぁ-ゔ","ァ-ヴ").gsub(/[^アイウエオカ-モヤユヨラ-ロワヲンヴー]/, "")
84
+ end
85
+ end
86
+
87
+ def root_form
88
+ feature[6]
89
+ end
90
+
91
+ def stat
92
+ @node.stat
93
+ end
94
+
95
+ def subtype1
96
+ feature[1]
97
+ end
98
+
99
+ def subtype2
100
+ feature[2]
101
+ end
102
+
103
+ def subtype3
104
+ feature[3]
105
+ end
106
+
107
+ def surface
108
+ @node.surface
109
+ end
110
+
111
+ def to_s
112
+ surface
113
+ end
114
+
115
+ def type
116
+ feature[0]
117
+ end
118
+ end
119
+ end
@@ -0,0 +1,18 @@
1
+ require "natto"
2
+ require "ikku/node"
3
+
4
+ module Ikku
5
+ class Parser
6
+ def parse(text)
7
+ mecab.enum_parse(text).map do |mecab_node|
8
+ Node.new(mecab_node)
9
+ end.select(&:analyzable?)
10
+ end
11
+
12
+ private
13
+
14
+ def mecab
15
+ @mecab ||= Natto::MeCab.new
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,42 @@
1
+ require "ikku/parser"
2
+ require "ikku/scanner"
3
+
4
+ module Ikku
5
+ class Reviewer
6
+ def initialize(rule: nil)
7
+ @rule = rule
8
+ end
9
+
10
+ # Find one available haiku from given text.
11
+ # @return [Array<Array>]
12
+ def find(text)
13
+ nodes = parser.parse(text)
14
+ nodes.length.times.find do |index|
15
+ if (phrases = Scanner.new(nodes[index..-1], rule: @rule).scan)
16
+ break phrases
17
+ end
18
+ end
19
+ end
20
+
21
+ # Judge if given text is haiku or not.
22
+ # @return [true, false]
23
+ def judge(text)
24
+ !Scanner.new(parser.parse(text), exactly: true, rule: @rule).scan.nil?
25
+ end
26
+
27
+ # Search all available haikus from given text.
28
+ # @return [Array<Array>]
29
+ def search(text)
30
+ nodes = parser.parse(text)
31
+ nodes.length.times.map do |index|
32
+ Scanner.new(nodes[index..-1], rule: @rule).scan
33
+ end.compact
34
+ end
35
+
36
+ private
37
+
38
+ def parser
39
+ @parser ||= Parser.new
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,86 @@
1
+ module Ikku
2
+ # Find one haiku that starts from the 1st node of given nodes.
3
+ class Scanner
4
+ DEFAULT_RULE = [5, 7, 5]
5
+
6
+ attr_writer :count
7
+
8
+ def initialize(nodes, exactly: false, rule: nil)
9
+ @exactly = exactly
10
+ @nodes = nodes
11
+ @rule = rule
12
+ end
13
+
14
+ # @note Pronounciation count
15
+ def count
16
+ @count ||= 0
17
+ end
18
+
19
+ def scan
20
+ if has_valid_first_node? && has_valid_last_node?
21
+ @nodes.each_with_index do |node, index|
22
+ if consume(node)
23
+ if has_full_count?
24
+ return phrases unless @exactly
25
+ end
26
+ else
27
+ return
28
+ end
29
+ end
30
+ phrases if has_full_count?
31
+ end
32
+ end
33
+
34
+ private
35
+
36
+ def consume(node)
37
+ case
38
+ when node.pronounciation_length > max_consumable_length
39
+ false
40
+ when first_of_phrase? && !node.first_of_phrase?
41
+ false
42
+ else
43
+ phrases[phrase_index] ||= []
44
+ phrases[phrase_index] << node
45
+ self.count += node.pronounciation_length
46
+ true
47
+ end
48
+ end
49
+
50
+ def first_of_phrase?
51
+ rule.inject([]) do |array, length|
52
+ array << array.last.to_i + length
53
+ end.include?(count)
54
+ end
55
+
56
+ def has_full_count?
57
+ count == rule.inject(0, :+)
58
+ end
59
+
60
+ def has_valid_first_node?
61
+ @nodes.first.first_of_ikku?
62
+ end
63
+
64
+ def has_valid_last_node?
65
+ @nodes.last.last_of_ikku?
66
+ end
67
+
68
+ def max_consumable_length
69
+ rule[0..phrase_index].inject(0, :+) - count
70
+ end
71
+
72
+ def phrase_index
73
+ rule.length.times.find do |index|
74
+ count < rule[0..index].inject(0, :+)
75
+ end || rule.length - 1
76
+ end
77
+
78
+ def phrases
79
+ @phrases ||= []
80
+ end
81
+
82
+ def rule
83
+ @rule || DEFAULT_RULE
84
+ end
85
+ end
86
+ end
data/lib/ikku/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Ikku
2
- VERSION = "0.0.4"
2
+ VERSION = "0.0.5"
3
3
  end
@@ -0,0 +1,113 @@
1
+ require "spec_helper"
2
+
3
+ RSpec.describe Ikku::Reviewer do
4
+ let(:instance) do
5
+ described_class.new(rule: rule)
6
+ end
7
+
8
+ let(:rule) do
9
+ nil
10
+ end
11
+
12
+ let(:text) do
13
+ "古池や蛙飛び込む水の音"
14
+ end
15
+
16
+ describe "#find" do
17
+ subject do
18
+ instance.find(text)
19
+ end
20
+
21
+ context "without ikku" do
22
+ let(:text) do
23
+ "test"
24
+ end
25
+
26
+ it { is_expected.to be_nil }
27
+ end
28
+
29
+ context "with valid ikku" do
30
+ it { is_expected.to be_a Array }
31
+ end
32
+
33
+ context "with text including ikku" do
34
+ let(:text) do
35
+ "ああ#{super()}ああ"
36
+ end
37
+
38
+ it { is_expected.to be_a Array }
39
+ end
40
+ end
41
+
42
+ describe "#judge" do
43
+ subject do
44
+ instance.judge(text)
45
+ end
46
+
47
+ context "with valid ikku" do
48
+ it { is_expected.to be true }
49
+ end
50
+
51
+ context "with invalid ikku" do
52
+ let(:text) do
53
+ "#{super()}ああ"
54
+ end
55
+
56
+ it { is_expected.to be false }
57
+ end
58
+
59
+ context "with rule option and valid ikku" do
60
+ let(:rule) do
61
+ [4, 3, 5]
62
+ end
63
+
64
+ let(:text) do
65
+ "すもももももももものうち"
66
+ end
67
+
68
+ it { is_expected.to be true }
69
+ end
70
+
71
+ context "with rule option and invalid ikku" do
72
+ let(:rule) do
73
+ [4, 3, 5]
74
+ end
75
+
76
+ it { is_expected.to be false }
77
+ end
78
+
79
+ context "with phrase starting with independent verb (歩く)" do
80
+ let(:text) do
81
+ "なぜ鳩は頭を振って歩くのか"
82
+ end
83
+
84
+ it { is_expected.to be true }
85
+ end
86
+ end
87
+
88
+ describe "#search" do
89
+ subject do
90
+ instance.search(text)
91
+ end
92
+
93
+ context "without ikku" do
94
+ let(:text) do
95
+ "test"
96
+ end
97
+
98
+ it { is_expected.to be_a Array }
99
+ end
100
+
101
+ context "with valid ikku" do
102
+ it { is_expected.to be_a Array }
103
+ end
104
+
105
+ context "with text including ikku" do
106
+ let(:text) do
107
+ "ああ#{super()}ああ"
108
+ end
109
+
110
+ it { is_expected.to be_a Array }
111
+ end
112
+ end
113
+ end
@@ -0,0 +1,18 @@
1
+ $LOAD_PATH.unshift File.expand_path("../../lib", __FILE__)
2
+ require "ikku"
3
+
4
+ RSpec.configure do |config|
5
+ config.expect_with :rspec do |expectations|
6
+ expectations.include_chain_clauses_in_custom_matcher_descriptions = true
7
+ end
8
+
9
+ config.mock_with :rspec do |mocks|
10
+ mocks.verify_partial_doubles = true
11
+ end
12
+
13
+ config.filter_run :focus
14
+ config.run_all_when_everything_filtered = true
15
+ config.disable_monkey_patching!
16
+ config.warnings = true
17
+ config.default_formatter = "doc" if config.files_to_run.one?
18
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ikku
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.0.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ryo Nakamura
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-02-14 00:00:00.000000000 Z
11
+ date: 2015-02-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: natto
@@ -66,6 +66,20 @@ dependencies:
66
66
  - - "~>"
67
67
  - !ruby/object:Gem::Version
68
68
  version: '10.0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: rspec
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - '='
74
+ - !ruby/object:Gem::Version
75
+ version: 3.2.0
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - '='
81
+ - !ruby/object:Gem::Version
82
+ version: 3.2.0
69
83
  description:
70
84
  email:
71
85
  - r7kamura@gmail.com
@@ -74,6 +88,7 @@ extensions: []
74
88
  extra_rdoc_files: []
75
89
  files:
76
90
  - ".gitignore"
91
+ - ".rspec"
77
92
  - CHANGELOG.md
78
93
  - Gemfile
79
94
  - LICENSE.txt
@@ -81,7 +96,13 @@ files:
81
96
  - Rakefile
82
97
  - ikku.gemspec
83
98
  - lib/ikku.rb
99
+ - lib/ikku/node.rb
100
+ - lib/ikku/parser.rb
101
+ - lib/ikku/reviewer.rb
102
+ - lib/ikku/scanner.rb
84
103
  - lib/ikku/version.rb
104
+ - spec/ikku/reviewer_spec.rb
105
+ - spec/spec_helper.rb
85
106
  homepage: https://github.com/r7kamura/ikku
86
107
  licenses:
87
108
  - MIT
@@ -106,5 +127,7 @@ rubygems_version: 2.4.5
106
127
  signing_key:
107
128
  specification_version: 4
108
129
  summary: Discover haiku from text.
109
- test_files: []
130
+ test_files:
131
+ - spec/ikku/reviewer_spec.rb
132
+ - spec/spec_helper.rb
110
133
  has_rdoc: