kampyo 0.2.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 0d96499aeb214750eddaa4eecdede3a629b05dcd05755c9a3ca798be84f400e3
4
- data.tar.gz: '094e4f87fe8c86726c46deb081c4d2fd4f7e356c62d1dfbd15384c9bb3eca6f0'
3
+ metadata.gz: 9d4b1b3f48eabf8c4eed7a00c7eeb169f6c173abad1cc0f9565aeffa4b542cd8
4
+ data.tar.gz: c84b909159d69b99a28b1bf544c846207346427e967d75515e9d1147c85855b1
5
5
  SHA512:
6
- metadata.gz: cd8aab55671360c9acb592050383f717fe1a93c4efd3233943b2bd3b7ffb7585c90090561b11c0dde13010c44743ab2e89842b4f317d1428d37b58a1438a0a3a
7
- data.tar.gz: 3d85e4fed2888ab5c3322da3685bdedafbfc8fb8c79c6b9f17eb578c4fe1ec793b9c85a34b803b3f1d58e722be3c109a935bf1a5b4f7078bbb005e341ae285cb
6
+ metadata.gz: 9f40d14b2e64af5e050b9b94ed20b228cb810100a97f0bd95a44c21aad47195f9aecdb556779808d445812de0c12ed6f48613dbccc30c7e5a68fccc74ca21bb5
7
+ data.tar.gz: fb88beb751215b65773c77c36f35af1fccdf2a93bda94bd93d294db3beb52fa06b1d22bf951bbd5ec2bbbdb3fd429d9a1d72d5516168d33cdb4bc476bfa3963f
data/README.base.md CHANGED
@@ -20,8 +20,8 @@ Unique features already implemented are subject and predicate, and stylistic inf
20
20
  Basic usage with Cabocha.
21
21
 
22
22
  ```
23
- text = Kampyo::Text.new
24
- text.cabocha_parser("今日は雨です")
23
+ cabocha = Kampyo::Cabocha.new
24
+ cabocha.parser("今日は雨です")
25
25
  ```
26
26
 
27
27
  You will get the following result.
@@ -38,8 +38,8 @@ You will get the following result.
38
38
  Guess the subject, predicate and sentence system.
39
39
 
40
40
  ```
41
- text = Kampyo::Text.new
42
- text.analysis(text.cabocha_parser("今日は雨です"))
41
+ cabocha = Kampyo::Cabocha.new
42
+ cabocha.analysis("今日は雨です")
43
43
  ```
44
44
 
45
45
  You will get the following result.
data/README.ja.md CHANGED
@@ -15,8 +15,8 @@ Unique features already implemented are subject and predicate, and stylistic inf
15
15
  Basic usage with Cabocha.
16
16
 
17
17
  ```
18
- text = Kampyo::Text.new
19
- text.cabocha_parser("今日は雨です")
18
+ cabocha = Kampyo::Cabocha.new
19
+ cabocha.parser("今日は雨です")
20
20
  ```
21
21
 
22
22
  You will get the following result.
@@ -33,8 +33,8 @@ You will get the following result.
33
33
  Guess the subject, predicate and sentence system.
34
34
 
35
35
  ```
36
- text = Kampyo::Text.new
37
- text.analysis(text.cabocha_parser("今日は雨です"))
36
+ cabocha = Kampyo::Cabocha.new
37
+ cabocha.analysis("今日は雨です")
38
38
  ```
39
39
 
40
40
  You will get the following result.
data/README.md CHANGED
@@ -15,8 +15,8 @@ Unique features already implemented are subject and predicate, and stylistic inf
15
15
  Basic usage with Cabocha.
16
16
 
17
17
  ```
18
- text = Kampyo::Text.new
19
- text.cabocha_parser("今日は雨です")
18
+ cabocha = Kampyo::Cabocha.new
19
+ cabocha.parser("今日は雨です")
20
20
  ```
21
21
 
22
22
  You will get the following result.
@@ -33,8 +33,8 @@ You will get the following result.
33
33
  Guess the subject, predicate and sentence system.
34
34
 
35
35
  ```
36
- text = Kampyo::Text.new
37
- text.analysis(text.cabocha_parser("今日は雨です"))
36
+ cabocha = Kampyo::Cabocha.new
37
+ cabocha.analysis("今日は雨です")
38
38
  ```
39
39
 
40
40
  You will get the following result.
@@ -0,0 +1,172 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'kampyo'
4
+ require 'kampyo/text'
5
+ require 'cabocha'
6
+
7
+ # Kampyo
8
+ module Kampyo
9
+ # Text
10
+ class Cabocha < Text # rubocop:disable Metrics/ClassLength
11
+ # rubocop:todo Metrics/MethodLength
12
+ def parser(input) # rubocop:todo Metrics/AbcSize, Metrics/MethodLength
13
+ # <sentence>
14
+ # <chunk id="0" link="1" rel="D" score="0.000000" head="0" func="1">
15
+ # <tok id="0" feature="名詞,副詞可能,*,*,*,*,今日,キョウ,キョー" ne="B-DATE">今日</tok>
16
+ # <tok id="1" feature="助詞,係助詞,*,*,*,*,は,ハ,ワ" ne="O">は</tok>
17
+ # </chunk>
18
+ # <chunk id="1" link="-1" rel="D" score="0.000000" head="2" func="3">
19
+ # <tok id="2" feature="名詞,一般,*,*,*,*,雨,アメ,アメ" ne="O">雨</tok>
20
+ # <tok id="3" feature="助動詞,*,*,*,特殊・デス,基本形,です,デス,デス" ne="O">です</tok>
21
+ # </chunk>
22
+ # </sentence>
23
+
24
+ parser = CaboCha::Parser.new
25
+ tree = parser.parse(input)
26
+
27
+ chunks = []
28
+ tokens = []
29
+ token_position = 0
30
+ (0..tree.chunk_size - 1).each do |i| # rubocop:todo Metrics/BlockLength
31
+ chunk = tree.chunk(i)
32
+ token_size = chunk.token_size
33
+
34
+ chunks << {
35
+ id: chunks.size + 1,
36
+ link: chunk.link >= 0 ? chunk.link + 1 : -1,
37
+ score: chunk.score
38
+ }
39
+
40
+ (token_position..token_position + token_size - 1).each do |j|
41
+ token = tree.token(j)
42
+
43
+ surface = token.surface.force_encoding('UTF-8')
44
+ feature0 = token.feature_list(0).force_encoding('UTF-8')
45
+ feature1 = token.feature_list(1).force_encoding('UTF-8')
46
+ feature6 = token.feature_list(6).force_encoding('UTF-8')
47
+ feature7 = token.feature_list(7).force_encoding('UTF-8')
48
+
49
+ tokens << {
50
+ id: tokens.size + 1,
51
+ chunk: i + 1,
52
+ surface: surface,
53
+ feature1: feature0,
54
+ feature2: feature1,
55
+ baseform: feature6,
56
+ reading: feature7,
57
+ ext_reading: ext_reading(feature7)
58
+ }
59
+ end
60
+
61
+ token_position += token_size
62
+ end
63
+
64
+ { chunks: chunks, tokens: tokens }
65
+ end
66
+ # rubocop:enable Metrics/MethodLength
67
+
68
+ def analysis(input) # rubocop:todo Metrics/CyclomaticComplexity, Metrics/AbcSize, Metrics/MethodLength, Metrics/PerceivedComplexity
69
+ result = parser(input)
70
+ chunks = result[:chunks]
71
+ tokens = result[:tokens]
72
+
73
+ subject_token = nil
74
+ predicate_token = nil
75
+ tod = '*'
76
+
77
+ # 述語の候補
78
+ predicate_chunk = chunks.find { |item| item[:link] == -1 }
79
+ predicate_tokens = tokens.select { |item| item[:chunk] == predicate_chunk[:id] }
80
+
81
+ # 主語の候補
82
+ subject_chunk = chunks.find do |item|
83
+ item[:link] == predicate_chunk[:id]
84
+ end
85
+ unless subject_chunk.nil?
86
+ subject_tokens = tokens.select do |item|
87
+ item[:chunk] == subject_chunk[:id] &&
88
+ item[:feature1] == '名詞' &&
89
+ %w[一般 固有名詞 サ変接続 接尾 数 副詞可能].include?(item[:feature2])
90
+ end
91
+
92
+ subject_tokens.each do |token|
93
+ next_token = tokens.find do |item|
94
+ item[:id] == token[:id] + 1 &&
95
+ item[:chunk] == token[:chunk] &&
96
+ %w[は って も が].include?(item[:baseform])
97
+ end
98
+
99
+ next if next_token.nil?
100
+
101
+ # 主語として確定する
102
+ subject_token = token
103
+ next
104
+ end
105
+ end
106
+
107
+ predicate_tokens.each do |token| # rubocop:todo Metrics/BlockLength
108
+ if %w[形容詞 動詞 名詞].include?(token[:feature1]) &&
109
+ %w[一般 自立 サ変接続 接尾].include?(token[:feature2]) &&
110
+ token[:baseform] != '*'
111
+ # 述語として確定する
112
+
113
+ predicate_token = token
114
+ end
115
+
116
+ next unless %w[動詞 終助詞 助動詞 助詞].include?(token[:feature1])
117
+
118
+ # 文体系を確定する
119
+ tods = {
120
+ 'れる' => '受身・尊敬・可能・自発',
121
+ 'られる' => '受身・尊敬・可能・自発',
122
+ 'せる' => '使役',
123
+ 'させる' => '使役',
124
+ 'ない' => '打消',
125
+ 'ぬ' => '打消',
126
+ 'ん' => '打消',
127
+ 'う' => '推量・意志・勧誘',
128
+ 'よう' => '推量・意志・勧誘',
129
+ 'まい' => '打消推量・打消意志',
130
+ 'たい' => '希望',
131
+ 'たがる' => '希望',
132
+ 'た' => '過去・完了・存在・確認',
133
+ 'ます' => '丁寧',
134
+ 'そうだ' => '様態・伝聞',
135
+ 'らしい' => '推定',
136
+ 'ようだ' => '比況・例示・推定',
137
+ 'だ' => '断定',
138
+ 'です' => '断定',
139
+ 'な' => '禁止・感動',
140
+ 'か' => '疑問・反語・感動',
141
+ 'の' => '断定・質問',
142
+ 'よ' => '強意・呼びかけ',
143
+ 'ぞ' => '強意',
144
+ 'も' => '確信',
145
+ 'ね' => '感動・念押し',
146
+ 'わ' => '感動・強意',
147
+ 'さ' => '断定'
148
+ }
149
+
150
+ tod = tods[token[:baseform]]
151
+ end
152
+
153
+ last_token = predicate_tokens.last
154
+
155
+ if predicate_token.nil? && ['助動詞'].include?(last_token[:feature1])
156
+ # 述語が確定していないとき最後の助動詞を述語として確定する
157
+ predicate_token = last_token
158
+ end
159
+
160
+ if ['?', '?'].include?(last_token[:surface])
161
+ # 述語の最後の形態素が?のとき
162
+ tod = '疑問・反語・感動'
163
+ end
164
+
165
+ {
166
+ subject: subject_token,
167
+ predicate: predicate_token,
168
+ tod: tod
169
+ }
170
+ end
171
+ end
172
+ end
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'kampyo'
4
+ require 'mecab'
5
+
6
+ # Kampyo
7
+ module Kampyo
8
+ # Text
9
+ class Mecab < Text
10
+ # rubocop:todo Metrics/MethodLength
11
+ def parser(input) # rubocop:todo Metrics/AbcSize, Metrics/MethodLength
12
+ result = []
13
+ parser = MeCab::Tagger.new
14
+ node = parser.parseToNode(input)
15
+ while node
16
+ features = node.feature.split(',')
17
+ if features[0] != 'BOS/EOS'
18
+ result << {
19
+ id: result.size + 1,
20
+ chunk: 0,
21
+ surface: node.surface,
22
+ feature1: features[0],
23
+ feature2: features[1],
24
+ baseform: features[6],
25
+ reading: features[7],
26
+ ext_reading: ext_reading(features[7]),
27
+ cost: node.cost,
28
+ wcost: node.wcost,
29
+ right_context: node.rcAttr,
30
+ left_context: node.lcAttr
31
+ }
32
+ end
33
+ node = node.next
34
+ end
35
+
36
+ result
37
+ end
38
+ # rubocop:enable Metrics/MethodLength
39
+ end
40
+ end
data/lib/kampyo/text.rb CHANGED
@@ -1,202 +1,15 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'kampyo'
4
- require 'kampyo/string'
5
- require 'cabocha'
6
- require 'mecab'
7
4
 
5
+ # Kampyo
8
6
  module Kampyo
7
+ # Text
9
8
  class Text
10
9
  def initialize; end
11
10
 
12
- def cabocha_parser(input)
13
- # <sentence>
14
- # <chunk id="0" link="1" rel="D" score="0.000000" head="0" func="1">
15
- # <tok id="0" feature="名詞,副詞可能,*,*,*,*,今日,キョウ,キョー" ne="B-DATE">今日</tok>
16
- # <tok id="1" feature="助詞,係助詞,*,*,*,*,は,ハ,ワ" ne="O">は</tok>
17
- # </chunk>
18
- # <chunk id="1" link="-1" rel="D" score="0.000000" head="2" func="3">
19
- # <tok id="2" feature="名詞,一般,*,*,*,*,雨,アメ,アメ" ne="O">雨</tok>
20
- # <tok id="3" feature="助動詞,*,*,*,特殊・デス,基本形,です,デス,デス" ne="O">です</tok>
21
- # </chunk>
22
- # </sentence>
23
-
24
- parser = CaboCha::Parser.new
25
- tree = parser.parse(input)
26
-
27
- chunks = []
28
- tokens = []
29
- token_position = 0
30
- (0..tree.chunk_size - 1).each do |i|
31
- chunk = tree.chunk(i)
32
- token_size = chunk.token_size
33
-
34
- chunks << {
35
- id: chunks.size + 1,
36
- link: chunk.link >= 0 ? chunk.link + 1 : -1,
37
- score: chunk.score
38
- }
39
-
40
- (token_position..token_position + token_size - 1).each do |j|
41
- token = tree.token(j)
42
-
43
- surface = token.surface.to_utf8
44
- feature0 = token.feature_list(0).to_utf8
45
- feature1 = token.feature_list(1).to_utf8
46
- feature6 = token.feature_list(6).to_utf8
47
- feature7 = token.feature_list(7).to_utf8
48
-
49
- tokens << {
50
- id: tokens.size + 1,
51
- chunk: i + 1,
52
- surface: surface,
53
- feature1: feature0,
54
- feature2: feature1,
55
- baseform: feature6,
56
- reading: feature7,
57
- ext_reading: ext_reading(feature7)
58
- }
59
- end
60
-
61
- token_position += token_size
62
- end
63
-
64
- { chunks: chunks, tokens: tokens }
65
- end
66
-
67
- def mecab_parser(input)
68
- result = []
69
- parser = MeCab::Tagger.new
70
- node = parser.parseToNode(input)
71
- while node
72
- features = node.feature.split(',')
73
- if features[0] != 'BOS/EOS'
74
- result << {
75
- id: result.size + 1,
76
- chunk: 0,
77
- surface: node.surface,
78
- feature1: features[0],
79
- feature2: features[1],
80
- baseform: features[6],
81
- reading: features[7],
82
- ext_reading: ext_reading(features[7]),
83
- cost: node.cost,
84
- wcost: node.wcost,
85
- right_context: node.rcAttr,
86
- left_context: node.lcAttr
87
- }
88
- end
89
- node = node.next
90
- end
91
-
92
- result
93
- end
94
-
95
11
  def ext_reading(feature)
96
12
  (feature =~ /\A[\p{katakana}|ー]+\z/).nil? ? feature : nil
97
13
  end
98
-
99
- def analysis(cabocha)
100
- chunks = cabocha[:chunks]
101
- tokens = cabocha[:tokens]
102
-
103
- subject_token = nil
104
- predicate_token = nil
105
- tod = '*'
106
-
107
- # 述語の候補
108
- predicate_chunk = chunks.find { |item| item[:link] == -1 }
109
- predicate_tokens = tokens.select { |item| item[:chunk] == predicate_chunk[:id] }
110
-
111
- # 主語の候補
112
- subject_chunk = chunks.find do |item|
113
- item[:link] == predicate_chunk[:id]
114
- end
115
- unless subject_chunk.nil?
116
- subject_tokens = tokens.select do |item|
117
- item[:chunk] == subject_chunk[:id] &&
118
- item[:feature1] == '名詞' &&
119
- %w[一般 固有名詞 サ変接続 接尾 数 副詞可能].include?(item[:feature2])
120
- end
121
-
122
- subject_tokens.each do |token|
123
- next_token = tokens.find do |item|
124
- item[:id] == token[:id] + 1 &&
125
- item[:chunk] == token[:chunk] &&
126
- %w[は って も が].include?(item[:baseform])
127
- end
128
-
129
- next if next_token.nil?
130
-
131
- # 主語として確定する
132
- subject_token = token
133
- next
134
- end
135
- end
136
-
137
- predicate_tokens.each do |token|
138
- if %w[形容詞 動詞 名詞].include?(token[:feature1]) &&
139
- %w[一般 自立 サ変接続 接尾].include?(token[:feature2]) &&
140
- token[:baseform] != '*'
141
- # 述語として確定する
142
-
143
- predicate_token = token
144
- end
145
-
146
- next unless %w[動詞 終助詞 助動詞 助詞].include?(token[:feature1])
147
-
148
- # 文体系を確定する
149
- tods = {
150
- 'れる' => '受身・尊敬・可能・自発',
151
- 'られる' => '受身・尊敬・可能・自発',
152
- 'せる' => '使役',
153
- 'させる' => '使役',
154
- 'ない' => '打消',
155
- 'ぬ' => '打消',
156
- 'ん' => '打消',
157
- 'う' => '推量・意志・勧誘',
158
- 'よう' => '推量・意志・勧誘',
159
- 'まい' => '打消推量・打消意志',
160
- 'たい' => '希望',
161
- 'たがる' => '希望',
162
- 'た' => '過去・完了・存在・確認',
163
- 'ます' => '丁寧',
164
- 'そうだ' => '様態・伝聞',
165
- 'らしい' => '推定',
166
- 'ようだ' => '比況・例示・推定',
167
- 'だ' => '断定',
168
- 'です' => '断定',
169
- 'な' => '禁止・感動',
170
- 'か' => '疑問・反語・感動',
171
- 'の' => '断定・質問',
172
- 'よ' => '強意・呼びかけ',
173
- 'ぞ' => '強意',
174
- 'も' => '確信',
175
- 'ね' => '感動・念押し',
176
- 'わ' => '感動・強意',
177
- 'さ' => '断定'
178
- }
179
-
180
- tod = tods[token[:baseform]]
181
- end
182
-
183
- last_token = predicate_tokens.last
184
-
185
- if predicate_token.nil? && ['助動詞'].include?(last_token[:feature1])
186
- # 述語が確定していないとき最後の助動詞を述語として確定する
187
- predicate_token = last_token
188
- end
189
-
190
- if ['?', '?'].include?(last_token[:surface])
191
- # 述語の最後の形態素が?のとき
192
- tod = '疑問・反語・感動'
193
- end
194
-
195
- {
196
- subject: subject_token,
197
- predicate: predicate_token,
198
- tod: tod
199
- }
200
- end
201
14
  end
202
15
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Kampyo
4
- VERSION = '0.2.0'
4
+ VERSION = '1.1.0'
5
5
  end
metadata CHANGED
@@ -1,14 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: kampyo
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - arthur87
8
- autorequire:
9
8
  bindir: exe
10
9
  cert_chain: []
11
- date: 2025-03-12 00:00:00.000000000 Z
10
+ date: 2026-04-27 00:00:00.000000000 Z
12
11
  dependencies:
13
12
  - !ruby/object:Gem::Dependency
14
13
  name: rubocop
@@ -66,7 +65,8 @@ files:
66
65
  - README.md
67
66
  - Rakefile
68
67
  - lib/kampyo.rb
69
- - lib/kampyo/string.rb
68
+ - lib/kampyo/cabocha.rb
69
+ - lib/kampyo/mecab.rb
70
70
  - lib/kampyo/text.rb
71
71
  - lib/kampyo/version.rb
72
72
  - sig/kampyo.rbs
@@ -76,7 +76,6 @@ licenses:
76
76
  metadata:
77
77
  homepage_uri: https://github.com/arthur87/kampyo
78
78
  source_code_uri: https://github.com/arthur87/kampyo
79
- post_install_message:
80
79
  rdoc_options: []
81
80
  require_paths:
82
81
  - lib
@@ -91,8 +90,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
91
90
  - !ruby/object:Gem::Version
92
91
  version: '0'
93
92
  requirements: []
94
- rubygems_version: 3.3.3
95
- signing_key:
93
+ rubygems_version: 3.6.3
96
94
  specification_version: 4
97
95
  summary: kampyo is a library for conveniently manipulating Cabocha and Mecab.
98
96
  test_files: []
data/lib/kampyo/string.rb DELETED
@@ -1,7 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- class String
4
- def to_utf8
5
- force_encoding('UTF-8')
6
- end
7
- end