kokugo_tagger 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: b609be62c8aea30eda82811044d1d7ecc6eeb1b4
4
+ data.tar.gz: 733d6a86cea778492416df55f6e79de5c072a4c3
5
+ SHA512:
6
+ metadata.gz: 02a682b673b2103320ced6836cb3784c8b9543628840d9aec1033de7a509869449818be930c6c2f06b22ccbfeb4748a2172916a2e849db836dd51b74837ad46d
7
+ data.tar.gz: b2ba4fe4e4aeeeb83d9a709243479c70298426e86ab771a19eaa28420ddd8dee6b18dcafcedf3b4c5e648a513b7b06cbbeff97352b13b1a0c7b09cb0361eafec
data/.gitignore ADDED
@@ -0,0 +1,15 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ *.bundle
11
+ *.so
12
+ *.o
13
+ *.a
14
+ mkmf.log
15
+ .DS_Store
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in kokugo_tagger.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2014 Mizuho IMADA
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,31 @@
1
+ # KokugoTagger
2
+
3
+ TODO: Write a gem description
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ ```ruby
10
+ gem 'kokugo_tagger'
11
+ ```
12
+
13
+ And then execute:
14
+
15
+ $ bundle
16
+
17
+ Or install it yourself as:
18
+
19
+ $ gem install kokugo_tagger
20
+
21
+ ## Usage
22
+
23
+ TODO: Write usage instructions here
24
+
25
+ ## Contributing
26
+
27
+ 1. Fork it ( https://github.com/[my-github-username]/kokugo_tagger/fork )
28
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
29
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
30
+ 4. Push to the branch (`git push origin my-new-feature`)
31
+ 5. Create a new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ require "bundler/gem_tasks"
2
+
data/bin/kokugo_tagger ADDED
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'kokugo_tagger'
4
+
5
+ Encoding.default_external = 'UTF-8'
6
+ KokugoTagger.annotate ARGF
@@ -0,0 +1,23 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'kokugo_tagger/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "kokugo_tagger"
8
+ spec.version = KokugoTagger::VERSION
9
+ spec.authors = ["Mizuho IMADA"]
10
+ spec.email = ["imadamizuho@gmail.com"]
11
+ spec.summary = %q{Write a short summary. Required.}
12
+ spec.description = %q{Write a longer description. Optional.}
13
+ spec.homepage = ""
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files -z`.split("\x0")
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "bundler", "~> 1.7"
22
+ spec.add_development_dependency "rake", "~> 10.0"
23
+ end
@@ -0,0 +1,46 @@
1
+ # -*- coding: utf-8 -*-
2
+ require 'csv'
3
+
4
+ module CabochaParser
5
+ def parse(line)
6
+ case line.chomp
7
+ when /^#/
8
+ return parse_excab(line)
9
+ when /^\*/
10
+ return parse_chunk(line)
11
+ when 'EOS'
12
+ return {type: 'EOS'}
13
+ when ''
14
+ return nil
15
+ else
16
+ return parse_token(line)
17
+ end
18
+ end
19
+ def parse_excab(line)
20
+ null, type, *data = CSV.parse_line(line.chomp, col_sep:' ')
21
+ case type
22
+ when 'SEGMENT', 'SEGMENT_S', 'LINK', 'LINK_S'
23
+ excab = {type: type, name: data[0], start: data[1].to_i, end: data[2].to_i, comment: data[3]}
24
+ when 'GROUP', 'GROUP_S'
25
+ excab = {type: type, name: data[0], member: data[1..-2], comment: data[-1]}
26
+ when 'ATTR'
27
+ excab = {type: type, name: data[0], value: data[1]}
28
+ end
29
+ return excab
30
+ end
31
+ def parse_chunk(line)
32
+ null, id, rel, part, score = line.chomp.split("\s")
33
+ link, dep = rel[0..-2], rel[-1]
34
+ head, func = part.split('/')
35
+ chunk = {type: 'CHUNK', id: id, link: link, dep: dep, head: head, func: func, score: score}
36
+ return chunk
37
+ end
38
+ def parse_token(line)
39
+ text, attrs, ne = line.chomp.split("\t")
40
+ attrs = CSV.parse_line(attrs, col_sep:',')
41
+ pos = attrs[0, 4].delete_if{|item| item.empty?}.join('-')
42
+ token = {type: 'TOKEN', text: text, ne: ne, pos: pos, ctype: attrs[4], cform: attrs[5]}
43
+ return token
44
+ end
45
+ module_function :parse, :parse_excab, :parse_chunk, :parse_token
46
+ end
@@ -0,0 +1,114 @@
1
+ # -*- coding: utf-8 -*-
2
+ require 'csv'
3
+
4
+ module KokugoTagger
5
+ module_function
6
+ def annotate(file)
7
+ file.each_line do |line|
8
+ next unless data = CabochaParser.parse(line)
9
+ method_name = data[:type].downcase.to_sym
10
+ method(method_name).call(data) if methods.include?(method_name)
11
+ puts line
12
+ end
13
+ end
14
+ def chunk(data)
15
+ @chunks ||= []
16
+ @chunks << @chunk = data
17
+ @lpos ||= 0
18
+ @chunk.update start:@lpos, end:@lpos, text:'', pos:nil, pred:nil, conj:nil
19
+ end
20
+ def token(data)
21
+ @lpos += data[:text].size
22
+ @chunk[:end] = @lpos
23
+ @chunk[:text] += data[:text]
24
+ pos data
25
+ cform data
26
+ end
27
+ def segment_s(data)
28
+ end
29
+ def attr(data)
30
+ end
31
+ def eos(data)
32
+ before_eos
33
+ @chunks.each do |chunk|
34
+ puts '#! SEGMENT_S bccwj-kok:Bnst %d %d "%s"' % [chunk[:start], chunk[:end], chunk[:text]]
35
+ puts '#! ATTR bccwj-kok:pred "%s述語"' % chunk[:pos] if chunk[:pred]
36
+ puts '#! ATTR bccwj-kok:conj "%s"' % chunk[:conj] if chunk[:conj]
37
+ end
38
+ @chunks, @chunk, @lpos, @segments = nil
39
+ end
40
+ def pos(token)
41
+ case token[:pos]
42
+ when /^(名詞|代名詞|接尾辞-名詞的)/
43
+ @chunk.update pos:'名詞', pred:nil, conj:nil
44
+ when /^(形状詞|接尾辞-形状詞的)/
45
+ @chunk.update pos:'形状詞', pred:nil, conj:nil
46
+ when /^連体詞/
47
+ @chunk.update pos:'連体詞', pred:nil, conj:'修飾(連体)'
48
+ when /^副詞/
49
+ @chunk.update pos:'副詞', pred:nil, conj:'修飾(連用)'
50
+ when /^接続詞/
51
+ @chunk.update pos:'接続詞', pred:nil, conj:'接続'
52
+ when /^感動詞/
53
+ @chunk.update pos:'感動詞', pred:nil, conj:'独立'
54
+ when /^(動詞|接尾辞-動詞的)/
55
+ @chunk.update pos:'動詞', pred:true, conj:nil
56
+ when /^(形容詞|接尾辞-形容詞的)/
57
+ @chunk.update pos:'形容詞', pred:true, conj:nil
58
+ when /^助動詞/
59
+ @chunk.update pred:true, conj:nil
60
+ when /^助詞-格助詞/
61
+ case token[:text]
62
+ when 'が'
63
+ @chunk.update conj:'主語'
64
+ when 'の', 'との', 'という', 'といった'
65
+ @chunk.update conj:'修飾(連体)'
66
+ else
67
+ @chunk.update conj:'補語'
68
+ end
69
+ when /^(助詞-副助詞|助詞-係助詞)/
70
+ @chunk.update conj:'修飾(連用)'
71
+ when /^助詞-接続詞/
72
+ @chunk.update pred:true, conj:'接続'
73
+ when /^助詞-終助詞/
74
+ @chunk.update pred:true, conj:nil
75
+ when /^助詞-準体助詞/
76
+ @chunk.update conj:nil
77
+ end
78
+ end
79
+ def cform(token)
80
+ case token[:cform]
81
+ when /^語幹/
82
+ when /^(未然形|連用形|仮定形|已然形)/
83
+ @chunk.update conj:'接続'
84
+ when /^(意志推量形|連体形)/
85
+ @chunk.update conj:'修飾(連体)'
86
+ when /^(終止形|命令形)/
87
+ @chunk.update conj:nil
88
+ end
89
+ end
90
+ def before_eos
91
+ # 属性を付与できなかった文節に対して、係り受けを利用して属性を補完
92
+ # 連用成分を受ける文節を述語とみなす
93
+ @chunks.each do |chunk|
94
+ chunk[:pred] ||= @chunks.any?{|_chunk| _chunk[:link] == chunk[:id] && _chunk[:conj] =~ /^(主語|補語|修飾\(連用\)|接続)$/}
95
+ end
96
+ # 述語にかかる文節を修飾(連用)とみなす
97
+ @chunks.each do |chunk|
98
+ chunk[:conj] = '修飾(連用)' if chunk[:conj] == nil && @chunks.any?{|_chunk| _chunk[:id] == chunk[:link] && _chunk[:pred]}
99
+ end
100
+ # 述語項構造が付与されている文節を補語にする
101
+ @chunks.each do |chunk|
102
+ next if chunk[:link] == '-1' || chunk[:arg] == nil
103
+ next unless chunk[:conj] == nil || chunk[:conj] == '修飾(連用)'
104
+ pred = @chunks[chunk[:link].to_i]
105
+ if chunk[:arg] == 'Ga' and pred[:passive] == nil
106
+ chunk[:conj] = '主語'
107
+ elsif chunk[:arg] == 'O' and pred[:passive] == '直接'
108
+ chunk[:conj] = '主語'
109
+ else
110
+ chunk[:conj] = '補語'
111
+ end
112
+ end
113
+ end
114
+ end
@@ -0,0 +1,3 @@
1
+ module KokugoTagger
2
+ VERSION = "0.0.2"
3
+ end
@@ -0,0 +1,7 @@
1
+ require "kokugo_tagger/version"
2
+ require "kokugo_tagger/parser"
3
+ require "kokugo_tagger/tagger"
4
+
5
+ module KokugoTagger
6
+ # Your code goes here...
7
+ end
metadata ADDED
@@ -0,0 +1,84 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: kokugo_tagger
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ platform: ruby
6
+ authors:
7
+ - Mizuho IMADA
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-11-17 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.7'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.7'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ description: Write a longer description. Optional.
42
+ email:
43
+ - imadamizuho@gmail.com
44
+ executables:
45
+ - kokugo_tagger
46
+ extensions: []
47
+ extra_rdoc_files: []
48
+ files:
49
+ - ".gitignore"
50
+ - Gemfile
51
+ - LICENSE.txt
52
+ - README.md
53
+ - Rakefile
54
+ - bin/kokugo_tagger
55
+ - kokugo_tagger.gemspec
56
+ - lib/kokugo_tagger.rb
57
+ - lib/kokugo_tagger/parser.rb
58
+ - lib/kokugo_tagger/tagger.rb
59
+ - lib/kokugo_tagger/version.rb
60
+ homepage: ''
61
+ licenses:
62
+ - MIT
63
+ metadata: {}
64
+ post_install_message:
65
+ rdoc_options: []
66
+ require_paths:
67
+ - lib
68
+ required_ruby_version: !ruby/object:Gem::Requirement
69
+ requirements:
70
+ - - ">="
71
+ - !ruby/object:Gem::Version
72
+ version: '0'
73
+ required_rubygems_version: !ruby/object:Gem::Requirement
74
+ requirements:
75
+ - - ">="
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
78
+ requirements: []
79
+ rubyforge_project:
80
+ rubygems_version: 2.4.3
81
+ signing_key:
82
+ specification_version: 4
83
+ summary: Write a short summary. Required.
84
+ test_files: []