kokugo_tagger 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/kokugo_tagger/parser.rb +3 -3
- data/lib/kokugo_tagger/tagger.rb +38 -2
- data/lib/kokugo_tagger/version.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 38c7ab0d9feb7ce099075914af22947354187b40
|
4
|
+
data.tar.gz: b240e3e012dce85475716a16ed6b1ff738d65327
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 28c3d0f941e530245655e9704d675343fdabb9f94123bf8065aad4044492b9de0b50e2cead2cd99b1ceecceb8a6d89a2417a8d52566d39a8aed3250aec50d93f
|
7
|
+
data.tar.gz: d2a488f3efdfeaabcaf72d610052db4327ba8af401b9e974f2e191e0579efe730d80dc5d9086a9db1a3eeaf4a4a5f89ab1b6e5136ced2cdfa75a66bda4a25797
|
data/lib/kokugo_tagger/parser.rb
CHANGED
@@ -29,10 +29,10 @@ module CabochaParser
|
|
29
29
|
return excab
|
30
30
|
end
|
31
31
|
def parse_chunk(line)
|
32
|
-
null, id,
|
33
|
-
link,
|
32
|
+
null, id, dep, part, score = line.chomp.split("\s")
|
33
|
+
link, rel = dep[0..-2], dep[-1]
|
34
34
|
head, func = part.split('/')
|
35
|
-
chunk = {type: 'CHUNK', id: id, link: link,
|
35
|
+
chunk = {type: 'CHUNK', id: id, link: link, rel: rel, head: head, func: func, score: score}
|
36
36
|
return chunk
|
37
37
|
end
|
38
38
|
def parse_token(line)
|
data/lib/kokugo_tagger/tagger.rb
CHANGED
@@ -25,8 +25,18 @@ module KokugoTagger
|
|
25
25
|
cform data
|
26
26
|
end
|
27
27
|
def segment_s(data)
|
28
|
+
@segments ||= []
|
29
|
+
@segments << data
|
30
|
+
@last_item = data
|
31
|
+
end
|
32
|
+
def group_s(data)
|
33
|
+
@groups ||= []
|
34
|
+
@groups << data
|
35
|
+
@last_item = data
|
28
36
|
end
|
29
37
|
def attr(data)
|
38
|
+
@last_item[:attributes] ||= []
|
39
|
+
@last_item[:attributes] << data
|
30
40
|
end
|
31
41
|
def eos(data)
|
32
42
|
before_eos
|
@@ -35,7 +45,7 @@ module KokugoTagger
|
|
35
45
|
puts '#! ATTR bccwj-kok:pred "%s述語"' % chunk[:pos] if chunk[:pred]
|
36
46
|
puts '#! ATTR bccwj-kok:conj "%s"' % chunk[:conj] if chunk[:conj]
|
37
47
|
end
|
38
|
-
@chunks, @chunk, @lpos, @segments = nil
|
48
|
+
@chunks, @chunk, @lpos, @segments, @groups = nil
|
39
49
|
end
|
40
50
|
def pos(token)
|
41
51
|
case token[:pos]
|
@@ -61,10 +71,12 @@ module KokugoTagger
|
|
61
71
|
case token[:text]
|
62
72
|
when 'が'
|
63
73
|
@chunk.update conj:'主語'
|
74
|
+
when 'を', 'に'
|
75
|
+
@chunk.update conj:'補語'
|
64
76
|
when 'の', 'との', 'という', 'といった'
|
65
77
|
@chunk.update conj:'修飾(連体)'
|
66
78
|
else
|
67
|
-
@chunk.update conj:'
|
79
|
+
@chunk.update conj:'修飾(連用)'
|
68
80
|
end
|
69
81
|
when /^(助詞-副助詞|助詞-係助詞)/
|
70
82
|
@chunk.update conj:'修飾(連用)'
|
@@ -88,6 +100,30 @@ module KokugoTagger
|
|
88
100
|
end
|
89
101
|
end
|
90
102
|
def before_eos
|
103
|
+
# BCCWJ-DepPara
|
104
|
+
@chunks.each do |chunk|
|
105
|
+
chunk[:conj] = [chunk[:conj], '断片'].compact.join(':') if chunk[:rel] == 'F'
|
106
|
+
chunk[:conj] = [chunk[:conj], '文節内'].compact.join(':') if chunk[:rel] == 'B'
|
107
|
+
chunk[:conj] = '文末' if chunk[:rel] == 'Z'
|
108
|
+
end
|
109
|
+
# 並列・同格関係
|
110
|
+
@groups ||= []
|
111
|
+
@segments ||= []
|
112
|
+
@groups.each do |group|
|
113
|
+
next unless group[:name] =~ /^(Parallel|Apposition)$/
|
114
|
+
members = group[:member].map{|n| n.to_i}
|
115
|
+
members = @segments.values_at(*members)
|
116
|
+
chunk_ids = members.map do |segment|
|
117
|
+
_end = segment[:end].to_i
|
118
|
+
chunk = @chunks.find{|c| c[:start] < _end and c[:end] >= _end}
|
119
|
+
chunk[:id].to_i if chunk
|
120
|
+
end
|
121
|
+
chunk_ids = chunk_ids.compact.uniq.sort
|
122
|
+
if chunk_ids.size > 1
|
123
|
+
conj = {'Parallel' => '並立', 'Apposition' => '同格'}[group[:name]]
|
124
|
+
chunk_ids[0..-2].each{|cid| @chunks[cid][:conj] = conj}
|
125
|
+
end
|
126
|
+
end
|
91
127
|
# 属性を付与できなかった文節に対して、係り受けを利用して属性を補完
|
92
128
|
# 連用成分を受ける文節を述語とみなす
|
93
129
|
@chunks.each do |chunk|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: kokugo_tagger
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mizuho IMADA
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2015-06-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -77,7 +77,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
77
77
|
version: '0'
|
78
78
|
requirements: []
|
79
79
|
rubyforge_project:
|
80
|
-
rubygems_version: 2.
|
80
|
+
rubygems_version: 2.2.2
|
81
81
|
signing_key:
|
82
82
|
specification_version: 4
|
83
83
|
summary: Write a short summary. Required.
|