kokugo_tagger 0.0.2 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b609be62c8aea30eda82811044d1d7ecc6eeb1b4
4
- data.tar.gz: 733d6a86cea778492416df55f6e79de5c072a4c3
3
+ metadata.gz: 38c7ab0d9feb7ce099075914af22947354187b40
4
+ data.tar.gz: b240e3e012dce85475716a16ed6b1ff738d65327
5
5
  SHA512:
6
- metadata.gz: 02a682b673b2103320ced6836cb3784c8b9543628840d9aec1033de7a509869449818be930c6c2f06b22ccbfeb4748a2172916a2e849db836dd51b74837ad46d
7
- data.tar.gz: b2ba4fe4e4aeeeb83d9a709243479c70298426e86ab771a19eaa28420ddd8dee6b18dcafcedf3b4c5e648a513b7b06cbbeff97352b13b1a0c7b09cb0361eafec
6
+ metadata.gz: 28c3d0f941e530245655e9704d675343fdabb9f94123bf8065aad4044492b9de0b50e2cead2cd99b1ceecceb8a6d89a2417a8d52566d39a8aed3250aec50d93f
7
+ data.tar.gz: d2a488f3efdfeaabcaf72d610052db4327ba8af401b9e974f2e191e0579efe730d80dc5d9086a9db1a3eeaf4a4a5f89ab1b6e5136ced2cdfa75a66bda4a25797
@@ -29,10 +29,10 @@ module CabochaParser
29
29
  return excab
30
30
  end
31
31
  def parse_chunk(line)
32
- null, id, rel, part, score = line.chomp.split("\s")
33
- link, dep = rel[0..-2], rel[-1]
32
+ null, id, dep, part, score = line.chomp.split("\s")
33
+ link, rel = dep[0..-2], dep[-1]
34
34
  head, func = part.split('/')
35
- chunk = {type: 'CHUNK', id: id, link: link, dep: dep, head: head, func: func, score: score}
35
+ chunk = {type: 'CHUNK', id: id, link: link, rel: rel, head: head, func: func, score: score}
36
36
  return chunk
37
37
  end
38
38
  def parse_token(line)
@@ -25,8 +25,18 @@ module KokugoTagger
25
25
  cform data
26
26
  end
27
27
  def segment_s(data)
28
+ @segments ||= []
29
+ @segments << data
30
+ @last_item = data
31
+ end
32
+ def group_s(data)
33
+ @groups ||= []
34
+ @groups << data
35
+ @last_item = data
28
36
  end
29
37
  def attr(data)
38
+ @last_item[:attributes] ||= []
39
+ @last_item[:attributes] << data
30
40
  end
31
41
  def eos(data)
32
42
  before_eos
@@ -35,7 +45,7 @@ module KokugoTagger
35
45
  puts '#! ATTR bccwj-kok:pred "%s述語"' % chunk[:pos] if chunk[:pred]
36
46
  puts '#! ATTR bccwj-kok:conj "%s"' % chunk[:conj] if chunk[:conj]
37
47
  end
38
- @chunks, @chunk, @lpos, @segments = nil
48
+ @chunks, @chunk, @lpos, @segments, @groups = nil
39
49
  end
40
50
  def pos(token)
41
51
  case token[:pos]
@@ -61,10 +71,12 @@ module KokugoTagger
61
71
  case token[:text]
62
72
  when 'が'
63
73
  @chunk.update conj:'主語'
74
+ when 'を', 'に'
75
+ @chunk.update conj:'補語'
64
76
  when 'の', 'との', 'という', 'といった'
65
77
  @chunk.update conj:'修飾(連体)'
66
78
  else
67
- @chunk.update conj:'補語'
79
+ @chunk.update conj:'修飾(連用)'
68
80
  end
69
81
  when /^(助詞-副助詞|助詞-係助詞)/
70
82
  @chunk.update conj:'修飾(連用)'
@@ -88,6 +100,30 @@ module KokugoTagger
88
100
  end
89
101
  end
90
102
  def before_eos
103
+ # BCCWJ-DepPara
104
+ @chunks.each do |chunk|
105
+ chunk[:conj] = [chunk[:conj], '断片'].compact.join(':') if chunk[:rel] == 'F'
106
+ chunk[:conj] = [chunk[:conj], '文節内'].compact.join(':') if chunk[:rel] == 'B'
107
+ chunk[:conj] = '文末' if chunk[:rel] == 'Z'
108
+ end
109
+ # 並列・同格関係
110
+ @groups ||= []
111
+ @segments ||= []
112
+ @groups.each do |group|
113
+ next unless group[:name] =~ /^(Parallel|Apposition)$/
114
+ members = group[:member].map{|n| n.to_i}
115
+ members = @segments.values_at(*members)
116
+ chunk_ids = members.map do |segment|
117
+ _end = segment[:end].to_i
118
+ chunk = @chunks.find{|c| c[:start] < _end and c[:end] >= _end}
119
+ chunk[:id].to_i if chunk
120
+ end
121
+ chunk_ids = chunk_ids.compact.uniq.sort
122
+ if chunk_ids.size > 1
123
+ conj = {'Parallel' => '並立', 'Apposition' => '同格'}[group[:name]]
124
+ chunk_ids[0..-2].each{|cid| @chunks[cid][:conj] = conj}
125
+ end
126
+ end
91
127
  # 属性を付与できなかった文節に対して、係り受けを利用して属性を補完
92
128
  # 連用成分を受ける文節を述語とみなす
93
129
  @chunks.each do |chunk|
@@ -1,3 +1,3 @@
1
1
  module KokugoTagger
2
- VERSION = "0.0.2"
2
+ VERSION = "0.0.3"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: kokugo_tagger
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mizuho IMADA
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-11-17 00:00:00.000000000 Z
11
+ date: 2015-06-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -77,7 +77,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
77
77
  version: '0'
78
78
  requirements: []
79
79
  rubyforge_project:
80
- rubygems_version: 2.4.3
80
+ rubygems_version: 2.2.2
81
81
  signing_key:
82
82
  specification_version: 4
83
83
  summary: Write a short summary. Required.