kokugo_tagger 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b609be62c8aea30eda82811044d1d7ecc6eeb1b4
4
- data.tar.gz: 733d6a86cea778492416df55f6e79de5c072a4c3
3
+ metadata.gz: 38c7ab0d9feb7ce099075914af22947354187b40
4
+ data.tar.gz: b240e3e012dce85475716a16ed6b1ff738d65327
5
5
  SHA512:
6
- metadata.gz: 02a682b673b2103320ced6836cb3784c8b9543628840d9aec1033de7a509869449818be930c6c2f06b22ccbfeb4748a2172916a2e849db836dd51b74837ad46d
7
- data.tar.gz: b2ba4fe4e4aeeeb83d9a709243479c70298426e86ab771a19eaa28420ddd8dee6b18dcafcedf3b4c5e648a513b7b06cbbeff97352b13b1a0c7b09cb0361eafec
6
+ metadata.gz: 28c3d0f941e530245655e9704d675343fdabb9f94123bf8065aad4044492b9de0b50e2cead2cd99b1ceecceb8a6d89a2417a8d52566d39a8aed3250aec50d93f
7
+ data.tar.gz: d2a488f3efdfeaabcaf72d610052db4327ba8af401b9e974f2e191e0579efe730d80dc5d9086a9db1a3eeaf4a4a5f89ab1b6e5136ced2cdfa75a66bda4a25797
@@ -29,10 +29,10 @@ module CabochaParser
29
29
  return excab
30
30
  end
31
31
  def parse_chunk(line)
32
- null, id, rel, part, score = line.chomp.split("\s")
33
- link, dep = rel[0..-2], rel[-1]
32
+ null, id, dep, part, score = line.chomp.split("\s")
33
+ link, rel = dep[0..-2], dep[-1]
34
34
  head, func = part.split('/')
35
- chunk = {type: 'CHUNK', id: id, link: link, dep: dep, head: head, func: func, score: score}
35
+ chunk = {type: 'CHUNK', id: id, link: link, rel: rel, head: head, func: func, score: score}
36
36
  return chunk
37
37
  end
38
38
  def parse_token(line)
@@ -25,8 +25,18 @@ module KokugoTagger
25
25
  cform data
26
26
  end
27
27
  def segment_s(data)
28
+ @segments ||= []
29
+ @segments << data
30
+ @last_item = data
31
+ end
32
+ def group_s(data)
33
+ @groups ||= []
34
+ @groups << data
35
+ @last_item = data
28
36
  end
29
37
  def attr(data)
38
+ @last_item[:attributes] ||= []
39
+ @last_item[:attributes] << data
30
40
  end
31
41
  def eos(data)
32
42
  before_eos
@@ -35,7 +45,7 @@ module KokugoTagger
35
45
  puts '#! ATTR bccwj-kok:pred "%s述語"' % chunk[:pos] if chunk[:pred]
36
46
  puts '#! ATTR bccwj-kok:conj "%s"' % chunk[:conj] if chunk[:conj]
37
47
  end
38
- @chunks, @chunk, @lpos, @segments = nil
48
+ @chunks, @chunk, @lpos, @segments, @groups = nil
39
49
  end
40
50
  def pos(token)
41
51
  case token[:pos]
@@ -61,10 +71,12 @@ module KokugoTagger
61
71
  case token[:text]
62
72
  when 'が'
63
73
  @chunk.update conj:'主語'
74
+ when 'を', 'に'
75
+ @chunk.update conj:'補語'
64
76
  when 'の', 'との', 'という', 'といった'
65
77
  @chunk.update conj:'修飾(連体)'
66
78
  else
67
- @chunk.update conj:'補語'
79
+ @chunk.update conj:'修飾(連用)'
68
80
  end
69
81
  when /^(助詞-副助詞|助詞-係助詞)/
70
82
  @chunk.update conj:'修飾(連用)'
@@ -88,6 +100,30 @@ module KokugoTagger
88
100
  end
89
101
  end
90
102
  def before_eos
103
+ # BCCWJ-DepPara
104
+ @chunks.each do |chunk|
105
+ chunk[:conj] = [chunk[:conj], '断片'].compact.join(':') if chunk[:rel] == 'F'
106
+ chunk[:conj] = [chunk[:conj], '文節内'].compact.join(':') if chunk[:rel] == 'B'
107
+ chunk[:conj] = '文末' if chunk[:rel] == 'Z'
108
+ end
109
+ # 並列・同格関係
110
+ @groups ||= []
111
+ @segments ||= []
112
+ @groups.each do |group|
113
+ next unless group[:name] =~ /^(Parallel|Apposition)$/
114
+ members = group[:member].map{|n| n.to_i}
115
+ members = @segments.values_at(*members)
116
+ chunk_ids = members.map do |segment|
117
+ _end = segment[:end].to_i
118
+ chunk = @chunks.find{|c| c[:start] < _end and c[:end] >= _end}
119
+ chunk[:id].to_i if chunk
120
+ end
121
+ chunk_ids = chunk_ids.compact.uniq.sort
122
+ if chunk_ids.size > 1
123
+ conj = {'Parallel' => '並立', 'Apposition' => '同格'}[group[:name]]
124
+ chunk_ids[0..-2].each{|cid| @chunks[cid][:conj] = conj}
125
+ end
126
+ end
91
127
  # 属性を付与できなかった文節に対して、係り受けを利用して属性を補完
92
128
  # 連用成分を受ける文節を述語とみなす
93
129
  @chunks.each do |chunk|
@@ -1,3 +1,3 @@
1
1
  module KokugoTagger
2
- VERSION = "0.0.2"
2
+ VERSION = "0.0.3"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: kokugo_tagger
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mizuho IMADA
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-11-17 00:00:00.000000000 Z
11
+ date: 2015-06-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -77,7 +77,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
77
77
  version: '0'
78
78
  requirements: []
79
79
  rubyforge_project:
80
- rubygems_version: 2.4.3
80
+ rubygems_version: 2.2.2
81
81
  signing_key:
82
82
  specification_version: 4
83
83
  summary: Write a short summary. Required.