kokugo_tagger 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/kokugo_tagger/parser.rb +3 -3
- data/lib/kokugo_tagger/tagger.rb +38 -2
- data/lib/kokugo_tagger/version.rb +1 -1
- metadata +3 -3
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA1:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: 38c7ab0d9feb7ce099075914af22947354187b40
         | 
| 4 | 
            +
              data.tar.gz: b240e3e012dce85475716a16ed6b1ff738d65327
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: 28c3d0f941e530245655e9704d675343fdabb9f94123bf8065aad4044492b9de0b50e2cead2cd99b1ceecceb8a6d89a2417a8d52566d39a8aed3250aec50d93f
         | 
| 7 | 
            +
              data.tar.gz: d2a488f3efdfeaabcaf72d610052db4327ba8af401b9e974f2e191e0579efe730d80dc5d9086a9db1a3eeaf4a4a5f89ab1b6e5136ced2cdfa75a66bda4a25797
         | 
    
        data/lib/kokugo_tagger/parser.rb
    CHANGED
    
    | @@ -29,10 +29,10 @@ module CabochaParser | |
| 29 29 | 
             
            		return excab
         | 
| 30 30 | 
             
            	end
         | 
| 31 31 | 
             
            	def parse_chunk(line)
         | 
| 32 | 
            -
            		null, id,  | 
| 33 | 
            -
            		link,  | 
| 32 | 
            +
            		null, id, dep, part, score = line.chomp.split("\s")
         | 
| 33 | 
            +
            		link, rel = dep[0..-2], dep[-1]
         | 
| 34 34 | 
             
            		head, func = part.split('/')
         | 
| 35 | 
            -
            		chunk = {type: 'CHUNK', id: id, link: link,  | 
| 35 | 
            +
            		chunk = {type: 'CHUNK', id: id, link: link, rel: rel, head: head, func: func, score: score}
         | 
| 36 36 | 
             
            		return chunk
         | 
| 37 37 | 
             
            	end
         | 
| 38 38 | 
             
            	def parse_token(line)
         | 
    
        data/lib/kokugo_tagger/tagger.rb
    CHANGED
    
    | @@ -25,8 +25,18 @@ module KokugoTagger | |
| 25 25 | 
             
            		cform data
         | 
| 26 26 | 
             
            	end
         | 
| 27 27 | 
             
            	def segment_s(data)
         | 
| 28 | 
            +
            		@segments ||= []
         | 
| 29 | 
            +
            		@segments << data
         | 
| 30 | 
            +
            		@last_item = data
         | 
| 31 | 
            +
            	end
         | 
| 32 | 
            +
            	def group_s(data)
         | 
| 33 | 
            +
            		@groups ||= []
         | 
| 34 | 
            +
            		@groups << data
         | 
| 35 | 
            +
            		@last_item = data
         | 
| 28 36 | 
             
            	end
         | 
| 29 37 | 
             
            	def attr(data)
         | 
| 38 | 
            +
            		@last_item[:attributes] ||= []
         | 
| 39 | 
            +
            		@last_item[:attributes] << data
         | 
| 30 40 | 
             
            	end
         | 
| 31 41 | 
             
            	def eos(data)
         | 
| 32 42 | 
             
            		before_eos
         | 
| @@ -35,7 +45,7 @@ module KokugoTagger | |
| 35 45 | 
             
            			puts '#! ATTR bccwj-kok:pred "%s述語"' % chunk[:pos] if chunk[:pred]
         | 
| 36 46 | 
             
            			puts '#! ATTR bccwj-kok:conj "%s"' % chunk[:conj] if chunk[:conj]
         | 
| 37 47 | 
             
            		end
         | 
| 38 | 
            -
            		@chunks, @chunk, @lpos, @segments = nil
         | 
| 48 | 
            +
            		@chunks, @chunk, @lpos, @segments, @groups = nil
         | 
| 39 49 | 
             
            	end
         | 
| 40 50 | 
             
            	def pos(token)
         | 
| 41 51 | 
             
            		case token[:pos]
         | 
| @@ -61,10 +71,12 @@ module KokugoTagger | |
| 61 71 | 
             
            			case token[:text]
         | 
| 62 72 | 
             
            			when 'が'
         | 
| 63 73 | 
             
            				@chunk.update conj:'主語'
         | 
| 74 | 
            +
            			when 'を', 'に'
         | 
| 75 | 
            +
            				@chunk.update conj:'補語'
         | 
| 64 76 | 
             
            			when 'の', 'との', 'という', 'といった'
         | 
| 65 77 | 
             
            				@chunk.update conj:'修飾(連体)'
         | 
| 66 78 | 
             
            			else
         | 
| 67 | 
            -
            				@chunk.update conj:' | 
| 79 | 
            +
            				@chunk.update conj:'修飾(連用)'
         | 
| 68 80 | 
             
            			end
         | 
| 69 81 | 
             
            		when /^(助詞-副助詞|助詞-係助詞)/
         | 
| 70 82 | 
             
            			@chunk.update conj:'修飾(連用)'
         | 
| @@ -88,6 +100,30 @@ module KokugoTagger | |
| 88 100 | 
             
            		end			
         | 
| 89 101 | 
             
            	end
         | 
| 90 102 | 
             
            	def before_eos
         | 
| 103 | 
            +
            		# BCCWJ-DepPara
         | 
| 104 | 
            +
            		@chunks.each do |chunk|
         | 
| 105 | 
            +
            			chunk[:conj] = [chunk[:conj], '断片'].compact.join(':') if chunk[:rel] == 'F'
         | 
| 106 | 
            +
            			chunk[:conj] = [chunk[:conj], '文節内'].compact.join(':') if chunk[:rel] == 'B'
         | 
| 107 | 
            +
            			chunk[:conj] = '文末' if chunk[:rel] == 'Z'
         | 
| 108 | 
            +
            		end
         | 
| 109 | 
            +
            		# 並列・同格関係
         | 
| 110 | 
            +
            		@groups ||= []
         | 
| 111 | 
            +
            		@segments ||= []
         | 
| 112 | 
            +
            		@groups.each do |group|
         | 
| 113 | 
            +
            			next unless group[:name] =~ /^(Parallel|Apposition)$/
         | 
| 114 | 
            +
            			members = group[:member].map{|n| n.to_i}
         | 
| 115 | 
            +
            			members = @segments.values_at(*members)
         | 
| 116 | 
            +
            			chunk_ids = members.map do |segment|
         | 
| 117 | 
            +
            				_end = segment[:end].to_i
         | 
| 118 | 
            +
            				chunk = @chunks.find{|c| c[:start] < _end and c[:end] >= _end}
         | 
| 119 | 
            +
            				chunk[:id].to_i if chunk
         | 
| 120 | 
            +
            			end
         | 
| 121 | 
            +
            			chunk_ids = chunk_ids.compact.uniq.sort
         | 
| 122 | 
            +
            			if chunk_ids.size > 1
         | 
| 123 | 
            +
            				conj = {'Parallel' => '並立', 'Apposition' => '同格'}[group[:name]]
         | 
| 124 | 
            +
            				chunk_ids[0..-2].each{|cid| @chunks[cid][:conj] = conj}
         | 
| 125 | 
            +
            			end
         | 
| 126 | 
            +
            		end
         | 
| 91 127 | 
             
            		# 属性を付与できなかった文節に対して、係り受けを利用して属性を補完
         | 
| 92 128 | 
             
            		# 連用成分を受ける文節を述語とみなす
         | 
| 93 129 | 
             
            		@chunks.each do |chunk|
         | 
    
        metadata
    CHANGED
    
    | @@ -1,14 +1,14 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: kokugo_tagger
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 0.0. | 
| 4 | 
            +
              version: 0.0.3
         | 
| 5 5 | 
             
            platform: ruby
         | 
| 6 6 | 
             
            authors:
         | 
| 7 7 | 
             
            - Mizuho IMADA
         | 
| 8 8 | 
             
            autorequire: 
         | 
| 9 9 | 
             
            bindir: bin
         | 
| 10 10 | 
             
            cert_chain: []
         | 
| 11 | 
            -
            date:  | 
| 11 | 
            +
            date: 2015-06-12 00:00:00.000000000 Z
         | 
| 12 12 | 
             
            dependencies:
         | 
| 13 13 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 14 14 | 
             
              name: bundler
         | 
| @@ -77,7 +77,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement | |
| 77 77 | 
             
                  version: '0'
         | 
| 78 78 | 
             
            requirements: []
         | 
| 79 79 | 
             
            rubyforge_project: 
         | 
| 80 | 
            -
            rubygems_version: 2. | 
| 80 | 
            +
            rubygems_version: 2.2.2
         | 
| 81 81 | 
             
            signing_key: 
         | 
| 82 82 | 
             
            specification_version: 4
         | 
| 83 83 | 
             
            summary: Write a short summary. Required.
         |