langue-japanese 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +17 -0
- data/Gemfile +6 -0
- data/LICENSE +22 -0
- data/README.md +53 -0
- data/Rakefile +2 -0
- data/langue-japanese.gemspec +22 -0
- data/lib/langue/japanese/language.rb +36 -0
- data/lib/langue/japanese/logging.rb +21 -0
- data/lib/langue/japanese/parser.rb +77 -0
- data/lib/langue/japanese/shaper.rb +70 -0
- data/lib/langue/japanese/structurer.rb +74 -0
- data/lib/langue/japanese/version.rb +5 -0
- data/lib/langue/japanese/words/adjective.rb +67 -0
- data/lib/langue/japanese/words/adjective_noun.rb +76 -0
- data/lib/langue/japanese/words/attribute.rb +100 -0
- data/lib/langue/japanese/words/classifier.rb +107 -0
- data/lib/langue/japanese/words/morpheme_filter.rb +26 -0
- data/lib/langue/japanese/words/noun.rb +61 -0
- data/lib/langue/japanese/words/period.rb +55 -0
- data/lib/langue/japanese/words/prefix.rb +19 -0
- data/lib/langue/japanese/words/pronoun.rb +16 -0
- data/lib/langue/japanese/words/verb.rb +100 -0
- data/lib/langue/japanese.rb +2 -0
- data/lib/langue-japanese.rb +1 -0
- data/spec/langue/japanese/data.yaml +169 -0
- data/spec/langue/japanese/language_spec.rb +120 -0
- data/spec/langue/japanese/parser_spec.rb +147 -0
- data/spec/langue/japanese/shaper_spec.rb +34 -0
- data/spec/langue/japanese/structurer_spec.rb +116 -0
- data/spec/langue/japanese/words/adjective_noun_spec.rb +76 -0
- data/spec/langue/japanese/words/adjective_spec.rb +123 -0
- data/spec/langue/japanese/words/noun_spec.rb +79 -0
- data/spec/langue/japanese/words/period_spec.rb +69 -0
- data/spec/langue/japanese/words/pronoun_spec.rb +24 -0
- data/spec/langue/japanese/words/verb_spec.rb +242 -0
- data/spec/langue/japanese_spec.rb +7 -0
- data/spec/spec_helper.rb +75 -0
- metadata +131 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2012 Takahiro Kondo
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
What is langue-japanese
|
2
|
+
=======================
|
3
|
+
|
4
|
+
It provides the operations to Japanese.
|
5
|
+
|
6
|
+
Installation
|
7
|
+
------------
|
8
|
+
|
9
|
+
Add this line to your application's Gemfile:
|
10
|
+
|
11
|
+
gem 'langue'
|
12
|
+
gem 'langue-japanese'
|
13
|
+
|
14
|
+
# When doing morphological analysis
|
15
|
+
gem 'mecab-ruby', :git => 'path to mecab-ruby repository'
|
16
|
+
|
17
|
+
And then execute:
|
18
|
+
|
19
|
+
$ bundle
|
20
|
+
|
21
|
+
Or install it yourself as:
|
22
|
+
|
23
|
+
$ gem install langue
|
24
|
+
$ gem install langue-japanese
|
25
|
+
|
26
|
+
langue-japanese gem runs on langue gem. So it depends to langue gem.
|
27
|
+
|
28
|
+
It also uses MeCab with morphological analysis, this gem depends too to
|
29
|
+
mecab-ruby gem if you do it.
|
30
|
+
|
31
|
+
Usage
|
32
|
+
-----
|
33
|
+
|
34
|
+
# coding: utf-8
|
35
|
+
require 'langue-japanese'
|
36
|
+
|
37
|
+
# Get a language class
|
38
|
+
language = Langue['japanese'].new
|
39
|
+
|
40
|
+
# Split to morphemes a text
|
41
|
+
morphemes = language.parse('今日は妹と一緒にお買い物してきたよ。楽しかった〜')
|
42
|
+
|
43
|
+
# Create a structured text from the morphemes
|
44
|
+
text = language.structure(morphemes)
|
45
|
+
|
46
|
+
Contributing
|
47
|
+
------------
|
48
|
+
|
49
|
+
1. Fork it
|
50
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
51
|
+
3. Commit your changes (`git commit -am 'Added some feature'`)
|
52
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
53
|
+
5. Create new Pull Request
|
data/Rakefile
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
require File.expand_path('../lib/langue/japanese/version', __FILE__)
|
3
|
+
|
4
|
+
Gem::Specification.new do |gem|
|
5
|
+
gem.authors = ["Takahiro Kondo"]
|
6
|
+
gem.email = ["kondo@atedesign.net"]
|
7
|
+
gem.description = %q{It provides the operations to Japanese.}
|
8
|
+
gem.summary = %q{The foundation for Japanese}
|
9
|
+
gem.homepage = ""
|
10
|
+
|
11
|
+
gem.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
12
|
+
gem.files = `git ls-files`.split("\n")
|
13
|
+
gem.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
14
|
+
gem.name = "langue-japanese"
|
15
|
+
gem.require_paths = ["lib"]
|
16
|
+
gem.version = Langue::Japanese::VERSION
|
17
|
+
|
18
|
+
gem.add_runtime_dependency 'langue'
|
19
|
+
gem.add_runtime_dependency 'activesupport'
|
20
|
+
|
21
|
+
gem.add_development_dependency 'rspec'
|
22
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
require 'langue'
|
2
|
+
|
3
|
+
module Langue
|
4
|
+
module Japanese
|
5
|
+
class Language < Langue::Language
|
6
|
+
def parser
|
7
|
+
@parser ||= Parser.new(@options)
|
8
|
+
end
|
9
|
+
depend_to :parser, 'langue/japanese/parser'
|
10
|
+
|
11
|
+
def shaper
|
12
|
+
@shaper ||= Shaper.new(@options)
|
13
|
+
end
|
14
|
+
depend_to :shaper, 'langue/japanese/shaper'
|
15
|
+
|
16
|
+
def structurer
|
17
|
+
@structurer ||= Structurer.new(@options)
|
18
|
+
end
|
19
|
+
depend_to :structurer, 'langue/japanese/structurer'
|
20
|
+
|
21
|
+
def parse(text)
|
22
|
+
parser.parse(text)
|
23
|
+
end
|
24
|
+
|
25
|
+
def shape_person_name(morphemes, person_name)
|
26
|
+
shaper.shape_person_name(morphemes, person_name)
|
27
|
+
end
|
28
|
+
|
29
|
+
def structure(morphemes)
|
30
|
+
structurer.structure(morphemes)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
support(Japanese::Language)
|
36
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module Langue
|
2
|
+
module Japanese
|
3
|
+
module Logging
|
4
|
+
def null_logger
|
5
|
+
return NullLogger.new unless Object.const_defined?(:Fluent)
|
6
|
+
return NullLogger.new unless Fluent.const_defined?(:Logger)
|
7
|
+
Fluent::Logger::NullLogger.open
|
8
|
+
end
|
9
|
+
|
10
|
+
class NullLogger
|
11
|
+
def post(tag, map)
|
12
|
+
post_with_time(tag, map, nil)
|
13
|
+
end
|
14
|
+
|
15
|
+
def post_with_time(tag, map, time)
|
16
|
+
false
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,77 @@
|
|
1
|
+
require 'MeCab'
|
2
|
+
|
3
|
+
require 'langue/morpheme'
|
4
|
+
require 'langue/morphemes'
|
5
|
+
require 'langue/japanese/logging'
|
6
|
+
|
7
|
+
module Langue
|
8
|
+
module Japanese
|
9
|
+
class Parser
|
10
|
+
include Logging
|
11
|
+
|
12
|
+
def initialize(options = {})
|
13
|
+
@mecab_options = options[:mecab_options] || {}
|
14
|
+
@logger = options[:logger] || null_logger
|
15
|
+
@taggers = {}
|
16
|
+
end
|
17
|
+
|
18
|
+
attr_accessor :mecab_options
|
19
|
+
|
20
|
+
def parse(text)
|
21
|
+
morphemes = Morphemes.new
|
22
|
+
node = tagger.parseToNode(text)
|
23
|
+
|
24
|
+
while node
|
25
|
+
surface = node.surface.force_encoding('utf-8')
|
26
|
+
|
27
|
+
unless surface.empty?
|
28
|
+
feature = node.feature.force_encoding('utf-8')
|
29
|
+
morphemes << create_morpheme(surface, feature)
|
30
|
+
end
|
31
|
+
|
32
|
+
node = node.next
|
33
|
+
end
|
34
|
+
|
35
|
+
morphemes
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
def tagger
|
41
|
+
@taggers[Thread.current] ||= MeCab::Tagger.new(mecab_options_as_string)
|
42
|
+
end
|
43
|
+
|
44
|
+
def mecab_options_as_string
|
45
|
+
options = @mecab_options.inject([]) do |o, pair|
|
46
|
+
key = pair[0].to_sym
|
47
|
+
value = pair[1]
|
48
|
+
|
49
|
+
case key
|
50
|
+
when :sysdic
|
51
|
+
o << '-d' << value
|
52
|
+
when :userdic
|
53
|
+
o << '-u' << value
|
54
|
+
else
|
55
|
+
map = {
|
56
|
+
:level => 'warn',
|
57
|
+
:message => "'#{key}' option is unsupported",
|
58
|
+
:key => key
|
59
|
+
}
|
60
|
+
|
61
|
+
@logger.post('langue.japanese.parser', map)
|
62
|
+
o
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
options.join(' ')
|
67
|
+
end
|
68
|
+
|
69
|
+
def create_morpheme(surface, feature)
|
70
|
+
values = feature.split(',').map { |v| v == '*' ? nil : v }
|
71
|
+
values[1..3] = [values[1..3].take_while {|value| !value.nil?}]
|
72
|
+
values.unshift(surface.downcase)
|
73
|
+
Morpheme.new(Hash[Morpheme::KEYS.zip(values)])
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
@@ -0,0 +1,70 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
require 'langue/morpheme'
|
3
|
+
require 'langue/morphemes'
|
4
|
+
require 'langue/japanese/logging'
|
5
|
+
|
6
|
+
module Langue
|
7
|
+
module Japanese
|
8
|
+
class Shaper
|
9
|
+
include Logging
|
10
|
+
|
11
|
+
def initialize(options = {})
|
12
|
+
@logger = options[:logger] || null_logger
|
13
|
+
end
|
14
|
+
|
15
|
+
def shape_person_name(morphemes, person_name)
|
16
|
+
new_morphemes = Morphemes.new
|
17
|
+
name_morphemes = []
|
18
|
+
start_index = 0
|
19
|
+
person_name_size = person_name.size
|
20
|
+
|
21
|
+
morphemes.each do |morpheme|
|
22
|
+
text = morpheme.text
|
23
|
+
index = person_name.index(text, start_index)
|
24
|
+
|
25
|
+
if index == start_index
|
26
|
+
name_morphemes << morpheme
|
27
|
+
start_index += text.size
|
28
|
+
|
29
|
+
if start_index == person_name_size
|
30
|
+
new_morphemes << join_as_person_name(name_morphemes)
|
31
|
+
name_morphemes.clear
|
32
|
+
start_index = 0
|
33
|
+
end
|
34
|
+
else
|
35
|
+
new_morphemes += name_morphemes + [morpheme]
|
36
|
+
name_morphemes.clear
|
37
|
+
start_index = 0
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
new_morphemes
|
42
|
+
end
|
43
|
+
|
44
|
+
private
|
45
|
+
|
46
|
+
def join_as_person_name(morphemes)
|
47
|
+
text = morphemes.map(&:text).join
|
48
|
+
|
49
|
+
yomi = morphemes.inject('') do |yomi, morpheme|
|
50
|
+
t = morpheme.text
|
51
|
+
y = morpheme.yomi
|
52
|
+
yomi + (y || t != 'ー' ? y : t)
|
53
|
+
end
|
54
|
+
|
55
|
+
pronunciation = morphemes.inject('') do |pronunciation, morpheme|
|
56
|
+
pronunciation + (morpheme.pronunciation)
|
57
|
+
end
|
58
|
+
|
59
|
+
Morpheme.new(
|
60
|
+
:text => text,
|
61
|
+
:part_of_speech => '名詞',
|
62
|
+
:categories => %w(固有名詞 人名),
|
63
|
+
:root_form => text,
|
64
|
+
:yomi => yomi,
|
65
|
+
:pronunciation => pronunciation
|
66
|
+
)
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
@@ -0,0 +1,74 @@
|
|
1
|
+
require 'active_support/core_ext/string/inflections'
|
2
|
+
|
3
|
+
require 'langue/text'
|
4
|
+
require 'langue/sentence'
|
5
|
+
require 'langue/word'
|
6
|
+
require 'langue/japanese/logging'
|
7
|
+
|
8
|
+
module Langue
|
9
|
+
module Japanese
|
10
|
+
class Structurer
|
11
|
+
include Logging
|
12
|
+
|
13
|
+
WORD_CLASSES = %w(
|
14
|
+
period
|
15
|
+
verb
|
16
|
+
adjective
|
17
|
+
adjective_noun
|
18
|
+
pronoun
|
19
|
+
noun
|
20
|
+
).map do |word_name|
|
21
|
+
require "langue/japanese/words/#{word_name}"
|
22
|
+
Langue::Japanese.const_get(word_name.camelize)
|
23
|
+
end
|
24
|
+
|
25
|
+
def initialize(options = {})
|
26
|
+
@logger = options[:logger] || null_logger
|
27
|
+
end
|
28
|
+
|
29
|
+
def structure(morphemes)
|
30
|
+
sentences = []
|
31
|
+
words = []
|
32
|
+
arrived = false
|
33
|
+
index = 0
|
34
|
+
length = morphemes.length
|
35
|
+
|
36
|
+
while index < length
|
37
|
+
word_class = nil
|
38
|
+
size = 0
|
39
|
+
|
40
|
+
WORD_CLASSES.each do |wc|
|
41
|
+
s = wc.take(morphemes, index)
|
42
|
+
|
43
|
+
if s > 0
|
44
|
+
word_class = wc
|
45
|
+
size = s
|
46
|
+
break
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
if word_class.nil?
|
51
|
+
word_class = Word
|
52
|
+
size = 1
|
53
|
+
end
|
54
|
+
|
55
|
+
word = word_class.new(morphemes[index, size])
|
56
|
+
|
57
|
+
if arrived && !word.instance_of?(Period)
|
58
|
+
sentences << Sentence.new(words)
|
59
|
+
words.clear
|
60
|
+
arrived = false
|
61
|
+
elsif word.instance_of?(Period)
|
62
|
+
arrived = true
|
63
|
+
end
|
64
|
+
|
65
|
+
words << word
|
66
|
+
index += size
|
67
|
+
end
|
68
|
+
|
69
|
+
sentences << Sentence.new(words) unless words.empty?
|
70
|
+
Text.new(sentences)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
@@ -0,0 +1,67 @@
|
|
1
|
+
require 'langue/word'
|
2
|
+
require 'langue/japanese/words/prefix'
|
3
|
+
require 'langue/japanese/words/attribute'
|
4
|
+
require 'langue/japanese/words/classifier'
|
5
|
+
|
6
|
+
module Langue
|
7
|
+
module Japanese
|
8
|
+
class Adjective < Word
|
9
|
+
include Prefix
|
10
|
+
include Attribute
|
11
|
+
|
12
|
+
has :negative, :perfective
|
13
|
+
|
14
|
+
class << self
|
15
|
+
include Classifier
|
16
|
+
|
17
|
+
def take(morphemes, index)
|
18
|
+
if first_adjective?(morphemes, index)
|
19
|
+
take_adjective(morphemes, index)
|
20
|
+
elsif adjective_prefix?(morphemes, index)
|
21
|
+
take_adjective_with_prefix(morphemes, index)
|
22
|
+
else
|
23
|
+
0
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def take_adjective(morphemes, index)
|
28
|
+
return 0 unless first_adjective?(morphemes, index)
|
29
|
+
size = 1
|
30
|
+
size += 1 while following_adjective?(morphemes, index + size) || conjunctive_particle?(morphemes, index + size) && following_adjective?(morphemes, index + size + 1)
|
31
|
+
size += 1 while auxiliary_verb?(morphemes, index + size)
|
32
|
+
size
|
33
|
+
end
|
34
|
+
|
35
|
+
def take_adjective_with_prefix(morphemes, index)
|
36
|
+
size = 0
|
37
|
+
size += 1 while adjective_prefix?(morphemes, index + size)
|
38
|
+
return 0 unless size > 0
|
39
|
+
next_size = take_adjective(morphemes, index + size)
|
40
|
+
next_size > 0 ? size + next_size : 0
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def key_morpheme
|
45
|
+
unless instance_variable_defined?(:@key_morpheme)
|
46
|
+
@key_morpheme = if empty?
|
47
|
+
nil
|
48
|
+
else
|
49
|
+
index = size - 1
|
50
|
+
index -= 1 while !self.class.body_adjective?(morphemes, index)
|
51
|
+
self[index]
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
@key_morpheme
|
56
|
+
end
|
57
|
+
|
58
|
+
def prefix_morphemes
|
59
|
+
@prefix_morphemes ||= begin
|
60
|
+
size = 0
|
61
|
+
size += 1 while self.class.adjective_prefix?(morphemes, size)
|
62
|
+
morphemes[0, size]
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
@@ -0,0 +1,76 @@
|
|
1
|
+
require 'langue/japanese/words/noun'
|
2
|
+
require 'langue/japanese/words/prefix'
|
3
|
+
require 'langue/japanese/words/classifier'
|
4
|
+
|
5
|
+
module Langue
|
6
|
+
module Japanese
|
7
|
+
class AdjectiveNoun < Noun
|
8
|
+
include Prefix
|
9
|
+
|
10
|
+
class << self
|
11
|
+
include Classifier
|
12
|
+
|
13
|
+
def take(morphemes, index)
|
14
|
+
if adjective_stem_noun?(morphemes, index)
|
15
|
+
take_adjective_stem_noun(morphemes, index)
|
16
|
+
elsif first_noun?(morphemes, index)
|
17
|
+
take_noun_with_suffix(morphemes, index)
|
18
|
+
elsif noun_prefix?(morphemes, index)
|
19
|
+
take_noun_with_prefix(morphemes, index)
|
20
|
+
else
|
21
|
+
0
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def take_adjective_stem_noun(morphemes, index)
|
26
|
+
size = 0
|
27
|
+
size += 1 while adjective_stem_noun?(morphemes, index + size)
|
28
|
+
return 0 unless size > 0
|
29
|
+
|
30
|
+
if adjective_stem_suffix?(morphemes, index + size)
|
31
|
+
size
|
32
|
+
elsif following_noun?(morphemes, index + size)
|
33
|
+
0
|
34
|
+
else
|
35
|
+
size
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def take_noun_with_suffix(morphemes, index)
|
40
|
+
return 0 unless first_noun?(morphemes, index)
|
41
|
+
size = 1
|
42
|
+
size += 1 while following_noun?(morphemes, index + size) && !adjective_stem_suffix?(morphemes, index + size)
|
43
|
+
return 0 unless adjective_stem_suffix?(morphemes, index + size)
|
44
|
+
size += 1 while adjective_stem_suffix?(morphemes, index + size)
|
45
|
+
|
46
|
+
if following_noun?(morphemes, index + size)
|
47
|
+
0
|
48
|
+
else
|
49
|
+
size
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def take_noun_with_prefix(morphemes, index)
|
54
|
+
size = 0
|
55
|
+
size += 1 while noun_prefix?(morphemes, index + size)
|
56
|
+
return 0 unless size > 0
|
57
|
+
next_size = take(morphemes, index + size)
|
58
|
+
next_size > 0 ? size + next_size : 0
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def prefix_morphemes
|
63
|
+
@prefix_morphemes ||= begin
|
64
|
+
size = 0
|
65
|
+
size += 1 while self.class.noun_prefix?(morphemes, size)
|
66
|
+
morphemes[0, size]
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
def body
|
71
|
+
@body = body_morphemes.empty? ? nil : body_morphemes.map(&:text).join unless instance_variable_defined?(:@body)
|
72
|
+
@body
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
@@ -0,0 +1,100 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
require 'langue/japanese/words/morpheme_filter'
|
3
|
+
|
4
|
+
module Langue
|
5
|
+
module Japanese
|
6
|
+
module Attribute
|
7
|
+
def self.included(object)
|
8
|
+
object.class_eval do
|
9
|
+
include MorphemeFilter
|
10
|
+
filter { |word, morphemes| word.empty? ? morphemes : morphemes[0..morphemes.index(word.key_morpheme)] }
|
11
|
+
|
12
|
+
def self.has(*attrs)
|
13
|
+
attrs.each do |attr|
|
14
|
+
define_method("#{attr}?") do
|
15
|
+
@attrs ||= {}
|
16
|
+
@attrs[attr] = !!__send__("include_#{attr}?") unless @attrs.key?(attr)
|
17
|
+
@attrs[attr]
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def body
|
25
|
+
unless instance_variable_defined?(:@body)
|
26
|
+
@body = if body_morphemes.empty?
|
27
|
+
nil
|
28
|
+
else
|
29
|
+
morphemes = body_morphemes.dup
|
30
|
+
last_morpheme = morphemes.pop
|
31
|
+
morphemes.map(&:text).join + last_morpheme.root_form
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
@body
|
36
|
+
end
|
37
|
+
|
38
|
+
if RUBY_VERSION.to_f < 1.9
|
39
|
+
def index(value = nil)
|
40
|
+
if value
|
41
|
+
super
|
42
|
+
else
|
43
|
+
each_with_index { |morpheme, index| return index if yield morpheme }
|
44
|
+
nil
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
private
|
50
|
+
|
51
|
+
def include_progressive?
|
52
|
+
if noncategorematic_verb_index(%w(てる でる とる どる))
|
53
|
+
true
|
54
|
+
elsif index = noncategorematic_verb_index(['いる'])
|
55
|
+
morphemes.at(index - 1) { |m| m.classified?('助詞', '接続助詞') && %w(て で).include?(m.root_form) }
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def include_passive?
|
60
|
+
verb_suffix_index(%w(れる られる))
|
61
|
+
end
|
62
|
+
|
63
|
+
def include_aggressive?
|
64
|
+
auxiliary_verb_index('特殊・タイ')
|
65
|
+
end
|
66
|
+
|
67
|
+
def include_negative?
|
68
|
+
if auxiliary_verb_index('特殊・ナイ')
|
69
|
+
true
|
70
|
+
elsif index = auxiliary_verb_index('特殊・ヌ')
|
71
|
+
morphemes.at(index - 1) { |m| m.inflection_type == '未然形' }
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def include_perfective?
|
76
|
+
if auxiliary_verb_index('特殊・タ')
|
77
|
+
true
|
78
|
+
elsif index = index { |m| m.classified?('助動詞') && m.root_form == 'ぬ' }
|
79
|
+
morphemes.at(index - 1) { |m| m.inflection_type == '連用形' }
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
def include_imperative?
|
84
|
+
self[-1].inflection_type =~ /^命令/
|
85
|
+
end
|
86
|
+
|
87
|
+
def noncategorematic_verb_index(root_forms)
|
88
|
+
index { |m| m.classified?('動詞', '非自立') && root_forms.include?(m.root_form) }
|
89
|
+
end
|
90
|
+
|
91
|
+
def auxiliary_verb_index(inflection)
|
92
|
+
index { |m| m.classified?('助動詞') && m.inflected?(inflection) }
|
93
|
+
end
|
94
|
+
|
95
|
+
def verb_suffix_index(root_forms)
|
96
|
+
index { |m| m.classified?('動詞', '接尾') && root_forms.include?(m.root_form) }
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|