langue 0.0.3 → 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/langue/morpheme.rb +5 -0
- data/lib/langue/morphemes.rb +42 -6
- data/lib/langue/sentence.rb +5 -0
- data/lib/langue/text.rb +6 -1
- data/lib/langue/version.rb +1 -1
- data/lib/langue/word.rb +11 -0
- data/spec/langue/morpheme_spec.rb +31 -0
- data/spec/langue/morphemes_spec.rb +69 -4
- data/spec/langue/sentence_spec.rb +16 -0
- data/spec/langue/text_spec.rb +24 -0
- data/spec/langue/word_spec.rb +31 -5
- metadata +3 -3
data/lib/langue/morpheme.rb
CHANGED
@@ -17,6 +17,11 @@ module Langue
|
|
17
17
|
|
18
18
|
attr_reader(*KEYS)
|
19
19
|
|
20
|
+
def ==(other_morpheme)
|
21
|
+
return false unless other_morpheme.is_a?(self.class)
|
22
|
+
KEYS.all? { |key| self.__send__(key) == other_morpheme.__send__(key) }
|
23
|
+
end
|
24
|
+
|
20
25
|
def classified?(part_of_speech, *categories)
|
21
26
|
got = [@part_of_speech] + @categories
|
22
27
|
expected = [part_of_speech] + categories
|
data/lib/langue/morphemes.rb
CHANGED
@@ -11,12 +11,48 @@ module Langue
|
|
11
11
|
morpheme.nil? || !block_given? ? morpheme : yield(morpheme)
|
12
12
|
end
|
13
13
|
|
14
|
-
|
15
|
-
|
16
|
-
|
14
|
+
def pad(text)
|
15
|
+
whitespace_regexp = /^([\x09\x0A\x0B\x0D\x20])+/
|
16
|
+
differece_error = ArgumentError.new('maybe different from the original text')
|
17
17
|
|
18
|
-
|
19
|
-
|
20
|
-
|
18
|
+
morphemes = Morphemes.new
|
19
|
+
index = 0
|
20
|
+
|
21
|
+
while index < size
|
22
|
+
morpheme = self[index]
|
23
|
+
morpheme_text = morpheme.text
|
24
|
+
|
25
|
+
if text[0, morpheme_text.size] == morpheme_text
|
26
|
+
text = text[morpheme_text.size..-1]
|
27
|
+
morphemes << morpheme
|
28
|
+
index += 1
|
29
|
+
elsif text =~ whitespace_regexp
|
30
|
+
whitespace = $1
|
31
|
+
text = text[whitespace.size..-1]
|
32
|
+
morphemes << create_padding(whitespace)
|
33
|
+
else
|
34
|
+
raise differece_error
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
if text =~ /#{whitespace_regexp}$/
|
39
|
+
morphemes << create_padding(text)
|
40
|
+
elsif !text.empty?
|
41
|
+
raise differece_error
|
42
|
+
end
|
43
|
+
|
44
|
+
morphemes
|
45
|
+
end
|
46
|
+
|
47
|
+
private
|
48
|
+
|
49
|
+
def create_padding(whitespace)
|
50
|
+
Morpheme.new(
|
51
|
+
:text => whitespace,
|
52
|
+
:part_of_speech => 'padding',
|
53
|
+
:categories => [],
|
54
|
+
:root_form => whitespace
|
55
|
+
)
|
56
|
+
end
|
21
57
|
end
|
22
58
|
end
|
data/lib/langue/sentence.rb
CHANGED
data/lib/langue/text.rb
CHANGED
@@ -12,11 +12,16 @@ module Langue
|
|
12
12
|
end
|
13
13
|
|
14
14
|
def words
|
15
|
-
@words ||= inject(&:+)
|
15
|
+
@words ||= inject([], &:+)
|
16
16
|
end
|
17
17
|
|
18
18
|
def morphemes
|
19
19
|
@morphemes ||= Morphemes.new(flatten)
|
20
20
|
end
|
21
|
+
|
22
|
+
def text
|
23
|
+
@text = empty? ? nil : map(&:text).join unless instance_variable_defined?(:@text)
|
24
|
+
@text
|
25
|
+
end
|
21
26
|
end
|
22
27
|
end
|
data/lib/langue/version.rb
CHANGED
data/lib/langue/word.rb
CHANGED
@@ -61,4 +61,15 @@ module Langue
|
|
61
61
|
end
|
62
62
|
end
|
63
63
|
end
|
64
|
+
|
65
|
+
class Noun < Word; end
|
66
|
+
class Pronoun < Word; end
|
67
|
+
class Adjective < Word; end
|
68
|
+
class Verb < Word; end
|
69
|
+
class Adverb < Word; end
|
70
|
+
class Particle < Word; end
|
71
|
+
class Conjunction < Word; end
|
72
|
+
class Determiner < Word; end
|
73
|
+
class Interjection < Word; end
|
74
|
+
class Period < Word; end
|
64
75
|
end
|
@@ -47,6 +47,37 @@ describe Langue::Morpheme, ' accessors' do
|
|
47
47
|
end
|
48
48
|
end
|
49
49
|
|
50
|
+
describe Langue::Morpheme, '#==' do
|
51
|
+
before do
|
52
|
+
@attrs = {
|
53
|
+
:text => 'text',
|
54
|
+
:part_of_speech => 'part_of_speech',
|
55
|
+
:categories => %w(category1 category2),
|
56
|
+
:inflection => 'inflection',
|
57
|
+
:inflection_type => 'inflection_type',
|
58
|
+
:root_form => 'root_form',
|
59
|
+
:yomi => 'yomi',
|
60
|
+
:pronunciation => 'pronunciation'
|
61
|
+
}
|
62
|
+
|
63
|
+
@morpheme = described_class.new(@attrs)
|
64
|
+
end
|
65
|
+
|
66
|
+
it 'returns true if the morpheme equals to other morpheme' do
|
67
|
+
other_morpheme = described_class.new(@attrs)
|
68
|
+
@morpheme.should == other_morpheme
|
69
|
+
end
|
70
|
+
|
71
|
+
it 'returns false if the morpheme does not equal to other morpheme' do
|
72
|
+
other_morpheme = described_class.new(@attrs.merge(:pronunciation => 'other_pronunciation'))
|
73
|
+
@morpheme.should_not == other_morpheme
|
74
|
+
end
|
75
|
+
|
76
|
+
it "returns false if other morpheme is not an instance of #{described_class}" do
|
77
|
+
@morpheme.should_not == 'other_morpheme'
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
50
81
|
describe Langue::Morpheme, '#classified?' do
|
51
82
|
before do
|
52
83
|
@morpheme = described_class.new(
|
@@ -1,3 +1,4 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
1
2
|
require 'langue/morphemes'
|
2
3
|
|
3
4
|
describe Langue::Morphemes, '#valid?' do
|
@@ -66,8 +67,72 @@ describe Langue::Morphemes, '#at' do
|
|
66
67
|
end
|
67
68
|
end
|
68
69
|
|
69
|
-
|
70
|
-
|
70
|
+
describe Langue::Morphemes, '#pad' do
|
71
|
+
before do
|
72
|
+
@morphemes = morphemes = described_class.new([
|
73
|
+
Langue::Morpheme.new(:text => 'text1'),
|
74
|
+
Langue::Morpheme.new(:text => 'text2'),
|
75
|
+
Langue::Morpheme.new(:text => 'text3')
|
76
|
+
])
|
77
|
+
end
|
78
|
+
|
79
|
+
it 'pads \\x09' do
|
80
|
+
padded_morphemes = @morphemes.pad("text1\x09text2text3\x09")
|
81
|
+
padded_morphemes.should have(5).items
|
82
|
+
morpheme = padded_morphemes[1]
|
83
|
+
morpheme.text.should == "\x09"
|
84
|
+
morpheme.root_form.should == "\x09"
|
85
|
+
morpheme = padded_morphemes[4]
|
86
|
+
morpheme.text.should == "\x09"
|
87
|
+
morpheme.root_form.should == "\x09"
|
88
|
+
end
|
89
|
+
|
90
|
+
it 'pads \\x0A' do
|
91
|
+
padded_morphemes = @morphemes.pad("text1\x0Atext2text3\x0A")
|
92
|
+
padded_morphemes.should have(5).items
|
93
|
+
morpheme = padded_morphemes[1]
|
94
|
+
morpheme.text.should == "\x0A"
|
95
|
+
morpheme.root_form.should == "\x0A"
|
96
|
+
morpheme = padded_morphemes[4]
|
97
|
+
morpheme.text.should == "\x0A"
|
98
|
+
morpheme.root_form.should == "\x0A"
|
99
|
+
end
|
71
100
|
|
72
|
-
|
73
|
-
|
101
|
+
it 'pads \\x0B' do
|
102
|
+
padded_morphemes = @morphemes.pad("text1\x0Btext2text3\x0B")
|
103
|
+
padded_morphemes.should have(5).items
|
104
|
+
morpheme = padded_morphemes[1]
|
105
|
+
morpheme.text.should == "\x0B"
|
106
|
+
morpheme.root_form.should == "\x0B"
|
107
|
+
morpheme = padded_morphemes[4]
|
108
|
+
morpheme.text.should == "\x0B"
|
109
|
+
morpheme.root_form.should == "\x0B"
|
110
|
+
end
|
111
|
+
|
112
|
+
it 'pads \\x0D' do
|
113
|
+
padded_morphemes = @morphemes.pad("text1\x0Dtext2text3\x0D")
|
114
|
+
padded_morphemes.should have(5).items
|
115
|
+
morpheme = padded_morphemes[1]
|
116
|
+
morpheme.text.should == "\x0D"
|
117
|
+
morpheme.root_form.should == "\x0D"
|
118
|
+
morpheme = padded_morphemes[4]
|
119
|
+
morpheme.text.should == "\x0D"
|
120
|
+
morpheme.root_form.should == "\x0D"
|
121
|
+
end
|
122
|
+
|
123
|
+
it 'pads \\x20' do
|
124
|
+
padded_morphemes = @morphemes.pad("text1\x20text2text3\x20")
|
125
|
+
padded_morphemes.should have(5).items
|
126
|
+
morpheme = padded_morphemes[1]
|
127
|
+
morpheme.text.should == "\x20"
|
128
|
+
morpheme.root_form.should == "\x20"
|
129
|
+
morpheme = padded_morphemes[4]
|
130
|
+
morpheme.text.should == "\x20"
|
131
|
+
morpheme.root_form.should == "\x20"
|
132
|
+
end
|
133
|
+
|
134
|
+
it 'raises ArgumentError if there is a difference between the text and the morphemes' do
|
135
|
+
lambda { @morphemes.pad("text1\x20text3text4") }.should raise_error(ArgumentError, 'maybe different from the original text')
|
136
|
+
lambda { @morphemes.pad('text1text2text3text4text5') }.should raise_error(ArgumentError, 'maybe different from the original text')
|
137
|
+
end
|
138
|
+
end
|
@@ -55,3 +55,19 @@ describe Langue::Sentence, '#morphemes' do
|
|
55
55
|
@morphemes.should == [1, 2, 3, 4]
|
56
56
|
end
|
57
57
|
end
|
58
|
+
|
59
|
+
describe Langue::Sentence, '#text' do
|
60
|
+
it 'returns a concatenated string of the text of the words' do
|
61
|
+
sentence = described_class.new([
|
62
|
+
stub(Langue::Word).tap { |s| s.stub!(:text).and_return('word1') },
|
63
|
+
stub(Langue::Word).tap { |s| s.stub!(:text).and_return('word2') }
|
64
|
+
])
|
65
|
+
|
66
|
+
sentence.text.should == 'word1word2'
|
67
|
+
end
|
68
|
+
|
69
|
+
it 'returns nil if the sentence is empty' do
|
70
|
+
sentence = described_class.new
|
71
|
+
sentence.text.should be_nil
|
72
|
+
end
|
73
|
+
end
|
data/spec/langue/text_spec.rb
CHANGED
@@ -54,6 +54,14 @@ describe Langue::Text, '#words' do
|
|
54
54
|
it 'returns the words in the sentences' do
|
55
55
|
@words.should == [[1, 2], [3, 4], [5, 6], [7, 8]]
|
56
56
|
end
|
57
|
+
|
58
|
+
context 'with an empty text' do
|
59
|
+
it 'returns an empty array' do
|
60
|
+
text = described_class.new([])
|
61
|
+
text.words.should be_an(Array)
|
62
|
+
text.words.should be_empty
|
63
|
+
end
|
64
|
+
end
|
57
65
|
end
|
58
66
|
|
59
67
|
describe Langue::Text, '#morphemes' do
|
@@ -70,3 +78,19 @@ describe Langue::Text, '#morphemes' do
|
|
70
78
|
@morphemes.should == [1, 2, 3, 4, 5, 6, 7, 8]
|
71
79
|
end
|
72
80
|
end
|
81
|
+
|
82
|
+
describe Langue::Text, '#text' do
|
83
|
+
it 'returns a concatenated string of the text of the sentences' do
|
84
|
+
text = described_class.new([
|
85
|
+
stub(Langue::Sentence).tap { |s| s.stub!(:text).and_return('sentence1') },
|
86
|
+
stub(Langue::Sentence).tap { |s| s.stub!(:text).and_return('sentence2') }
|
87
|
+
])
|
88
|
+
|
89
|
+
text.text.should == 'sentence1sentence2'
|
90
|
+
end
|
91
|
+
|
92
|
+
it 'returns nil if the text is empty' do
|
93
|
+
text = described_class.new
|
94
|
+
text.text.should be_nil
|
95
|
+
end
|
96
|
+
end
|
data/spec/langue/word_spec.rb
CHANGED
@@ -1,5 +1,30 @@
|
|
1
1
|
require 'langue/word'
|
2
2
|
|
3
|
+
describe Langue do
|
4
|
+
%w(
|
5
|
+
Noun
|
6
|
+
Pronoun
|
7
|
+
Adjective
|
8
|
+
Verb
|
9
|
+
Adverb
|
10
|
+
Particle
|
11
|
+
Conjunction
|
12
|
+
Determiner
|
13
|
+
Interjection
|
14
|
+
Period
|
15
|
+
).each do |part_of_speech|
|
16
|
+
it "has #{described_class}::#{part_of_speech}" do
|
17
|
+
Langue.should be_const_defined(part_of_speech)
|
18
|
+
end
|
19
|
+
|
20
|
+
describe part_of_speech do
|
21
|
+
it 'inherits Langue::Word' do
|
22
|
+
Langue.const_get(part_of_speech).superclass.should == Langue::Word
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
3
28
|
describe Langue::Word, '#valid?' do
|
4
29
|
before do
|
5
30
|
@word = described_class.new([
|
@@ -36,7 +61,7 @@ describe Langue::Word, '#morphemes' do
|
|
36
61
|
@morphemes.should be_a Langue::Morphemes
|
37
62
|
end
|
38
63
|
|
39
|
-
it 'returns ' do
|
64
|
+
it 'returns own morphemes' do
|
40
65
|
@morphemes.should == [1, 2, 3]
|
41
66
|
end
|
42
67
|
end
|
@@ -64,18 +89,19 @@ describe Langue::Word, '#key_morpheme' do
|
|
64
89
|
end
|
65
90
|
|
66
91
|
describe Langue::Word, '#text' do
|
67
|
-
|
92
|
+
it 'returns a concatenated string of the text of the morphemes' do
|
68
93
|
word = described_class.new([
|
69
94
|
stub.tap { |s| s.stub!(:text).and_return('text1') },
|
70
95
|
stub.tap { |s| s.stub!(:text).and_return('text2') },
|
71
96
|
stub.tap { |s| s.stub!(:text).and_return('text3') }
|
72
97
|
])
|
73
98
|
|
74
|
-
|
99
|
+
word.text.should == 'text1text2text3'
|
75
100
|
end
|
76
101
|
|
77
|
-
it 'returns
|
78
|
-
|
102
|
+
it 'returns nil if the word is empty' do
|
103
|
+
word = described_class.new
|
104
|
+
word.text.should be_nil
|
79
105
|
end
|
80
106
|
end
|
81
107
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: langue
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-
|
12
|
+
date: 2012-09-03 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: activesupport
|
@@ -93,7 +93,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
93
93
|
version: '0'
|
94
94
|
requirements: []
|
95
95
|
rubyforge_project:
|
96
|
-
rubygems_version: 1.8.
|
96
|
+
rubygems_version: 1.8.24
|
97
97
|
signing_key:
|
98
98
|
specification_version: 3
|
99
99
|
summary: The foundation for the natural languages
|