langue 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/langue/morpheme.rb +5 -0
- data/lib/langue/morphemes.rb +42 -6
- data/lib/langue/sentence.rb +5 -0
- data/lib/langue/text.rb +6 -1
- data/lib/langue/version.rb +1 -1
- data/lib/langue/word.rb +11 -0
- data/spec/langue/morpheme_spec.rb +31 -0
- data/spec/langue/morphemes_spec.rb +69 -4
- data/spec/langue/sentence_spec.rb +16 -0
- data/spec/langue/text_spec.rb +24 -0
- data/spec/langue/word_spec.rb +31 -5
- metadata +3 -3
data/lib/langue/morpheme.rb
CHANGED
@@ -17,6 +17,11 @@ module Langue
|
|
17
17
|
|
18
18
|
attr_reader(*KEYS)
|
19
19
|
|
20
|
+
def ==(other_morpheme)
|
21
|
+
return false unless other_morpheme.is_a?(self.class)
|
22
|
+
KEYS.all? { |key| self.__send__(key) == other_morpheme.__send__(key) }
|
23
|
+
end
|
24
|
+
|
20
25
|
def classified?(part_of_speech, *categories)
|
21
26
|
got = [@part_of_speech] + @categories
|
22
27
|
expected = [part_of_speech] + categories
|
data/lib/langue/morphemes.rb
CHANGED
@@ -11,12 +11,48 @@ module Langue
|
|
11
11
|
morpheme.nil? || !block_given? ? morpheme : yield(morpheme)
|
12
12
|
end
|
13
13
|
|
14
|
-
|
15
|
-
|
16
|
-
|
14
|
+
def pad(text)
|
15
|
+
whitespace_regexp = /^([\x09\x0A\x0B\x0D\x20])+/
|
16
|
+
differece_error = ArgumentError.new('maybe different from the original text')
|
17
17
|
|
18
|
-
|
19
|
-
|
20
|
-
|
18
|
+
morphemes = Morphemes.new
|
19
|
+
index = 0
|
20
|
+
|
21
|
+
while index < size
|
22
|
+
morpheme = self[index]
|
23
|
+
morpheme_text = morpheme.text
|
24
|
+
|
25
|
+
if text[0, morpheme_text.size] == morpheme_text
|
26
|
+
text = text[morpheme_text.size..-1]
|
27
|
+
morphemes << morpheme
|
28
|
+
index += 1
|
29
|
+
elsif text =~ whitespace_regexp
|
30
|
+
whitespace = $1
|
31
|
+
text = text[whitespace.size..-1]
|
32
|
+
morphemes << create_padding(whitespace)
|
33
|
+
else
|
34
|
+
raise differece_error
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
if text =~ /#{whitespace_regexp}$/
|
39
|
+
morphemes << create_padding(text)
|
40
|
+
elsif !text.empty?
|
41
|
+
raise differece_error
|
42
|
+
end
|
43
|
+
|
44
|
+
morphemes
|
45
|
+
end
|
46
|
+
|
47
|
+
private
|
48
|
+
|
49
|
+
def create_padding(whitespace)
|
50
|
+
Morpheme.new(
|
51
|
+
:text => whitespace,
|
52
|
+
:part_of_speech => 'padding',
|
53
|
+
:categories => [],
|
54
|
+
:root_form => whitespace
|
55
|
+
)
|
56
|
+
end
|
21
57
|
end
|
22
58
|
end
|
data/lib/langue/sentence.rb
CHANGED
data/lib/langue/text.rb
CHANGED
@@ -12,11 +12,16 @@ module Langue
|
|
12
12
|
end
|
13
13
|
|
14
14
|
def words
|
15
|
-
@words ||= inject(&:+)
|
15
|
+
@words ||= inject([], &:+)
|
16
16
|
end
|
17
17
|
|
18
18
|
def morphemes
|
19
19
|
@morphemes ||= Morphemes.new(flatten)
|
20
20
|
end
|
21
|
+
|
22
|
+
def text
|
23
|
+
@text = empty? ? nil : map(&:text).join unless instance_variable_defined?(:@text)
|
24
|
+
@text
|
25
|
+
end
|
21
26
|
end
|
22
27
|
end
|
data/lib/langue/version.rb
CHANGED
data/lib/langue/word.rb
CHANGED
@@ -61,4 +61,15 @@ module Langue
|
|
61
61
|
end
|
62
62
|
end
|
63
63
|
end
|
64
|
+
|
65
|
+
class Noun < Word; end
|
66
|
+
class Pronoun < Word; end
|
67
|
+
class Adjective < Word; end
|
68
|
+
class Verb < Word; end
|
69
|
+
class Adverb < Word; end
|
70
|
+
class Particle < Word; end
|
71
|
+
class Conjunction < Word; end
|
72
|
+
class Determiner < Word; end
|
73
|
+
class Interjection < Word; end
|
74
|
+
class Period < Word; end
|
64
75
|
end
|
@@ -47,6 +47,37 @@ describe Langue::Morpheme, ' accessors' do
|
|
47
47
|
end
|
48
48
|
end
|
49
49
|
|
50
|
+
describe Langue::Morpheme, '#==' do
|
51
|
+
before do
|
52
|
+
@attrs = {
|
53
|
+
:text => 'text',
|
54
|
+
:part_of_speech => 'part_of_speech',
|
55
|
+
:categories => %w(category1 category2),
|
56
|
+
:inflection => 'inflection',
|
57
|
+
:inflection_type => 'inflection_type',
|
58
|
+
:root_form => 'root_form',
|
59
|
+
:yomi => 'yomi',
|
60
|
+
:pronunciation => 'pronunciation'
|
61
|
+
}
|
62
|
+
|
63
|
+
@morpheme = described_class.new(@attrs)
|
64
|
+
end
|
65
|
+
|
66
|
+
it 'returns true if the morpheme equals to other morpheme' do
|
67
|
+
other_morpheme = described_class.new(@attrs)
|
68
|
+
@morpheme.should == other_morpheme
|
69
|
+
end
|
70
|
+
|
71
|
+
it 'returns false if the morpheme does not equal to other morpheme' do
|
72
|
+
other_morpheme = described_class.new(@attrs.merge(:pronunciation => 'other_pronunciation'))
|
73
|
+
@morpheme.should_not == other_morpheme
|
74
|
+
end
|
75
|
+
|
76
|
+
it "returns false if other morpheme is not an instance of #{described_class}" do
|
77
|
+
@morpheme.should_not == 'other_morpheme'
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
50
81
|
describe Langue::Morpheme, '#classified?' do
|
51
82
|
before do
|
52
83
|
@morpheme = described_class.new(
|
@@ -1,3 +1,4 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
1
2
|
require 'langue/morphemes'
|
2
3
|
|
3
4
|
describe Langue::Morphemes, '#valid?' do
|
@@ -66,8 +67,72 @@ describe Langue::Morphemes, '#at' do
|
|
66
67
|
end
|
67
68
|
end
|
68
69
|
|
69
|
-
|
70
|
-
|
70
|
+
describe Langue::Morphemes, '#pad' do
|
71
|
+
before do
|
72
|
+
@morphemes = morphemes = described_class.new([
|
73
|
+
Langue::Morpheme.new(:text => 'text1'),
|
74
|
+
Langue::Morpheme.new(:text => 'text2'),
|
75
|
+
Langue::Morpheme.new(:text => 'text3')
|
76
|
+
])
|
77
|
+
end
|
78
|
+
|
79
|
+
it 'pads \\x09' do
|
80
|
+
padded_morphemes = @morphemes.pad("text1\x09text2text3\x09")
|
81
|
+
padded_morphemes.should have(5).items
|
82
|
+
morpheme = padded_morphemes[1]
|
83
|
+
morpheme.text.should == "\x09"
|
84
|
+
morpheme.root_form.should == "\x09"
|
85
|
+
morpheme = padded_morphemes[4]
|
86
|
+
morpheme.text.should == "\x09"
|
87
|
+
morpheme.root_form.should == "\x09"
|
88
|
+
end
|
89
|
+
|
90
|
+
it 'pads \\x0A' do
|
91
|
+
padded_morphemes = @morphemes.pad("text1\x0Atext2text3\x0A")
|
92
|
+
padded_morphemes.should have(5).items
|
93
|
+
morpheme = padded_morphemes[1]
|
94
|
+
morpheme.text.should == "\x0A"
|
95
|
+
morpheme.root_form.should == "\x0A"
|
96
|
+
morpheme = padded_morphemes[4]
|
97
|
+
morpheme.text.should == "\x0A"
|
98
|
+
morpheme.root_form.should == "\x0A"
|
99
|
+
end
|
71
100
|
|
72
|
-
|
73
|
-
|
101
|
+
it 'pads \\x0B' do
|
102
|
+
padded_morphemes = @morphemes.pad("text1\x0Btext2text3\x0B")
|
103
|
+
padded_morphemes.should have(5).items
|
104
|
+
morpheme = padded_morphemes[1]
|
105
|
+
morpheme.text.should == "\x0B"
|
106
|
+
morpheme.root_form.should == "\x0B"
|
107
|
+
morpheme = padded_morphemes[4]
|
108
|
+
morpheme.text.should == "\x0B"
|
109
|
+
morpheme.root_form.should == "\x0B"
|
110
|
+
end
|
111
|
+
|
112
|
+
it 'pads \\x0D' do
|
113
|
+
padded_morphemes = @morphemes.pad("text1\x0Dtext2text3\x0D")
|
114
|
+
padded_morphemes.should have(5).items
|
115
|
+
morpheme = padded_morphemes[1]
|
116
|
+
morpheme.text.should == "\x0D"
|
117
|
+
morpheme.root_form.should == "\x0D"
|
118
|
+
morpheme = padded_morphemes[4]
|
119
|
+
morpheme.text.should == "\x0D"
|
120
|
+
morpheme.root_form.should == "\x0D"
|
121
|
+
end
|
122
|
+
|
123
|
+
it 'pads \\x20' do
|
124
|
+
padded_morphemes = @morphemes.pad("text1\x20text2text3\x20")
|
125
|
+
padded_morphemes.should have(5).items
|
126
|
+
morpheme = padded_morphemes[1]
|
127
|
+
morpheme.text.should == "\x20"
|
128
|
+
morpheme.root_form.should == "\x20"
|
129
|
+
morpheme = padded_morphemes[4]
|
130
|
+
morpheme.text.should == "\x20"
|
131
|
+
morpheme.root_form.should == "\x20"
|
132
|
+
end
|
133
|
+
|
134
|
+
it 'raises ArgumentError if there is a difference between the text and the morphemes' do
|
135
|
+
lambda { @morphemes.pad("text1\x20text3text4") }.should raise_error(ArgumentError, 'maybe different from the original text')
|
136
|
+
lambda { @morphemes.pad('text1text2text3text4text5') }.should raise_error(ArgumentError, 'maybe different from the original text')
|
137
|
+
end
|
138
|
+
end
|
@@ -55,3 +55,19 @@ describe Langue::Sentence, '#morphemes' do
|
|
55
55
|
@morphemes.should == [1, 2, 3, 4]
|
56
56
|
end
|
57
57
|
end
|
58
|
+
|
59
|
+
describe Langue::Sentence, '#text' do
|
60
|
+
it 'returns a concatenated string of the text of the words' do
|
61
|
+
sentence = described_class.new([
|
62
|
+
stub(Langue::Word).tap { |s| s.stub!(:text).and_return('word1') },
|
63
|
+
stub(Langue::Word).tap { |s| s.stub!(:text).and_return('word2') }
|
64
|
+
])
|
65
|
+
|
66
|
+
sentence.text.should == 'word1word2'
|
67
|
+
end
|
68
|
+
|
69
|
+
it 'returns nil if the sentence is empty' do
|
70
|
+
sentence = described_class.new
|
71
|
+
sentence.text.should be_nil
|
72
|
+
end
|
73
|
+
end
|
data/spec/langue/text_spec.rb
CHANGED
@@ -54,6 +54,14 @@ describe Langue::Text, '#words' do
|
|
54
54
|
it 'returns the words in the sentences' do
|
55
55
|
@words.should == [[1, 2], [3, 4], [5, 6], [7, 8]]
|
56
56
|
end
|
57
|
+
|
58
|
+
context 'with an empty text' do
|
59
|
+
it 'returns an empty array' do
|
60
|
+
text = described_class.new([])
|
61
|
+
text.words.should be_an(Array)
|
62
|
+
text.words.should be_empty
|
63
|
+
end
|
64
|
+
end
|
57
65
|
end
|
58
66
|
|
59
67
|
describe Langue::Text, '#morphemes' do
|
@@ -70,3 +78,19 @@ describe Langue::Text, '#morphemes' do
|
|
70
78
|
@morphemes.should == [1, 2, 3, 4, 5, 6, 7, 8]
|
71
79
|
end
|
72
80
|
end
|
81
|
+
|
82
|
+
describe Langue::Text, '#text' do
|
83
|
+
it 'returns a concatenated string of the text of the sentences' do
|
84
|
+
text = described_class.new([
|
85
|
+
stub(Langue::Sentence).tap { |s| s.stub!(:text).and_return('sentence1') },
|
86
|
+
stub(Langue::Sentence).tap { |s| s.stub!(:text).and_return('sentence2') }
|
87
|
+
])
|
88
|
+
|
89
|
+
text.text.should == 'sentence1sentence2'
|
90
|
+
end
|
91
|
+
|
92
|
+
it 'returns nil if the text is empty' do
|
93
|
+
text = described_class.new
|
94
|
+
text.text.should be_nil
|
95
|
+
end
|
96
|
+
end
|
data/spec/langue/word_spec.rb
CHANGED
@@ -1,5 +1,30 @@
|
|
1
1
|
require 'langue/word'
|
2
2
|
|
3
|
+
describe Langue do
|
4
|
+
%w(
|
5
|
+
Noun
|
6
|
+
Pronoun
|
7
|
+
Adjective
|
8
|
+
Verb
|
9
|
+
Adverb
|
10
|
+
Particle
|
11
|
+
Conjunction
|
12
|
+
Determiner
|
13
|
+
Interjection
|
14
|
+
Period
|
15
|
+
).each do |part_of_speech|
|
16
|
+
it "has #{described_class}::#{part_of_speech}" do
|
17
|
+
Langue.should be_const_defined(part_of_speech)
|
18
|
+
end
|
19
|
+
|
20
|
+
describe part_of_speech do
|
21
|
+
it 'inherits Langue::Word' do
|
22
|
+
Langue.const_get(part_of_speech).superclass.should == Langue::Word
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
3
28
|
describe Langue::Word, '#valid?' do
|
4
29
|
before do
|
5
30
|
@word = described_class.new([
|
@@ -36,7 +61,7 @@ describe Langue::Word, '#morphemes' do
|
|
36
61
|
@morphemes.should be_a Langue::Morphemes
|
37
62
|
end
|
38
63
|
|
39
|
-
it 'returns ' do
|
64
|
+
it 'returns own morphemes' do
|
40
65
|
@morphemes.should == [1, 2, 3]
|
41
66
|
end
|
42
67
|
end
|
@@ -64,18 +89,19 @@ describe Langue::Word, '#key_morpheme' do
|
|
64
89
|
end
|
65
90
|
|
66
91
|
describe Langue::Word, '#text' do
|
67
|
-
|
92
|
+
it 'returns a concatenated string of the text of the morphemes' do
|
68
93
|
word = described_class.new([
|
69
94
|
stub.tap { |s| s.stub!(:text).and_return('text1') },
|
70
95
|
stub.tap { |s| s.stub!(:text).and_return('text2') },
|
71
96
|
stub.tap { |s| s.stub!(:text).and_return('text3') }
|
72
97
|
])
|
73
98
|
|
74
|
-
|
99
|
+
word.text.should == 'text1text2text3'
|
75
100
|
end
|
76
101
|
|
77
|
-
it 'returns
|
78
|
-
|
102
|
+
it 'returns nil if the word is empty' do
|
103
|
+
word = described_class.new
|
104
|
+
word.text.should be_nil
|
79
105
|
end
|
80
106
|
end
|
81
107
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: langue
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-
|
12
|
+
date: 2012-09-03 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: activesupport
|
@@ -93,7 +93,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
93
93
|
version: '0'
|
94
94
|
requirements: []
|
95
95
|
rubyforge_project:
|
96
|
-
rubygems_version: 1.8.
|
96
|
+
rubygems_version: 1.8.24
|
97
97
|
signing_key:
|
98
98
|
specification_version: 3
|
99
99
|
summary: The foundation for the natural languages
|