langue 0.0.3 → 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -17,6 +17,11 @@ module Langue
17
17
 
18
18
  attr_reader(*KEYS)
19
19
 
20
+ def ==(other_morpheme)
21
+ return false unless other_morpheme.is_a?(self.class)
22
+ KEYS.all? { |key| self.__send__(key) == other_morpheme.__send__(key) }
23
+ end
24
+
20
25
  def classified?(part_of_speech, *categories)
21
26
  got = [@part_of_speech] + @categories
22
27
  expected = [part_of_speech] + categories
@@ -11,12 +11,48 @@ module Langue
11
11
  morpheme.nil? || !block_given? ? morpheme : yield(morpheme)
12
12
  end
13
13
 
14
- # def match?(index, text)
15
- # at(index) {|morpheme| morpheme.text == text}
16
- # end
14
+ def pad(text)
15
+ whitespace_regexp = /^([\x09\x0A\x0B\x0D\x20])+/
16
+ differece_error = ArgumentError.new('maybe different from the original text')
17
17
 
18
- # def after(index)
19
- # self[index..-1]
20
- # end
18
+ morphemes = Morphemes.new
19
+ index = 0
20
+
21
+ while index < size
22
+ morpheme = self[index]
23
+ morpheme_text = morpheme.text
24
+
25
+ if text[0, morpheme_text.size] == morpheme_text
26
+ text = text[morpheme_text.size..-1]
27
+ morphemes << morpheme
28
+ index += 1
29
+ elsif text =~ whitespace_regexp
30
+ whitespace = $1
31
+ text = text[whitespace.size..-1]
32
+ morphemes << create_padding(whitespace)
33
+ else
34
+ raise differece_error
35
+ end
36
+ end
37
+
38
+ if text =~ /#{whitespace_regexp}$/
39
+ morphemes << create_padding(text)
40
+ elsif !text.empty?
41
+ raise differece_error
42
+ end
43
+
44
+ morphemes
45
+ end
46
+
47
+ private
48
+
49
+ def create_padding(whitespace)
50
+ Morpheme.new(
51
+ :text => whitespace,
52
+ :part_of_speech => 'padding',
53
+ :categories => [],
54
+ :root_form => whitespace
55
+ )
56
+ end
21
57
  end
22
58
  end
@@ -14,5 +14,10 @@ module Langue
14
14
  def morphemes
15
15
  @morphemes ||= Morphemes.new(flatten)
16
16
  end
17
+
18
+ def text
19
+ @text = empty? ? nil : map(&:text).join unless instance_variable_defined?(:@text)
20
+ @text
21
+ end
17
22
  end
18
23
  end
@@ -12,11 +12,16 @@ module Langue
12
12
  end
13
13
 
14
14
  def words
15
- @words ||= inject(&:+)
15
+ @words ||= inject([], &:+)
16
16
  end
17
17
 
18
18
  def morphemes
19
19
  @morphemes ||= Morphemes.new(flatten)
20
20
  end
21
+
22
+ def text
23
+ @text = empty? ? nil : map(&:text).join unless instance_variable_defined?(:@text)
24
+ @text
25
+ end
21
26
  end
22
27
  end
@@ -1,3 +1,3 @@
1
1
  module Langue
2
- VERSION = '0.0.3'
2
+ VERSION = '0.0.4'
3
3
  end
@@ -61,4 +61,15 @@ module Langue
61
61
  end
62
62
  end
63
63
  end
64
+
65
+ class Noun < Word; end
66
+ class Pronoun < Word; end
67
+ class Adjective < Word; end
68
+ class Verb < Word; end
69
+ class Adverb < Word; end
70
+ class Particle < Word; end
71
+ class Conjunction < Word; end
72
+ class Determiner < Word; end
73
+ class Interjection < Word; end
74
+ class Period < Word; end
64
75
  end
@@ -47,6 +47,37 @@ describe Langue::Morpheme, ' accessors' do
47
47
  end
48
48
  end
49
49
 
50
+ describe Langue::Morpheme, '#==' do
51
+ before do
52
+ @attrs = {
53
+ :text => 'text',
54
+ :part_of_speech => 'part_of_speech',
55
+ :categories => %w(category1 category2),
56
+ :inflection => 'inflection',
57
+ :inflection_type => 'inflection_type',
58
+ :root_form => 'root_form',
59
+ :yomi => 'yomi',
60
+ :pronunciation => 'pronunciation'
61
+ }
62
+
63
+ @morpheme = described_class.new(@attrs)
64
+ end
65
+
66
+ it 'returns true if the morpheme equals to other morpheme' do
67
+ other_morpheme = described_class.new(@attrs)
68
+ @morpheme.should == other_morpheme
69
+ end
70
+
71
+ it 'returns false if the morpheme does not equal to other morpheme' do
72
+ other_morpheme = described_class.new(@attrs.merge(:pronunciation => 'other_pronunciation'))
73
+ @morpheme.should_not == other_morpheme
74
+ end
75
+
76
+ it "returns false if other morpheme is not an instance of #{described_class}" do
77
+ @morpheme.should_not == 'other_morpheme'
78
+ end
79
+ end
80
+
50
81
  describe Langue::Morpheme, '#classified?' do
51
82
  before do
52
83
  @morpheme = described_class.new(
@@ -1,3 +1,4 @@
1
+ # -*- coding: utf-8 -*-
1
2
  require 'langue/morphemes'
2
3
 
3
4
  describe Langue::Morphemes, '#valid?' do
@@ -66,8 +67,72 @@ describe Langue::Morphemes, '#at' do
66
67
  end
67
68
  end
68
69
 
69
- # describe Langue::Morphemes, '#match?' do
70
- # end
70
+ describe Langue::Morphemes, '#pad' do
71
+ before do
72
+ @morphemes = morphemes = described_class.new([
73
+ Langue::Morpheme.new(:text => 'text1'),
74
+ Langue::Morpheme.new(:text => 'text2'),
75
+ Langue::Morpheme.new(:text => 'text3')
76
+ ])
77
+ end
78
+
79
+ it 'pads \\x09' do
80
+ padded_morphemes = @morphemes.pad("text1\x09text2text3\x09")
81
+ padded_morphemes.should have(5).items
82
+ morpheme = padded_morphemes[1]
83
+ morpheme.text.should == "\x09"
84
+ morpheme.root_form.should == "\x09"
85
+ morpheme = padded_morphemes[4]
86
+ morpheme.text.should == "\x09"
87
+ morpheme.root_form.should == "\x09"
88
+ end
89
+
90
+ it 'pads \\x0A' do
91
+ padded_morphemes = @morphemes.pad("text1\x0Atext2text3\x0A")
92
+ padded_morphemes.should have(5).items
93
+ morpheme = padded_morphemes[1]
94
+ morpheme.text.should == "\x0A"
95
+ morpheme.root_form.should == "\x0A"
96
+ morpheme = padded_morphemes[4]
97
+ morpheme.text.should == "\x0A"
98
+ morpheme.root_form.should == "\x0A"
99
+ end
71
100
 
72
- # describe Langue::Morphemes, '#after' do
73
- # end
101
+ it 'pads \\x0B' do
102
+ padded_morphemes = @morphemes.pad("text1\x0Btext2text3\x0B")
103
+ padded_morphemes.should have(5).items
104
+ morpheme = padded_morphemes[1]
105
+ morpheme.text.should == "\x0B"
106
+ morpheme.root_form.should == "\x0B"
107
+ morpheme = padded_morphemes[4]
108
+ morpheme.text.should == "\x0B"
109
+ morpheme.root_form.should == "\x0B"
110
+ end
111
+
112
+ it 'pads \\x0D' do
113
+ padded_morphemes = @morphemes.pad("text1\x0Dtext2text3\x0D")
114
+ padded_morphemes.should have(5).items
115
+ morpheme = padded_morphemes[1]
116
+ morpheme.text.should == "\x0D"
117
+ morpheme.root_form.should == "\x0D"
118
+ morpheme = padded_morphemes[4]
119
+ morpheme.text.should == "\x0D"
120
+ morpheme.root_form.should == "\x0D"
121
+ end
122
+
123
+ it 'pads \\x20' do
124
+ padded_morphemes = @morphemes.pad("text1\x20text2text3\x20")
125
+ padded_morphemes.should have(5).items
126
+ morpheme = padded_morphemes[1]
127
+ morpheme.text.should == "\x20"
128
+ morpheme.root_form.should == "\x20"
129
+ morpheme = padded_morphemes[4]
130
+ morpheme.text.should == "\x20"
131
+ morpheme.root_form.should == "\x20"
132
+ end
133
+
134
+ it 'raises ArgumentError if there is a difference between the text and the morphemes' do
135
+ lambda { @morphemes.pad("text1\x20text3text4") }.should raise_error(ArgumentError, 'maybe different from the original text')
136
+ lambda { @morphemes.pad('text1text2text3text4text5') }.should raise_error(ArgumentError, 'maybe different from the original text')
137
+ end
138
+ end
@@ -55,3 +55,19 @@ describe Langue::Sentence, '#morphemes' do
55
55
  @morphemes.should == [1, 2, 3, 4]
56
56
  end
57
57
  end
58
+
59
+ describe Langue::Sentence, '#text' do
60
+ it 'returns a concatenated string of the text of the words' do
61
+ sentence = described_class.new([
62
+ stub(Langue::Word).tap { |s| s.stub!(:text).and_return('word1') },
63
+ stub(Langue::Word).tap { |s| s.stub!(:text).and_return('word2') }
64
+ ])
65
+
66
+ sentence.text.should == 'word1word2'
67
+ end
68
+
69
+ it 'returns nil if the sentence is empty' do
70
+ sentence = described_class.new
71
+ sentence.text.should be_nil
72
+ end
73
+ end
@@ -54,6 +54,14 @@ describe Langue::Text, '#words' do
54
54
  it 'returns the words in the sentences' do
55
55
  @words.should == [[1, 2], [3, 4], [5, 6], [7, 8]]
56
56
  end
57
+
58
+ context 'with an empty text' do
59
+ it 'returns an empty array' do
60
+ text = described_class.new([])
61
+ text.words.should be_an(Array)
62
+ text.words.should be_empty
63
+ end
64
+ end
57
65
  end
58
66
 
59
67
  describe Langue::Text, '#morphemes' do
@@ -70,3 +78,19 @@ describe Langue::Text, '#morphemes' do
70
78
  @morphemes.should == [1, 2, 3, 4, 5, 6, 7, 8]
71
79
  end
72
80
  end
81
+
82
+ describe Langue::Text, '#text' do
83
+ it 'returns a concatenated string of the text of the sentences' do
84
+ text = described_class.new([
85
+ stub(Langue::Sentence).tap { |s| s.stub!(:text).and_return('sentence1') },
86
+ stub(Langue::Sentence).tap { |s| s.stub!(:text).and_return('sentence2') }
87
+ ])
88
+
89
+ text.text.should == 'sentence1sentence2'
90
+ end
91
+
92
+ it 'returns nil if the text is empty' do
93
+ text = described_class.new
94
+ text.text.should be_nil
95
+ end
96
+ end
@@ -1,5 +1,30 @@
1
1
  require 'langue/word'
2
2
 
3
+ describe Langue do
4
+ %w(
5
+ Noun
6
+ Pronoun
7
+ Adjective
8
+ Verb
9
+ Adverb
10
+ Particle
11
+ Conjunction
12
+ Determiner
13
+ Interjection
14
+ Period
15
+ ).each do |part_of_speech|
16
+ it "has #{described_class}::#{part_of_speech}" do
17
+ Langue.should be_const_defined(part_of_speech)
18
+ end
19
+
20
+ describe part_of_speech do
21
+ it 'inherits Langue::Word' do
22
+ Langue.const_get(part_of_speech).superclass.should == Langue::Word
23
+ end
24
+ end
25
+ end
26
+ end
27
+
3
28
  describe Langue::Word, '#valid?' do
4
29
  before do
5
30
  @word = described_class.new([
@@ -36,7 +61,7 @@ describe Langue::Word, '#morphemes' do
36
61
  @morphemes.should be_a Langue::Morphemes
37
62
  end
38
63
 
39
- it 'returns ' do
64
+ it 'returns own morphemes' do
40
65
  @morphemes.should == [1, 2, 3]
41
66
  end
42
67
  end
@@ -64,18 +89,19 @@ describe Langue::Word, '#key_morpheme' do
64
89
  end
65
90
 
66
91
  describe Langue::Word, '#text' do
67
- before do
92
+ it 'returns a concatenated string of the text of the morphemes' do
68
93
  word = described_class.new([
69
94
  stub.tap { |s| s.stub!(:text).and_return('text1') },
70
95
  stub.tap { |s| s.stub!(:text).and_return('text2') },
71
96
  stub.tap { |s| s.stub!(:text).and_return('text3') }
72
97
  ])
73
98
 
74
- @text = word.text
99
+ word.text.should == 'text1text2text3'
75
100
  end
76
101
 
77
- it 'returns a concatenated string of the text of the morphemes' do
78
- @text.should == 'text1text2text3'
102
+ it 'returns nil if the word is empty' do
103
+ word = described_class.new
104
+ word.text.should be_nil
79
105
  end
80
106
  end
81
107
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: langue
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.4
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-04-13 00:00:00.000000000 Z
12
+ date: 2012-09-03 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: activesupport
@@ -93,7 +93,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
93
93
  version: '0'
94
94
  requirements: []
95
95
  rubyforge_project:
96
- rubygems_version: 1.8.21
96
+ rubygems_version: 1.8.24
97
97
  signing_key:
98
98
  specification_version: 3
99
99
  summary: The foundation for the natural languages