myaso 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,84 @@
1
+ # encoding: utf-8
2
+
3
+ require_relative 'spec_helper'
4
+
5
+ describe Myaso::Lexicon do
6
+ subject { Myaso::Lexicon.new }
7
+
8
+ describe '#new' do
9
+ it 'should be empty' do
10
+ subject.table.must_be_empty
11
+ end
12
+
13
+ it 'should not initialize @tags' do
14
+ subject.instance_variable_defined?(:@tags).must_equal false
15
+ end
16
+ end
17
+
18
+ describe '#[]' do
19
+ it 'should treat unknown words as zeroes' do
20
+ subject['lopata'].must_equal 0
21
+ end
22
+
23
+ it 'should treat unknown words and tags as zeroes' do
24
+ subject['lopata', 'dno'].must_equal 0
25
+ end
26
+
27
+ it 'should modify a word' do
28
+ subject['lopata'] = 1
29
+ subject['lopata'].must_equal 1
30
+ end
31
+
32
+ it 'should modify a word with tag' do
33
+ subject['lopata', 'dno'] = 2
34
+ subject['lopata', 'dno'].must_equal 2
35
+ end
36
+ end
37
+
38
+ describe '#tags' do
39
+ it 'should perform lazy initialization' do
40
+ subject.instance_variable_defined?(:@tags).must_equal false
41
+ subject.tags
42
+ subject.instance_variable_get(:@tags).wont_be_nil
43
+ end
44
+
45
+ it 'should collect global tag counts' do
46
+ subject['lopata', 'dno'] = 1
47
+ subject['lopata', 'bydlow'] = 2
48
+ subject.tags.must_equal 'dno' => 1, 'bydlow' => 2
49
+ end
50
+
51
+ it 'should be invalidated after the value assignment' do
52
+ subject.tags.must_be_empty
53
+ subject['lopata', 'dno'] = 1
54
+ subject.tags.must_equal 'dno' => 1
55
+ end
56
+
57
+ it 'should return tags of the given word' do
58
+ subject.tags('lopata').must_be_empty
59
+ subject['lopata', 'dno'] = 1
60
+ subject.tags('lopata').must_equal %w(dno)
61
+ end
62
+ end
63
+
64
+ describe '#each' do
65
+ it 'should iterate over the internal table' do
66
+ subject.each.to_a.must_equal subject.table.to_a
67
+ end
68
+ end
69
+
70
+ describe '#==' do
71
+ let(:other) { Myaso::Lexicon.new }
72
+
73
+ it 'should be equal to a new instance when not modified' do
74
+ subject.must_equal other
75
+ end
76
+
77
+ it 'should check equality by internal tables' do
78
+ subject['lopata', 'dno'] = 1
79
+ subject.wont_equal other
80
+ other['lopata', 'dno'] = 1
81
+ subject.must_equal other
82
+ end
83
+ end
84
+ end
@@ -0,0 +1,81 @@
1
+ # encoding: utf-8
2
+
3
+ require_relative 'spec_helper'
4
+
5
+ describe Myaso::Mystem do
6
+ describe 'analysis of dictionary words' do
7
+ subject { Myaso::Mystem.analyze('СТАЛИ') }
8
+
9
+ it 'is ambiguous' do
10
+ subject.length.must_equal 2
11
+ end
12
+
13
+ it 'is a dictionary word' do
14
+ subject.each { |s| s.quality.must_equal :dictionary }
15
+ end
16
+
17
+ it 'lemmatizes' do
18
+ subject.map(&:lemma).sort!.must_equal %w(сталь становиться)
19
+ end
20
+
21
+ it 'normalizes' do
22
+ subject.each { |s| s.form.must_equal 'стали' }
23
+ end
24
+
25
+ it 'analyzes' do
26
+ subject.map { |s| s.msd.pos.to_s }.sort!.must_equal %w(noun verb)
27
+ end
28
+ end
29
+
30
+ describe 'analysis of bastard words' do
31
+ subject { Myaso::Mystem.analyze('дОлБоЯщЕрА') }
32
+
33
+ it 'is unambiguous' do
34
+ subject.length.must_equal 1
35
+ end
36
+
37
+ it 'is really a dictionary word' do
38
+ subject.first.quality.must_equal :bastard
39
+ end
40
+
41
+ it 'lemmatizes' do
42
+ subject.first.lemma.must_equal 'долбоящер'
43
+ end
44
+
45
+ it 'normalizes' do
46
+ subject.first.form.must_equal 'долбоящера'
47
+ end
48
+
49
+ it 'analyzes' do
50
+ subject.first.msd.pos.must_equal :noun
51
+ end
52
+ end
53
+
54
+ describe 'form enumeration' do
55
+ let(:lemma) { Myaso::Mystem.analyze('человеком').first }
56
+
57
+ subject { Myaso::Mystem.forms('человеком', 3890) }
58
+
59
+ it 'enumerates forms' do
60
+ subject.length.must_equal 14
61
+ end
62
+
63
+ it 'works for lemmas' do
64
+ subject.must_equal lemma.forms
65
+ end
66
+ end
67
+
68
+ describe 'inflection' do
69
+ let(:lemma) { Myaso::Mystem.analyze('людьми').first }
70
+
71
+ subject { lemma.inflect(:number => :plural, :case => :dative) }
72
+
73
+ it 'is ambiguous' do
74
+ subject.length.must_equal 2
75
+ end
76
+
77
+ it 'inflects' do
78
+ subject.map!(&:form).sort!.must_equal %w(людям человекам)
79
+ end
80
+ end
81
+ end
@@ -0,0 +1,97 @@
1
+ # encoding: utf-8
2
+
3
+ require_relative 'spec_helper'
4
+
5
+ describe Myaso::Ngrams do
6
+ let(:tags) { %w(D V N) }
7
+ let(:unigrams) { tags }
8
+ let(:bigrams) { tags + [nil] }
9
+ let(:trigrams) { tags + [nil] }
10
+
11
+ subject { Myaso::Ngrams.new }
12
+
13
+ describe '#new' do
14
+ it 'should be full of zeroes' do
15
+ unigrams.each do |u|
16
+ bigrams.each do |b|
17
+ trigrams.each do |t|
18
+ subject[u, b, t].must_equal 0
19
+ end
20
+ end
21
+ end
22
+ end
23
+
24
+ it 'should be empty' do
25
+ subject.unigrams_count.must_equal 0
26
+ end
27
+ end
28
+
29
+ describe '#[]' do
30
+ it 'should treat unset unigrams as zeroes' do
31
+ subject['D'].must_equal 0
32
+ end
33
+
34
+ it 'should treat unset bigrams as zeroes' do
35
+ subject['D', 'V'].must_equal 0
36
+ end
37
+
38
+ it 'should treat unset trigrams as zeroes' do
39
+ subject['D', 'V', 'N'].must_equal 0
40
+ end
41
+
42
+ it 'should modify an unigram' do
43
+ subject['D'] = 1
44
+ subject['D'].must_equal 1
45
+ subject.unigrams_count.must_equal 1
46
+ end
47
+
48
+ it 'should modify a bigram' do
49
+ subject['D', 'N'] = 2
50
+ subject['D', 'N'].must_equal 2
51
+ end
52
+
53
+ it 'should modify a trigram' do
54
+ subject['D', 'N', 'V'] = 3
55
+ subject['D', 'N', 'V'].must_equal 3
56
+ end
57
+ end
58
+
59
+ describe '#==' do
60
+ let(:other) { Myaso::Ngrams.new }
61
+
62
+ it 'should be equal to a new instance when not modified' do
63
+ subject.must_equal other
64
+ end
65
+
66
+ it 'should check equality by internal tables' do
67
+ subject['D', 'N', 'V'] = 1
68
+ subject.wont_equal other
69
+ other['D', 'N', 'V'] = 1
70
+ subject.must_equal other
71
+ end
72
+ end
73
+
74
+ describe '#each' do
75
+ before do
76
+ subject['D'] = 1
77
+ subject['N'] = 2
78
+ subject['D', 'N'] = 3
79
+ subject['V', 'D'] = 4
80
+ subject['D', 'N', 'V'] = 5
81
+ subject['N', 'V', 'D'] = 6
82
+ end
83
+
84
+ it 'should iterate over the internal table' do
85
+ subject.each.to_a.must_equal([["D", {nil=>{nil=>1},
86
+ "N"=>{nil=>3, "V"=>5}}], ["N", {nil=>{nil=>2}, "V"=>{"D"=>6}}],
87
+ ["V", {"D"=>{nil=>4}}]])
88
+ end
89
+
90
+ it 'should enumerate over trigrams' do
91
+ Array.new.tap do |trigrams|
92
+ subject.each_trigram { |trigram| trigrams << trigram }
93
+ trigrams.must_equal [[%w(D N V), 5], [%w(N V D), 6]]
94
+ end
95
+ end
96
+ end
97
+ end
@@ -0,0 +1,53 @@
1
+ # encoding: utf-8
2
+
3
+ require_relative 'spec_helper'
4
+
5
+ describe Myaso::PiTable do
6
+ let(:length) { 10 }
7
+ let(:tags) { %w(D V N) }
8
+
9
+ subject { Myaso::PiTable.new }
10
+
11
+ describe '#new' do
12
+ it 'should be full of nils' do
13
+ length.times do |i|
14
+ tags.each do |u|
15
+ tags.each do |v|
16
+ subject[i, u, v].must_be_nil
17
+ end
18
+ end
19
+ end
20
+ end
21
+
22
+ describe 'with default value' do
23
+ let(:default) { 0 }
24
+
25
+ subject { Myaso::PiTable.new(default) }
26
+
27
+ it 'should be full of zeros' do
28
+ length.times do |i|
29
+ tags.each do |u|
30
+ tags.each do |v|
31
+ subject[i, u, v].must_equal 0
32
+ end
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
38
+
39
+ describe '#[]' do
40
+ it 'should get and set a tuple' do
41
+ subject[0, 'D', 'N'] = 1
42
+ subject[0, 'D', 'V'] = 2
43
+ subject[0, 'D', 'N'].must_equal 1
44
+ subject[0, 'D', 'V'].must_equal 2
45
+ end
46
+ end
47
+
48
+ describe '#each' do
49
+ it 'iterates over an internal table' do
50
+ subject.each.to_a.must_equal subject.table.to_a
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,12 @@
1
+ # encoding: utf-8
2
+
3
+ require 'rubygems'
4
+
5
+ gem 'minitest'
6
+ require 'minitest/autorun'
7
+ require 'minitest/hell'
8
+
9
+ $LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
10
+ require 'myaso'
11
+
12
+ Dir[File.expand_path('../support/**/*.rb', __FILE__)].each { |f| require f }
@@ -0,0 +1,34 @@
1
+ # encoding: utf-8
2
+
3
+ require 'yaml'
4
+
5
+ # When this file is loaded, for each fixture file, a singleton method is
6
+ # created within the Myaso::Fixtures module with the same name as the fixture
7
+ # file, returning the value of the fixture.
8
+ #
9
+ # For example, file <tt>prefixes.yml</tt>:
10
+ #
11
+ # - id: 1
12
+ # prefix: sub
13
+ # - id: 2
14
+ # prefix: bi
15
+ #
16
+ # These fixtures would be made available like so:
17
+ #
18
+ # Myaso::Fixtures::PREFIXES
19
+ # => [{"id"=>1, "prefix"=>"sub"}, {"id"=>2, "prefix"=>"bi"}]
20
+ #
21
+ # You can find out all available fixtures by calling
22
+ #
23
+ # Myaso::Fixtures.constants
24
+ # => [ :BUCKETS ]
25
+ #
26
+ module Myaso::Fixtures
27
+ end
28
+
29
+ fixtures_path = File.expand_path('../../fixtures', __FILE__)
30
+
31
+ Dir[File.expand_path('*.yml', fixtures_path)].each do |filename|
32
+ const_name = File.basename(filename, '.*').upcase
33
+ Myaso::Fixtures.const_set(const_name, YAML.load_file(filename))
34
+ end
@@ -0,0 +1,29 @@
1
+ # encoding: utf-8
2
+
3
+ require 'open3'
4
+
5
+ # http://dota2.ru/guides/880-invokirkhakha-sanstrajk-ni-azhydal-da/
6
+ #
7
+ class MiniTest::Test
8
+ # Quas Wex Exort.
9
+ #
10
+ def invoke_cache
11
+ @invoke_cache ||= {}
12
+ end
13
+
14
+ # So begins a new age of knowledge.
15
+ #
16
+ def invoke(*argv)
17
+ return invoke_cache[argv] if invoke_cache.has_key? argv
18
+
19
+ arguments = argv.dup
20
+ options = (arguments.last.is_a? Hash) ? arguments.pop : {}
21
+ executable = File.expand_path('../../../bin/myaso', __FILE__)
22
+
23
+ Open3.popen3(executable, *arguments) do |i, o, *_|
24
+ i.puts options[:stdin] if options[:stdin]
25
+ i.close
26
+ invoke_cache[argv] = o.readlines.map(&:chomp!)
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,27 @@
1
+ # encoding: utf-8
2
+
3
+ require_relative 'spec_helper'
4
+
5
+ describe Myaso::Tagger do
6
+ let(:ngrams) { File.expand_path('../data/test.123', __FILE__) }
7
+ let(:lexicon) { File.expand_path('../data/test.lex', __FILE__) }
8
+ let(:model) { Myaso::Tagger::TnT.new(ngrams, lexicon) }
9
+
10
+ subject { Myaso::Tagger.new(model) }
11
+
12
+ describe 'annotate(sentence)' do
13
+ it 'should annotate one word sentences' do
14
+ subject.annotate(%w(братишка)).must_equal %w(e)
15
+ end
16
+
17
+ it 'should annotate sentences with tags' do
18
+ subject.annotate(%w(братишка я тебе покушать принес)).
19
+ must_equal(%w(a b b d e))
20
+ end
21
+
22
+ it 'should handle unknown words' do
23
+ subject.annotate(%w(мир прекрасен , как никогда)).
24
+ must_equal(%w(d d d d d))
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,73 @@
1
+ # encoding: utf-8
2
+
3
+ require_relative 'spec_helper'
4
+
5
+ describe Myaso::Tagger::Model do
6
+ let(:ngrams) { File.expand_path('../data/test.123', __FILE__) }
7
+ let(:lexicon) { File.expand_path('../data/test.lex', __FILE__) }
8
+
9
+ subject { Myaso::Tagger::TnT.new(ngrams, lexicon) }
10
+
11
+ describe '#q(t1,t2,t3)' do
12
+ it 'counts the quotient between trigram and bigram counts othewise' do
13
+ subject.q('a', 'a', 'a').must_be_close_to 0.224, 0.001
14
+ subject.q('b', 'a', 'b').must_be_close_to 0.287, 0.001
15
+ end
16
+ end
17
+
18
+ describe '#e(w|t)' do
19
+ it 'returns 0 if there is no such bunch word => tag' do
20
+ subject.e('братишка', 'b').must_equal(0)
21
+ subject.e('проголодался', 'c').must_equal(0)
22
+ end
23
+
24
+ it 'counts the quotient between count(word, tag) and ngrams(tag)' do
25
+ subject.e('братишка', 'a').must_equal(1 / 26.0)
26
+ subject.e('принес', 'e').must_equal(2 / 6.0)
27
+ end
28
+ end
29
+
30
+ describe '#learn!' do
31
+ it 'should has the same ngrams as in the gold standard' do
32
+ subject.ngrams.must_equal Myaso::Fixtures::NGRAMS
33
+ end
34
+
35
+ it 'should has the same lexicon as in the gold standard' do
36
+ subject.lexicon.must_equal Myaso::Fixtures::LEXICON
37
+ end
38
+
39
+ it 'should has the same interpolations as in the gold standard' do
40
+ subject.interpolations.must_equal Myaso::Fixtures::INTERPOLATIONS
41
+ end
42
+ end
43
+
44
+ describe '#start_symbol' do
45
+ it 'should be SENT' do
46
+ subject.start_symbol.must_equal 'SENT'
47
+ end
48
+ end
49
+
50
+ describe '#stop_symbol' do
51
+ it 'should be SENT' do
52
+ subject.stop_symbol.must_equal 'SENT'
53
+ end
54
+ end
55
+
56
+ describe '#conditional' do
57
+ it 'should compute p(0|0) as 0' do
58
+ subject.conditional(0, 0).must_equal 0.0
59
+ end
60
+
61
+ it 'should compute p(1|0) as 0' do
62
+ subject.conditional(1, 0).must_equal 0.0
63
+ end
64
+
65
+ it 'should compute p(0|1) as 0' do
66
+ subject.conditional(0, 1).must_equal 0.0
67
+ end
68
+
69
+ it 'should compute p(3|2) as 1.5' do
70
+ subject.conditional(3, 2).must_equal 1.5
71
+ end
72
+ end
73
+ end