myasorubka 0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,24 @@
1
+ # encoding: utf-8
2
+
3
+ require_relative '../spec_helper'
4
+ require 'csv'
5
+
6
+ class Myasorubka::MSD
7
+ describe Russian do
8
+ before do
9
+ table_filename = File.expand_path('../russian.tsv', __FILE__)
10
+ @tsv = CSV.open(table_filename, 'rb', :col_sep => "\t")
11
+ @header = @tsv.shift
12
+ end
13
+
14
+ after do
15
+ @tsv.close
16
+ end
17
+
18
+ it 'should be parsed' do
19
+ until @tsv.eof?
20
+ Myasorubka::MSD.new(Russian, @tsv.shift.first[0]).must_be :valid?
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,145 @@
1
+ # encoding: utf-8
2
+
3
+ require_relative 'spec_helper'
4
+
5
+ module Myasorubka
6
+ describe MSD do
7
+ describe 'Initializer' do
8
+ module ValidFoo; CATEGORIES = []; end
9
+ module InvalidFoo; end
10
+
11
+ it 'should work when appropriate Language is given' do
12
+ MSD.new(ValidFoo).must_be :valid?
13
+ end
14
+
15
+ it 'should not work when Language has not CATEGORIES' do
16
+ lambda { MSD.new(InvalidFoo) }.must_raise ArgumentError
17
+ end
18
+
19
+ it 'should work when Language is given with empty MSD line' do
20
+ MSD.new(ValidFoo, nil).must_be :valid?
21
+ MSD.new(ValidFoo, '').must_be :valid?
22
+ end
23
+
24
+ it 'should have empty grammemes, virgin POS and defined language' do
25
+ msd = MSD.new(ValidFoo)
26
+ msd[:pos].must_be_nil
27
+ msd.grammemes.must_equal({})
28
+ msd.language.must_equal ValidFoo
29
+ end
30
+ end
31
+
32
+ describe 'Attributes' do
33
+ before { @msd = MSD.new(MSD::Russian) }
34
+
35
+ it 'should change POS over []=' do
36
+ @msd[:pos] = :residual
37
+ @msd[:pos].must_equal :residual
38
+ end
39
+
40
+ it 'should change grammemes over []=' do
41
+ @msd[:pos] = :verb
42
+
43
+ @msd[:tense].must_be_nil
44
+ @msd[:tense] = :past
45
+ @msd[:tense].must_equal :past
46
+ end
47
+
48
+ it 'should have reader for POS' do
49
+ @msd[:pos] = :residual
50
+ @msd.pos.must_equal @msd[:pos]
51
+ end
52
+
53
+ it 'can merge attributes hash into itself' do
54
+ attrs = { :pos => :conjunction, :type => :coordinating,
55
+ :formation => :simple }
56
+ @msd.merge! attrs
57
+ pos = attrs.delete :pos
58
+
59
+ @msd.pos.must_equal pos
60
+ @msd.grammemes.must_equal attrs
61
+ end
62
+
63
+ it 'can self-validate' do
64
+ @msd[:pos] = :interjection
65
+ @msd.must_be :valid?
66
+ end
67
+
68
+ it 'should break the validation when descriptors are invalid' do
69
+ @msd[:pos] = :zalupa
70
+ @msd.wont_be :valid?
71
+ end
72
+
73
+ it 'can generate regexp based on POS and grammemes' do
74
+ @msd[:pos] = :verb
75
+ @msd[:type] = :main
76
+
77
+ re = @msd.to_regexp
78
+ ('Vmp' =~ re).must_equal 0
79
+ ('Nc-pl' =~ re).must_be_nil
80
+ end
81
+ end
82
+
83
+ describe 'Generator' do
84
+ before { @msd = MSD.new(MSD::Russian) }
85
+
86
+ it 'should not generate anything without POS tag' do
87
+ @msd[:number] = :singular
88
+ @msd.to_s.must_equal ''
89
+ end
90
+
91
+ it 'should raise InvalidDescriptor when POS tag is invalid' do
92
+ @msd[:pos] = :zalupa
93
+ lambda { @msd.to_s }.must_raise MSD::InvalidDescriptor
94
+ end
95
+
96
+ it 'should generate valid MSD lines when POS/grammemes are valid too' do
97
+ @msd[:pos] = :noun
98
+ @msd.to_s.must_equal 'N'
99
+
100
+ @msd[:animate] = :yes
101
+ @msd.to_s.must_equal 'N----y'
102
+
103
+ @msd[:number] = :singular
104
+ @msd.to_s.must_equal 'N--s-y'
105
+
106
+ @msd[:animate] = nil
107
+ @msd.to_s.must_equal 'N--s'
108
+
109
+ @msd[:type] = :common
110
+ @msd.to_s.must_equal 'Nc-s'
111
+ end
112
+ end
113
+
114
+ describe 'Parser' do
115
+ it 'should parse correctly composed MSD lines' do
116
+ msd = MSD.new(MSD::Russian, 'Ncmsnn')
117
+ msd.pos.must_equal :noun
118
+ msd.grammemes.must_equal({
119
+ :type => :common, :gender => :masculine, :number => :singular,
120
+ :case => :nominative, :animate => :no
121
+ })
122
+
123
+ msd = MSD.new(MSD::Russian, 'Vm--1p---p')
124
+ msd.pos.must_equal :verb
125
+ msd.grammemes.must_equal({
126
+ :type => :main, :person => :first, :number => :plural,
127
+ :aspect => :progressive
128
+ })
129
+ end
130
+
131
+ it 'should parse MSD lines generated by itself' do
132
+ gen = MSD.new(MSD::Russian)
133
+ gen[:pos] = :pronoun
134
+ gen[:person] = :third
135
+ gen[:gender] = :masculine
136
+ gen[:number] = :singular
137
+ gen[:case] = :instrumental
138
+
139
+ msd = MSD.new(gen.language, gen.to_s)
140
+ msd.pos.must_equal gen.pos
141
+ msd.grammemes.must_equal gen.grammemes
142
+ end
143
+ end
144
+ end
145
+ end
@@ -0,0 +1,17 @@
1
+ # encoding: utf-8
2
+
3
+ require 'rubygems'
4
+
5
+ $:.unshift File.expand_path('../../lib', __FILE__)
6
+
7
+ if RUBY_VERSION == '1.8'
8
+ gem 'minitest'
9
+ end
10
+
11
+ require 'minitest/autorun'
12
+
13
+ require 'myasorubka'
14
+ require 'myasorubka/aot'
15
+ require 'myasorubka/msd/russian'
16
+
17
+ Dir[File.expand_path('../support/**/*.rb', __FILE__)].each { |f| require f }
metadata ADDED
@@ -0,0 +1,126 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: myasorubka
3
+ version: !ruby/object:Gem::Version
4
+ version: '0.1'
5
+ platform: ruby
6
+ authors:
7
+ - Dmitry Ustalov
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2013-04-20 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: '1.3'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: '1.3'
27
+ - !ruby/object:Gem::Dependency
28
+ name: minitest
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '2.11'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '2.11'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: unicode_utils
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ version: '1.4'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ~>
67
+ - !ruby/object:Gem::Version
68
+ version: '1.4'
69
+ description: Myasorubka is a morphological data processor.
70
+ email:
71
+ - dmitry@eveel.ru
72
+ executables: []
73
+ extensions: []
74
+ extra_rdoc_files: []
75
+ files:
76
+ - .gitignore
77
+ - .travis.yml
78
+ - Gemfile
79
+ - LICENSE.txt
80
+ - README.md
81
+ - Rakefile
82
+ - aot-russian
83
+ - lib/myasorubka.rb
84
+ - lib/myasorubka/aot.rb
85
+ - lib/myasorubka/aot/dictionary.rb
86
+ - lib/myasorubka/aot/gramtab.rb
87
+ - lib/myasorubka/aot/tags.rb
88
+ - lib/myasorubka/msd.rb
89
+ - lib/myasorubka/msd/english.rb
90
+ - lib/myasorubka/msd/russian.rb
91
+ - lib/myasorubka/version.rb
92
+ - myasorubka.gemspec
93
+ - spec/msd/russian.tsv
94
+ - spec/msd/russian_spec.rb
95
+ - spec/msd_spec.rb
96
+ - spec/spec_helper.rb
97
+ homepage: https://github.com/ustalov/myasorubka
98
+ licenses:
99
+ - MIT
100
+ metadata: {}
101
+ post_install_message:
102
+ rdoc_options: []
103
+ require_paths:
104
+ - lib
105
+ required_ruby_version: !ruby/object:Gem::Requirement
106
+ requirements:
107
+ - - '>='
108
+ - !ruby/object:Gem::Version
109
+ version: '0'
110
+ required_rubygems_version: !ruby/object:Gem::Requirement
111
+ requirements:
112
+ - - '>='
113
+ - !ruby/object:Gem::Version
114
+ version: '0'
115
+ requirements: []
116
+ rubyforge_project:
117
+ rubygems_version: 2.0.3
118
+ signing_key:
119
+ specification_version: 4
120
+ summary: Myasorubka is a morphological data proceesor that supports AOT and MULTEXT-East
121
+ notations.
122
+ test_files:
123
+ - spec/msd/russian.tsv
124
+ - spec/msd/russian_spec.rb
125
+ - spec/msd_spec.rb
126
+ - spec/spec_helper.rb