myasorubka 0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,24 @@
1
+ # encoding: utf-8
2
+
3
+ require_relative '../spec_helper'
4
+ require 'csv'
5
+
6
+ class Myasorubka::MSD
7
+ describe Russian do
8
+ before do
9
+ table_filename = File.expand_path('../russian.tsv', __FILE__)
10
+ @tsv = CSV.open(table_filename, 'rb', :col_sep => "\t")
11
+ @header = @tsv.shift
12
+ end
13
+
14
+ after do
15
+ @tsv.close
16
+ end
17
+
18
+ it 'should be parsed' do
19
+ until @tsv.eof?
20
+ Myasorubka::MSD.new(Russian, @tsv.shift.first[0]).must_be :valid?
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,145 @@
1
+ # encoding: utf-8
2
+
3
+ require_relative 'spec_helper'
4
+
5
+ module Myasorubka
6
+ describe MSD do
7
+ describe 'Initializer' do
8
+ module ValidFoo; CATEGORIES = []; end
9
+ module InvalidFoo; end
10
+
11
+ it 'should work when appropriate Language is given' do
12
+ MSD.new(ValidFoo).must_be :valid?
13
+ end
14
+
15
+ it 'should not work when Language has not CATEGORIES' do
16
+ lambda { MSD.new(InvalidFoo) }.must_raise ArgumentError
17
+ end
18
+
19
+ it 'should work when Language is given with empty MSD line' do
20
+ MSD.new(ValidFoo, nil).must_be :valid?
21
+ MSD.new(ValidFoo, '').must_be :valid?
22
+ end
23
+
24
+ it 'should have empty grammemes, virgin POS and defined language' do
25
+ msd = MSD.new(ValidFoo)
26
+ msd[:pos].must_be_nil
27
+ msd.grammemes.must_equal({})
28
+ msd.language.must_equal ValidFoo
29
+ end
30
+ end
31
+
32
+ describe 'Attributes' do
33
+ before { @msd = MSD.new(MSD::Russian) }
34
+
35
+ it 'should change POS over []=' do
36
+ @msd[:pos] = :residual
37
+ @msd[:pos].must_equal :residual
38
+ end
39
+
40
+ it 'should change grammemes over []=' do
41
+ @msd[:pos] = :verb
42
+
43
+ @msd[:tense].must_be_nil
44
+ @msd[:tense] = :past
45
+ @msd[:tense].must_equal :past
46
+ end
47
+
48
+ it 'should have reader for POS' do
49
+ @msd[:pos] = :residual
50
+ @msd.pos.must_equal @msd[:pos]
51
+ end
52
+
53
+ it 'can merge attributes hash into itself' do
54
+ attrs = { :pos => :conjunction, :type => :coordinating,
55
+ :formation => :simple }
56
+ @msd.merge! attrs
57
+ pos = attrs.delete :pos
58
+
59
+ @msd.pos.must_equal pos
60
+ @msd.grammemes.must_equal attrs
61
+ end
62
+
63
+ it 'can self-validate' do
64
+ @msd[:pos] = :interjection
65
+ @msd.must_be :valid?
66
+ end
67
+
68
+ it 'should break the validation when descriptors are invalid' do
69
+ @msd[:pos] = :zalupa
70
+ @msd.wont_be :valid?
71
+ end
72
+
73
+ it 'can generate regexp based on POS and grammemes' do
74
+ @msd[:pos] = :verb
75
+ @msd[:type] = :main
76
+
77
+ re = @msd.to_regexp
78
+ ('Vmp' =~ re).must_equal 0
79
+ ('Nc-pl' =~ re).must_be_nil
80
+ end
81
+ end
82
+
83
+ describe 'Generator' do
84
+ before { @msd = MSD.new(MSD::Russian) }
85
+
86
+ it 'should not generate anything without POS tag' do
87
+ @msd[:number] = :singular
88
+ @msd.to_s.must_equal ''
89
+ end
90
+
91
+ it 'should raise InvalidDescriptor when POS tag is invalid' do
92
+ @msd[:pos] = :zalupa
93
+ lambda { @msd.to_s }.must_raise MSD::InvalidDescriptor
94
+ end
95
+
96
+ it 'should generate valid MSD lines when POS/grammemes are valid too' do
97
+ @msd[:pos] = :noun
98
+ @msd.to_s.must_equal 'N'
99
+
100
+ @msd[:animate] = :yes
101
+ @msd.to_s.must_equal 'N----y'
102
+
103
+ @msd[:number] = :singular
104
+ @msd.to_s.must_equal 'N--s-y'
105
+
106
+ @msd[:animate] = nil
107
+ @msd.to_s.must_equal 'N--s'
108
+
109
+ @msd[:type] = :common
110
+ @msd.to_s.must_equal 'Nc-s'
111
+ end
112
+ end
113
+
114
+ describe 'Parser' do
115
+ it 'should parse correctly composed MSD lines' do
116
+ msd = MSD.new(MSD::Russian, 'Ncmsnn')
117
+ msd.pos.must_equal :noun
118
+ msd.grammemes.must_equal({
119
+ :type => :common, :gender => :masculine, :number => :singular,
120
+ :case => :nominative, :animate => :no
121
+ })
122
+
123
+ msd = MSD.new(MSD::Russian, 'Vm--1p---p')
124
+ msd.pos.must_equal :verb
125
+ msd.grammemes.must_equal({
126
+ :type => :main, :person => :first, :number => :plural,
127
+ :aspect => :progressive
128
+ })
129
+ end
130
+
131
+ it 'should parse MSD lines generated by itself' do
132
+ gen = MSD.new(MSD::Russian)
133
+ gen[:pos] = :pronoun
134
+ gen[:person] = :third
135
+ gen[:gender] = :masculine
136
+ gen[:number] = :singular
137
+ gen[:case] = :instrumental
138
+
139
+ msd = MSD.new(gen.language, gen.to_s)
140
+ msd.pos.must_equal gen.pos
141
+ msd.grammemes.must_equal gen.grammemes
142
+ end
143
+ end
144
+ end
145
+ end
@@ -0,0 +1,17 @@
1
+ # encoding: utf-8
2
+
3
+ require 'rubygems'
4
+
5
+ $:.unshift File.expand_path('../../lib', __FILE__)
6
+
7
+ if RUBY_VERSION == '1.8'
8
+ gem 'minitest'
9
+ end
10
+
11
+ require 'minitest/autorun'
12
+
13
+ require 'myasorubka'
14
+ require 'myasorubka/aot'
15
+ require 'myasorubka/msd/russian'
16
+
17
+ Dir[File.expand_path('../support/**/*.rb', __FILE__)].each { |f| require f }
metadata ADDED
@@ -0,0 +1,126 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: myasorubka
3
+ version: !ruby/object:Gem::Version
4
+ version: '0.1'
5
+ platform: ruby
6
+ authors:
7
+ - Dmitry Ustalov
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2013-04-20 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: '1.3'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: '1.3'
27
+ - !ruby/object:Gem::Dependency
28
+ name: minitest
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '2.11'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '2.11'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: unicode_utils
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ version: '1.4'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ~>
67
+ - !ruby/object:Gem::Version
68
+ version: '1.4'
69
+ description: Myasorubka is a morphological data processor.
70
+ email:
71
+ - dmitry@eveel.ru
72
+ executables: []
73
+ extensions: []
74
+ extra_rdoc_files: []
75
+ files:
76
+ - .gitignore
77
+ - .travis.yml
78
+ - Gemfile
79
+ - LICENSE.txt
80
+ - README.md
81
+ - Rakefile
82
+ - aot-russian
83
+ - lib/myasorubka.rb
84
+ - lib/myasorubka/aot.rb
85
+ - lib/myasorubka/aot/dictionary.rb
86
+ - lib/myasorubka/aot/gramtab.rb
87
+ - lib/myasorubka/aot/tags.rb
88
+ - lib/myasorubka/msd.rb
89
+ - lib/myasorubka/msd/english.rb
90
+ - lib/myasorubka/msd/russian.rb
91
+ - lib/myasorubka/version.rb
92
+ - myasorubka.gemspec
93
+ - spec/msd/russian.tsv
94
+ - spec/msd/russian_spec.rb
95
+ - spec/msd_spec.rb
96
+ - spec/spec_helper.rb
97
+ homepage: https://github.com/ustalov/myasorubka
98
+ licenses:
99
+ - MIT
100
+ metadata: {}
101
+ post_install_message:
102
+ rdoc_options: []
103
+ require_paths:
104
+ - lib
105
+ required_ruby_version: !ruby/object:Gem::Requirement
106
+ requirements:
107
+ - - '>='
108
+ - !ruby/object:Gem::Version
109
+ version: '0'
110
+ required_rubygems_version: !ruby/object:Gem::Requirement
111
+ requirements:
112
+ - - '>='
113
+ - !ruby/object:Gem::Version
114
+ version: '0'
115
+ requirements: []
116
+ rubyforge_project:
117
+ rubygems_version: 2.0.3
118
+ signing_key:
119
+ specification_version: 4
120
+ summary: Myasorubka is a morphological data proceesor that supports AOT and MULTEXT-East
121
+ notations.
122
+ test_files:
123
+ - spec/msd/russian.tsv
124
+ - spec/msd/russian_spec.rb
125
+ - spec/msd_spec.rb
126
+ - spec/spec_helper.rb