myasorubka 0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +45 -0
- data/.travis.yml +7 -0
- data/Gemfile +3 -0
- data/LICENSE.txt +22 -0
- data/README.md +23 -0
- data/Rakefile +11 -0
- data/aot-russian +7 -0
- data/lib/myasorubka.rb +4 -0
- data/lib/myasorubka/aot.rb +8 -0
- data/lib/myasorubka/aot/dictionary.rb +125 -0
- data/lib/myasorubka/aot/gramtab.rb +32 -0
- data/lib/myasorubka/aot/tags.rb +326 -0
- data/lib/myasorubka/msd.rb +218 -0
- data/lib/myasorubka/msd/english.rb +263 -0
- data/lib/myasorubka/msd/russian.rb +454 -0
- data/lib/myasorubka/version.rb +9 -0
- data/myasorubka.gemspec +28 -0
- data/spec/msd/russian.tsv +717 -0
- data/spec/msd/russian_spec.rb +24 -0
- data/spec/msd_spec.rb +145 -0
- data/spec/spec_helper.rb +17 -0
- metadata +126 -0
@@ -0,0 +1,24 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require_relative '../spec_helper'
|
4
|
+
require 'csv'
|
5
|
+
|
6
|
+
class Myasorubka::MSD
|
7
|
+
describe Russian do
|
8
|
+
before do
|
9
|
+
table_filename = File.expand_path('../russian.tsv', __FILE__)
|
10
|
+
@tsv = CSV.open(table_filename, 'rb', :col_sep => "\t")
|
11
|
+
@header = @tsv.shift
|
12
|
+
end
|
13
|
+
|
14
|
+
after do
|
15
|
+
@tsv.close
|
16
|
+
end
|
17
|
+
|
18
|
+
it 'should be parsed' do
|
19
|
+
until @tsv.eof?
|
20
|
+
Myasorubka::MSD.new(Russian, @tsv.shift.first[0]).must_be :valid?
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
data/spec/msd_spec.rb
ADDED
@@ -0,0 +1,145 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require_relative 'spec_helper'
|
4
|
+
|
5
|
+
module Myasorubka
|
6
|
+
describe MSD do
|
7
|
+
describe 'Initializer' do
|
8
|
+
module ValidFoo; CATEGORIES = []; end
|
9
|
+
module InvalidFoo; end
|
10
|
+
|
11
|
+
it 'should work when appropriate Language is given' do
|
12
|
+
MSD.new(ValidFoo).must_be :valid?
|
13
|
+
end
|
14
|
+
|
15
|
+
it 'should not work when Language has not CATEGORIES' do
|
16
|
+
lambda { MSD.new(InvalidFoo) }.must_raise ArgumentError
|
17
|
+
end
|
18
|
+
|
19
|
+
it 'should work when Language is given with empty MSD line' do
|
20
|
+
MSD.new(ValidFoo, nil).must_be :valid?
|
21
|
+
MSD.new(ValidFoo, '').must_be :valid?
|
22
|
+
end
|
23
|
+
|
24
|
+
it 'should have empty grammemes, virgin POS and defined language' do
|
25
|
+
msd = MSD.new(ValidFoo)
|
26
|
+
msd[:pos].must_be_nil
|
27
|
+
msd.grammemes.must_equal({})
|
28
|
+
msd.language.must_equal ValidFoo
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
describe 'Attributes' do
|
33
|
+
before { @msd = MSD.new(MSD::Russian) }
|
34
|
+
|
35
|
+
it 'should change POS over []=' do
|
36
|
+
@msd[:pos] = :residual
|
37
|
+
@msd[:pos].must_equal :residual
|
38
|
+
end
|
39
|
+
|
40
|
+
it 'should change grammemes over []=' do
|
41
|
+
@msd[:pos] = :verb
|
42
|
+
|
43
|
+
@msd[:tense].must_be_nil
|
44
|
+
@msd[:tense] = :past
|
45
|
+
@msd[:tense].must_equal :past
|
46
|
+
end
|
47
|
+
|
48
|
+
it 'should have reader for POS' do
|
49
|
+
@msd[:pos] = :residual
|
50
|
+
@msd.pos.must_equal @msd[:pos]
|
51
|
+
end
|
52
|
+
|
53
|
+
it 'can merge attributes hash into itself' do
|
54
|
+
attrs = { :pos => :conjunction, :type => :coordinating,
|
55
|
+
:formation => :simple }
|
56
|
+
@msd.merge! attrs
|
57
|
+
pos = attrs.delete :pos
|
58
|
+
|
59
|
+
@msd.pos.must_equal pos
|
60
|
+
@msd.grammemes.must_equal attrs
|
61
|
+
end
|
62
|
+
|
63
|
+
it 'can self-validate' do
|
64
|
+
@msd[:pos] = :interjection
|
65
|
+
@msd.must_be :valid?
|
66
|
+
end
|
67
|
+
|
68
|
+
it 'should break the validation when descriptors are invalid' do
|
69
|
+
@msd[:pos] = :zalupa
|
70
|
+
@msd.wont_be :valid?
|
71
|
+
end
|
72
|
+
|
73
|
+
it 'can generate regexp based on POS and grammemes' do
|
74
|
+
@msd[:pos] = :verb
|
75
|
+
@msd[:type] = :main
|
76
|
+
|
77
|
+
re = @msd.to_regexp
|
78
|
+
('Vmp' =~ re).must_equal 0
|
79
|
+
('Nc-pl' =~ re).must_be_nil
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
describe 'Generator' do
|
84
|
+
before { @msd = MSD.new(MSD::Russian) }
|
85
|
+
|
86
|
+
it 'should not generate anything without POS tag' do
|
87
|
+
@msd[:number] = :singular
|
88
|
+
@msd.to_s.must_equal ''
|
89
|
+
end
|
90
|
+
|
91
|
+
it 'should raise InvalidDescriptor when POS tag is invalid' do
|
92
|
+
@msd[:pos] = :zalupa
|
93
|
+
lambda { @msd.to_s }.must_raise MSD::InvalidDescriptor
|
94
|
+
end
|
95
|
+
|
96
|
+
it 'should generate valid MSD lines when POS/grammemes are valid too' do
|
97
|
+
@msd[:pos] = :noun
|
98
|
+
@msd.to_s.must_equal 'N'
|
99
|
+
|
100
|
+
@msd[:animate] = :yes
|
101
|
+
@msd.to_s.must_equal 'N----y'
|
102
|
+
|
103
|
+
@msd[:number] = :singular
|
104
|
+
@msd.to_s.must_equal 'N--s-y'
|
105
|
+
|
106
|
+
@msd[:animate] = nil
|
107
|
+
@msd.to_s.must_equal 'N--s'
|
108
|
+
|
109
|
+
@msd[:type] = :common
|
110
|
+
@msd.to_s.must_equal 'Nc-s'
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
describe 'Parser' do
|
115
|
+
it 'should parse correctly composed MSD lines' do
|
116
|
+
msd = MSD.new(MSD::Russian, 'Ncmsnn')
|
117
|
+
msd.pos.must_equal :noun
|
118
|
+
msd.grammemes.must_equal({
|
119
|
+
:type => :common, :gender => :masculine, :number => :singular,
|
120
|
+
:case => :nominative, :animate => :no
|
121
|
+
})
|
122
|
+
|
123
|
+
msd = MSD.new(MSD::Russian, 'Vm--1p---p')
|
124
|
+
msd.pos.must_equal :verb
|
125
|
+
msd.grammemes.must_equal({
|
126
|
+
:type => :main, :person => :first, :number => :plural,
|
127
|
+
:aspect => :progressive
|
128
|
+
})
|
129
|
+
end
|
130
|
+
|
131
|
+
it 'should parse MSD lines generated by itself' do
|
132
|
+
gen = MSD.new(MSD::Russian)
|
133
|
+
gen[:pos] = :pronoun
|
134
|
+
gen[:person] = :third
|
135
|
+
gen[:gender] = :masculine
|
136
|
+
gen[:number] = :singular
|
137
|
+
gen[:case] = :instrumental
|
138
|
+
|
139
|
+
msd = MSD.new(gen.language, gen.to_s)
|
140
|
+
msd.pos.must_equal gen.pos
|
141
|
+
msd.grammemes.must_equal gen.grammemes
|
142
|
+
end
|
143
|
+
end
|
144
|
+
end
|
145
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
|
5
|
+
$:.unshift File.expand_path('../../lib', __FILE__)
|
6
|
+
|
7
|
+
if RUBY_VERSION == '1.8'
|
8
|
+
gem 'minitest'
|
9
|
+
end
|
10
|
+
|
11
|
+
require 'minitest/autorun'
|
12
|
+
|
13
|
+
require 'myasorubka'
|
14
|
+
require 'myasorubka/aot'
|
15
|
+
require 'myasorubka/msd/russian'
|
16
|
+
|
17
|
+
Dir[File.expand_path('../support/**/*.rb', __FILE__)].each { |f| require f }
|
metadata
ADDED
@@ -0,0 +1,126 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: myasorubka
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: '0.1'
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Dmitry Ustalov
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2013-04-20 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ~>
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.3'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ~>
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.3'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: minitest
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '2.11'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - '>='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '2.11'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rake
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - '>='
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - '>='
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: unicode_utils
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ~>
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '1.4'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ~>
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '1.4'
|
69
|
+
description: Myasorubka is a morphological data processor.
|
70
|
+
email:
|
71
|
+
- dmitry@eveel.ru
|
72
|
+
executables: []
|
73
|
+
extensions: []
|
74
|
+
extra_rdoc_files: []
|
75
|
+
files:
|
76
|
+
- .gitignore
|
77
|
+
- .travis.yml
|
78
|
+
- Gemfile
|
79
|
+
- LICENSE.txt
|
80
|
+
- README.md
|
81
|
+
- Rakefile
|
82
|
+
- aot-russian
|
83
|
+
- lib/myasorubka.rb
|
84
|
+
- lib/myasorubka/aot.rb
|
85
|
+
- lib/myasorubka/aot/dictionary.rb
|
86
|
+
- lib/myasorubka/aot/gramtab.rb
|
87
|
+
- lib/myasorubka/aot/tags.rb
|
88
|
+
- lib/myasorubka/msd.rb
|
89
|
+
- lib/myasorubka/msd/english.rb
|
90
|
+
- lib/myasorubka/msd/russian.rb
|
91
|
+
- lib/myasorubka/version.rb
|
92
|
+
- myasorubka.gemspec
|
93
|
+
- spec/msd/russian.tsv
|
94
|
+
- spec/msd/russian_spec.rb
|
95
|
+
- spec/msd_spec.rb
|
96
|
+
- spec/spec_helper.rb
|
97
|
+
homepage: https://github.com/ustalov/myasorubka
|
98
|
+
licenses:
|
99
|
+
- MIT
|
100
|
+
metadata: {}
|
101
|
+
post_install_message:
|
102
|
+
rdoc_options: []
|
103
|
+
require_paths:
|
104
|
+
- lib
|
105
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
106
|
+
requirements:
|
107
|
+
- - '>='
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
version: '0'
|
110
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
111
|
+
requirements:
|
112
|
+
- - '>='
|
113
|
+
- !ruby/object:Gem::Version
|
114
|
+
version: '0'
|
115
|
+
requirements: []
|
116
|
+
rubyforge_project:
|
117
|
+
rubygems_version: 2.0.3
|
118
|
+
signing_key:
|
119
|
+
specification_version: 4
|
120
|
+
summary: Myasorubka is a morphological data proceesor that supports AOT and MULTEXT-East
|
121
|
+
notations.
|
122
|
+
test_files:
|
123
|
+
- spec/msd/russian.tsv
|
124
|
+
- spec/msd/russian_spec.rb
|
125
|
+
- spec/msd_spec.rb
|
126
|
+
- spec/spec_helper.rb
|