myasorubka 0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +45 -0
- data/.travis.yml +7 -0
- data/Gemfile +3 -0
- data/LICENSE.txt +22 -0
- data/README.md +23 -0
- data/Rakefile +11 -0
- data/aot-russian +7 -0
- data/lib/myasorubka.rb +4 -0
- data/lib/myasorubka/aot.rb +8 -0
- data/lib/myasorubka/aot/dictionary.rb +125 -0
- data/lib/myasorubka/aot/gramtab.rb +32 -0
- data/lib/myasorubka/aot/tags.rb +326 -0
- data/lib/myasorubka/msd.rb +218 -0
- data/lib/myasorubka/msd/english.rb +263 -0
- data/lib/myasorubka/msd/russian.rb +454 -0
- data/lib/myasorubka/version.rb +9 -0
- data/myasorubka.gemspec +28 -0
- data/spec/msd/russian.tsv +717 -0
- data/spec/msd/russian_spec.rb +24 -0
- data/spec/msd_spec.rb +145 -0
- data/spec/spec_helper.rb +17 -0
- metadata +126 -0
@@ -0,0 +1,24 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require_relative '../spec_helper'
|
4
|
+
require 'csv'
|
5
|
+
|
6
|
+
class Myasorubka::MSD
|
7
|
+
describe Russian do
|
8
|
+
before do
|
9
|
+
table_filename = File.expand_path('../russian.tsv', __FILE__)
|
10
|
+
@tsv = CSV.open(table_filename, 'rb', :col_sep => "\t")
|
11
|
+
@header = @tsv.shift
|
12
|
+
end
|
13
|
+
|
14
|
+
after do
|
15
|
+
@tsv.close
|
16
|
+
end
|
17
|
+
|
18
|
+
it 'should be parsed' do
|
19
|
+
until @tsv.eof?
|
20
|
+
Myasorubka::MSD.new(Russian, @tsv.shift.first[0]).must_be :valid?
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
data/spec/msd_spec.rb
ADDED
@@ -0,0 +1,145 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require_relative 'spec_helper'
|
4
|
+
|
5
|
+
module Myasorubka
|
6
|
+
describe MSD do
|
7
|
+
describe 'Initializer' do
|
8
|
+
module ValidFoo; CATEGORIES = []; end
|
9
|
+
module InvalidFoo; end
|
10
|
+
|
11
|
+
it 'should work when appropriate Language is given' do
|
12
|
+
MSD.new(ValidFoo).must_be :valid?
|
13
|
+
end
|
14
|
+
|
15
|
+
it 'should not work when Language has not CATEGORIES' do
|
16
|
+
lambda { MSD.new(InvalidFoo) }.must_raise ArgumentError
|
17
|
+
end
|
18
|
+
|
19
|
+
it 'should work when Language is given with empty MSD line' do
|
20
|
+
MSD.new(ValidFoo, nil).must_be :valid?
|
21
|
+
MSD.new(ValidFoo, '').must_be :valid?
|
22
|
+
end
|
23
|
+
|
24
|
+
it 'should have empty grammemes, virgin POS and defined language' do
|
25
|
+
msd = MSD.new(ValidFoo)
|
26
|
+
msd[:pos].must_be_nil
|
27
|
+
msd.grammemes.must_equal({})
|
28
|
+
msd.language.must_equal ValidFoo
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
describe 'Attributes' do
|
33
|
+
before { @msd = MSD.new(MSD::Russian) }
|
34
|
+
|
35
|
+
it 'should change POS over []=' do
|
36
|
+
@msd[:pos] = :residual
|
37
|
+
@msd[:pos].must_equal :residual
|
38
|
+
end
|
39
|
+
|
40
|
+
it 'should change grammemes over []=' do
|
41
|
+
@msd[:pos] = :verb
|
42
|
+
|
43
|
+
@msd[:tense].must_be_nil
|
44
|
+
@msd[:tense] = :past
|
45
|
+
@msd[:tense].must_equal :past
|
46
|
+
end
|
47
|
+
|
48
|
+
it 'should have reader for POS' do
|
49
|
+
@msd[:pos] = :residual
|
50
|
+
@msd.pos.must_equal @msd[:pos]
|
51
|
+
end
|
52
|
+
|
53
|
+
it 'can merge attributes hash into itself' do
|
54
|
+
attrs = { :pos => :conjunction, :type => :coordinating,
|
55
|
+
:formation => :simple }
|
56
|
+
@msd.merge! attrs
|
57
|
+
pos = attrs.delete :pos
|
58
|
+
|
59
|
+
@msd.pos.must_equal pos
|
60
|
+
@msd.grammemes.must_equal attrs
|
61
|
+
end
|
62
|
+
|
63
|
+
it 'can self-validate' do
|
64
|
+
@msd[:pos] = :interjection
|
65
|
+
@msd.must_be :valid?
|
66
|
+
end
|
67
|
+
|
68
|
+
it 'should break the validation when descriptors are invalid' do
|
69
|
+
@msd[:pos] = :zalupa
|
70
|
+
@msd.wont_be :valid?
|
71
|
+
end
|
72
|
+
|
73
|
+
it 'can generate regexp based on POS and grammemes' do
|
74
|
+
@msd[:pos] = :verb
|
75
|
+
@msd[:type] = :main
|
76
|
+
|
77
|
+
re = @msd.to_regexp
|
78
|
+
('Vmp' =~ re).must_equal 0
|
79
|
+
('Nc-pl' =~ re).must_be_nil
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
describe 'Generator' do
|
84
|
+
before { @msd = MSD.new(MSD::Russian) }
|
85
|
+
|
86
|
+
it 'should not generate anything without POS tag' do
|
87
|
+
@msd[:number] = :singular
|
88
|
+
@msd.to_s.must_equal ''
|
89
|
+
end
|
90
|
+
|
91
|
+
it 'should raise InvalidDescriptor when POS tag is invalid' do
|
92
|
+
@msd[:pos] = :zalupa
|
93
|
+
lambda { @msd.to_s }.must_raise MSD::InvalidDescriptor
|
94
|
+
end
|
95
|
+
|
96
|
+
it 'should generate valid MSD lines when POS/grammemes are valid too' do
|
97
|
+
@msd[:pos] = :noun
|
98
|
+
@msd.to_s.must_equal 'N'
|
99
|
+
|
100
|
+
@msd[:animate] = :yes
|
101
|
+
@msd.to_s.must_equal 'N----y'
|
102
|
+
|
103
|
+
@msd[:number] = :singular
|
104
|
+
@msd.to_s.must_equal 'N--s-y'
|
105
|
+
|
106
|
+
@msd[:animate] = nil
|
107
|
+
@msd.to_s.must_equal 'N--s'
|
108
|
+
|
109
|
+
@msd[:type] = :common
|
110
|
+
@msd.to_s.must_equal 'Nc-s'
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
describe 'Parser' do
|
115
|
+
it 'should parse correctly composed MSD lines' do
|
116
|
+
msd = MSD.new(MSD::Russian, 'Ncmsnn')
|
117
|
+
msd.pos.must_equal :noun
|
118
|
+
msd.grammemes.must_equal({
|
119
|
+
:type => :common, :gender => :masculine, :number => :singular,
|
120
|
+
:case => :nominative, :animate => :no
|
121
|
+
})
|
122
|
+
|
123
|
+
msd = MSD.new(MSD::Russian, 'Vm--1p---p')
|
124
|
+
msd.pos.must_equal :verb
|
125
|
+
msd.grammemes.must_equal({
|
126
|
+
:type => :main, :person => :first, :number => :plural,
|
127
|
+
:aspect => :progressive
|
128
|
+
})
|
129
|
+
end
|
130
|
+
|
131
|
+
it 'should parse MSD lines generated by itself' do
|
132
|
+
gen = MSD.new(MSD::Russian)
|
133
|
+
gen[:pos] = :pronoun
|
134
|
+
gen[:person] = :third
|
135
|
+
gen[:gender] = :masculine
|
136
|
+
gen[:number] = :singular
|
137
|
+
gen[:case] = :instrumental
|
138
|
+
|
139
|
+
msd = MSD.new(gen.language, gen.to_s)
|
140
|
+
msd.pos.must_equal gen.pos
|
141
|
+
msd.grammemes.must_equal gen.grammemes
|
142
|
+
end
|
143
|
+
end
|
144
|
+
end
|
145
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
|
5
|
+
$:.unshift File.expand_path('../../lib', __FILE__)
|
6
|
+
|
7
|
+
if RUBY_VERSION == '1.8'
|
8
|
+
gem 'minitest'
|
9
|
+
end
|
10
|
+
|
11
|
+
require 'minitest/autorun'
|
12
|
+
|
13
|
+
require 'myasorubka'
|
14
|
+
require 'myasorubka/aot'
|
15
|
+
require 'myasorubka/msd/russian'
|
16
|
+
|
17
|
+
Dir[File.expand_path('../support/**/*.rb', __FILE__)].each { |f| require f }
|
metadata
ADDED
@@ -0,0 +1,126 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: myasorubka
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: '0.1'
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Dmitry Ustalov
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2013-04-20 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ~>
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.3'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ~>
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.3'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: minitest
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '2.11'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - '>='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '2.11'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rake
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - '>='
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - '>='
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: unicode_utils
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ~>
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '1.4'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ~>
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '1.4'
|
69
|
+
description: Myasorubka is a morphological data processor.
|
70
|
+
email:
|
71
|
+
- dmitry@eveel.ru
|
72
|
+
executables: []
|
73
|
+
extensions: []
|
74
|
+
extra_rdoc_files: []
|
75
|
+
files:
|
76
|
+
- .gitignore
|
77
|
+
- .travis.yml
|
78
|
+
- Gemfile
|
79
|
+
- LICENSE.txt
|
80
|
+
- README.md
|
81
|
+
- Rakefile
|
82
|
+
- aot-russian
|
83
|
+
- lib/myasorubka.rb
|
84
|
+
- lib/myasorubka/aot.rb
|
85
|
+
- lib/myasorubka/aot/dictionary.rb
|
86
|
+
- lib/myasorubka/aot/gramtab.rb
|
87
|
+
- lib/myasorubka/aot/tags.rb
|
88
|
+
- lib/myasorubka/msd.rb
|
89
|
+
- lib/myasorubka/msd/english.rb
|
90
|
+
- lib/myasorubka/msd/russian.rb
|
91
|
+
- lib/myasorubka/version.rb
|
92
|
+
- myasorubka.gemspec
|
93
|
+
- spec/msd/russian.tsv
|
94
|
+
- spec/msd/russian_spec.rb
|
95
|
+
- spec/msd_spec.rb
|
96
|
+
- spec/spec_helper.rb
|
97
|
+
homepage: https://github.com/ustalov/myasorubka
|
98
|
+
licenses:
|
99
|
+
- MIT
|
100
|
+
metadata: {}
|
101
|
+
post_install_message:
|
102
|
+
rdoc_options: []
|
103
|
+
require_paths:
|
104
|
+
- lib
|
105
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
106
|
+
requirements:
|
107
|
+
- - '>='
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
version: '0'
|
110
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
111
|
+
requirements:
|
112
|
+
- - '>='
|
113
|
+
- !ruby/object:Gem::Version
|
114
|
+
version: '0'
|
115
|
+
requirements: []
|
116
|
+
rubyforge_project:
|
117
|
+
rubygems_version: 2.0.3
|
118
|
+
signing_key:
|
119
|
+
specification_version: 4
|
120
|
+
summary: Myasorubka is a morphological data proceesor that supports AOT and MULTEXT-East
|
121
|
+
notations.
|
122
|
+
test_files:
|
123
|
+
- spec/msd/russian.tsv
|
124
|
+
- spec/msd/russian_spec.rb
|
125
|
+
- spec/msd_spec.rb
|
126
|
+
- spec/spec_helper.rb
|