myasorubka 0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +45 -0
- data/.travis.yml +7 -0
- data/Gemfile +3 -0
- data/LICENSE.txt +22 -0
- data/README.md +23 -0
- data/Rakefile +11 -0
- data/aot-russian +7 -0
- data/lib/myasorubka.rb +4 -0
- data/lib/myasorubka/aot.rb +8 -0
- data/lib/myasorubka/aot/dictionary.rb +125 -0
- data/lib/myasorubka/aot/gramtab.rb +32 -0
- data/lib/myasorubka/aot/tags.rb +326 -0
- data/lib/myasorubka/msd.rb +218 -0
- data/lib/myasorubka/msd/english.rb +263 -0
- data/lib/myasorubka/msd/russian.rb +454 -0
- data/lib/myasorubka/version.rb +9 -0
- data/myasorubka.gemspec +28 -0
- data/spec/msd/russian.tsv +717 -0
- data/spec/msd/russian_spec.rb +24 -0
- data/spec/msd_spec.rb +145 -0
- data/spec/spec_helper.rb +17 -0
- metadata +126 -0
| @@ -0,0 +1,24 @@ | |
| 1 | 
            +
            # encoding: utf-8
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            require_relative '../spec_helper'
         | 
| 4 | 
            +
            require 'csv'
         | 
| 5 | 
            +
             | 
| 6 | 
            +
            class Myasorubka::MSD
         | 
| 7 | 
            +
              describe Russian do
         | 
| 8 | 
            +
                before do
         | 
| 9 | 
            +
                  table_filename = File.expand_path('../russian.tsv', __FILE__)
         | 
| 10 | 
            +
                  @tsv = CSV.open(table_filename, 'rb', :col_sep => "\t")
         | 
| 11 | 
            +
                  @header = @tsv.shift
         | 
| 12 | 
            +
                end
         | 
| 13 | 
            +
             | 
| 14 | 
            +
                after do
         | 
| 15 | 
            +
                  @tsv.close
         | 
| 16 | 
            +
                end
         | 
| 17 | 
            +
             | 
| 18 | 
            +
                it 'should be parsed' do
         | 
| 19 | 
            +
                  until @tsv.eof?
         | 
| 20 | 
            +
                    Myasorubka::MSD.new(Russian, @tsv.shift.first[0]).must_be :valid?
         | 
| 21 | 
            +
                  end
         | 
| 22 | 
            +
                end
         | 
| 23 | 
            +
              end
         | 
| 24 | 
            +
            end
         | 
    
        data/spec/msd_spec.rb
    ADDED
    
    | @@ -0,0 +1,145 @@ | |
| 1 | 
            +
            # encoding: utf-8
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            require_relative 'spec_helper'
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            module Myasorubka
         | 
| 6 | 
            +
              describe MSD do
         | 
| 7 | 
            +
                describe 'Initializer' do
         | 
| 8 | 
            +
                  module ValidFoo; CATEGORIES = []; end
         | 
| 9 | 
            +
                  module InvalidFoo; end
         | 
| 10 | 
            +
             | 
| 11 | 
            +
                  it 'should work when appropriate Language is given' do
         | 
| 12 | 
            +
                    MSD.new(ValidFoo).must_be :valid?
         | 
| 13 | 
            +
                  end
         | 
| 14 | 
            +
             | 
| 15 | 
            +
                  it 'should not work when Language has not CATEGORIES' do
         | 
| 16 | 
            +
                    lambda { MSD.new(InvalidFoo) }.must_raise ArgumentError
         | 
| 17 | 
            +
                  end
         | 
| 18 | 
            +
             | 
| 19 | 
            +
                  it 'should work when Language is given with empty MSD line' do
         | 
| 20 | 
            +
                    MSD.new(ValidFoo, nil).must_be :valid?
         | 
| 21 | 
            +
                    MSD.new(ValidFoo, '').must_be :valid?
         | 
| 22 | 
            +
                  end
         | 
| 23 | 
            +
             | 
| 24 | 
            +
                  it 'should have empty grammemes, virgin POS and defined language' do
         | 
| 25 | 
            +
                    msd = MSD.new(ValidFoo)
         | 
| 26 | 
            +
                    msd[:pos].must_be_nil
         | 
| 27 | 
            +
                    msd.grammemes.must_equal({})
         | 
| 28 | 
            +
                    msd.language.must_equal ValidFoo
         | 
| 29 | 
            +
                  end
         | 
| 30 | 
            +
                end
         | 
| 31 | 
            +
             | 
| 32 | 
            +
                describe 'Attributes' do
         | 
| 33 | 
            +
                  before { @msd = MSD.new(MSD::Russian) }
         | 
| 34 | 
            +
             | 
| 35 | 
            +
                  it 'should change POS over []=' do
         | 
| 36 | 
            +
                    @msd[:pos] = :residual
         | 
| 37 | 
            +
                    @msd[:pos].must_equal :residual
         | 
| 38 | 
            +
                  end
         | 
| 39 | 
            +
             | 
| 40 | 
            +
                  it 'should change grammemes over []=' do
         | 
| 41 | 
            +
                    @msd[:pos] = :verb
         | 
| 42 | 
            +
             | 
| 43 | 
            +
                    @msd[:tense].must_be_nil
         | 
| 44 | 
            +
                    @msd[:tense] = :past
         | 
| 45 | 
            +
                    @msd[:tense].must_equal :past
         | 
| 46 | 
            +
                  end
         | 
| 47 | 
            +
             | 
| 48 | 
            +
                  it 'should have reader for POS' do
         | 
| 49 | 
            +
                    @msd[:pos] = :residual
         | 
| 50 | 
            +
                    @msd.pos.must_equal @msd[:pos]
         | 
| 51 | 
            +
                  end
         | 
| 52 | 
            +
             | 
| 53 | 
            +
                  it 'can merge attributes hash into itself' do
         | 
| 54 | 
            +
                    attrs = { :pos => :conjunction, :type => :coordinating,
         | 
| 55 | 
            +
                              :formation => :simple }
         | 
| 56 | 
            +
                    @msd.merge! attrs
         | 
| 57 | 
            +
                    pos = attrs.delete :pos
         | 
| 58 | 
            +
             | 
| 59 | 
            +
                    @msd.pos.must_equal pos
         | 
| 60 | 
            +
                    @msd.grammemes.must_equal attrs
         | 
| 61 | 
            +
                  end
         | 
| 62 | 
            +
             | 
| 63 | 
            +
                  it 'can self-validate' do
         | 
| 64 | 
            +
                    @msd[:pos] = :interjection
         | 
| 65 | 
            +
                    @msd.must_be :valid?
         | 
| 66 | 
            +
                  end
         | 
| 67 | 
            +
             | 
| 68 | 
            +
                  it 'should break the validation when descriptors are invalid' do
         | 
| 69 | 
            +
                    @msd[:pos] = :zalupa
         | 
| 70 | 
            +
                    @msd.wont_be :valid?
         | 
| 71 | 
            +
                  end
         | 
| 72 | 
            +
             | 
| 73 | 
            +
                  it 'can generate regexp based on POS and grammemes' do
         | 
| 74 | 
            +
                    @msd[:pos] = :verb
         | 
| 75 | 
            +
                    @msd[:type] = :main
         | 
| 76 | 
            +
             | 
| 77 | 
            +
                    re = @msd.to_regexp
         | 
| 78 | 
            +
                    ('Vmp' =~ re).must_equal 0
         | 
| 79 | 
            +
                    ('Nc-pl' =~ re).must_be_nil
         | 
| 80 | 
            +
                  end
         | 
| 81 | 
            +
                end
         | 
| 82 | 
            +
             | 
| 83 | 
            +
                describe 'Generator' do
         | 
| 84 | 
            +
                  before { @msd = MSD.new(MSD::Russian) }
         | 
| 85 | 
            +
             | 
| 86 | 
            +
                  it 'should not generate anything without POS tag' do
         | 
| 87 | 
            +
                    @msd[:number] = :singular
         | 
| 88 | 
            +
                    @msd.to_s.must_equal ''
         | 
| 89 | 
            +
                  end
         | 
| 90 | 
            +
             | 
| 91 | 
            +
                  it 'should raise InvalidDescriptor when POS tag is invalid' do
         | 
| 92 | 
            +
                    @msd[:pos] = :zalupa
         | 
| 93 | 
            +
                    lambda { @msd.to_s }.must_raise MSD::InvalidDescriptor
         | 
| 94 | 
            +
                  end
         | 
| 95 | 
            +
             | 
| 96 | 
            +
                  it 'should generate valid MSD lines when POS/grammemes are valid too' do
         | 
| 97 | 
            +
                    @msd[:pos] = :noun
         | 
| 98 | 
            +
                    @msd.to_s.must_equal 'N'
         | 
| 99 | 
            +
             | 
| 100 | 
            +
                    @msd[:animate] = :yes
         | 
| 101 | 
            +
                    @msd.to_s.must_equal 'N----y'
         | 
| 102 | 
            +
             | 
| 103 | 
            +
                    @msd[:number] = :singular
         | 
| 104 | 
            +
                    @msd.to_s.must_equal 'N--s-y'
         | 
| 105 | 
            +
             | 
| 106 | 
            +
                    @msd[:animate] = nil
         | 
| 107 | 
            +
                    @msd.to_s.must_equal 'N--s'
         | 
| 108 | 
            +
             | 
| 109 | 
            +
                    @msd[:type] = :common
         | 
| 110 | 
            +
                    @msd.to_s.must_equal 'Nc-s'
         | 
| 111 | 
            +
                  end
         | 
| 112 | 
            +
                end
         | 
| 113 | 
            +
             | 
| 114 | 
            +
                describe 'Parser' do
         | 
| 115 | 
            +
                  it 'should parse correctly composed MSD lines' do
         | 
| 116 | 
            +
                    msd = MSD.new(MSD::Russian, 'Ncmsnn')
         | 
| 117 | 
            +
                    msd.pos.must_equal :noun
         | 
| 118 | 
            +
                    msd.grammemes.must_equal({
         | 
| 119 | 
            +
                      :type => :common, :gender => :masculine, :number => :singular,
         | 
| 120 | 
            +
                      :case => :nominative, :animate => :no
         | 
| 121 | 
            +
                    })
         | 
| 122 | 
            +
             | 
| 123 | 
            +
                    msd = MSD.new(MSD::Russian, 'Vm--1p---p')
         | 
| 124 | 
            +
                    msd.pos.must_equal :verb
         | 
| 125 | 
            +
                    msd.grammemes.must_equal({
         | 
| 126 | 
            +
                      :type => :main, :person => :first, :number => :plural,
         | 
| 127 | 
            +
                      :aspect => :progressive
         | 
| 128 | 
            +
                    })
         | 
| 129 | 
            +
                  end
         | 
| 130 | 
            +
             | 
| 131 | 
            +
                  it 'should parse MSD lines generated by itself' do
         | 
| 132 | 
            +
                    gen = MSD.new(MSD::Russian)
         | 
| 133 | 
            +
                    gen[:pos] = :pronoun
         | 
| 134 | 
            +
                    gen[:person] = :third
         | 
| 135 | 
            +
                    gen[:gender] = :masculine
         | 
| 136 | 
            +
                    gen[:number] = :singular
         | 
| 137 | 
            +
                    gen[:case] = :instrumental
         | 
| 138 | 
            +
             | 
| 139 | 
            +
                    msd = MSD.new(gen.language, gen.to_s)
         | 
| 140 | 
            +
                    msd.pos.must_equal gen.pos
         | 
| 141 | 
            +
                    msd.grammemes.must_equal gen.grammemes
         | 
| 142 | 
            +
                  end
         | 
| 143 | 
            +
                end
         | 
| 144 | 
            +
              end
         | 
| 145 | 
            +
            end
         | 
    
        data/spec/spec_helper.rb
    ADDED
    
    | @@ -0,0 +1,17 @@ | |
| 1 | 
            +
            # encoding: utf-8
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            require 'rubygems'
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            $:.unshift File.expand_path('../../lib', __FILE__)
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            if RUBY_VERSION == '1.8'
         | 
| 8 | 
            +
              gem 'minitest'
         | 
| 9 | 
            +
            end
         | 
| 10 | 
            +
             | 
| 11 | 
            +
            require 'minitest/autorun'
         | 
| 12 | 
            +
             | 
| 13 | 
            +
            require 'myasorubka'
         | 
| 14 | 
            +
            require 'myasorubka/aot'
         | 
| 15 | 
            +
            require 'myasorubka/msd/russian'
         | 
| 16 | 
            +
             | 
| 17 | 
            +
            Dir[File.expand_path('../support/**/*.rb', __FILE__)].each { |f| require f }
         | 
    
        metadata
    ADDED
    
    | @@ -0,0 +1,126 @@ | |
| 1 | 
            +
            --- !ruby/object:Gem::Specification
         | 
| 2 | 
            +
            name: myasorubka
         | 
| 3 | 
            +
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            +
              version: '0.1'
         | 
| 5 | 
            +
            platform: ruby
         | 
| 6 | 
            +
            authors:
         | 
| 7 | 
            +
            - Dmitry Ustalov
         | 
| 8 | 
            +
            autorequire: 
         | 
| 9 | 
            +
            bindir: bin
         | 
| 10 | 
            +
            cert_chain: []
         | 
| 11 | 
            +
            date: 2013-04-20 00:00:00.000000000 Z
         | 
| 12 | 
            +
            dependencies:
         | 
| 13 | 
            +
            - !ruby/object:Gem::Dependency
         | 
| 14 | 
            +
              name: bundler
         | 
| 15 | 
            +
              requirement: !ruby/object:Gem::Requirement
         | 
| 16 | 
            +
                requirements:
         | 
| 17 | 
            +
                - - ~>
         | 
| 18 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 19 | 
            +
                    version: '1.3'
         | 
| 20 | 
            +
              type: :development
         | 
| 21 | 
            +
              prerelease: false
         | 
| 22 | 
            +
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 23 | 
            +
                requirements:
         | 
| 24 | 
            +
                - - ~>
         | 
| 25 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 26 | 
            +
                    version: '1.3'
         | 
| 27 | 
            +
            - !ruby/object:Gem::Dependency
         | 
| 28 | 
            +
              name: minitest
         | 
| 29 | 
            +
              requirement: !ruby/object:Gem::Requirement
         | 
| 30 | 
            +
                requirements:
         | 
| 31 | 
            +
                - - '>='
         | 
| 32 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 33 | 
            +
                    version: '2.11'
         | 
| 34 | 
            +
              type: :development
         | 
| 35 | 
            +
              prerelease: false
         | 
| 36 | 
            +
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 37 | 
            +
                requirements:
         | 
| 38 | 
            +
                - - '>='
         | 
| 39 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 40 | 
            +
                    version: '2.11'
         | 
| 41 | 
            +
            - !ruby/object:Gem::Dependency
         | 
| 42 | 
            +
              name: rake
         | 
| 43 | 
            +
              requirement: !ruby/object:Gem::Requirement
         | 
| 44 | 
            +
                requirements:
         | 
| 45 | 
            +
                - - '>='
         | 
| 46 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 47 | 
            +
                    version: '0'
         | 
| 48 | 
            +
              type: :development
         | 
| 49 | 
            +
              prerelease: false
         | 
| 50 | 
            +
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 51 | 
            +
                requirements:
         | 
| 52 | 
            +
                - - '>='
         | 
| 53 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 54 | 
            +
                    version: '0'
         | 
| 55 | 
            +
            - !ruby/object:Gem::Dependency
         | 
| 56 | 
            +
              name: unicode_utils
         | 
| 57 | 
            +
              requirement: !ruby/object:Gem::Requirement
         | 
| 58 | 
            +
                requirements:
         | 
| 59 | 
            +
                - - ~>
         | 
| 60 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 61 | 
            +
                    version: '1.4'
         | 
| 62 | 
            +
              type: :runtime
         | 
| 63 | 
            +
              prerelease: false
         | 
| 64 | 
            +
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 65 | 
            +
                requirements:
         | 
| 66 | 
            +
                - - ~>
         | 
| 67 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 68 | 
            +
                    version: '1.4'
         | 
| 69 | 
            +
            description: Myasorubka is a morphological data processor.
         | 
| 70 | 
            +
            email:
         | 
| 71 | 
            +
            - dmitry@eveel.ru
         | 
| 72 | 
            +
            executables: []
         | 
| 73 | 
            +
            extensions: []
         | 
| 74 | 
            +
            extra_rdoc_files: []
         | 
| 75 | 
            +
            files:
         | 
| 76 | 
            +
            - .gitignore
         | 
| 77 | 
            +
            - .travis.yml
         | 
| 78 | 
            +
            - Gemfile
         | 
| 79 | 
            +
            - LICENSE.txt
         | 
| 80 | 
            +
            - README.md
         | 
| 81 | 
            +
            - Rakefile
         | 
| 82 | 
            +
            - aot-russian
         | 
| 83 | 
            +
            - lib/myasorubka.rb
         | 
| 84 | 
            +
            - lib/myasorubka/aot.rb
         | 
| 85 | 
            +
            - lib/myasorubka/aot/dictionary.rb
         | 
| 86 | 
            +
            - lib/myasorubka/aot/gramtab.rb
         | 
| 87 | 
            +
            - lib/myasorubka/aot/tags.rb
         | 
| 88 | 
            +
            - lib/myasorubka/msd.rb
         | 
| 89 | 
            +
            - lib/myasorubka/msd/english.rb
         | 
| 90 | 
            +
            - lib/myasorubka/msd/russian.rb
         | 
| 91 | 
            +
            - lib/myasorubka/version.rb
         | 
| 92 | 
            +
            - myasorubka.gemspec
         | 
| 93 | 
            +
            - spec/msd/russian.tsv
         | 
| 94 | 
            +
            - spec/msd/russian_spec.rb
         | 
| 95 | 
            +
            - spec/msd_spec.rb
         | 
| 96 | 
            +
            - spec/spec_helper.rb
         | 
| 97 | 
            +
            homepage: https://github.com/ustalov/myasorubka
         | 
| 98 | 
            +
            licenses:
         | 
| 99 | 
            +
            - MIT
         | 
| 100 | 
            +
            metadata: {}
         | 
| 101 | 
            +
            post_install_message: 
         | 
| 102 | 
            +
            rdoc_options: []
         | 
| 103 | 
            +
            require_paths:
         | 
| 104 | 
            +
            - lib
         | 
| 105 | 
            +
            required_ruby_version: !ruby/object:Gem::Requirement
         | 
| 106 | 
            +
              requirements:
         | 
| 107 | 
            +
              - - '>='
         | 
| 108 | 
            +
                - !ruby/object:Gem::Version
         | 
| 109 | 
            +
                  version: '0'
         | 
| 110 | 
            +
            required_rubygems_version: !ruby/object:Gem::Requirement
         | 
| 111 | 
            +
              requirements:
         | 
| 112 | 
            +
              - - '>='
         | 
| 113 | 
            +
                - !ruby/object:Gem::Version
         | 
| 114 | 
            +
                  version: '0'
         | 
| 115 | 
            +
            requirements: []
         | 
| 116 | 
            +
            rubyforge_project: 
         | 
| 117 | 
            +
            rubygems_version: 2.0.3
         | 
| 118 | 
            +
            signing_key: 
         | 
| 119 | 
            +
            specification_version: 4
         | 
| 120 | 
            +
            summary: Myasorubka is a morphological data proceesor that supports AOT and MULTEXT-East
         | 
| 121 | 
            +
              notations.
         | 
| 122 | 
            +
            test_files:
         | 
| 123 | 
            +
            - spec/msd/russian.tsv
         | 
| 124 | 
            +
            - spec/msd/russian_spec.rb
         | 
| 125 | 
            +
            - spec/msd_spec.rb
         | 
| 126 | 
            +
            - spec/spec_helper.rb
         |