yandex_mystem 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,18 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
18
+ .idea/
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --format progress
data/.rvmrc ADDED
@@ -0,0 +1 @@
1
+ rvm use 1.9.3 --create
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'http://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in yandex_mystem.gemspec
4
+ gemspec
data/README.md ADDED
@@ -0,0 +1,26 @@
1
+ # Yandex Mystem
2
+
3
+ ## Introduction
4
+
5
+ Mystem is a software that provided by the Yandex only for non-commercial project. With use of it you can detect base forms of the words in a text, make a simple morphological analysis of russian words.
6
+
7
+ ## License
8
+
9
+ First of all, read license on http://company.yandex.ru/technologies/mystem/
10
+
11
+ `Mystem` available only for non-commercial usage.
12
+
13
+ ## OS
14
+
15
+ This gem contains executables for there platforms:
16
+
17
+ * Windows
18
+ * Linux 2.6 32-bit
19
+ * Linux 2.6 64-bit
20
+ * Mac OS X 10.5
21
+
22
+ ...of six, FreeBSD not in the gem. If you need it, add pull request or issue.
23
+
24
+ ## Usage
25
+
26
+ YandexMystem::Base.stem 'О предложении в котором много слов.'
data/Rakefile ADDED
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/env rake
2
+ require "bundler/gem_tasks"
3
+ require 'rspec/core/rake_task'
4
+
5
+ RSpec::Core::RakeTask.new(:spec)
6
+ task default: :spec
Binary file
Binary file
data/app/mystem-mac ADDED
Binary file
Binary file
@@ -0,0 +1,51 @@
1
+ require 'open3'
2
+ require 'yandex_mystem/version'
3
+
4
+ module YandexMystem
5
+ class Base
6
+ # TODO add -i
7
+ def self.stem(text)
8
+ exec = Array(command).tap do |c|
9
+ c << '-e utf-8 -n'
10
+ end.join(' ')
11
+
12
+ data = Open3.popen3(exec) do |stdin, stdout, stderr|
13
+ stdin.write text
14
+ stdin.close
15
+ #stderr.read
16
+ stdout.read
17
+ end
18
+
19
+ data = data.scan(/^([^\{]+)\{(.+)\}$/).map do |(word, words)|
20
+ words = words.split('|').select do |w|
21
+ !(w =~ /.+\?\?$/)
22
+ end
23
+
24
+ [word, words]
25
+ end.flatten(1)
26
+
27
+ Hash[*data]
28
+ end
29
+
30
+ private
31
+
32
+ def self.command
33
+ postfix = if RUBY_PLATFORM =~ /(win|w)32$/
34
+ 'win.exe'
35
+ elsif RUBY_PLATFORM =~ /32.+linux$/
36
+ 'linux-32'
37
+ elsif RUBY_PLATFORM =~ /64.+linux$/
38
+ 'linux-64'
39
+ elsif RUBY_PLATFORM =~ /darwin/
40
+ 'mac'
41
+ elsif RUBY_PLATFORM =~ /freebsd/
42
+ raise 'Create an issue or add pull request on a github.'
43
+ else
44
+ raise 'Unknown OS'
45
+ end
46
+
47
+ path = Pathname.new(__FILE__) + '../../app/'
48
+ path + "mystem-#{postfix}"
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,3 @@
1
+ module YandexMystem
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,7 @@
1
+ require 'yandex_mystem'
2
+
3
+ RSpec.configure do |config|
4
+ config.treat_symbols_as_metadata_keys_with_true_values = true
5
+ config.run_all_when_everything_filtered = true
6
+ config.filter_run :focus
7
+ end
@@ -0,0 +1,15 @@
1
+ # encoding: utf-8
2
+ require 'spec_helper'
3
+
4
+ describe YandexMystem do
5
+ it "should stem words" do
6
+ data = YandexMystem::Base.stem('мальчики мальчиков девочки девочек компьютеров компьютере сов пошли elements')
7
+ data['мальчики'].should eq ['мальчик']
8
+ data['мальчиков'].should eq ['мальчик', "мальчиков", "мальчиковый"]
9
+ data['девочки'].should eq ['девочка']
10
+ data['девочек'].should eq ['девочка']
11
+ data['сов'].should eq ['сова']
12
+ data['пошли'].should eq %w(пойти посылать)
13
+ data['elements'].should eq []
14
+ end
15
+ end
@@ -0,0 +1,19 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require File.expand_path('../lib/yandex_mystem/version', __FILE__)
3
+
4
+ Gem::Specification.new do |gem|
5
+ gem.authors = ["Dmitry Polushkin"]
6
+ gem.email = ["dmitry.polushkin@gmail.com"]
7
+ gem.description = %q{Mystem is a software that provided by the Yandex only for non-commercial project. With use of it you can detect base forms of the words in a text, make a simple morphological analysis of russian words.}
8
+ gem.summary = %q{Yandex Mystem makes morphological analysis of a russian text}
9
+ gem.homepage = ""
10
+
11
+ gem.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
12
+ gem.files = `git ls-files`.split("\n")
13
+ gem.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
14
+ gem.name = "yandex_mystem"
15
+ gem.require_paths = ["lib"]
16
+ gem.version = YandexMystem::VERSION
17
+
18
+ gem.add_development_dependency "rspec", '~> 2.8'
19
+ end
metadata ADDED
@@ -0,0 +1,73 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: yandex_mystem
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Dmitry Polushkin
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-01-09 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: rspec
16
+ requirement: &18846940 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: '2.8'
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: *18846940
25
+ description: Mystem is a software that provided by the Yandex only for non-commercial
26
+ project. With use of it you can detect base forms of the words in a text, make a
27
+ simple morphological analysis of russian words.
28
+ email:
29
+ - dmitry.polushkin@gmail.com
30
+ executables: []
31
+ extensions: []
32
+ extra_rdoc_files: []
33
+ files:
34
+ - .gitignore
35
+ - .rspec
36
+ - .rvmrc
37
+ - Gemfile
38
+ - README.md
39
+ - Rakefile
40
+ - app/mystem-linux-32
41
+ - app/mystem-linux-64
42
+ - app/mystem-mac
43
+ - app/mystem-win.exe
44
+ - lib/yandex_mystem.rb
45
+ - lib/yandex_mystem/version.rb
46
+ - spec/spec_helper.rb
47
+ - spec/yandex_mystem_spec.rb
48
+ - yandex_mystem.gemspec
49
+ homepage: ''
50
+ licenses: []
51
+ post_install_message:
52
+ rdoc_options: []
53
+ require_paths:
54
+ - lib
55
+ required_ruby_version: !ruby/object:Gem::Requirement
56
+ none: false
57
+ requirements:
58
+ - - ! '>='
59
+ - !ruby/object:Gem::Version
60
+ version: '0'
61
+ required_rubygems_version: !ruby/object:Gem::Requirement
62
+ none: false
63
+ requirements:
64
+ - - ! '>='
65
+ - !ruby/object:Gem::Version
66
+ version: '0'
67
+ requirements: []
68
+ rubyforge_project:
69
+ rubygems_version: 1.8.10
70
+ signing_key:
71
+ specification_version: 3
72
+ summary: Yandex Mystem makes morphological analysis of a russian text
73
+ test_files: []