yandex_mystem 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,18 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
18
+ .idea/
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --format progress
data/.rvmrc ADDED
@@ -0,0 +1 @@
1
+ rvm use 1.9.3 --create
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'http://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in yandex_mystem.gemspec
4
+ gemspec
data/README.md ADDED
@@ -0,0 +1,26 @@
1
+ # Yandex Mystem
2
+
3
+ ## Introduction
4
+
5
+ Mystem is a software that provided by the Yandex only for non-commercial project. With use of it you can detect base forms of the words in a text, make a simple morphological analysis of russian words.
6
+
7
+ ## License
8
+
9
+ First of all, read license on http://company.yandex.ru/technologies/mystem/
10
+
11
+ `Mystem` available only for non-commercial usage.
12
+
13
+ ## OS
14
+
15
+ This gem contains executables for there platforms:
16
+
17
+ * Windows
18
+ * Linux 2.6 32-bit
19
+ * Linux 2.6 64-bit
20
+ * Mac OS X 10.5
21
+
22
+ ...of six, FreeBSD not in the gem. If you need it, add pull request or issue.
23
+
24
+ ## Usage
25
+
26
+ YandexMystem::Base.stem 'О предложении в котором много слов.'
data/Rakefile ADDED
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/env rake
2
+ require "bundler/gem_tasks"
3
+ require 'rspec/core/rake_task'
4
+
5
+ RSpec::Core::RakeTask.new(:spec)
6
+ task default: :spec
Binary file
Binary file
data/app/mystem-mac ADDED
Binary file
Binary file
@@ -0,0 +1,51 @@
1
+ require 'open3'
2
+ require 'yandex_mystem/version'
3
+
4
+ module YandexMystem
5
+ class Base
6
+ # TODO add -i
7
+ def self.stem(text)
8
+ exec = Array(command).tap do |c|
9
+ c << '-e utf-8 -n'
10
+ end.join(' ')
11
+
12
+ data = Open3.popen3(exec) do |stdin, stdout, stderr|
13
+ stdin.write text
14
+ stdin.close
15
+ #stderr.read
16
+ stdout.read
17
+ end
18
+
19
+ data = data.scan(/^([^\{]+)\{(.+)\}$/).map do |(word, words)|
20
+ words = words.split('|').select do |w|
21
+ !(w =~ /.+\?\?$/)
22
+ end
23
+
24
+ [word, words]
25
+ end.flatten(1)
26
+
27
+ Hash[*data]
28
+ end
29
+
30
+ private
31
+
32
+ def self.command
33
+ postfix = if RUBY_PLATFORM =~ /(win|w)32$/
34
+ 'win.exe'
35
+ elsif RUBY_PLATFORM =~ /32.+linux$/
36
+ 'linux-32'
37
+ elsif RUBY_PLATFORM =~ /64.+linux$/
38
+ 'linux-64'
39
+ elsif RUBY_PLATFORM =~ /darwin/
40
+ 'mac'
41
+ elsif RUBY_PLATFORM =~ /freebsd/
42
+ raise 'Create an issue or add pull request on a github.'
43
+ else
44
+ raise 'Unknown OS'
45
+ end
46
+
47
+ path = Pathname.new(__FILE__) + '../../app/'
48
+ path + "mystem-#{postfix}"
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,3 @@
1
+ module YandexMystem
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,7 @@
1
+ require 'yandex_mystem'
2
+
3
+ RSpec.configure do |config|
4
+ config.treat_symbols_as_metadata_keys_with_true_values = true
5
+ config.run_all_when_everything_filtered = true
6
+ config.filter_run :focus
7
+ end
@@ -0,0 +1,15 @@
1
+ # encoding: utf-8
2
+ require 'spec_helper'
3
+
4
+ describe YandexMystem do
5
+ it "should stem words" do
6
+ data = YandexMystem::Base.stem('мальчики мальчиков девочки девочек компьютеров компьютере сов пошли elements')
7
+ data['мальчики'].should eq ['мальчик']
8
+ data['мальчиков'].should eq ['мальчик', "мальчиков", "мальчиковый"]
9
+ data['девочки'].should eq ['девочка']
10
+ data['девочек'].should eq ['девочка']
11
+ data['сов'].should eq ['сова']
12
+ data['пошли'].should eq %w(пойти посылать)
13
+ data['elements'].should eq []
14
+ end
15
+ end
@@ -0,0 +1,19 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require File.expand_path('../lib/yandex_mystem/version', __FILE__)
3
+
4
+ Gem::Specification.new do |gem|
5
+ gem.authors = ["Dmitry Polushkin"]
6
+ gem.email = ["dmitry.polushkin@gmail.com"]
7
+ gem.description = %q{Mystem is a software that provided by the Yandex only for non-commercial project. With use of it you can detect base forms of the words in a text, make a simple morphological analysis of russian words.}
8
+ gem.summary = %q{Yandex Mystem makes morphological analysis of a russian text}
9
+ gem.homepage = ""
10
+
11
+ gem.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
12
+ gem.files = `git ls-files`.split("\n")
13
+ gem.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
14
+ gem.name = "yandex_mystem"
15
+ gem.require_paths = ["lib"]
16
+ gem.version = YandexMystem::VERSION
17
+
18
+ gem.add_development_dependency "rspec", '~> 2.8'
19
+ end
metadata ADDED
@@ -0,0 +1,73 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: yandex_mystem
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Dmitry Polushkin
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-01-09 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: rspec
16
+ requirement: &18846940 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: '2.8'
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: *18846940
25
+ description: Mystem is a software that provided by the Yandex only for non-commercial
26
+ project. With use of it you can detect base forms of the words in a text, make a
27
+ simple morphological analysis of russian words.
28
+ email:
29
+ - dmitry.polushkin@gmail.com
30
+ executables: []
31
+ extensions: []
32
+ extra_rdoc_files: []
33
+ files:
34
+ - .gitignore
35
+ - .rspec
36
+ - .rvmrc
37
+ - Gemfile
38
+ - README.md
39
+ - Rakefile
40
+ - app/mystem-linux-32
41
+ - app/mystem-linux-64
42
+ - app/mystem-mac
43
+ - app/mystem-win.exe
44
+ - lib/yandex_mystem.rb
45
+ - lib/yandex_mystem/version.rb
46
+ - spec/spec_helper.rb
47
+ - spec/yandex_mystem_spec.rb
48
+ - yandex_mystem.gemspec
49
+ homepage: ''
50
+ licenses: []
51
+ post_install_message:
52
+ rdoc_options: []
53
+ require_paths:
54
+ - lib
55
+ required_ruby_version: !ruby/object:Gem::Requirement
56
+ none: false
57
+ requirements:
58
+ - - ! '>='
59
+ - !ruby/object:Gem::Version
60
+ version: '0'
61
+ required_rubygems_version: !ruby/object:Gem::Requirement
62
+ none: false
63
+ requirements:
64
+ - - ! '>='
65
+ - !ruby/object:Gem::Version
66
+ version: '0'
67
+ requirements: []
68
+ rubyforge_project:
69
+ rubygems_version: 1.8.10
70
+ signing_key:
71
+ specification_version: 3
72
+ summary: Yandex Mystem makes morphological analysis of a russian text
73
+ test_files: []