yandex_mystem 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +18 -0
- data/.rspec +2 -0
- data/.rvmrc +1 -0
- data/Gemfile +4 -0
- data/README.md +26 -0
- data/Rakefile +6 -0
- data/app/mystem-linux-32 +0 -0
- data/app/mystem-linux-64 +0 -0
- data/app/mystem-mac +0 -0
- data/app/mystem-win.exe +0 -0
- data/lib/yandex_mystem.rb +51 -0
- data/lib/yandex_mystem/version.rb +3 -0
- data/spec/spec_helper.rb +7 -0
- data/spec/yandex_mystem_spec.rb +15 -0
- data/yandex_mystem.gemspec +19 -0
- metadata +73 -0
data/.gitignore
ADDED
data/.rspec
ADDED
data/.rvmrc
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
rvm use 1.9.3 --create
|
data/Gemfile
ADDED
data/README.md
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
# Yandex Mystem
|
2
|
+
|
3
|
+
## Introduction
|
4
|
+
|
5
|
+
Mystem is a software that provided by the Yandex only for non-commercial project. With use of it you can detect base forms of the words in a text, make a simple morphological analysis of russian words.
|
6
|
+
|
7
|
+
## License
|
8
|
+
|
9
|
+
First of all, read license on http://company.yandex.ru/technologies/mystem/
|
10
|
+
|
11
|
+
`Mystem` available only for non-commercial usage.
|
12
|
+
|
13
|
+
## OS
|
14
|
+
|
15
|
+
This gem contains executables for there platforms:
|
16
|
+
|
17
|
+
* Windows
|
18
|
+
* Linux 2.6 32-bit
|
19
|
+
* Linux 2.6 64-bit
|
20
|
+
* Mac OS X 10.5
|
21
|
+
|
22
|
+
...of six, FreeBSD not in the gem. If you need it, add pull request or issue.
|
23
|
+
|
24
|
+
## Usage
|
25
|
+
|
26
|
+
YandexMystem::Base.stem 'О предложении в котором много слов.'
|
data/Rakefile
ADDED
data/app/mystem-linux-32
ADDED
Binary file
|
data/app/mystem-linux-64
ADDED
Binary file
|
data/app/mystem-mac
ADDED
Binary file
|
data/app/mystem-win.exe
ADDED
Binary file
|
@@ -0,0 +1,51 @@
|
|
1
|
+
require 'open3'
|
2
|
+
require 'yandex_mystem/version'
|
3
|
+
|
4
|
+
module YandexMystem
|
5
|
+
class Base
|
6
|
+
# TODO add -i
|
7
|
+
def self.stem(text)
|
8
|
+
exec = Array(command).tap do |c|
|
9
|
+
c << '-e utf-8 -n'
|
10
|
+
end.join(' ')
|
11
|
+
|
12
|
+
data = Open3.popen3(exec) do |stdin, stdout, stderr|
|
13
|
+
stdin.write text
|
14
|
+
stdin.close
|
15
|
+
#stderr.read
|
16
|
+
stdout.read
|
17
|
+
end
|
18
|
+
|
19
|
+
data = data.scan(/^([^\{]+)\{(.+)\}$/).map do |(word, words)|
|
20
|
+
words = words.split('|').select do |w|
|
21
|
+
!(w =~ /.+\?\?$/)
|
22
|
+
end
|
23
|
+
|
24
|
+
[word, words]
|
25
|
+
end.flatten(1)
|
26
|
+
|
27
|
+
Hash[*data]
|
28
|
+
end
|
29
|
+
|
30
|
+
private
|
31
|
+
|
32
|
+
def self.command
|
33
|
+
postfix = if RUBY_PLATFORM =~ /(win|w)32$/
|
34
|
+
'win.exe'
|
35
|
+
elsif RUBY_PLATFORM =~ /32.+linux$/
|
36
|
+
'linux-32'
|
37
|
+
elsif RUBY_PLATFORM =~ /64.+linux$/
|
38
|
+
'linux-64'
|
39
|
+
elsif RUBY_PLATFORM =~ /darwin/
|
40
|
+
'mac'
|
41
|
+
elsif RUBY_PLATFORM =~ /freebsd/
|
42
|
+
raise 'Create an issue or add pull request on a github.'
|
43
|
+
else
|
44
|
+
raise 'Unknown OS'
|
45
|
+
end
|
46
|
+
|
47
|
+
path = Pathname.new(__FILE__) + '../../app/'
|
48
|
+
path + "mystem-#{postfix}"
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'spec_helper'
|
3
|
+
|
4
|
+
describe YandexMystem do
|
5
|
+
it "should stem words" do
|
6
|
+
data = YandexMystem::Base.stem('мальчики мальчиков девочки девочек компьютеров компьютере сов пошли elements')
|
7
|
+
data['мальчики'].should eq ['мальчик']
|
8
|
+
data['мальчиков'].should eq ['мальчик', "мальчиков", "мальчиковый"]
|
9
|
+
data['девочки'].should eq ['девочка']
|
10
|
+
data['девочек'].should eq ['девочка']
|
11
|
+
data['сов'].should eq ['сова']
|
12
|
+
data['пошли'].should eq %w(пойти посылать)
|
13
|
+
data['elements'].should eq []
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
require File.expand_path('../lib/yandex_mystem/version', __FILE__)
|
3
|
+
|
4
|
+
Gem::Specification.new do |gem|
|
5
|
+
gem.authors = ["Dmitry Polushkin"]
|
6
|
+
gem.email = ["dmitry.polushkin@gmail.com"]
|
7
|
+
gem.description = %q{Mystem is a software that provided by the Yandex only for non-commercial project. With use of it you can detect base forms of the words in a text, make a simple morphological analysis of russian words.}
|
8
|
+
gem.summary = %q{Yandex Mystem makes morphological analysis of a russian text}
|
9
|
+
gem.homepage = ""
|
10
|
+
|
11
|
+
gem.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
12
|
+
gem.files = `git ls-files`.split("\n")
|
13
|
+
gem.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
14
|
+
gem.name = "yandex_mystem"
|
15
|
+
gem.require_paths = ["lib"]
|
16
|
+
gem.version = YandexMystem::VERSION
|
17
|
+
|
18
|
+
gem.add_development_dependency "rspec", '~> 2.8'
|
19
|
+
end
|
metadata
ADDED
@@ -0,0 +1,73 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: yandex_mystem
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Dmitry Polushkin
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-01-09 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: rspec
|
16
|
+
requirement: &18846940 !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ~>
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '2.8'
|
22
|
+
type: :development
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: *18846940
|
25
|
+
description: Mystem is a software that provided by the Yandex only for non-commercial
|
26
|
+
project. With use of it you can detect base forms of the words in a text, make a
|
27
|
+
simple morphological analysis of russian words.
|
28
|
+
email:
|
29
|
+
- dmitry.polushkin@gmail.com
|
30
|
+
executables: []
|
31
|
+
extensions: []
|
32
|
+
extra_rdoc_files: []
|
33
|
+
files:
|
34
|
+
- .gitignore
|
35
|
+
- .rspec
|
36
|
+
- .rvmrc
|
37
|
+
- Gemfile
|
38
|
+
- README.md
|
39
|
+
- Rakefile
|
40
|
+
- app/mystem-linux-32
|
41
|
+
- app/mystem-linux-64
|
42
|
+
- app/mystem-mac
|
43
|
+
- app/mystem-win.exe
|
44
|
+
- lib/yandex_mystem.rb
|
45
|
+
- lib/yandex_mystem/version.rb
|
46
|
+
- spec/spec_helper.rb
|
47
|
+
- spec/yandex_mystem_spec.rb
|
48
|
+
- yandex_mystem.gemspec
|
49
|
+
homepage: ''
|
50
|
+
licenses: []
|
51
|
+
post_install_message:
|
52
|
+
rdoc_options: []
|
53
|
+
require_paths:
|
54
|
+
- lib
|
55
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
56
|
+
none: false
|
57
|
+
requirements:
|
58
|
+
- - ! '>='
|
59
|
+
- !ruby/object:Gem::Version
|
60
|
+
version: '0'
|
61
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
62
|
+
none: false
|
63
|
+
requirements:
|
64
|
+
- - ! '>='
|
65
|
+
- !ruby/object:Gem::Version
|
66
|
+
version: '0'
|
67
|
+
requirements: []
|
68
|
+
rubyforge_project:
|
69
|
+
rubygems_version: 1.8.10
|
70
|
+
signing_key:
|
71
|
+
specification_version: 3
|
72
|
+
summary: Yandex Mystem makes morphological analysis of a russian text
|
73
|
+
test_files: []
|