yandex_mystem 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +18 -0
- data/.rspec +2 -0
- data/.rvmrc +1 -0
- data/Gemfile +4 -0
- data/README.md +26 -0
- data/Rakefile +6 -0
- data/app/mystem-linux-32 +0 -0
- data/app/mystem-linux-64 +0 -0
- data/app/mystem-mac +0 -0
- data/app/mystem-win.exe +0 -0
- data/lib/yandex_mystem.rb +51 -0
- data/lib/yandex_mystem/version.rb +3 -0
- data/spec/spec_helper.rb +7 -0
- data/spec/yandex_mystem_spec.rb +15 -0
- data/yandex_mystem.gemspec +19 -0
- metadata +73 -0
data/.gitignore
ADDED
data/.rspec
ADDED
data/.rvmrc
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
rvm use 1.9.3 --create
|
data/Gemfile
ADDED
data/README.md
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
# Yandex Mystem
|
2
|
+
|
3
|
+
## Introduction
|
4
|
+
|
5
|
+
Mystem is a software that provided by the Yandex only for non-commercial project. With use of it you can detect base forms of the words in a text, make a simple morphological analysis of russian words.
|
6
|
+
|
7
|
+
## License
|
8
|
+
|
9
|
+
First of all, read license on http://company.yandex.ru/technologies/mystem/
|
10
|
+
|
11
|
+
`Mystem` available only for non-commercial usage.
|
12
|
+
|
13
|
+
## OS
|
14
|
+
|
15
|
+
This gem contains executables for there platforms:
|
16
|
+
|
17
|
+
* Windows
|
18
|
+
* Linux 2.6 32-bit
|
19
|
+
* Linux 2.6 64-bit
|
20
|
+
* Mac OS X 10.5
|
21
|
+
|
22
|
+
...of six, FreeBSD not in the gem. If you need it, add pull request or issue.
|
23
|
+
|
24
|
+
## Usage
|
25
|
+
|
26
|
+
YandexMystem::Base.stem 'О предложении в котором много слов.'
|
data/Rakefile
ADDED
data/app/mystem-linux-32
ADDED
Binary file
|
data/app/mystem-linux-64
ADDED
Binary file
|
data/app/mystem-mac
ADDED
Binary file
|
data/app/mystem-win.exe
ADDED
Binary file
|
@@ -0,0 +1,51 @@
|
|
1
|
+
require 'open3'
|
2
|
+
require 'yandex_mystem/version'
|
3
|
+
|
4
|
+
module YandexMystem
|
5
|
+
class Base
|
6
|
+
# TODO add -i
|
7
|
+
def self.stem(text)
|
8
|
+
exec = Array(command).tap do |c|
|
9
|
+
c << '-e utf-8 -n'
|
10
|
+
end.join(' ')
|
11
|
+
|
12
|
+
data = Open3.popen3(exec) do |stdin, stdout, stderr|
|
13
|
+
stdin.write text
|
14
|
+
stdin.close
|
15
|
+
#stderr.read
|
16
|
+
stdout.read
|
17
|
+
end
|
18
|
+
|
19
|
+
data = data.scan(/^([^\{]+)\{(.+)\}$/).map do |(word, words)|
|
20
|
+
words = words.split('|').select do |w|
|
21
|
+
!(w =~ /.+\?\?$/)
|
22
|
+
end
|
23
|
+
|
24
|
+
[word, words]
|
25
|
+
end.flatten(1)
|
26
|
+
|
27
|
+
Hash[*data]
|
28
|
+
end
|
29
|
+
|
30
|
+
private
|
31
|
+
|
32
|
+
def self.command
|
33
|
+
postfix = if RUBY_PLATFORM =~ /(win|w)32$/
|
34
|
+
'win.exe'
|
35
|
+
elsif RUBY_PLATFORM =~ /32.+linux$/
|
36
|
+
'linux-32'
|
37
|
+
elsif RUBY_PLATFORM =~ /64.+linux$/
|
38
|
+
'linux-64'
|
39
|
+
elsif RUBY_PLATFORM =~ /darwin/
|
40
|
+
'mac'
|
41
|
+
elsif RUBY_PLATFORM =~ /freebsd/
|
42
|
+
raise 'Create an issue or add pull request on a github.'
|
43
|
+
else
|
44
|
+
raise 'Unknown OS'
|
45
|
+
end
|
46
|
+
|
47
|
+
path = Pathname.new(__FILE__) + '../../app/'
|
48
|
+
path + "mystem-#{postfix}"
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'spec_helper'
|
3
|
+
|
4
|
+
describe YandexMystem do
|
5
|
+
it "should stem words" do
|
6
|
+
data = YandexMystem::Base.stem('мальчики мальчиков девочки девочек компьютеров компьютере сов пошли elements')
|
7
|
+
data['мальчики'].should eq ['мальчик']
|
8
|
+
data['мальчиков'].should eq ['мальчик', "мальчиков", "мальчиковый"]
|
9
|
+
data['девочки'].should eq ['девочка']
|
10
|
+
data['девочек'].should eq ['девочка']
|
11
|
+
data['сов'].should eq ['сова']
|
12
|
+
data['пошли'].should eq %w(пойти посылать)
|
13
|
+
data['elements'].should eq []
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
require File.expand_path('../lib/yandex_mystem/version', __FILE__)
|
3
|
+
|
4
|
+
Gem::Specification.new do |gem|
|
5
|
+
gem.authors = ["Dmitry Polushkin"]
|
6
|
+
gem.email = ["dmitry.polushkin@gmail.com"]
|
7
|
+
gem.description = %q{Mystem is a software that provided by the Yandex only for non-commercial project. With use of it you can detect base forms of the words in a text, make a simple morphological analysis of russian words.}
|
8
|
+
gem.summary = %q{Yandex Mystem makes morphological analysis of a russian text}
|
9
|
+
gem.homepage = ""
|
10
|
+
|
11
|
+
gem.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
12
|
+
gem.files = `git ls-files`.split("\n")
|
13
|
+
gem.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
14
|
+
gem.name = "yandex_mystem"
|
15
|
+
gem.require_paths = ["lib"]
|
16
|
+
gem.version = YandexMystem::VERSION
|
17
|
+
|
18
|
+
gem.add_development_dependency "rspec", '~> 2.8'
|
19
|
+
end
|
metadata
ADDED
@@ -0,0 +1,73 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: yandex_mystem
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Dmitry Polushkin
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-01-09 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: rspec
|
16
|
+
requirement: &18846940 !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ~>
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '2.8'
|
22
|
+
type: :development
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: *18846940
|
25
|
+
description: Mystem is a software that provided by the Yandex only for non-commercial
|
26
|
+
project. With use of it you can detect base forms of the words in a text, make a
|
27
|
+
simple morphological analysis of russian words.
|
28
|
+
email:
|
29
|
+
- dmitry.polushkin@gmail.com
|
30
|
+
executables: []
|
31
|
+
extensions: []
|
32
|
+
extra_rdoc_files: []
|
33
|
+
files:
|
34
|
+
- .gitignore
|
35
|
+
- .rspec
|
36
|
+
- .rvmrc
|
37
|
+
- Gemfile
|
38
|
+
- README.md
|
39
|
+
- Rakefile
|
40
|
+
- app/mystem-linux-32
|
41
|
+
- app/mystem-linux-64
|
42
|
+
- app/mystem-mac
|
43
|
+
- app/mystem-win.exe
|
44
|
+
- lib/yandex_mystem.rb
|
45
|
+
- lib/yandex_mystem/version.rb
|
46
|
+
- spec/spec_helper.rb
|
47
|
+
- spec/yandex_mystem_spec.rb
|
48
|
+
- yandex_mystem.gemspec
|
49
|
+
homepage: ''
|
50
|
+
licenses: []
|
51
|
+
post_install_message:
|
52
|
+
rdoc_options: []
|
53
|
+
require_paths:
|
54
|
+
- lib
|
55
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
56
|
+
none: false
|
57
|
+
requirements:
|
58
|
+
- - ! '>='
|
59
|
+
- !ruby/object:Gem::Version
|
60
|
+
version: '0'
|
61
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
62
|
+
none: false
|
63
|
+
requirements:
|
64
|
+
- - ! '>='
|
65
|
+
- !ruby/object:Gem::Version
|
66
|
+
version: '0'
|
67
|
+
requirements: []
|
68
|
+
rubyforge_project:
|
69
|
+
rubygems_version: 1.8.10
|
70
|
+
signing_key:
|
71
|
+
specification_version: 3
|
72
|
+
summary: Yandex Mystem makes morphological analysis of a russian text
|
73
|
+
test_files: []
|