morphy 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +18 -0
- data/Gemfile +4 -0
- data/LICENSE +22 -0
- data/README.md +2 -0
- data/lib/dictionary/dawg.dat +0 -0
- data/lib/dictionary/grammemes.txt +5008 -0
- data/lib/dictionary/paradigms.dat +0 -0
- data/lib/dictionary/prefixes.txt +509 -0
- data/lib/dictionary/suffixes.txt +4979 -0
- data/lib/morphy.rb +98 -0
- data/morphy.gemspec +19 -0
- metadata +76 -0
data/lib/morphy.rb
ADDED
@@ -0,0 +1,98 @@
|
|
1
|
+
require "dawg"
|
2
|
+
|
3
|
+
class Morphy
|
4
|
+
class Word
|
5
|
+
attr_accessor :para_id
|
6
|
+
|
7
|
+
def initialize(word,para_id,index)
|
8
|
+
@word = word
|
9
|
+
@para_id = para_id.to_i
|
10
|
+
@index = index.to_i
|
11
|
+
@prefix_id = Morphy.paradigms[@para_id][@index*3]
|
12
|
+
@suffix_id = Morphy.paradigms[@para_id][@index*3+1]
|
13
|
+
@grammeme_id = Morphy.paradigms[@para_id][@index*3+2]
|
14
|
+
|
15
|
+
end
|
16
|
+
|
17
|
+
def to_s
|
18
|
+
@word
|
19
|
+
end
|
20
|
+
|
21
|
+
def normal_form
|
22
|
+
prefix = Morphy.prefixes[Morphy.paradigms[@para_id][0]]
|
23
|
+
suffix = Morphy.suffixes[Morphy.paradigms[@para_id][1]]
|
24
|
+
|
25
|
+
"#{prefix}#{stem}#{suffix}"
|
26
|
+
end
|
27
|
+
def grammemes
|
28
|
+
Morphy.grammemes[@grammeme_id]
|
29
|
+
end
|
30
|
+
def stem
|
31
|
+
prefix = Morphy.prefixes[Morphy.paradigms[@para_id][0]]
|
32
|
+
suffix = Morphy.suffixes[Morphy.paradigms[@para_id][1]]
|
33
|
+
grammeme = Morphy.grammemes[Morphy.paradigms[@para_id][2]]
|
34
|
+
word = @word.dup
|
35
|
+
word.gsub!(Morphy.prefixes[@prefix_id],"")
|
36
|
+
word.gsub!(Morphy.suffixes[@suffix_id],"")
|
37
|
+
word
|
38
|
+
end
|
39
|
+
|
40
|
+
def same_paradigm?(other)
|
41
|
+
@para_id == other.para_id
|
42
|
+
end
|
43
|
+
|
44
|
+
def tag
|
45
|
+
Morphy.grammemes[@grammeme_id].join(",")
|
46
|
+
end
|
47
|
+
|
48
|
+
def lexemme
|
49
|
+
(0..(Morphy.paradigms[@para_id].length / 3)-1).map do |index|
|
50
|
+
prefix = Morphy.prefixes[Morphy.paradigms[@para_id][index*3]]
|
51
|
+
suffix = Morphy.suffixes[Morphy.paradigms[@para_id][index*3+1]]
|
52
|
+
grammeme = Morphy.grammemes[Morphy.paradigms[@para_id][index*3+2]]
|
53
|
+
Word.new(prefix+stem+suffix,@para_id,index)
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
def inflect(grammemes)
|
58
|
+
words = lexemme
|
59
|
+
words.each do |word|
|
60
|
+
return word if word.grammemes.last(grammemes.length) == grammemes
|
61
|
+
end
|
62
|
+
nil
|
63
|
+
end
|
64
|
+
end
|
65
|
+
def initialize
|
66
|
+
|
67
|
+
path = File.dirname(__FILE__)+"/dictionary/"
|
68
|
+
|
69
|
+
@dawg = Dawg.load("#{path}/dawg.dat") # why it's eating so much memory?
|
70
|
+
@@suffixes ||= File.open("#{path}/suffixes.txt", 'r').read.split("\n")
|
71
|
+
@@prefixes ||= File.open("#{path}/prefixes.txt", 'r').read.split("\n")
|
72
|
+
@@grammemes ||= File.open("#{path}/grammemes.txt", 'r').read.split("\n").map{|g| g.split(",")}
|
73
|
+
@@paradigms ||= Marshal.load(File.read("#{path}/paradigms.dat"))
|
74
|
+
end
|
75
|
+
def self.paradigms
|
76
|
+
@@paradigms
|
77
|
+
end
|
78
|
+
def self.prefixes
|
79
|
+
@@prefixes
|
80
|
+
end
|
81
|
+
def self.suffixes
|
82
|
+
@@suffixes
|
83
|
+
end
|
84
|
+
def self.grammemes
|
85
|
+
@@grammemes
|
86
|
+
end
|
87
|
+
def find_similar(word)
|
88
|
+
results = @dawg.find_similar(word)
|
89
|
+
results = results.map do |result|
|
90
|
+
word,para_id,index = result.split(" ")
|
91
|
+
Word.new(word,para_id,index)
|
92
|
+
end
|
93
|
+
results
|
94
|
+
end
|
95
|
+
def to_s
|
96
|
+
"Morphy"
|
97
|
+
end
|
98
|
+
end
|
data/morphy.gemspec
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
Gem::Specification.new do |spec|
|
4
|
+
spec.name = "morphy"
|
5
|
+
spec.version = "0.0.2"
|
6
|
+
spec.date = '2015-05-29'
|
7
|
+
spec.authors = ["Maksatbek Mansurov"]
|
8
|
+
spec.email = ["maksat.mansurov@gmail.com"]
|
9
|
+
spec.description = %q{Morphological analyzer (POS tagger + inflection engine) for Russian language in ruby. Inspired by pymorphy2}
|
10
|
+
spec.summary = %q{Morphological analyzer for Russian language in ruby. Inspired by pymorphy2}
|
11
|
+
spec.homepage = "https://github.com/baltavay/morphy"
|
12
|
+
spec.license = "MIT"
|
13
|
+
|
14
|
+
spec.files = `git ls-files`.split($/)
|
15
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
16
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
17
|
+
spec.require_paths = ["lib"]
|
18
|
+
spec.add_development_dependency 'dawg', '~> 0.0', '>= 0.0.2'
|
19
|
+
end
|
metadata
ADDED
@@ -0,0 +1,76 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: morphy
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.2
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Maksatbek Mansurov
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2015-05-29 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: dawg
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0.0'
|
20
|
+
- - ">="
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: 0.0.2
|
23
|
+
type: :development
|
24
|
+
prerelease: false
|
25
|
+
version_requirements: !ruby/object:Gem::Requirement
|
26
|
+
requirements:
|
27
|
+
- - "~>"
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '0.0'
|
30
|
+
- - ">="
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: 0.0.2
|
33
|
+
description: Morphological analyzer (POS tagger + inflection engine) for Russian language
|
34
|
+
in ruby. Inspired by pymorphy2
|
35
|
+
email:
|
36
|
+
- maksat.mansurov@gmail.com
|
37
|
+
executables: []
|
38
|
+
extensions: []
|
39
|
+
extra_rdoc_files: []
|
40
|
+
files:
|
41
|
+
- ".gitignore"
|
42
|
+
- Gemfile
|
43
|
+
- LICENSE
|
44
|
+
- README.md
|
45
|
+
- lib/dictionary/dawg.dat
|
46
|
+
- lib/dictionary/grammemes.txt
|
47
|
+
- lib/dictionary/paradigms.dat
|
48
|
+
- lib/dictionary/prefixes.txt
|
49
|
+
- lib/dictionary/suffixes.txt
|
50
|
+
- lib/morphy.rb
|
51
|
+
- morphy.gemspec
|
52
|
+
homepage: https://github.com/baltavay/morphy
|
53
|
+
licenses:
|
54
|
+
- MIT
|
55
|
+
metadata: {}
|
56
|
+
post_install_message:
|
57
|
+
rdoc_options: []
|
58
|
+
require_paths:
|
59
|
+
- lib
|
60
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
61
|
+
requirements:
|
62
|
+
- - ">="
|
63
|
+
- !ruby/object:Gem::Version
|
64
|
+
version: '0'
|
65
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
66
|
+
requirements:
|
67
|
+
- - ">="
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '0'
|
70
|
+
requirements: []
|
71
|
+
rubyforge_project:
|
72
|
+
rubygems_version: 2.2.2
|
73
|
+
signing_key:
|
74
|
+
specification_version: 4
|
75
|
+
summary: Morphological analyzer for Russian language in ruby. Inspired by pymorphy2
|
76
|
+
test_files: []
|