morphy 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
data/lib/morphy.rb ADDED
@@ -0,0 +1,98 @@
1
+ require "dawg"
2
+
3
+ class Morphy
4
+ class Word
5
+ attr_accessor :para_id
6
+
7
+ def initialize(word,para_id,index)
8
+ @word = word
9
+ @para_id = para_id.to_i
10
+ @index = index.to_i
11
+ @prefix_id = Morphy.paradigms[@para_id][@index*3]
12
+ @suffix_id = Morphy.paradigms[@para_id][@index*3+1]
13
+ @grammeme_id = Morphy.paradigms[@para_id][@index*3+2]
14
+
15
+ end
16
+
17
+ def to_s
18
+ @word
19
+ end
20
+
21
+ def normal_form
22
+ prefix = Morphy.prefixes[Morphy.paradigms[@para_id][0]]
23
+ suffix = Morphy.suffixes[Morphy.paradigms[@para_id][1]]
24
+
25
+ "#{prefix}#{stem}#{suffix}"
26
+ end
27
+ def grammemes
28
+ Morphy.grammemes[@grammeme_id]
29
+ end
30
+ def stem
31
+ prefix = Morphy.prefixes[Morphy.paradigms[@para_id][0]]
32
+ suffix = Morphy.suffixes[Morphy.paradigms[@para_id][1]]
33
+ grammeme = Morphy.grammemes[Morphy.paradigms[@para_id][2]]
34
+ word = @word.dup
35
+ word.gsub!(Morphy.prefixes[@prefix_id],"")
36
+ word.gsub!(Morphy.suffixes[@suffix_id],"")
37
+ word
38
+ end
39
+
40
+ def same_paradigm?(other)
41
+ @para_id == other.para_id
42
+ end
43
+
44
+ def tag
45
+ Morphy.grammemes[@grammeme_id].join(",")
46
+ end
47
+
48
+ def lexemme
49
+ (0..(Morphy.paradigms[@para_id].length / 3)-1).map do |index|
50
+ prefix = Morphy.prefixes[Morphy.paradigms[@para_id][index*3]]
51
+ suffix = Morphy.suffixes[Morphy.paradigms[@para_id][index*3+1]]
52
+ grammeme = Morphy.grammemes[Morphy.paradigms[@para_id][index*3+2]]
53
+ Word.new(prefix+stem+suffix,@para_id,index)
54
+ end
55
+ end
56
+
57
+ def inflect(grammemes)
58
+ words = lexemme
59
+ words.each do |word|
60
+ return word if word.grammemes.last(grammemes.length) == grammemes
61
+ end
62
+ nil
63
+ end
64
+ end
65
+ def initialize
66
+
67
+ path = File.dirname(__FILE__)+"/dictionary/"
68
+
69
+ @dawg = Dawg.load("#{path}/dawg.dat") # why it's eating so much memory?
70
+ @@suffixes ||= File.open("#{path}/suffixes.txt", 'r').read.split("\n")
71
+ @@prefixes ||= File.open("#{path}/prefixes.txt", 'r').read.split("\n")
72
+ @@grammemes ||= File.open("#{path}/grammemes.txt", 'r').read.split("\n").map{|g| g.split(",")}
73
+ @@paradigms ||= Marshal.load(File.read("#{path}/paradigms.dat"))
74
+ end
75
+ def self.paradigms
76
+ @@paradigms
77
+ end
78
+ def self.prefixes
79
+ @@prefixes
80
+ end
81
+ def self.suffixes
82
+ @@suffixes
83
+ end
84
+ def self.grammemes
85
+ @@grammemes
86
+ end
87
+ def find_similar(word)
88
+ results = @dawg.find_similar(word)
89
+ results = results.map do |result|
90
+ word,para_id,index = result.split(" ")
91
+ Word.new(word,para_id,index)
92
+ end
93
+ results
94
+ end
95
+ def to_s
96
+ "Morphy"
97
+ end
98
+ end
data/morphy.gemspec ADDED
@@ -0,0 +1,19 @@
1
+ # coding: utf-8
2
+
3
+ Gem::Specification.new do |spec|
4
+ spec.name = "morphy"
5
+ spec.version = "0.0.2"
6
+ spec.date = '2015-05-29'
7
+ spec.authors = ["Maksatbek Mansurov"]
8
+ spec.email = ["maksat.mansurov@gmail.com"]
9
+ spec.description = %q{Morphological analyzer (POS tagger + inflection engine) for Russian language in ruby. Inspired by pymorphy2}
10
+ spec.summary = %q{Morphological analyzer for Russian language in ruby. Inspired by pymorphy2}
11
+ spec.homepage = "https://github.com/baltavay/morphy"
12
+ spec.license = "MIT"
13
+
14
+ spec.files = `git ls-files`.split($/)
15
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
16
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
17
+ spec.require_paths = ["lib"]
18
+ spec.add_development_dependency 'dawg', '~> 0.0', '>= 0.0.2'
19
+ end
metadata ADDED
@@ -0,0 +1,76 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: morphy
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ platform: ruby
6
+ authors:
7
+ - Maksatbek Mansurov
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-05-29 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: dawg
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '0.0'
20
+ - - ">="
21
+ - !ruby/object:Gem::Version
22
+ version: 0.0.2
23
+ type: :development
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - "~>"
28
+ - !ruby/object:Gem::Version
29
+ version: '0.0'
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: 0.0.2
33
+ description: Morphological analyzer (POS tagger + inflection engine) for Russian language
34
+ in ruby. Inspired by pymorphy2
35
+ email:
36
+ - maksat.mansurov@gmail.com
37
+ executables: []
38
+ extensions: []
39
+ extra_rdoc_files: []
40
+ files:
41
+ - ".gitignore"
42
+ - Gemfile
43
+ - LICENSE
44
+ - README.md
45
+ - lib/dictionary/dawg.dat
46
+ - lib/dictionary/grammemes.txt
47
+ - lib/dictionary/paradigms.dat
48
+ - lib/dictionary/prefixes.txt
49
+ - lib/dictionary/suffixes.txt
50
+ - lib/morphy.rb
51
+ - morphy.gemspec
52
+ homepage: https://github.com/baltavay/morphy
53
+ licenses:
54
+ - MIT
55
+ metadata: {}
56
+ post_install_message:
57
+ rdoc_options: []
58
+ require_paths:
59
+ - lib
60
+ required_ruby_version: !ruby/object:Gem::Requirement
61
+ requirements:
62
+ - - ">="
63
+ - !ruby/object:Gem::Version
64
+ version: '0'
65
+ required_rubygems_version: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - ">="
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
70
+ requirements: []
71
+ rubyforge_project:
72
+ rubygems_version: 2.2.2
73
+ signing_key:
74
+ specification_version: 4
75
+ summary: Morphological analyzer for Russian language in ruby. Inspired by pymorphy2
76
+ test_files: []