morphy 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/morphy.rb ADDED
@@ -0,0 +1,98 @@
1
+ require "dawg"
2
+
3
+ class Morphy
4
+ class Word
5
+ attr_accessor :para_id
6
+
7
+ def initialize(word,para_id,index)
8
+ @word = word
9
+ @para_id = para_id.to_i
10
+ @index = index.to_i
11
+ @prefix_id = Morphy.paradigms[@para_id][@index*3]
12
+ @suffix_id = Morphy.paradigms[@para_id][@index*3+1]
13
+ @grammeme_id = Morphy.paradigms[@para_id][@index*3+2]
14
+
15
+ end
16
+
17
+ def to_s
18
+ @word
19
+ end
20
+
21
+ def normal_form
22
+ prefix = Morphy.prefixes[Morphy.paradigms[@para_id][0]]
23
+ suffix = Morphy.suffixes[Morphy.paradigms[@para_id][1]]
24
+
25
+ "#{prefix}#{stem}#{suffix}"
26
+ end
27
+ def grammemes
28
+ Morphy.grammemes[@grammeme_id]
29
+ end
30
+ def stem
31
+ prefix = Morphy.prefixes[Morphy.paradigms[@para_id][0]]
32
+ suffix = Morphy.suffixes[Morphy.paradigms[@para_id][1]]
33
+ grammeme = Morphy.grammemes[Morphy.paradigms[@para_id][2]]
34
+ word = @word.dup
35
+ word.gsub!(Morphy.prefixes[@prefix_id],"")
36
+ word.gsub!(Morphy.suffixes[@suffix_id],"")
37
+ word
38
+ end
39
+
40
+ def same_paradigm?(other)
41
+ @para_id == other.para_id
42
+ end
43
+
44
+ def tag
45
+ Morphy.grammemes[@grammeme_id].join(",")
46
+ end
47
+
48
+ def lexemme
49
+ (0..(Morphy.paradigms[@para_id].length / 3)-1).map do |index|
50
+ prefix = Morphy.prefixes[Morphy.paradigms[@para_id][index*3]]
51
+ suffix = Morphy.suffixes[Morphy.paradigms[@para_id][index*3+1]]
52
+ grammeme = Morphy.grammemes[Morphy.paradigms[@para_id][index*3+2]]
53
+ Word.new(prefix+stem+suffix,@para_id,index)
54
+ end
55
+ end
56
+
57
+ def inflect(grammemes)
58
+ words = lexemme
59
+ words.each do |word|
60
+ return word if word.grammemes.last(grammemes.length) == grammemes
61
+ end
62
+ nil
63
+ end
64
+ end
65
+ def initialize
66
+
67
+ path = File.dirname(__FILE__)+"/dictionary/"
68
+
69
+ @dawg = Dawg.load("#{path}/dawg.dat") # why it's eating so much memory?
70
+ @@suffixes ||= File.open("#{path}/suffixes.txt", 'r').read.split("\n")
71
+ @@prefixes ||= File.open("#{path}/prefixes.txt", 'r').read.split("\n")
72
+ @@grammemes ||= File.open("#{path}/grammemes.txt", 'r').read.split("\n").map{|g| g.split(",")}
73
+ @@paradigms ||= Marshal.load(File.read("#{path}/paradigms.dat"))
74
+ end
75
+ def self.paradigms
76
+ @@paradigms
77
+ end
78
+ def self.prefixes
79
+ @@prefixes
80
+ end
81
+ def self.suffixes
82
+ @@suffixes
83
+ end
84
+ def self.grammemes
85
+ @@grammemes
86
+ end
87
+ def find_similar(word)
88
+ results = @dawg.find_similar(word)
89
+ results = results.map do |result|
90
+ word,para_id,index = result.split(" ")
91
+ Word.new(word,para_id,index)
92
+ end
93
+ results
94
+ end
95
+ def to_s
96
+ "Morphy"
97
+ end
98
+ end
data/morphy.gemspec ADDED
@@ -0,0 +1,19 @@
1
+ # coding: utf-8
2
+
3
+ Gem::Specification.new do |spec|
4
+ spec.name = "morphy"
5
+ spec.version = "0.0.2"
6
+ spec.date = '2015-05-29'
7
+ spec.authors = ["Maksatbek Mansurov"]
8
+ spec.email = ["maksat.mansurov@gmail.com"]
9
+ spec.description = %q{Morphological analyzer (POS tagger + inflection engine) for Russian language in ruby. Inspired by pymorphy2}
10
+ spec.summary = %q{Morphological analyzer for Russian language in ruby. Inspired by pymorphy2}
11
+ spec.homepage = "https://github.com/baltavay/morphy"
12
+ spec.license = "MIT"
13
+
14
+ spec.files = `git ls-files`.split($/)
15
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
16
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
17
+ spec.require_paths = ["lib"]
18
+ spec.add_development_dependency 'dawg', '~> 0.0', '>= 0.0.2'
19
+ end
metadata ADDED
@@ -0,0 +1,76 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: morphy
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ platform: ruby
6
+ authors:
7
+ - Maksatbek Mansurov
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-05-29 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: dawg
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '0.0'
20
+ - - ">="
21
+ - !ruby/object:Gem::Version
22
+ version: 0.0.2
23
+ type: :development
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - "~>"
28
+ - !ruby/object:Gem::Version
29
+ version: '0.0'
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: 0.0.2
33
+ description: Morphological analyzer (POS tagger + inflection engine) for Russian language
34
+ in ruby. Inspired by pymorphy2
35
+ email:
36
+ - maksat.mansurov@gmail.com
37
+ executables: []
38
+ extensions: []
39
+ extra_rdoc_files: []
40
+ files:
41
+ - ".gitignore"
42
+ - Gemfile
43
+ - LICENSE
44
+ - README.md
45
+ - lib/dictionary/dawg.dat
46
+ - lib/dictionary/grammemes.txt
47
+ - lib/dictionary/paradigms.dat
48
+ - lib/dictionary/prefixes.txt
49
+ - lib/dictionary/suffixes.txt
50
+ - lib/morphy.rb
51
+ - morphy.gemspec
52
+ homepage: https://github.com/baltavay/morphy
53
+ licenses:
54
+ - MIT
55
+ metadata: {}
56
+ post_install_message:
57
+ rdoc_options: []
58
+ require_paths:
59
+ - lib
60
+ required_ruby_version: !ruby/object:Gem::Requirement
61
+ requirements:
62
+ - - ">="
63
+ - !ruby/object:Gem::Version
64
+ version: '0'
65
+ required_rubygems_version: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - ">="
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
70
+ requirements: []
71
+ rubyforge_project:
72
+ rubygems_version: 2.2.2
73
+ signing_key:
74
+ specification_version: 4
75
+ summary: Morphological analyzer for Russian language in ruby. Inspired by pymorphy2
76
+ test_files: []