morphy 0.0.3 → 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +0 -0
- data/Gemfile +0 -0
- data/LICENSE +0 -0
- data/README.md +8 -6
- data/lib/dictionary/{dawg.dat → dawg.bin} +0 -0
- data/lib/dictionary/grammemes.txt +0 -0
- data/lib/dictionary/paradigms.dat +0 -0
- data/lib/dictionary/prefixes.txt +0 -0
- data/lib/dictionary/suffixes.txt +0 -0
- data/lib/morphy.rb +37 -82
- data/lib/word.rb +61 -0
- data/morphy.gemspec +4 -4
- metadata +8 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: fcbdc7a59f41e79c1cf9ac95ecec4887176c5024
|
4
|
+
data.tar.gz: 4277e263c6d63ba16540b196c0207d7ec98fadcd
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c0fddd3b4910808a4160474e9214ad09138a745fd0e95e61fe774017bfa782c09c8ac6f4cf411357e76b514338f2bf78154d4ce0f4fe02cd30ef0f33c19a1a78
|
7
|
+
data.tar.gz: c58a7f67a75eebd7223458927d79f678d0f25e360f9745287fbe5901f1e8ab9c27e3a2f49742e1a29806f3ab00ffa17dd7fdaece354e9d2337d0b4cb0cdc7960
|
data/.gitignore
CHANGED
File without changes
|
data/Gemfile
CHANGED
File without changes
|
data/LICENSE
CHANGED
File without changes
|
data/README.md
CHANGED
@@ -15,19 +15,20 @@ Or install it yourself as:
|
|
15
15
|
|
16
16
|
$ gem install morphy
|
17
17
|
|
18
|
-
## Usage
|
19
|
-
# Words must be added in alphabetical order
|
18
|
+
## Usage
|
20
19
|
require "morphy"
|
21
|
-
|
20
|
+
|
22
21
|
morphy = Morphy.new
|
23
|
-
|
24
|
-
word = morphy.
|
22
|
+
|
23
|
+
word = morphy.query("облако").first
|
25
24
|
|
26
25
|
datv = word.inflect(["datv"])
|
27
26
|
datv.to_s
|
28
27
|
=> облаку
|
29
28
|
datv.inflect(["nomn"]).to_s
|
30
29
|
=> облако
|
30
|
+
datv.inflect(["nomn", "plur"]).to_s
|
31
|
+
=> облака
|
31
32
|
datv.lexemme.map(&:to_s)
|
32
33
|
=> ["облако", "облака", "облаку", "облако", "облаком", "облаке", "облака", "облаков", "облакам", "облака", "облаками", "облаках"]
|
33
34
|
datv.normal_form
|
@@ -35,6 +36,8 @@ Or install it yourself as:
|
|
35
36
|
|
36
37
|
|
37
38
|
|
39
|
+
|
40
|
+
|
38
41
|
## Contributing
|
39
42
|
|
40
43
|
1. Fork it
|
@@ -42,4 +45,3 @@ Or install it yourself as:
|
|
42
45
|
3. Commit your changes (`git commit -am 'Add some feature'`)
|
43
46
|
4. Push to the branch (`git push origin my-new-feature`)
|
44
47
|
5. Create new Pull Request
|
45
|
-
|
Binary file
|
File without changes
|
File without changes
|
data/lib/dictionary/prefixes.txt
CHANGED
File without changes
|
data/lib/dictionary/suffixes.txt
CHANGED
File without changes
|
data/lib/morphy.rb
CHANGED
@@ -1,98 +1,53 @@
|
|
1
1
|
require "dawg"
|
2
|
+
require_relative "word"
|
2
3
|
|
3
|
-
|
4
|
-
class Word
|
5
|
-
attr_accessor :para_id
|
6
|
-
|
7
|
-
def initialize(word,para_id,index)
|
8
|
-
@word = word
|
9
|
-
@para_id = para_id.to_i
|
10
|
-
@index = index.to_i
|
11
|
-
@prefix_id = Morphy.paradigms[@para_id][@index*3]
|
12
|
-
@suffix_id = Morphy.paradigms[@para_id][@index*3+1]
|
13
|
-
@grammeme_id = Morphy.paradigms[@para_id][@index*3+2]
|
14
|
-
|
15
|
-
end
|
16
|
-
|
17
|
-
def to_s
|
18
|
-
@word
|
19
|
-
end
|
20
|
-
|
21
|
-
def normal_form
|
22
|
-
prefix = Morphy.prefixes[Morphy.paradigms[@para_id][0]]
|
23
|
-
suffix = Morphy.suffixes[Morphy.paradigms[@para_id][1]]
|
24
|
-
|
25
|
-
"#{prefix}#{stem}#{suffix}"
|
26
|
-
end
|
27
|
-
def grammemes
|
28
|
-
Morphy.grammemes[@grammeme_id]
|
29
|
-
end
|
30
|
-
def stem
|
31
|
-
prefix = Morphy.prefixes[Morphy.paradigms[@para_id][0]]
|
32
|
-
suffix = Morphy.suffixes[Morphy.paradigms[@para_id][1]]
|
33
|
-
grammeme = Morphy.grammemes[Morphy.paradigms[@para_id][2]]
|
34
|
-
word = @word.dup
|
35
|
-
word.sub!(Morphy.prefixes[@prefix_id],"")
|
36
|
-
word = word.reverse.sub(Morphy.suffixes[@suffix_id],"").reverse
|
37
|
-
word
|
38
|
-
end
|
4
|
+
module Morphy
|
39
5
|
|
40
|
-
|
41
|
-
@para_id == other.para_id
|
42
|
-
end
|
6
|
+
class Morphy
|
43
7
|
|
44
|
-
def
|
45
|
-
|
8
|
+
def initialize
|
9
|
+
@dawg = Dawg.load("#{::Morphy.path}/dawg.bin")
|
46
10
|
end
|
47
11
|
|
48
|
-
def
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
Word.new(
|
54
|
-
end
|
12
|
+
def query(word)
|
13
|
+
results = @dawg.query(word)
|
14
|
+
results = results.map do |result|
|
15
|
+
result = result.to_s
|
16
|
+
word, para_id, index = result.split(' ')
|
17
|
+
Word.new(word, para_id, index)
|
18
|
+
end
|
19
|
+
results
|
55
20
|
end
|
56
21
|
|
57
|
-
def
|
58
|
-
|
59
|
-
|
60
|
-
return word if word.grammemes.last(grammemes.length) == grammemes
|
61
|
-
end
|
62
|
-
nil
|
63
|
-
end
|
22
|
+
def to_s
|
23
|
+
"Morphy"
|
24
|
+
end
|
64
25
|
end
|
65
|
-
def initialize
|
66
|
-
|
67
|
-
path = File.dirname(__FILE__)+"/dictionary/"
|
68
26
|
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
@@paradigms ||= Marshal.load(File.read("#{path}/paradigms.dat"))
|
74
|
-
end
|
75
|
-
def self.paradigms
|
76
|
-
@@paradigms
|
27
|
+
extend self
|
28
|
+
|
29
|
+
def new
|
30
|
+
Morphy.new
|
77
31
|
end
|
78
|
-
|
79
|
-
|
32
|
+
|
33
|
+
def path
|
34
|
+
File.dirname(__FILE__)+"/dictionary"
|
80
35
|
end
|
81
|
-
|
82
|
-
|
36
|
+
|
37
|
+
def paradigms
|
38
|
+
@@paradigms ||= Marshal.load(File.read("#{path}/paradigms.dat"))
|
83
39
|
end
|
84
|
-
|
85
|
-
|
40
|
+
|
41
|
+
def prefixes
|
42
|
+
@@prefixes ||= File.open("#{path}/prefixes.txt", 'r').read.split("\n")
|
86
43
|
end
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
word,para_id,index = result.split(" ")
|
91
|
-
Word.new(word,para_id,index)
|
92
|
-
end
|
93
|
-
results
|
44
|
+
|
45
|
+
def suffixes
|
46
|
+
@@suffixes ||= File.open("#{path}/suffixes.txt", 'r').read.split("\n")
|
94
47
|
end
|
95
|
-
|
96
|
-
|
48
|
+
|
49
|
+
def grammemes
|
50
|
+
@@grammemes ||= File.open("#{path}/grammemes.txt", 'r').read.split("\n").map{|g| g.split(",")}
|
97
51
|
end
|
98
|
-
|
52
|
+
|
53
|
+
end
|
data/lib/word.rb
ADDED
@@ -0,0 +1,61 @@
|
|
1
|
+
module Morphy
|
2
|
+
class Word
|
3
|
+
attr_accessor :para_id
|
4
|
+
|
5
|
+
def initialize(word,para_id,index)
|
6
|
+
@word = word
|
7
|
+
@para_id = para_id.to_i
|
8
|
+
@index = index.to_i
|
9
|
+
@prefix_id = ::Morphy.paradigms[@para_id][@index * 3]
|
10
|
+
@suffix_id = ::Morphy.paradigms[@para_id][@index * 3 + 1]
|
11
|
+
@grammeme_id = ::Morphy.paradigms[@para_id][@index * 3 + 2]
|
12
|
+
end
|
13
|
+
|
14
|
+
def to_s
|
15
|
+
@word
|
16
|
+
end
|
17
|
+
|
18
|
+
def normal_form
|
19
|
+
self.inflect(["nomn"])
|
20
|
+
end
|
21
|
+
|
22
|
+
def grammemes
|
23
|
+
::Morphy.grammemes[@grammeme_id]
|
24
|
+
end
|
25
|
+
|
26
|
+
def stem
|
27
|
+
prefix = ::Morphy.prefixes[::Morphy.paradigms[@para_id][0]]
|
28
|
+
suffix = ::Morphy.suffixes[::Morphy.paradigms[@para_id][1]]
|
29
|
+
grammeme = ::Morphy.grammemes[::Morphy.paradigms[@para_id][2]]
|
30
|
+
word = @word.dup
|
31
|
+
word.sub!(::Morphy.prefixes[@prefix_id], '')
|
32
|
+
word = word.reverse.sub(::Morphy.suffixes[@suffix_id].reverse, '').reverse
|
33
|
+
word
|
34
|
+
end
|
35
|
+
|
36
|
+
def same_paradigm?(other)
|
37
|
+
@para_id == other.para_id
|
38
|
+
end
|
39
|
+
|
40
|
+
def tag
|
41
|
+
::Morphy.grammemes[@grammeme_id].join(',')
|
42
|
+
end
|
43
|
+
|
44
|
+
def lexemme
|
45
|
+
(0..(::Morphy.paradigms[@para_id].length / 3) - 1).map do |index|
|
46
|
+
prefix = ::Morphy.prefixes[::Morphy.paradigms[@para_id][index * 3]]
|
47
|
+
suffix = ::Morphy.suffixes[::Morphy.paradigms[@para_id][index * 3 + 1]]
|
48
|
+
grammeme = ::Morphy.grammemes[::Morphy.paradigms[@para_id][index * 3 + 2]]
|
49
|
+
Word.new(prefix + stem + suffix, @para_id, index)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def inflect(grammemes)
|
54
|
+
words = lexemme
|
55
|
+
words.each do |word|
|
56
|
+
return word if (word.grammemes & grammemes).length == grammemes.length
|
57
|
+
end
|
58
|
+
nil
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
data/morphy.gemspec
CHANGED
@@ -2,8 +2,8 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |spec|
|
4
4
|
spec.name = "morphy"
|
5
|
-
spec.version = "0.0.
|
6
|
-
spec.date = '
|
5
|
+
spec.version = "0.0.4"
|
6
|
+
spec.date = '2017-03-19'
|
7
7
|
spec.authors = ["Maksatbek Mansurov"]
|
8
8
|
spec.email = ["maksat.mansurov@gmail.com"]
|
9
9
|
spec.description = %q{Morphological analyzer (POS tagger + inflection engine) for Russian language in ruby. Inspired by pymorphy2}
|
@@ -14,6 +14,6 @@ Gem::Specification.new do |spec|
|
|
14
14
|
spec.files = `git ls-files`.split($/)
|
15
15
|
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
16
16
|
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
17
|
-
spec.require_paths = ["lib"]
|
18
|
-
spec.
|
17
|
+
spec.require_paths = ["lib"]
|
18
|
+
spec.add_runtime_dependency 'dawg', '~> 0.0', '>= 0.0.4'
|
19
19
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: morphy
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Maksatbek Mansurov
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2017-03-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: dawg
|
@@ -19,8 +19,8 @@ dependencies:
|
|
19
19
|
version: '0.0'
|
20
20
|
- - ">="
|
21
21
|
- !ruby/object:Gem::Version
|
22
|
-
version: 0.0.
|
23
|
-
type: :
|
22
|
+
version: 0.0.4
|
23
|
+
type: :runtime
|
24
24
|
prerelease: false
|
25
25
|
version_requirements: !ruby/object:Gem::Requirement
|
26
26
|
requirements:
|
@@ -29,7 +29,7 @@ dependencies:
|
|
29
29
|
version: '0.0'
|
30
30
|
- - ">="
|
31
31
|
- !ruby/object:Gem::Version
|
32
|
-
version: 0.0.
|
32
|
+
version: 0.0.4
|
33
33
|
description: Morphological analyzer (POS tagger + inflection engine) for Russian language
|
34
34
|
in ruby. Inspired by pymorphy2
|
35
35
|
email:
|
@@ -42,12 +42,13 @@ files:
|
|
42
42
|
- Gemfile
|
43
43
|
- LICENSE
|
44
44
|
- README.md
|
45
|
-
- lib/dictionary/dawg.
|
45
|
+
- lib/dictionary/dawg.bin
|
46
46
|
- lib/dictionary/grammemes.txt
|
47
47
|
- lib/dictionary/paradigms.dat
|
48
48
|
- lib/dictionary/prefixes.txt
|
49
49
|
- lib/dictionary/suffixes.txt
|
50
50
|
- lib/morphy.rb
|
51
|
+
- lib/word.rb
|
51
52
|
- morphy.gemspec
|
52
53
|
homepage: https://github.com/baltavay/morphy
|
53
54
|
licenses:
|
@@ -69,7 +70,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
69
70
|
version: '0'
|
70
71
|
requirements: []
|
71
72
|
rubyforge_project:
|
72
|
-
rubygems_version: 2.
|
73
|
+
rubygems_version: 2.4.5.1
|
73
74
|
signing_key:
|
74
75
|
specification_version: 4
|
75
76
|
summary: Morphological analyzer for Russian language in ruby. Inspired by pymorphy2
|