morphy 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +0 -0
- data/Gemfile +0 -0
- data/LICENSE +0 -0
- data/README.md +8 -6
- data/lib/dictionary/{dawg.dat → dawg.bin} +0 -0
- data/lib/dictionary/grammemes.txt +0 -0
- data/lib/dictionary/paradigms.dat +0 -0
- data/lib/dictionary/prefixes.txt +0 -0
- data/lib/dictionary/suffixes.txt +0 -0
- data/lib/morphy.rb +37 -82
- data/lib/word.rb +61 -0
- data/morphy.gemspec +4 -4
- metadata +8 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: fcbdc7a59f41e79c1cf9ac95ecec4887176c5024
|
4
|
+
data.tar.gz: 4277e263c6d63ba16540b196c0207d7ec98fadcd
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c0fddd3b4910808a4160474e9214ad09138a745fd0e95e61fe774017bfa782c09c8ac6f4cf411357e76b514338f2bf78154d4ce0f4fe02cd30ef0f33c19a1a78
|
7
|
+
data.tar.gz: c58a7f67a75eebd7223458927d79f678d0f25e360f9745287fbe5901f1e8ab9c27e3a2f49742e1a29806f3ab00ffa17dd7fdaece354e9d2337d0b4cb0cdc7960
|
data/.gitignore
CHANGED
File without changes
|
data/Gemfile
CHANGED
File without changes
|
data/LICENSE
CHANGED
File without changes
|
data/README.md
CHANGED
@@ -15,19 +15,20 @@ Or install it yourself as:
|
|
15
15
|
|
16
16
|
$ gem install morphy
|
17
17
|
|
18
|
-
## Usage
|
19
|
-
# Words must be added in alphabetical order
|
18
|
+
## Usage
|
20
19
|
require "morphy"
|
21
|
-
|
20
|
+
|
22
21
|
morphy = Morphy.new
|
23
|
-
|
24
|
-
word = morphy.
|
22
|
+
|
23
|
+
word = morphy.query("облако").first
|
25
24
|
|
26
25
|
datv = word.inflect(["datv"])
|
27
26
|
datv.to_s
|
28
27
|
=> облаку
|
29
28
|
datv.inflect(["nomn"]).to_s
|
30
29
|
=> облако
|
30
|
+
datv.inflect(["nomn", "plur"]).to_s
|
31
|
+
=> облака
|
31
32
|
datv.lexemme.map(&:to_s)
|
32
33
|
=> ["облако", "облака", "облаку", "облако", "облаком", "облаке", "облака", "облаков", "облакам", "облака", "облаками", "облаках"]
|
33
34
|
datv.normal_form
|
@@ -35,6 +36,8 @@ Or install it yourself as:
|
|
35
36
|
|
36
37
|
|
37
38
|
|
39
|
+
|
40
|
+
|
38
41
|
## Contributing
|
39
42
|
|
40
43
|
1. Fork it
|
@@ -42,4 +45,3 @@ Or install it yourself as:
|
|
42
45
|
3. Commit your changes (`git commit -am 'Add some feature'`)
|
43
46
|
4. Push to the branch (`git push origin my-new-feature`)
|
44
47
|
5. Create new Pull Request
|
45
|
-
|
Binary file
|
File without changes
|
File without changes
|
data/lib/dictionary/prefixes.txt
CHANGED
File without changes
|
data/lib/dictionary/suffixes.txt
CHANGED
File without changes
|
data/lib/morphy.rb
CHANGED
@@ -1,98 +1,53 @@
|
|
1
1
|
require "dawg"
|
2
|
+
require_relative "word"
|
2
3
|
|
3
|
-
|
4
|
-
class Word
|
5
|
-
attr_accessor :para_id
|
6
|
-
|
7
|
-
def initialize(word,para_id,index)
|
8
|
-
@word = word
|
9
|
-
@para_id = para_id.to_i
|
10
|
-
@index = index.to_i
|
11
|
-
@prefix_id = Morphy.paradigms[@para_id][@index*3]
|
12
|
-
@suffix_id = Morphy.paradigms[@para_id][@index*3+1]
|
13
|
-
@grammeme_id = Morphy.paradigms[@para_id][@index*3+2]
|
14
|
-
|
15
|
-
end
|
16
|
-
|
17
|
-
def to_s
|
18
|
-
@word
|
19
|
-
end
|
20
|
-
|
21
|
-
def normal_form
|
22
|
-
prefix = Morphy.prefixes[Morphy.paradigms[@para_id][0]]
|
23
|
-
suffix = Morphy.suffixes[Morphy.paradigms[@para_id][1]]
|
24
|
-
|
25
|
-
"#{prefix}#{stem}#{suffix}"
|
26
|
-
end
|
27
|
-
def grammemes
|
28
|
-
Morphy.grammemes[@grammeme_id]
|
29
|
-
end
|
30
|
-
def stem
|
31
|
-
prefix = Morphy.prefixes[Morphy.paradigms[@para_id][0]]
|
32
|
-
suffix = Morphy.suffixes[Morphy.paradigms[@para_id][1]]
|
33
|
-
grammeme = Morphy.grammemes[Morphy.paradigms[@para_id][2]]
|
34
|
-
word = @word.dup
|
35
|
-
word.sub!(Morphy.prefixes[@prefix_id],"")
|
36
|
-
word = word.reverse.sub(Morphy.suffixes[@suffix_id],"").reverse
|
37
|
-
word
|
38
|
-
end
|
4
|
+
module Morphy
|
39
5
|
|
40
|
-
|
41
|
-
@para_id == other.para_id
|
42
|
-
end
|
6
|
+
class Morphy
|
43
7
|
|
44
|
-
def
|
45
|
-
|
8
|
+
def initialize
|
9
|
+
@dawg = Dawg.load("#{::Morphy.path}/dawg.bin")
|
46
10
|
end
|
47
11
|
|
48
|
-
def
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
Word.new(
|
54
|
-
end
|
12
|
+
def query(word)
|
13
|
+
results = @dawg.query(word)
|
14
|
+
results = results.map do |result|
|
15
|
+
result = result.to_s
|
16
|
+
word, para_id, index = result.split(' ')
|
17
|
+
Word.new(word, para_id, index)
|
18
|
+
end
|
19
|
+
results
|
55
20
|
end
|
56
21
|
|
57
|
-
def
|
58
|
-
|
59
|
-
|
60
|
-
return word if word.grammemes.last(grammemes.length) == grammemes
|
61
|
-
end
|
62
|
-
nil
|
63
|
-
end
|
22
|
+
def to_s
|
23
|
+
"Morphy"
|
24
|
+
end
|
64
25
|
end
|
65
|
-
def initialize
|
66
|
-
|
67
|
-
path = File.dirname(__FILE__)+"/dictionary/"
|
68
26
|
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
@@paradigms ||= Marshal.load(File.read("#{path}/paradigms.dat"))
|
74
|
-
end
|
75
|
-
def self.paradigms
|
76
|
-
@@paradigms
|
27
|
+
extend self
|
28
|
+
|
29
|
+
def new
|
30
|
+
Morphy.new
|
77
31
|
end
|
78
|
-
|
79
|
-
|
32
|
+
|
33
|
+
def path
|
34
|
+
File.dirname(__FILE__)+"/dictionary"
|
80
35
|
end
|
81
|
-
|
82
|
-
|
36
|
+
|
37
|
+
def paradigms
|
38
|
+
@@paradigms ||= Marshal.load(File.read("#{path}/paradigms.dat"))
|
83
39
|
end
|
84
|
-
|
85
|
-
|
40
|
+
|
41
|
+
def prefixes
|
42
|
+
@@prefixes ||= File.open("#{path}/prefixes.txt", 'r').read.split("\n")
|
86
43
|
end
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
word,para_id,index = result.split(" ")
|
91
|
-
Word.new(word,para_id,index)
|
92
|
-
end
|
93
|
-
results
|
44
|
+
|
45
|
+
def suffixes
|
46
|
+
@@suffixes ||= File.open("#{path}/suffixes.txt", 'r').read.split("\n")
|
94
47
|
end
|
95
|
-
|
96
|
-
|
48
|
+
|
49
|
+
def grammemes
|
50
|
+
@@grammemes ||= File.open("#{path}/grammemes.txt", 'r').read.split("\n").map{|g| g.split(",")}
|
97
51
|
end
|
98
|
-
|
52
|
+
|
53
|
+
end
|
data/lib/word.rb
ADDED
@@ -0,0 +1,61 @@
|
|
1
|
+
module Morphy
|
2
|
+
class Word
|
3
|
+
attr_accessor :para_id
|
4
|
+
|
5
|
+
def initialize(word,para_id,index)
|
6
|
+
@word = word
|
7
|
+
@para_id = para_id.to_i
|
8
|
+
@index = index.to_i
|
9
|
+
@prefix_id = ::Morphy.paradigms[@para_id][@index * 3]
|
10
|
+
@suffix_id = ::Morphy.paradigms[@para_id][@index * 3 + 1]
|
11
|
+
@grammeme_id = ::Morphy.paradigms[@para_id][@index * 3 + 2]
|
12
|
+
end
|
13
|
+
|
14
|
+
def to_s
|
15
|
+
@word
|
16
|
+
end
|
17
|
+
|
18
|
+
def normal_form
|
19
|
+
self.inflect(["nomn"])
|
20
|
+
end
|
21
|
+
|
22
|
+
def grammemes
|
23
|
+
::Morphy.grammemes[@grammeme_id]
|
24
|
+
end
|
25
|
+
|
26
|
+
def stem
|
27
|
+
prefix = ::Morphy.prefixes[::Morphy.paradigms[@para_id][0]]
|
28
|
+
suffix = ::Morphy.suffixes[::Morphy.paradigms[@para_id][1]]
|
29
|
+
grammeme = ::Morphy.grammemes[::Morphy.paradigms[@para_id][2]]
|
30
|
+
word = @word.dup
|
31
|
+
word.sub!(::Morphy.prefixes[@prefix_id], '')
|
32
|
+
word = word.reverse.sub(::Morphy.suffixes[@suffix_id].reverse, '').reverse
|
33
|
+
word
|
34
|
+
end
|
35
|
+
|
36
|
+
def same_paradigm?(other)
|
37
|
+
@para_id == other.para_id
|
38
|
+
end
|
39
|
+
|
40
|
+
def tag
|
41
|
+
::Morphy.grammemes[@grammeme_id].join(',')
|
42
|
+
end
|
43
|
+
|
44
|
+
def lexemme
|
45
|
+
(0..(::Morphy.paradigms[@para_id].length / 3) - 1).map do |index|
|
46
|
+
prefix = ::Morphy.prefixes[::Morphy.paradigms[@para_id][index * 3]]
|
47
|
+
suffix = ::Morphy.suffixes[::Morphy.paradigms[@para_id][index * 3 + 1]]
|
48
|
+
grammeme = ::Morphy.grammemes[::Morphy.paradigms[@para_id][index * 3 + 2]]
|
49
|
+
Word.new(prefix + stem + suffix, @para_id, index)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def inflect(grammemes)
|
54
|
+
words = lexemme
|
55
|
+
words.each do |word|
|
56
|
+
return word if (word.grammemes & grammemes).length == grammemes.length
|
57
|
+
end
|
58
|
+
nil
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
data/morphy.gemspec
CHANGED
@@ -2,8 +2,8 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |spec|
|
4
4
|
spec.name = "morphy"
|
5
|
-
spec.version = "0.0.
|
6
|
-
spec.date = '
|
5
|
+
spec.version = "0.0.4"
|
6
|
+
spec.date = '2017-03-19'
|
7
7
|
spec.authors = ["Maksatbek Mansurov"]
|
8
8
|
spec.email = ["maksat.mansurov@gmail.com"]
|
9
9
|
spec.description = %q{Morphological analyzer (POS tagger + inflection engine) for Russian language in ruby. Inspired by pymorphy2}
|
@@ -14,6 +14,6 @@ Gem::Specification.new do |spec|
|
|
14
14
|
spec.files = `git ls-files`.split($/)
|
15
15
|
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
16
16
|
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
17
|
-
spec.require_paths = ["lib"]
|
18
|
-
spec.
|
17
|
+
spec.require_paths = ["lib"]
|
18
|
+
spec.add_runtime_dependency 'dawg', '~> 0.0', '>= 0.0.4'
|
19
19
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: morphy
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Maksatbek Mansurov
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2017-03-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: dawg
|
@@ -19,8 +19,8 @@ dependencies:
|
|
19
19
|
version: '0.0'
|
20
20
|
- - ">="
|
21
21
|
- !ruby/object:Gem::Version
|
22
|
-
version: 0.0.
|
23
|
-
type: :
|
22
|
+
version: 0.0.4
|
23
|
+
type: :runtime
|
24
24
|
prerelease: false
|
25
25
|
version_requirements: !ruby/object:Gem::Requirement
|
26
26
|
requirements:
|
@@ -29,7 +29,7 @@ dependencies:
|
|
29
29
|
version: '0.0'
|
30
30
|
- - ">="
|
31
31
|
- !ruby/object:Gem::Version
|
32
|
-
version: 0.0.
|
32
|
+
version: 0.0.4
|
33
33
|
description: Morphological analyzer (POS tagger + inflection engine) for Russian language
|
34
34
|
in ruby. Inspired by pymorphy2
|
35
35
|
email:
|
@@ -42,12 +42,13 @@ files:
|
|
42
42
|
- Gemfile
|
43
43
|
- LICENSE
|
44
44
|
- README.md
|
45
|
-
- lib/dictionary/dawg.
|
45
|
+
- lib/dictionary/dawg.bin
|
46
46
|
- lib/dictionary/grammemes.txt
|
47
47
|
- lib/dictionary/paradigms.dat
|
48
48
|
- lib/dictionary/prefixes.txt
|
49
49
|
- lib/dictionary/suffixes.txt
|
50
50
|
- lib/morphy.rb
|
51
|
+
- lib/word.rb
|
51
52
|
- morphy.gemspec
|
52
53
|
homepage: https://github.com/baltavay/morphy
|
53
54
|
licenses:
|
@@ -69,7 +70,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
69
70
|
version: '0'
|
70
71
|
requirements: []
|
71
72
|
rubyforge_project:
|
72
|
-
rubygems_version: 2.
|
73
|
+
rubygems_version: 2.4.5.1
|
73
74
|
signing_key:
|
74
75
|
specification_version: 4
|
75
76
|
summary: Morphological analyzer for Russian language in ruby. Inspired by pymorphy2
|