russian_word_forms 0.0.4 → 0.0.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +0 -0
- data/Gemfile +0 -0
- data/LICENSE.txt +0 -0
- data/README.md +0 -0
- data/Rakefile +0 -0
- data/lib/russian_word_forms.rb +47 -52
- data/lib/russian_word_forms/dictionaries/abbrev.dic +0 -0
- data/lib/russian_word_forms/dictionaries/base.dic +0 -0
- data/lib/russian_word_forms/dictionaries/computer.dic +0 -0
- data/lib/russian_word_forms/dictionaries/for_name.dic +0 -0
- data/lib/russian_word_forms/dictionaries/geography.dic +0 -0
- data/lib/russian_word_forms/dictionaries/rare.dic +0 -0
- data/lib/russian_word_forms/dictionaries/russian.aff +3 -3
- data/lib/russian_word_forms/dictionaries/science.dic +0 -0
- data/lib/russian_word_forms/dictionary.rb +23 -23
- data/lib/russian_word_forms/rules.rb +37 -29
- data/lib/russian_word_forms/version.rb +1 -1
- data/russian_word_forms.gemspec +1 -0
- metadata +24 -10
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ade7ff1c37d3f2c7285af4c39a9c103d30f36be1
|
4
|
+
data.tar.gz: 4aeb05584df63c15a3ea0ca5acf1aeb68d5bc5f0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c7c688c887c7b153f62b975fd75b91086ce8bce9cafd9724202a8eb888c8aec2baf518349bfcc7e19b1a5c5b5ea2cf5f2868a3252b5602a0c9ed52b746113e2e
|
7
|
+
data.tar.gz: ef355f059533a935ae5e6c5f77ae1b645f5fe64361cc0152f5f0a6dbe10830416b2042db02409b7f8e660ddb94debc220e314172475d5243a120cd05b11077e1
|
data/.gitignore
CHANGED
File without changes
|
data/Gemfile
CHANGED
File without changes
|
data/LICENSE.txt
CHANGED
File without changes
|
data/README.md
CHANGED
File without changes
|
data/Rakefile
CHANGED
File without changes
|
data/lib/russian_word_forms.rb
CHANGED
@@ -1,76 +1,71 @@
|
|
1
1
|
require "russian_word_forms/version"
|
2
2
|
require 'russian_word_forms/dictionary'
|
3
3
|
require 'russian_word_forms/rules'
|
4
|
+
require "unicode"
|
5
|
+
|
4
6
|
module RussianWordForms
|
5
|
-
@dictionary=Dictionary.new
|
6
|
-
@rules=Rules.new
|
7
|
-
|
8
|
-
def self.inflect(word)
|
9
|
-
word=word.mb_chars.upcase.to_s
|
10
|
-
flags=@dictionary.dictionary[word]
|
11
|
-
output=[]
|
12
|
-
output<<word if !flags.kind_of?(Array)
|
13
|
-
flags=@rules.rules.keys.join if flags.empty? # if not found in dictionary
|
14
|
-
flags.each_char do |flag|
|
15
|
-
rules=@rules.rules[flag]
|
16
|
-
rules.keys.each do |rule|
|
17
|
-
rules[rule].each do |affix|
|
18
|
-
left,right=affix.split(",")
|
19
|
-
if right
|
20
|
-
left=left[1..-1] if left[0]=='-'
|
21
|
-
right=right[1..-1] if right[0]=='-'
|
22
|
-
|
23
|
-
output<<word.gsub(/(#{left})$/,right) if word.match(/(#{rule})$/)
|
24
|
-
else
|
25
7
|
|
26
|
-
|
27
|
-
|
8
|
+
@@dictionary=Dictionary.new
|
9
|
+
@@rules=Rules.new
|
10
|
+
|
28
11
|
|
12
|
+
def self.inflect(word)
|
13
|
+
word = Unicode::upcase(word)
|
14
|
+
flags = @@dictionary.get_flags word
|
15
|
+
output = []
|
16
|
+
if flags
|
17
|
+
flags.each_char do |flag|
|
18
|
+
rules_keys = @@rules.rules[flag]
|
19
|
+
rules_keys.each do |key,rules|
|
20
|
+
rules.each do |rule|
|
21
|
+
if rule.suffix
|
22
|
+
output << word.gsub(/(#{rule.normal_suffix})$/i,rule.suffix) if word.match(/(#{rule.rule})$/i)
|
23
|
+
else
|
24
|
+
output << word+rule.normal_suffix if word.match(/(#{rule.rule})$/i)
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
29
28
|
end
|
30
|
-
end
|
29
|
+
end
|
31
30
|
end
|
32
31
|
output.uniq
|
33
32
|
end
|
34
33
|
def self.get_base_form(word)
|
35
|
-
word=word.
|
36
|
-
flags
|
37
|
-
variants=[]
|
38
|
-
variants<<word if
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
tmp
|
49
|
-
variants<< tmp if tmp.match(/(#{rule})$/)
|
34
|
+
word = Unicode::upcase(word).to_s
|
35
|
+
flags = @@dictionary.dictionary[word]
|
36
|
+
variants = []
|
37
|
+
variants << word if flags
|
38
|
+
@@rules.rules.each do |flag,rules_keys|
|
39
|
+
rules_keys.each do |key,rules|
|
40
|
+
rules.each do |rule|
|
41
|
+
if rule.suffix && !rule.suffix.empty?
|
42
|
+
# puts "#{word} #{rule.suffix}"
|
43
|
+
|
44
|
+
if word.end_with? rule.suffix
|
45
|
+
tmp = word.gsub(rule.suffix,rule.normal_suffix)
|
46
|
+
# puts tmp
|
47
|
+
variants << tmp if tmp != word && tmp.match(/(#{rule.rule})$/i) && @@dictionary.get_flags(tmp)
|
50
48
|
end
|
49
|
+
|
51
50
|
else
|
52
|
-
if word.
|
53
|
-
tmp=word.gsub(
|
54
|
-
variants<<tmp if tmp.match(/(#{rule})$/)
|
51
|
+
if word.end_with?(rule.normal_suffix)
|
52
|
+
tmp = word.gsub(rule.normal_suffix,"")
|
53
|
+
variants << tmp if tmp != word && tmp.match(/(#{rule.rule})$/i) && @@dictionary.dictionary[tmp]
|
55
54
|
end
|
56
55
|
end
|
57
56
|
end
|
58
57
|
end
|
59
58
|
end
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
output<<variant
|
64
|
-
end
|
65
|
-
end
|
66
|
-
return output.uniq
|
59
|
+
|
60
|
+
|
61
|
+
return variants.uniq
|
67
62
|
end
|
68
|
-
|
63
|
+
|
69
64
|
def self.rules
|
70
|
-
|
65
|
+
@@rules
|
71
66
|
end
|
72
|
-
|
67
|
+
|
73
68
|
def self.dictionary
|
74
|
-
|
69
|
+
@@dictionary
|
75
70
|
end
|
76
71
|
end
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
@@ -16,8 +16,8 @@
|
|
16
16
|
# Affix table for Russian
|
17
17
|
#
|
18
18
|
|
19
|
-
nroffchars ().\\*
|
20
|
-
texchars ()\[]{}<\>\\$*.%
|
19
|
+
# nroffchars ().\\*
|
20
|
+
# texchars ()\[]{}<\>\\$*.%
|
21
21
|
|
22
22
|
# First we declare the character set.
|
23
23
|
|
@@ -69,7 +69,7 @@ texchars ()\[]{}<\>\\$*.%
|
|
69
69
|
|
70
70
|
#prefixes
|
71
71
|
|
72
|
-
suffixes
|
72
|
+
#suffixes
|
73
73
|
|
74
74
|
flag *L:
|
75
75
|
#
|
File without changes
|
@@ -1,35 +1,35 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
1
|
+
require "unicode"
|
2
|
+
module RussianWordForms
|
3
|
+
|
4
|
+
class Dictionary
|
5
|
+
|
6
|
+
attr_accessor :dictionary
|
4
7
|
|
5
8
|
def initialize
|
9
|
+
@dictionary = Hash.new
|
6
10
|
load_dictionaries
|
7
|
-
end
|
8
|
-
|
11
|
+
end
|
12
|
+
|
13
|
+
def load_dictionary(file)
|
9
14
|
File.readlines(file).each do |line|
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
@@dictionary[word[0]]=word[1]
|
16
|
-
else
|
17
|
-
@@dictionary[word[0]]=""
|
18
|
-
end
|
19
|
-
end
|
15
|
+
stem,flags = line.chomp.split('/')
|
16
|
+
stem = Unicode::upcase stem
|
17
|
+
stem.gsub!("Ё","Е")
|
18
|
+
@dictionary[stem]=flags
|
19
|
+
end
|
20
20
|
end
|
21
21
|
|
22
|
-
def load_dictionaries
|
23
|
-
files=Dir[File.dirname(__FILE__)+"/dictionaries/*.dic"]
|
24
|
-
files.each do |file|
|
22
|
+
def load_dictionaries
|
23
|
+
files = Dir[File.dirname(__FILE__)+"/dictionaries/*.dic"]
|
24
|
+
files.each do |file|
|
25
25
|
load_dictionary file
|
26
|
-
end
|
26
|
+
end
|
27
27
|
end
|
28
28
|
|
29
|
-
def
|
30
|
-
|
29
|
+
def get_flags(word)
|
30
|
+
@dictionary[word]
|
31
31
|
end
|
32
|
-
|
33
32
|
|
34
33
|
end
|
35
|
-
|
34
|
+
|
35
|
+
end
|
@@ -1,41 +1,49 @@
|
|
1
|
-
module RussianWordForms
|
2
|
-
class Rules
|
3
|
-
|
1
|
+
module RussianWordForms
|
2
|
+
class Rules
|
3
|
+
class Rule
|
4
|
+
attr_accessor :rule,:normal_suffix,:suffix
|
5
|
+
def initialize(rule,normal_suffix,suffix)
|
6
|
+
@rule = rule
|
7
|
+
@normal_suffix = normal_suffix
|
8
|
+
@suffix = suffix
|
9
|
+
end
|
10
|
+
end
|
11
|
+
attr_accessor :rules,:rules_without_flags
|
12
|
+
|
4
13
|
|
5
14
|
def initialize
|
15
|
+
@rules = Hash.new {|h,k| h[k]=Hash.new {|h2,k2| h2[k2]=[]}}
|
16
|
+
@rules_without_flags = Hash.new {|h2,k2| h2[k2]=[]}
|
6
17
|
load_rules
|
7
|
-
end
|
8
|
-
|
9
|
-
|
18
|
+
end
|
19
|
+
|
20
|
+
def load_file(file)
|
21
|
+
flag = ""
|
10
22
|
File.readlines(file).each do |line|
|
11
|
-
command=line.chomp.split('#')
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
rule
|
22
|
-
|
23
|
-
end
|
23
|
+
command,comments = line.chomp.split('#') # get rid of comments
|
24
|
+
if command && !command.empty?
|
25
|
+
if command.start_with? "flag"
|
26
|
+
flag = command[6..-2]
|
27
|
+
else
|
28
|
+
rule,suffixes = command.split.join.split(">")
|
29
|
+
normal_suffix,suffix = suffixes.split(",")
|
30
|
+
normal_suffix = normal_suffix[1..-1] if normal_suffix[0] == '-'
|
31
|
+
suffix=suffix[1..-1] if suffix && suffix[0] == '-'
|
32
|
+
@rules[flag][rule] << Rule.new(rule,normal_suffix,suffix)
|
33
|
+
@rules_without_flags[rule] << Rule.new(rule,normal_suffix,suffix)
|
34
|
+
end
|
24
35
|
end
|
25
|
-
end
|
36
|
+
end
|
26
37
|
end
|
27
38
|
|
28
|
-
def load_rules
|
29
|
-
files=Dir[File.dirname(__FILE__)+"/dictionaries/*.aff"]
|
30
|
-
files.each do |file|
|
39
|
+
def load_rules
|
40
|
+
files = Dir[File.dirname(__FILE__)+"/dictionaries/*.aff"]
|
41
|
+
files.each do |file|
|
31
42
|
load_file file
|
32
|
-
end
|
43
|
+
end
|
33
44
|
end
|
34
45
|
|
35
|
-
|
36
|
-
@@rules
|
37
|
-
end
|
38
|
-
|
46
|
+
|
39
47
|
|
40
48
|
end
|
41
|
-
end
|
49
|
+
end
|
data/russian_word_forms.gemspec
CHANGED
metadata
CHANGED
@@ -1,41 +1,55 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: russian_word_forms
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Maksatbek Mansurov
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2015-05-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - ~>
|
17
|
+
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
19
|
version: '1.3'
|
20
20
|
type: :development
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- - ~>
|
24
|
+
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '1.3'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: rake
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- -
|
31
|
+
- - ">="
|
32
32
|
- !ruby/object:Gem::Version
|
33
33
|
version: '0'
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- -
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: unicode
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
39
53
|
- !ruby/object:Gem::Version
|
40
54
|
version: '0'
|
41
55
|
description: Gem detects wordforms. It uses russian ispell dictionary written by Alexander
|
@@ -46,7 +60,7 @@ executables: []
|
|
46
60
|
extensions: []
|
47
61
|
extra_rdoc_files: []
|
48
62
|
files:
|
49
|
-
- .gitignore
|
63
|
+
- ".gitignore"
|
50
64
|
- Gemfile
|
51
65
|
- LICENSE.txt
|
52
66
|
- README.md
|
@@ -74,17 +88,17 @@ require_paths:
|
|
74
88
|
- lib
|
75
89
|
required_ruby_version: !ruby/object:Gem::Requirement
|
76
90
|
requirements:
|
77
|
-
- -
|
91
|
+
- - ">="
|
78
92
|
- !ruby/object:Gem::Version
|
79
93
|
version: '0'
|
80
94
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
81
95
|
requirements:
|
82
|
-
- -
|
96
|
+
- - ">="
|
83
97
|
- !ruby/object:Gem::Version
|
84
98
|
version: '0'
|
85
99
|
requirements: []
|
86
100
|
rubyforge_project:
|
87
|
-
rubygems_version: 2.
|
101
|
+
rubygems_version: 2.2.2
|
88
102
|
signing_key:
|
89
103
|
specification_version: 4
|
90
104
|
summary: Gem detects wordforms
|