russian_word_forms 0.0.4 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +0 -0
- data/Gemfile +0 -0
- data/LICENSE.txt +0 -0
- data/README.md +0 -0
- data/Rakefile +0 -0
- data/lib/russian_word_forms.rb +47 -52
- data/lib/russian_word_forms/dictionaries/abbrev.dic +0 -0
- data/lib/russian_word_forms/dictionaries/base.dic +0 -0
- data/lib/russian_word_forms/dictionaries/computer.dic +0 -0
- data/lib/russian_word_forms/dictionaries/for_name.dic +0 -0
- data/lib/russian_word_forms/dictionaries/geography.dic +0 -0
- data/lib/russian_word_forms/dictionaries/rare.dic +0 -0
- data/lib/russian_word_forms/dictionaries/russian.aff +3 -3
- data/lib/russian_word_forms/dictionaries/science.dic +0 -0
- data/lib/russian_word_forms/dictionary.rb +23 -23
- data/lib/russian_word_forms/rules.rb +37 -29
- data/lib/russian_word_forms/version.rb +1 -1
- data/russian_word_forms.gemspec +1 -0
- metadata +24 -10
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ade7ff1c37d3f2c7285af4c39a9c103d30f36be1
|
4
|
+
data.tar.gz: 4aeb05584df63c15a3ea0ca5acf1aeb68d5bc5f0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c7c688c887c7b153f62b975fd75b91086ce8bce9cafd9724202a8eb888c8aec2baf518349bfcc7e19b1a5c5b5ea2cf5f2868a3252b5602a0c9ed52b746113e2e
|
7
|
+
data.tar.gz: ef355f059533a935ae5e6c5f77ae1b645f5fe64361cc0152f5f0a6dbe10830416b2042db02409b7f8e660ddb94debc220e314172475d5243a120cd05b11077e1
|
data/.gitignore
CHANGED
File without changes
|
data/Gemfile
CHANGED
File without changes
|
data/LICENSE.txt
CHANGED
File without changes
|
data/README.md
CHANGED
File without changes
|
data/Rakefile
CHANGED
File without changes
|
data/lib/russian_word_forms.rb
CHANGED
@@ -1,76 +1,71 @@
|
|
1
1
|
require "russian_word_forms/version"
|
2
2
|
require 'russian_word_forms/dictionary'
|
3
3
|
require 'russian_word_forms/rules'
|
4
|
+
require "unicode"
|
5
|
+
|
4
6
|
module RussianWordForms
|
5
|
-
@dictionary=Dictionary.new
|
6
|
-
@rules=Rules.new
|
7
|
-
|
8
|
-
def self.inflect(word)
|
9
|
-
word=word.mb_chars.upcase.to_s
|
10
|
-
flags=@dictionary.dictionary[word]
|
11
|
-
output=[]
|
12
|
-
output<<word if !flags.kind_of?(Array)
|
13
|
-
flags=@rules.rules.keys.join if flags.empty? # if not found in dictionary
|
14
|
-
flags.each_char do |flag|
|
15
|
-
rules=@rules.rules[flag]
|
16
|
-
rules.keys.each do |rule|
|
17
|
-
rules[rule].each do |affix|
|
18
|
-
left,right=affix.split(",")
|
19
|
-
if right
|
20
|
-
left=left[1..-1] if left[0]=='-'
|
21
|
-
right=right[1..-1] if right[0]=='-'
|
22
|
-
|
23
|
-
output<<word.gsub(/(#{left})$/,right) if word.match(/(#{rule})$/)
|
24
|
-
else
|
25
7
|
|
26
|
-
|
27
|
-
|
8
|
+
@@dictionary=Dictionary.new
|
9
|
+
@@rules=Rules.new
|
10
|
+
|
28
11
|
|
12
|
+
def self.inflect(word)
|
13
|
+
word = Unicode::upcase(word)
|
14
|
+
flags = @@dictionary.get_flags word
|
15
|
+
output = []
|
16
|
+
if flags
|
17
|
+
flags.each_char do |flag|
|
18
|
+
rules_keys = @@rules.rules[flag]
|
19
|
+
rules_keys.each do |key,rules|
|
20
|
+
rules.each do |rule|
|
21
|
+
if rule.suffix
|
22
|
+
output << word.gsub(/(#{rule.normal_suffix})$/i,rule.suffix) if word.match(/(#{rule.rule})$/i)
|
23
|
+
else
|
24
|
+
output << word+rule.normal_suffix if word.match(/(#{rule.rule})$/i)
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
29
28
|
end
|
30
|
-
end
|
29
|
+
end
|
31
30
|
end
|
32
31
|
output.uniq
|
33
32
|
end
|
34
33
|
def self.get_base_form(word)
|
35
|
-
word=word.
|
36
|
-
flags
|
37
|
-
variants=[]
|
38
|
-
variants<<word if
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
tmp
|
49
|
-
variants<< tmp if tmp.match(/(#{rule})$/)
|
34
|
+
word = Unicode::upcase(word).to_s
|
35
|
+
flags = @@dictionary.dictionary[word]
|
36
|
+
variants = []
|
37
|
+
variants << word if flags
|
38
|
+
@@rules.rules.each do |flag,rules_keys|
|
39
|
+
rules_keys.each do |key,rules|
|
40
|
+
rules.each do |rule|
|
41
|
+
if rule.suffix && !rule.suffix.empty?
|
42
|
+
# puts "#{word} #{rule.suffix}"
|
43
|
+
|
44
|
+
if word.end_with? rule.suffix
|
45
|
+
tmp = word.gsub(rule.suffix,rule.normal_suffix)
|
46
|
+
# puts tmp
|
47
|
+
variants << tmp if tmp != word && tmp.match(/(#{rule.rule})$/i) && @@dictionary.get_flags(tmp)
|
50
48
|
end
|
49
|
+
|
51
50
|
else
|
52
|
-
if word.
|
53
|
-
tmp=word.gsub(
|
54
|
-
variants<<tmp if tmp.match(/(#{rule})$/)
|
51
|
+
if word.end_with?(rule.normal_suffix)
|
52
|
+
tmp = word.gsub(rule.normal_suffix,"")
|
53
|
+
variants << tmp if tmp != word && tmp.match(/(#{rule.rule})$/i) && @@dictionary.dictionary[tmp]
|
55
54
|
end
|
56
55
|
end
|
57
56
|
end
|
58
57
|
end
|
59
58
|
end
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
output<<variant
|
64
|
-
end
|
65
|
-
end
|
66
|
-
return output.uniq
|
59
|
+
|
60
|
+
|
61
|
+
return variants.uniq
|
67
62
|
end
|
68
|
-
|
63
|
+
|
69
64
|
def self.rules
|
70
|
-
|
65
|
+
@@rules
|
71
66
|
end
|
72
|
-
|
67
|
+
|
73
68
|
def self.dictionary
|
74
|
-
|
69
|
+
@@dictionary
|
75
70
|
end
|
76
71
|
end
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
@@ -16,8 +16,8 @@
|
|
16
16
|
# Affix table for Russian
|
17
17
|
#
|
18
18
|
|
19
|
-
nroffchars ().\\*
|
20
|
-
texchars ()\[]{}<\>\\$*.%
|
19
|
+
# nroffchars ().\\*
|
20
|
+
# texchars ()\[]{}<\>\\$*.%
|
21
21
|
|
22
22
|
# First we declare the character set.
|
23
23
|
|
@@ -69,7 +69,7 @@ texchars ()\[]{}<\>\\$*.%
|
|
69
69
|
|
70
70
|
#prefixes
|
71
71
|
|
72
|
-
suffixes
|
72
|
+
#suffixes
|
73
73
|
|
74
74
|
flag *L:
|
75
75
|
#
|
File without changes
|
@@ -1,35 +1,35 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
1
|
+
require "unicode"
|
2
|
+
module RussianWordForms
|
3
|
+
|
4
|
+
class Dictionary
|
5
|
+
|
6
|
+
attr_accessor :dictionary
|
4
7
|
|
5
8
|
def initialize
|
9
|
+
@dictionary = Hash.new
|
6
10
|
load_dictionaries
|
7
|
-
end
|
8
|
-
|
11
|
+
end
|
12
|
+
|
13
|
+
def load_dictionary(file)
|
9
14
|
File.readlines(file).each do |line|
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
@@dictionary[word[0]]=word[1]
|
16
|
-
else
|
17
|
-
@@dictionary[word[0]]=""
|
18
|
-
end
|
19
|
-
end
|
15
|
+
stem,flags = line.chomp.split('/')
|
16
|
+
stem = Unicode::upcase stem
|
17
|
+
stem.gsub!("Ё","Е")
|
18
|
+
@dictionary[stem]=flags
|
19
|
+
end
|
20
20
|
end
|
21
21
|
|
22
|
-
def load_dictionaries
|
23
|
-
files=Dir[File.dirname(__FILE__)+"/dictionaries/*.dic"]
|
24
|
-
files.each do |file|
|
22
|
+
def load_dictionaries
|
23
|
+
files = Dir[File.dirname(__FILE__)+"/dictionaries/*.dic"]
|
24
|
+
files.each do |file|
|
25
25
|
load_dictionary file
|
26
|
-
end
|
26
|
+
end
|
27
27
|
end
|
28
28
|
|
29
|
-
def
|
30
|
-
|
29
|
+
def get_flags(word)
|
30
|
+
@dictionary[word]
|
31
31
|
end
|
32
|
-
|
33
32
|
|
34
33
|
end
|
35
|
-
|
34
|
+
|
35
|
+
end
|
@@ -1,41 +1,49 @@
|
|
1
|
-
module RussianWordForms
|
2
|
-
class Rules
|
3
|
-
|
1
|
+
module RussianWordForms
|
2
|
+
class Rules
|
3
|
+
class Rule
|
4
|
+
attr_accessor :rule,:normal_suffix,:suffix
|
5
|
+
def initialize(rule,normal_suffix,suffix)
|
6
|
+
@rule = rule
|
7
|
+
@normal_suffix = normal_suffix
|
8
|
+
@suffix = suffix
|
9
|
+
end
|
10
|
+
end
|
11
|
+
attr_accessor :rules,:rules_without_flags
|
12
|
+
|
4
13
|
|
5
14
|
def initialize
|
15
|
+
@rules = Hash.new {|h,k| h[k]=Hash.new {|h2,k2| h2[k2]=[]}}
|
16
|
+
@rules_without_flags = Hash.new {|h2,k2| h2[k2]=[]}
|
6
17
|
load_rules
|
7
|
-
end
|
8
|
-
|
9
|
-
|
18
|
+
end
|
19
|
+
|
20
|
+
def load_file(file)
|
21
|
+
flag = ""
|
10
22
|
File.readlines(file).each do |line|
|
11
|
-
command=line.chomp.split('#')
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
rule
|
22
|
-
|
23
|
-
end
|
23
|
+
command,comments = line.chomp.split('#') # get rid of comments
|
24
|
+
if command && !command.empty?
|
25
|
+
if command.start_with? "flag"
|
26
|
+
flag = command[6..-2]
|
27
|
+
else
|
28
|
+
rule,suffixes = command.split.join.split(">")
|
29
|
+
normal_suffix,suffix = suffixes.split(",")
|
30
|
+
normal_suffix = normal_suffix[1..-1] if normal_suffix[0] == '-'
|
31
|
+
suffix=suffix[1..-1] if suffix && suffix[0] == '-'
|
32
|
+
@rules[flag][rule] << Rule.new(rule,normal_suffix,suffix)
|
33
|
+
@rules_without_flags[rule] << Rule.new(rule,normal_suffix,suffix)
|
34
|
+
end
|
24
35
|
end
|
25
|
-
end
|
36
|
+
end
|
26
37
|
end
|
27
38
|
|
28
|
-
def load_rules
|
29
|
-
files=Dir[File.dirname(__FILE__)+"/dictionaries/*.aff"]
|
30
|
-
files.each do |file|
|
39
|
+
def load_rules
|
40
|
+
files = Dir[File.dirname(__FILE__)+"/dictionaries/*.aff"]
|
41
|
+
files.each do |file|
|
31
42
|
load_file file
|
32
|
-
end
|
43
|
+
end
|
33
44
|
end
|
34
45
|
|
35
|
-
|
36
|
-
@@rules
|
37
|
-
end
|
38
|
-
|
46
|
+
|
39
47
|
|
40
48
|
end
|
41
|
-
end
|
49
|
+
end
|
data/russian_word_forms.gemspec
CHANGED
metadata
CHANGED
@@ -1,41 +1,55 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: russian_word_forms
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Maksatbek Mansurov
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2015-05-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - ~>
|
17
|
+
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
19
|
version: '1.3'
|
20
20
|
type: :development
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- - ~>
|
24
|
+
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '1.3'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: rake
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- -
|
31
|
+
- - ">="
|
32
32
|
- !ruby/object:Gem::Version
|
33
33
|
version: '0'
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- -
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: unicode
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
39
53
|
- !ruby/object:Gem::Version
|
40
54
|
version: '0'
|
41
55
|
description: Gem detects wordforms. It uses russian ispell dictionary written by Alexander
|
@@ -46,7 +60,7 @@ executables: []
|
|
46
60
|
extensions: []
|
47
61
|
extra_rdoc_files: []
|
48
62
|
files:
|
49
|
-
- .gitignore
|
63
|
+
- ".gitignore"
|
50
64
|
- Gemfile
|
51
65
|
- LICENSE.txt
|
52
66
|
- README.md
|
@@ -74,17 +88,17 @@ require_paths:
|
|
74
88
|
- lib
|
75
89
|
required_ruby_version: !ruby/object:Gem::Requirement
|
76
90
|
requirements:
|
77
|
-
- -
|
91
|
+
- - ">="
|
78
92
|
- !ruby/object:Gem::Version
|
79
93
|
version: '0'
|
80
94
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
81
95
|
requirements:
|
82
|
-
- -
|
96
|
+
- - ">="
|
83
97
|
- !ruby/object:Gem::Version
|
84
98
|
version: '0'
|
85
99
|
requirements: []
|
86
100
|
rubyforge_project:
|
87
|
-
rubygems_version: 2.
|
101
|
+
rubygems_version: 2.2.2
|
88
102
|
signing_key:
|
89
103
|
specification_version: 4
|
90
104
|
summary: Gem detects wordforms
|