russian_word_forms 0.0.4 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 342af8888b394e9eb6f997faf378f7b6ff3e3845
4
- data.tar.gz: a44afb323002b6705b985a71db4b2f3e8ff0e316
3
+ metadata.gz: ade7ff1c37d3f2c7285af4c39a9c103d30f36be1
4
+ data.tar.gz: 4aeb05584df63c15a3ea0ca5acf1aeb68d5bc5f0
5
5
  SHA512:
6
- metadata.gz: 177c6eabe8c59b4721ca9b56a75eba54dadcc95171b11203bc30c0bdf58f950d6daa791eb43dd1cfc24f5b2b157ea8d3f72cacec7d0d60bca3e8fba0da6b7f5c
7
- data.tar.gz: a04c04d564bde2fb6c88e167d364000950a500938484483e6f1673841c1d135ea4e373fe6a39b85bf71ff4710fde881b7d44356688c8b500cf1aafff845f34dc
6
+ metadata.gz: c7c688c887c7b153f62b975fd75b91086ce8bce9cafd9724202a8eb888c8aec2baf518349bfcc7e19b1a5c5b5ea2cf5f2868a3252b5602a0c9ed52b746113e2e
7
+ data.tar.gz: ef355f059533a935ae5e6c5f77ae1b645f5fe64361cc0152f5f0a6dbe10830416b2042db02409b7f8e660ddb94debc220e314172475d5243a120cd05b11077e1
data/.gitignore CHANGED
File without changes
data/Gemfile CHANGED
File without changes
File without changes
data/README.md CHANGED
File without changes
data/Rakefile CHANGED
File without changes
@@ -1,76 +1,71 @@
1
1
  require "russian_word_forms/version"
2
2
  require 'russian_word_forms/dictionary'
3
3
  require 'russian_word_forms/rules'
4
+ require "unicode"
5
+
4
6
  module RussianWordForms
5
- @dictionary=Dictionary.new
6
- @rules=Rules.new
7
-
8
- def self.inflect(word)
9
- word=word.mb_chars.upcase.to_s
10
- flags=@dictionary.dictionary[word]
11
- output=[]
12
- output<<word if !flags.kind_of?(Array)
13
- flags=@rules.rules.keys.join if flags.empty? # if not found in dictionary
14
- flags.each_char do |flag|
15
- rules=@rules.rules[flag]
16
- rules.keys.each do |rule|
17
- rules[rule].each do |affix|
18
- left,right=affix.split(",")
19
- if right
20
- left=left[1..-1] if left[0]=='-'
21
- right=right[1..-1] if right[0]=='-'
22
-
23
- output<<word.gsub(/(#{left})$/,right) if word.match(/(#{rule})$/)
24
- else
25
7
 
26
- output<<word+left if word.match(/(#{rule})$/)
27
- end
8
+ @@dictionary=Dictionary.new
9
+ @@rules=Rules.new
10
+
28
11
 
12
+ def self.inflect(word)
13
+ word = Unicode::upcase(word)
14
+ flags = @@dictionary.get_flags word
15
+ output = []
16
+ if flags
17
+ flags.each_char do |flag|
18
+ rules_keys = @@rules.rules[flag]
19
+ rules_keys.each do |key,rules|
20
+ rules.each do |rule|
21
+ if rule.suffix
22
+ output << word.gsub(/(#{rule.normal_suffix})$/i,rule.suffix) if word.match(/(#{rule.rule})$/i)
23
+ else
24
+ output << word+rule.normal_suffix if word.match(/(#{rule.rule})$/i)
25
+ end
26
+
27
+ end
29
28
  end
30
- end
29
+ end
31
30
  end
32
31
  output.uniq
33
32
  end
34
33
  def self.get_base_form(word)
35
- word=word.mb_chars.upcase.to_s
36
- flags=@dictionary.dictionary[word]
37
- variants=[]
38
- variants<<word if !flags.kind_of?(Array)
39
- @rules.rules.keys.each do |flag|
40
- rules=@rules.rules[flag]
41
- rules.keys.each do |rule|
42
- rules[rule].each do |affix|
43
- left,right=affix.split(",")
44
- if right
45
- left=left[1..-1] if left[0]=='-'
46
- right=right[1..-1] if right[0]=='-'
47
- if word.match(/(#{right})$/)
48
- tmp=word.gsub(/(#{right})$/,left)
49
- variants<< tmp if tmp.match(/(#{rule})$/)
34
+ word = Unicode::upcase(word).to_s
35
+ flags = @@dictionary.dictionary[word]
36
+ variants = []
37
+ variants << word if flags
38
+ @@rules.rules.each do |flag,rules_keys|
39
+ rules_keys.each do |key,rules|
40
+ rules.each do |rule|
41
+ if rule.suffix && !rule.suffix.empty?
42
+ # puts "#{word} #{rule.suffix}"
43
+
44
+ if word.end_with? rule.suffix
45
+ tmp = word.gsub(rule.suffix,rule.normal_suffix)
46
+ # puts tmp
47
+ variants << tmp if tmp != word && tmp.match(/(#{rule.rule})$/i) && @@dictionary.get_flags(tmp)
50
48
  end
49
+
51
50
  else
52
- if word.match(/(#{left})$/)
53
- tmp=word.gsub(/(#{left})$/,"")
54
- variants<<tmp if tmp.match(/(#{rule})$/)
51
+ if word.end_with?(rule.normal_suffix)
52
+ tmp = word.gsub(rule.normal_suffix,"")
53
+ variants << tmp if tmp != word && tmp.match(/(#{rule.rule})$/i) && @@dictionary.dictionary[tmp]
55
54
  end
56
55
  end
57
56
  end
58
57
  end
59
58
  end
60
- output=[]
61
- variants.each do |variant|
62
- if !@dictionary.dictionary[variant].kind_of?(Array)&&self.inflect(variant).any? { |w| w==word }
63
- output<<variant
64
- end
65
- end
66
- return output.uniq
59
+
60
+
61
+ return variants.uniq
67
62
  end
68
-
63
+
69
64
  def self.rules
70
- @rules
65
+ @@rules
71
66
  end
72
-
67
+
73
68
  def self.dictionary
74
- @dictionary
69
+ @@dictionary
75
70
  end
76
71
  end
@@ -16,8 +16,8 @@
16
16
  # Affix table for Russian
17
17
  #
18
18
 
19
- nroffchars ().\\*
20
- texchars ()\[]{}<\>\\$*.%
19
+ # nroffchars ().\\*
20
+ # texchars ()\[]{}<\>\\$*.%
21
21
 
22
22
  # First we declare the character set.
23
23
 
@@ -69,7 +69,7 @@ texchars ()\[]{}<\>\\$*.%
69
69
 
70
70
  #prefixes
71
71
 
72
- suffixes
72
+ #suffixes
73
73
 
74
74
  flag *L:
75
75
  #
@@ -1,35 +1,35 @@
1
- module RussianWordForms
2
- class Dictionary
3
- @@dictionary=Hash.new {|h,k| h[k]=[]}
1
+ require "unicode"
2
+ module RussianWordForms
3
+
4
+ class Dictionary
5
+
6
+ attr_accessor :dictionary
4
7
 
5
8
  def initialize
9
+ @dictionary = Hash.new
6
10
  load_dictionaries
7
- end
8
- def load_dictionary(file)
11
+ end
12
+
13
+ def load_dictionary(file)
9
14
  File.readlines(file).each do |line|
10
- word=line.chomp.split('/')
11
- word[0]=word[0].mb_chars.upcase.to_s
12
- #
13
- word[0].gsub!("Ё","Е")
14
- if word.count>1
15
- @@dictionary[word[0]]=word[1]
16
- else
17
- @@dictionary[word[0]]=""
18
- end
19
- end
15
+ stem,flags = line.chomp.split('/')
16
+ stem = Unicode::upcase stem
17
+ stem.gsub!("Ё","Е")
18
+ @dictionary[stem]=flags
19
+ end
20
20
  end
21
21
 
22
- def load_dictionaries
23
- files=Dir[File.dirname(__FILE__)+"/dictionaries/*.dic"]
24
- files.each do |file|
22
+ def load_dictionaries
23
+ files = Dir[File.dirname(__FILE__)+"/dictionaries/*.dic"]
24
+ files.each do |file|
25
25
  load_dictionary file
26
- end
26
+ end
27
27
  end
28
28
 
29
- def dictionary
30
- @@dictionary
29
+ def get_flags(word)
30
+ @dictionary[word]
31
31
  end
32
-
33
32
 
34
33
  end
35
- end
34
+
35
+ end
@@ -1,41 +1,49 @@
1
- module RussianWordForms
2
- class Rules
3
- @@rules=Hash.new {|h,k| h[k]=Hash.new {|h2,k2| h2[k2]=[]}}
1
+ module RussianWordForms
2
+ class Rules
3
+ class Rule
4
+ attr_accessor :rule,:normal_suffix,:suffix
5
+ def initialize(rule,normal_suffix,suffix)
6
+ @rule = rule
7
+ @normal_suffix = normal_suffix
8
+ @suffix = suffix
9
+ end
10
+ end
11
+ attr_accessor :rules,:rules_without_flags
12
+
4
13
 
5
14
  def initialize
15
+ @rules = Hash.new {|h,k| h[k]=Hash.new {|h2,k2| h2[k2]=[]}}
16
+ @rules_without_flags = Hash.new {|h2,k2| h2[k2]=[]}
6
17
  load_rules
7
- end
8
- def load_file(file)
9
- flag=""
18
+ end
19
+
20
+ def load_file(file)
21
+ flag = ""
10
22
  File.readlines(file).each do |line|
11
- command=line.chomp.split('#')[0] # get rid of comments
12
-
13
- if command&&command!=""
14
- command=command.split(" ")
15
- if command.count==2 #
16
- case command[0]
17
- when "flag"
18
- flag=command[1][1..-2]
19
- end
20
- elsif command.count>2 # command
21
- rule=command.join.split(">")
22
- @@rules[flag][rule[0]]<<rule[1]
23
- end
23
+ command,comments = line.chomp.split('#') # get rid of comments
24
+ if command && !command.empty?
25
+ if command.start_with? "flag"
26
+ flag = command[6..-2]
27
+ else
28
+ rule,suffixes = command.split.join.split(">")
29
+ normal_suffix,suffix = suffixes.split(",")
30
+ normal_suffix = normal_suffix[1..-1] if normal_suffix[0] == '-'
31
+ suffix=suffix[1..-1] if suffix && suffix[0] == '-'
32
+ @rules[flag][rule] << Rule.new(rule,normal_suffix,suffix)
33
+ @rules_without_flags[rule] << Rule.new(rule,normal_suffix,suffix)
34
+ end
24
35
  end
25
- end
36
+ end
26
37
  end
27
38
 
28
- def load_rules
29
- files=Dir[File.dirname(__FILE__)+"/dictionaries/*.aff"]
30
- files.each do |file|
39
+ def load_rules
40
+ files = Dir[File.dirname(__FILE__)+"/dictionaries/*.aff"]
41
+ files.each do |file|
31
42
  load_file file
32
- end
43
+ end
33
44
  end
34
45
 
35
- def rules
36
- @@rules
37
- end
38
-
46
+
39
47
 
40
48
  end
41
- end
49
+ end
@@ -1,3 +1,3 @@
1
1
  module RussianWordForms
2
- VERSION = "0.0.4"
2
+ VERSION = "0.0.5"
3
3
  end
@@ -20,4 +20,5 @@ Gem::Specification.new do |spec|
20
20
 
21
21
  spec.add_development_dependency "bundler", "~> 1.3"
22
22
  spec.add_development_dependency "rake"
23
+ spec.add_development_dependency "unicode"
23
24
  end
metadata CHANGED
@@ -1,41 +1,55 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: russian_word_forms
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.0.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Maksatbek Mansurov
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-02-06 00:00:00.000000000 Z
11
+ date: 2015-05-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - ~>
17
+ - - "~>"
18
18
  - !ruby/object:Gem::Version
19
19
  version: '1.3'
20
20
  type: :development
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - ~>
24
+ - - "~>"
25
25
  - !ruby/object:Gem::Version
26
26
  version: '1.3'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: rake
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - '>='
31
+ - - ">="
32
32
  - !ruby/object:Gem::Version
33
33
  version: '0'
34
34
  type: :development
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - '>='
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: unicode
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
39
53
  - !ruby/object:Gem::Version
40
54
  version: '0'
41
55
  description: Gem detects wordforms. It uses russian ispell dictionary written by Alexander
@@ -46,7 +60,7 @@ executables: []
46
60
  extensions: []
47
61
  extra_rdoc_files: []
48
62
  files:
49
- - .gitignore
63
+ - ".gitignore"
50
64
  - Gemfile
51
65
  - LICENSE.txt
52
66
  - README.md
@@ -74,17 +88,17 @@ require_paths:
74
88
  - lib
75
89
  required_ruby_version: !ruby/object:Gem::Requirement
76
90
  requirements:
77
- - - '>='
91
+ - - ">="
78
92
  - !ruby/object:Gem::Version
79
93
  version: '0'
80
94
  required_rubygems_version: !ruby/object:Gem::Requirement
81
95
  requirements:
82
- - - '>='
96
+ - - ">="
83
97
  - !ruby/object:Gem::Version
84
98
  version: '0'
85
99
  requirements: []
86
100
  rubyforge_project:
87
- rubygems_version: 2.0.14
101
+ rubygems_version: 2.2.2
88
102
  signing_key:
89
103
  specification_version: 4
90
104
  summary: Gem detects wordforms