russian_word_forms 0.0.4 → 0.0.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 342af8888b394e9eb6f997faf378f7b6ff3e3845
4
- data.tar.gz: a44afb323002b6705b985a71db4b2f3e8ff0e316
3
+ metadata.gz: ade7ff1c37d3f2c7285af4c39a9c103d30f36be1
4
+ data.tar.gz: 4aeb05584df63c15a3ea0ca5acf1aeb68d5bc5f0
5
5
  SHA512:
6
- metadata.gz: 177c6eabe8c59b4721ca9b56a75eba54dadcc95171b11203bc30c0bdf58f950d6daa791eb43dd1cfc24f5b2b157ea8d3f72cacec7d0d60bca3e8fba0da6b7f5c
7
- data.tar.gz: a04c04d564bde2fb6c88e167d364000950a500938484483e6f1673841c1d135ea4e373fe6a39b85bf71ff4710fde881b7d44356688c8b500cf1aafff845f34dc
6
+ metadata.gz: c7c688c887c7b153f62b975fd75b91086ce8bce9cafd9724202a8eb888c8aec2baf518349bfcc7e19b1a5c5b5ea2cf5f2868a3252b5602a0c9ed52b746113e2e
7
+ data.tar.gz: ef355f059533a935ae5e6c5f77ae1b645f5fe64361cc0152f5f0a6dbe10830416b2042db02409b7f8e660ddb94debc220e314172475d5243a120cd05b11077e1
data/.gitignore CHANGED
File without changes
data/Gemfile CHANGED
File without changes
File without changes
data/README.md CHANGED
File without changes
data/Rakefile CHANGED
File without changes
@@ -1,76 +1,71 @@
1
1
  require "russian_word_forms/version"
2
2
  require 'russian_word_forms/dictionary'
3
3
  require 'russian_word_forms/rules'
4
+ require "unicode"
5
+
4
6
  module RussianWordForms
5
- @dictionary=Dictionary.new
6
- @rules=Rules.new
7
-
8
- def self.inflect(word)
9
- word=word.mb_chars.upcase.to_s
10
- flags=@dictionary.dictionary[word]
11
- output=[]
12
- output<<word if !flags.kind_of?(Array)
13
- flags=@rules.rules.keys.join if flags.empty? # if not found in dictionary
14
- flags.each_char do |flag|
15
- rules=@rules.rules[flag]
16
- rules.keys.each do |rule|
17
- rules[rule].each do |affix|
18
- left,right=affix.split(",")
19
- if right
20
- left=left[1..-1] if left[0]=='-'
21
- right=right[1..-1] if right[0]=='-'
22
-
23
- output<<word.gsub(/(#{left})$/,right) if word.match(/(#{rule})$/)
24
- else
25
7
 
26
- output<<word+left if word.match(/(#{rule})$/)
27
- end
8
+ @@dictionary=Dictionary.new
9
+ @@rules=Rules.new
10
+
28
11
 
12
+ def self.inflect(word)
13
+ word = Unicode::upcase(word)
14
+ flags = @@dictionary.get_flags word
15
+ output = []
16
+ if flags
17
+ flags.each_char do |flag|
18
+ rules_keys = @@rules.rules[flag]
19
+ rules_keys.each do |key,rules|
20
+ rules.each do |rule|
21
+ if rule.suffix
22
+ output << word.gsub(/(#{rule.normal_suffix})$/i,rule.suffix) if word.match(/(#{rule.rule})$/i)
23
+ else
24
+ output << word+rule.normal_suffix if word.match(/(#{rule.rule})$/i)
25
+ end
26
+
27
+ end
29
28
  end
30
- end
29
+ end
31
30
  end
32
31
  output.uniq
33
32
  end
34
33
  def self.get_base_form(word)
35
- word=word.mb_chars.upcase.to_s
36
- flags=@dictionary.dictionary[word]
37
- variants=[]
38
- variants<<word if !flags.kind_of?(Array)
39
- @rules.rules.keys.each do |flag|
40
- rules=@rules.rules[flag]
41
- rules.keys.each do |rule|
42
- rules[rule].each do |affix|
43
- left,right=affix.split(",")
44
- if right
45
- left=left[1..-1] if left[0]=='-'
46
- right=right[1..-1] if right[0]=='-'
47
- if word.match(/(#{right})$/)
48
- tmp=word.gsub(/(#{right})$/,left)
49
- variants<< tmp if tmp.match(/(#{rule})$/)
34
+ word = Unicode::upcase(word).to_s
35
+ flags = @@dictionary.dictionary[word]
36
+ variants = []
37
+ variants << word if flags
38
+ @@rules.rules.each do |flag,rules_keys|
39
+ rules_keys.each do |key,rules|
40
+ rules.each do |rule|
41
+ if rule.suffix && !rule.suffix.empty?
42
+ # puts "#{word} #{rule.suffix}"
43
+
44
+ if word.end_with? rule.suffix
45
+ tmp = word.gsub(rule.suffix,rule.normal_suffix)
46
+ # puts tmp
47
+ variants << tmp if tmp != word && tmp.match(/(#{rule.rule})$/i) && @@dictionary.get_flags(tmp)
50
48
  end
49
+
51
50
  else
52
- if word.match(/(#{left})$/)
53
- tmp=word.gsub(/(#{left})$/,"")
54
- variants<<tmp if tmp.match(/(#{rule})$/)
51
+ if word.end_with?(rule.normal_suffix)
52
+ tmp = word.gsub(rule.normal_suffix,"")
53
+ variants << tmp if tmp != word && tmp.match(/(#{rule.rule})$/i) && @@dictionary.dictionary[tmp]
55
54
  end
56
55
  end
57
56
  end
58
57
  end
59
58
  end
60
- output=[]
61
- variants.each do |variant|
62
- if !@dictionary.dictionary[variant].kind_of?(Array)&&self.inflect(variant).any? { |w| w==word }
63
- output<<variant
64
- end
65
- end
66
- return output.uniq
59
+
60
+
61
+ return variants.uniq
67
62
  end
68
-
63
+
69
64
  def self.rules
70
- @rules
65
+ @@rules
71
66
  end
72
-
67
+
73
68
  def self.dictionary
74
- @dictionary
69
+ @@dictionary
75
70
  end
76
71
  end
@@ -16,8 +16,8 @@
16
16
  # Affix table for Russian
17
17
  #
18
18
 
19
- nroffchars ().\\*
20
- texchars ()\[]{}<\>\\$*.%
19
+ # nroffchars ().\\*
20
+ # texchars ()\[]{}<\>\\$*.%
21
21
 
22
22
  # First we declare the character set.
23
23
 
@@ -69,7 +69,7 @@ texchars ()\[]{}<\>\\$*.%
69
69
 
70
70
  #prefixes
71
71
 
72
- suffixes
72
+ #suffixes
73
73
 
74
74
  flag *L:
75
75
  #
@@ -1,35 +1,35 @@
1
- module RussianWordForms
2
- class Dictionary
3
- @@dictionary=Hash.new {|h,k| h[k]=[]}
1
+ require "unicode"
2
+ module RussianWordForms
3
+
4
+ class Dictionary
5
+
6
+ attr_accessor :dictionary
4
7
 
5
8
  def initialize
9
+ @dictionary = Hash.new
6
10
  load_dictionaries
7
- end
8
- def load_dictionary(file)
11
+ end
12
+
13
+ def load_dictionary(file)
9
14
  File.readlines(file).each do |line|
10
- word=line.chomp.split('/')
11
- word[0]=word[0].mb_chars.upcase.to_s
12
- #
13
- word[0].gsub!("Ё","Е")
14
- if word.count>1
15
- @@dictionary[word[0]]=word[1]
16
- else
17
- @@dictionary[word[0]]=""
18
- end
19
- end
15
+ stem,flags = line.chomp.split('/')
16
+ stem = Unicode::upcase stem
17
+ stem.gsub!("Ё","Е")
18
+ @dictionary[stem]=flags
19
+ end
20
20
  end
21
21
 
22
- def load_dictionaries
23
- files=Dir[File.dirname(__FILE__)+"/dictionaries/*.dic"]
24
- files.each do |file|
22
+ def load_dictionaries
23
+ files = Dir[File.dirname(__FILE__)+"/dictionaries/*.dic"]
24
+ files.each do |file|
25
25
  load_dictionary file
26
- end
26
+ end
27
27
  end
28
28
 
29
- def dictionary
30
- @@dictionary
29
+ def get_flags(word)
30
+ @dictionary[word]
31
31
  end
32
-
33
32
 
34
33
  end
35
- end
34
+
35
+ end
@@ -1,41 +1,49 @@
1
- module RussianWordForms
2
- class Rules
3
- @@rules=Hash.new {|h,k| h[k]=Hash.new {|h2,k2| h2[k2]=[]}}
1
+ module RussianWordForms
2
+ class Rules
3
+ class Rule
4
+ attr_accessor :rule,:normal_suffix,:suffix
5
+ def initialize(rule,normal_suffix,suffix)
6
+ @rule = rule
7
+ @normal_suffix = normal_suffix
8
+ @suffix = suffix
9
+ end
10
+ end
11
+ attr_accessor :rules,:rules_without_flags
12
+
4
13
 
5
14
  def initialize
15
+ @rules = Hash.new {|h,k| h[k]=Hash.new {|h2,k2| h2[k2]=[]}}
16
+ @rules_without_flags = Hash.new {|h2,k2| h2[k2]=[]}
6
17
  load_rules
7
- end
8
- def load_file(file)
9
- flag=""
18
+ end
19
+
20
+ def load_file(file)
21
+ flag = ""
10
22
  File.readlines(file).each do |line|
11
- command=line.chomp.split('#')[0] # get rid of comments
12
-
13
- if command&&command!=""
14
- command=command.split(" ")
15
- if command.count==2 #
16
- case command[0]
17
- when "flag"
18
- flag=command[1][1..-2]
19
- end
20
- elsif command.count>2 # command
21
- rule=command.join.split(">")
22
- @@rules[flag][rule[0]]<<rule[1]
23
- end
23
+ command,comments = line.chomp.split('#') # get rid of comments
24
+ if command && !command.empty?
25
+ if command.start_with? "flag"
26
+ flag = command[6..-2]
27
+ else
28
+ rule,suffixes = command.split.join.split(">")
29
+ normal_suffix,suffix = suffixes.split(",")
30
+ normal_suffix = normal_suffix[1..-1] if normal_suffix[0] == '-'
31
+ suffix=suffix[1..-1] if suffix && suffix[0] == '-'
32
+ @rules[flag][rule] << Rule.new(rule,normal_suffix,suffix)
33
+ @rules_without_flags[rule] << Rule.new(rule,normal_suffix,suffix)
34
+ end
24
35
  end
25
- end
36
+ end
26
37
  end
27
38
 
28
- def load_rules
29
- files=Dir[File.dirname(__FILE__)+"/dictionaries/*.aff"]
30
- files.each do |file|
39
+ def load_rules
40
+ files = Dir[File.dirname(__FILE__)+"/dictionaries/*.aff"]
41
+ files.each do |file|
31
42
  load_file file
32
- end
43
+ end
33
44
  end
34
45
 
35
- def rules
36
- @@rules
37
- end
38
-
46
+
39
47
 
40
48
  end
41
- end
49
+ end
@@ -1,3 +1,3 @@
1
1
  module RussianWordForms
2
- VERSION = "0.0.4"
2
+ VERSION = "0.0.5"
3
3
  end
@@ -20,4 +20,5 @@ Gem::Specification.new do |spec|
20
20
 
21
21
  spec.add_development_dependency "bundler", "~> 1.3"
22
22
  spec.add_development_dependency "rake"
23
+ spec.add_development_dependency "unicode"
23
24
  end
metadata CHANGED
@@ -1,41 +1,55 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: russian_word_forms
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.0.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Maksatbek Mansurov
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-02-06 00:00:00.000000000 Z
11
+ date: 2015-05-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - ~>
17
+ - - "~>"
18
18
  - !ruby/object:Gem::Version
19
19
  version: '1.3'
20
20
  type: :development
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - ~>
24
+ - - "~>"
25
25
  - !ruby/object:Gem::Version
26
26
  version: '1.3'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: rake
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - '>='
31
+ - - ">="
32
32
  - !ruby/object:Gem::Version
33
33
  version: '0'
34
34
  type: :development
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - '>='
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: unicode
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
39
53
  - !ruby/object:Gem::Version
40
54
  version: '0'
41
55
  description: Gem detects wordforms. It uses russian ispell dictionary written by Alexander
@@ -46,7 +60,7 @@ executables: []
46
60
  extensions: []
47
61
  extra_rdoc_files: []
48
62
  files:
49
- - .gitignore
63
+ - ".gitignore"
50
64
  - Gemfile
51
65
  - LICENSE.txt
52
66
  - README.md
@@ -74,17 +88,17 @@ require_paths:
74
88
  - lib
75
89
  required_ruby_version: !ruby/object:Gem::Requirement
76
90
  requirements:
77
- - - '>='
91
+ - - ">="
78
92
  - !ruby/object:Gem::Version
79
93
  version: '0'
80
94
  required_rubygems_version: !ruby/object:Gem::Requirement
81
95
  requirements:
82
- - - '>='
96
+ - - ">="
83
97
  - !ruby/object:Gem::Version
84
98
  version: '0'
85
99
  requirements: []
86
100
  rubyforge_project:
87
- rubygems_version: 2.0.14
101
+ rubygems_version: 2.2.2
88
102
  signing_key:
89
103
  specification_version: 4
90
104
  summary: Gem detects wordforms