inferx 0.2.3 → 0.2.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -2,20 +2,18 @@ require 'redis'
2
2
 
3
3
  require 'inferx/version'
4
4
  require 'inferx/categories'
5
- require 'inferx/complementary/categories'
6
5
 
7
6
  class Inferx
8
7
 
9
8
  # @param [Hash] options other options are passed to Redis#initialize in
10
9
  # {https://github.com/redis/redis-rb redis}
11
10
  #
12
- # @option options [Boolean] :complementary
11
+ # @option options [Boolean] :complementary use complementary Bayes classifier
13
12
  # @option options [String] :namespace namespace of keys to be used to Redis
14
13
  # @option options [Boolean] :manual whether manual save, defaults to false
15
14
  def initialize(options = {})
16
15
  @complementary = !!options[:complementary]
17
- categories_class = @complementary ? Complementary::Categories : Categories
18
- @categories = categories_class.new(Redis.new(options), options)
16
+ @categories = Categories.new(Redis.new(options), options)
19
17
  end
20
18
 
21
19
  attr_reader :categories
@@ -1,25 +1,81 @@
1
- require 'inferx/adapter'
2
1
  require 'inferx/category'
2
+ require 'inferx/category/complementary'
3
+ require 'set'
3
4
 
4
5
  class Inferx
5
- class Categories < Adapter
6
+ class Categories
6
7
  include Enumerable
7
8
 
9
+ # @param [Redis] redis an instance of Redis
10
+ # @param [Hash] options
11
+ # @option options [Boolean] :complementary use complementary Bayes
12
+ # classifier
13
+ # @option options [String] :namespace namespace of keys to be used to Redis
14
+ # @option options [Boolean] :manual whether manual save, defaults to false
15
+ def initialize(redis, options = {})
16
+ @redis = redis
17
+ @category_class = options[:complementary] ? Category::Complementary : Category
18
+ parts = %w(inferx categories)
19
+ parts.insert(1, options[:namespace]) if options[:namespace]
20
+ @key = parts.join(':')
21
+ @manual = !!options[:manual]
22
+ @filter = nil
23
+ @except = Set.new
24
+ end
25
+
26
+ # Get key for access to categories on Redis.
27
+ #
28
+ # @attribute [r] key
29
+ # @return [String] the key
30
+ attr_reader :key
31
+
32
+ # Determine if manual save.
33
+ #
34
+ # @return [Boolean] whether manual save
35
+ def manual?
36
+ @manual
37
+ end
38
+
39
+ # Filter categories.
40
+ #
41
+ # @param [Array<String>] category_names category names
42
+ # @return [Inferx::Categories] categories filtered by the category names
43
+ def filter(*category_names)
44
+ category_names = category_names.map(&:to_s)
45
+
46
+ filtered do
47
+ @filter = @filter ? @filter & category_names : Set.new(category_names)
48
+ end
49
+ end
50
+
51
+ # Filter by excepting categories.
52
+ #
53
+ # @param [Array<String>] category_names category names
54
+ # @return [Inferx::Categories] categories filterd by the category names
55
+ def except(*category_names)
56
+ category_names = category_names.map(&:to_s)
57
+
58
+ filtered do
59
+ @except.merge(category_names)
60
+ end
61
+ end
62
+
8
63
  # Get all category names.
9
64
  #
10
65
  # @return [Array<String>] category names
11
66
  def all
12
- hkeys || []
67
+ all_in_visible.to_a
13
68
  end
14
69
 
15
- # Get a category according the name.
70
+ # Get category according the name.
16
71
  #
17
- # @param [String] category_name a category name
18
- # @return [Inferx::Category] a category
72
+ # @param [String] category_name the category name
73
+ # @return [Inferx::Category] the category
19
74
  def get(category_name)
20
- size = hget(category_name)
75
+ size = @redis.hget(@key, category_name)
21
76
  raise ArgumentError, "#{category_name.inspect} is missing" unless size
22
- spawn_category(category_name, size.to_i)
77
+ raise ArgumentError, "#{category_name.inspect} does not exist in filtered categories" unless all_in_visible.include?(category_name.to_s)
78
+ make_category(category_name, size.to_i)
23
79
  end
24
80
  alias [] get
25
81
 
@@ -28,7 +84,7 @@ class Inferx
28
84
  # @param [Array<String>] category_names category names
29
85
  def add(*category_names)
30
86
  @redis.pipelined do
31
- category_names.each { |category_name| hsetnx(category_name, 0) }
87
+ category_names.each { |category_name| @redis.hsetnx(@key, category_name, 0) }
32
88
  @redis.save unless manual?
33
89
  end
34
90
  end
@@ -38,7 +94,7 @@ class Inferx
38
94
  # @param [Array<String>] category_names category names
39
95
  def remove(*category_names)
40
96
  @redis.pipelined do
41
- category_names.each { |category_name| hdel(category_name) }
97
+ category_names.each { |category_name| @redis.hdel(@key, category_name) }
42
98
  @redis.del(*category_names.map(&method(:make_category_key)))
43
99
  @redis.save unless manual?
44
100
  end
@@ -46,26 +102,125 @@ class Inferx
46
102
 
47
103
  # Determine if the category is defined.
48
104
  #
49
- # @param [String] category_name a category name
105
+ # @param [String] category_name the category name
50
106
  # @return whether the category is defined
51
107
  def exists?(category_name)
52
- hexists(category_name)
108
+ all_in_visible.include?(category_name.to_s)
53
109
  end
54
110
 
55
111
  # Apply process for each category.
56
112
  #
57
- # @yield a block to be called for every category
113
+ # @yield called for every category
58
114
  # @yieldparam [Inferx::Category] category a category
59
115
  def each
60
- hgetall.each do |category_name, size|
61
- yield spawn_category(category_name, size.to_i)
116
+ visible_category_names = all_in_visible
117
+
118
+ @redis.hgetall(@key).each do |category_name, size|
119
+ next unless visible_category_names.include?(category_name)
120
+ yield make_category(category_name, size.to_i)
62
121
  end
63
122
  end
64
123
 
65
- protected
124
+ # Inject the words to the training data of the categories.
125
+ #
126
+ # @param [Array<String>] words an array of words
127
+ # @return [Hash<String, Integer>] increase for each category
128
+ def inject(words)
129
+ category_names = all
130
+ return {} if category_names.empty?
131
+ return associate(category_names, 0) if words.empty?
132
+
133
+ increase = words.size
134
+ words = collect(words)
135
+
136
+ associate(category_names, increase) do
137
+ @redis.pipelined do
138
+ category_names.each do |category_name|
139
+ category_key = make_category_key(category_name)
140
+ words.each { |word, count| @redis.zincrby(category_key, count, word) }
141
+ @redis.hincrby(@key, category_name, increase)
142
+ end
143
+
144
+ @redis.save unless manual?
145
+ end
146
+ end
147
+ end
148
+
149
+ # Eject the words from the training data of the categories.
150
+ #
151
+ # @param [Array<String>] words an array of words
152
+ # @return [Hash<String, Integer>] decrease for each category
153
+ def eject(words)
154
+ category_names = all
155
+ return {} if category_names.empty?
156
+ return associate(category_names, 0) if words.empty?
157
+
158
+ decrease = words.size
159
+ words = collect(words)
160
+
161
+ associate(category_names, decrease) do |fluctuation|
162
+ all_scores = @redis.pipelined do
163
+ category_names.each do |category_name|
164
+ category_key = make_category_key(category_name)
165
+ words.each { |word, count| @redis.zincrby(category_key, -count, word) }
166
+ @redis.zremrangebyscore(category_key, '-inf', 0)
167
+ end
168
+ end
169
+
170
+ length = words.size
171
+
172
+ category_names.each_with_index do |category_name, index|
173
+ scores = all_scores[index * (length + 1), length]
174
+ initial = fluctuation[category_name]
175
+
176
+ fluctuation[category_name] = scores.inject(initial) do |decrease, score|
177
+ score = score.to_i
178
+ score < 0 ? decrease + score : decrease
179
+ end
180
+ end
181
+
182
+ @redis.pipelined do
183
+ fluctuation.each do |category_name, decrease|
184
+ @redis.hincrby(@key, category_name, -decrease)
185
+ end
186
+
187
+ @redis.save unless manual?
188
+ end
189
+ end
190
+ end
191
+
192
+ private
193
+
194
+ def filtered(&block)
195
+ dup.tap { |filtered| filtered.instance_eval(&block) }
196
+ end
197
+
198
+ def all_in_visible
199
+ all = Set.new(@redis.hkeys(@key) || [])
200
+ all &= @filter if @filter
201
+ all - @except
202
+ end
203
+
204
+ def make_category_key(category_name)
205
+ "#{@key}:#{category_name}"
206
+ end
207
+
208
+ def make_category(*args)
209
+ @category_class.new(@redis, self, *args)
210
+ end
211
+
212
+ def collect(words)
213
+ words.inject({}) do |hash, word|
214
+ hash[word] ||= 0
215
+ hash[word] += 1
216
+ hash
217
+ end
218
+ end
66
219
 
67
- def spawn_category(*args)
68
- spawn(Category, *args)
220
+ def associate(keys, value, &block)
221
+ keys_and_values = Hash[keys.map { |key| [key, value] }]
222
+ yield *(block.arity.zero? ? [] : [keys_and_values]) if block_given?
223
+ keys_and_values
69
224
  end
70
225
  end
71
226
  end
@@ -1,7 +1,5 @@
1
- require 'inferx/adapter'
2
-
3
1
  class Inferx
4
- class Category < Adapter
2
+ class Category
5
3
 
6
4
  def self.ready_for(method_name)
7
5
  define_method("ready_to_#{method_name}") do |&block|
@@ -12,51 +10,64 @@ class Inferx
12
10
  end
13
11
 
14
12
  # @param [Redis] redis an instance of Redis
13
+ # @param [Inferx::Categories] categories the categories
15
14
  # @param [String] name a category name
16
15
  # @param [Integer] size total of scores
17
- # @param [Hash] options
18
- # @option options [String] :namespace namespace of keys to be used to Redis
19
- # @option options [Boolean] :manual whether manual save, defaults to false
20
- def initialize(redis, name, size, options = {})
21
- super(redis, options)
16
+ def initialize(redis, categories, name, size)
17
+ @redis = redis
18
+ @categories = categories
19
+ @key = "#{categories.key}:#{name}"
22
20
  @name = name.to_s
23
21
  @size = size
24
22
  end
25
23
 
26
- # Get a category name.
24
+ # Get key for access to training data of the category on Redis.
25
+ #
26
+ # @attribute [r] key
27
+ # @return [String] the key
28
+ attr_reader :key
29
+
30
+ # Get name of the category.
27
31
  #
28
32
  # @attribute [r] name
29
- # @return [String] a category name
33
+ # @return [String] the name
34
+ attr_reader :name
30
35
 
31
36
  # Get total of scores.
32
37
  #
33
38
  # @attribute [r] size
34
39
  # @return [Integer] total of scores
35
- attr_reader :name, :size
40
+ attr_reader :size
36
41
 
37
42
  # Get words with scores in the category.
38
43
  #
39
44
  # @return [Hash<String, Integer>] words with scores
40
45
  def all
41
- words_with_scores = zrevrange(0, -1, :withscores => true)
42
- index = 1
43
- size = words_with_scores.size
46
+ words_with_scores = @redis.zrevrange(@key, 0, -1, :withscores => true)
44
47
 
45
- while index < size
46
- words_with_scores[index] = words_with_scores[index].to_i
47
- index += 2
48
- end
48
+ if !words_with_scores.empty? and words_with_scores.first.is_a?(Array)
49
+ words_with_scores.each { |pair| pair[1] = pair[1].to_i }
50
+ Hash[words_with_scores]
51
+ else
52
+ index = 1
53
+ size = words_with_scores.size
49
54
 
50
- Hash[*words_with_scores]
55
+ while index < size
56
+ words_with_scores[index] = words_with_scores[index].to_i
57
+ index += 2
58
+ end
59
+
60
+ Hash[*words_with_scores]
61
+ end
51
62
  end
52
63
 
53
- # Get score of a word.
64
+ # Get score of the word.
54
65
  #
55
- # @param [String] word a word
66
+ # @param [String] word the word
56
67
  # @return [Integer] when the word is member, score of the word
57
68
  # @return [nil] when the word is not member
58
69
  def get(word)
59
- score = zscore(word)
70
+ score = @redis.zscore(@key, word)
60
71
  score ? score.to_i : nil
61
72
  end
62
73
  alias [] get
@@ -65,18 +76,8 @@ class Inferx
65
76
  #
66
77
  # @param [Array<String>] words an array of words
67
78
  def train(words)
68
- return if words.empty?
69
-
70
- increase = words.size
71
- words = collect(words)
72
-
73
- @redis.pipelined do
74
- words.each { |word, count| zincrby(count, word) }
75
- hincrby(name, increase)
76
- @redis.save unless manual?
77
- end
78
-
79
- @size += increase
79
+ increases = @categories.filter(@name).inject(words)
80
+ @size += increases[@name]
80
81
  end
81
82
 
82
83
  # Prepare to enhance the training data. Use for high performance.
@@ -89,31 +90,8 @@ class Inferx
89
90
  #
90
91
  # @param [Array<String>] words an array of words
91
92
  def untrain(words)
92
- return if words.empty?
93
-
94
- decrease = words.size
95
- words = collect(words)
96
-
97
- scores = @redis.pipelined do
98
- words.each { |word, count| zincrby(-count, word) }
99
- zremrangebyscore('-inf', 0)
100
- end
101
-
102
- length = words.size
103
-
104
- scores[0, length].each do |score|
105
- score = score.to_i
106
- decrease += score if score < 0
107
- end
108
-
109
- return unless decrease > 0
110
-
111
- @redis.pipelined do
112
- hincrby(name, -decrease)
113
- @redis.save unless manual?
114
- end
115
-
116
- @size -= decrease
93
+ decreases = @categories.filter(@name).eject(words)
94
+ @size -= decreases[@name]
117
95
  end
118
96
 
119
97
  # Prepare to attenuate the training data giving words.
@@ -127,25 +105,11 @@ class Inferx
127
105
  # @param [Array<String>] words an array of words
128
106
  # @return [Array<Integer>] scores for each word
129
107
  def scores(words)
130
- scores = @redis.pipelined { words.map(&method(:zscore)) }
131
- scores.map { |score| score ? score.to_i : nil }
132
- end
133
-
134
- protected
135
-
136
- %w(zrevrange zscore zincrby zremrangebyscore).each do |command|
137
- define_method(command) do |*args|
138
- @category_key ||= make_category_key(@name)
139
- @redis.__send__(command, @category_key, *args)
108
+ scores = @redis.pipelined do
109
+ words.map { |word| @redis.zscore(@key, word) }
140
110
  end
141
- end
142
111
 
143
- def collect(words)
144
- words.inject({}) do |hash, word|
145
- hash[word] ||= 0
146
- hash[word] += 1
147
- hash
148
- end
112
+ scores.map { |score| score ? score.to_i : nil }
149
113
  end
150
114
  end
151
115
  end
@@ -0,0 +1,48 @@
1
+ require 'inferx/category'
2
+
3
+ class Inferx
4
+ class Category
5
+ class Complementary < Category
6
+
7
+ # Inject the words to the training data of the category.
8
+ #
9
+ # @param [Array<String>] words an array of words
10
+ alias inject train
11
+
12
+ # Prepare to inject the words to the training data of the category. Use
13
+ # for high performance.
14
+ #
15
+ # @yield [train] process something
16
+ # @yieldparam [Proc] inject inject the words to the training data of the
17
+ # category
18
+ ready_for :inject
19
+
20
+ # Eject the words from the training data of the category.
21
+ #
22
+ # @param [Array<String>] words an array of words
23
+ alias eject untrain
24
+
25
+ # Prepare to eject the words from the training data of the category. Use
26
+ # for high performance.
27
+ #
28
+ # @yield [train] process something
29
+ # @yieldparam [Proc] eject eject the words from the training data of the
30
+ # category
31
+ ready_for :eject
32
+
33
+ # Enhance the training data of other categories giving words.
34
+ #
35
+ # @param [Array<String>] words an array of words
36
+ def train(words)
37
+ @categories.except(@name).inject(words)
38
+ end
39
+
40
+ # Attenuate the training data of other categories giving words.
41
+ #
42
+ # @param [Array<String>] words an array of words
43
+ def untrain(words)
44
+ @categories.except(@name).eject(words)
45
+ end
46
+ end
47
+ end
48
+ end