inferx 0.2.3 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,20 +2,18 @@ require 'redis'
2
2
 
3
3
  require 'inferx/version'
4
4
  require 'inferx/categories'
5
- require 'inferx/complementary/categories'
6
5
 
7
6
  class Inferx
8
7
 
9
8
  # @param [Hash] options other options are passed to Redis#initialize in
10
9
  # {https://github.com/redis/redis-rb redis}
11
10
  #
12
- # @option options [Boolean] :complementary
11
+ # @option options [Boolean] :complementary use complementary Bayes classifier
13
12
  # @option options [String] :namespace namespace of keys to be used to Redis
14
13
  # @option options [Boolean] :manual whether manual save, defaults to false
15
14
  def initialize(options = {})
16
15
  @complementary = !!options[:complementary]
17
- categories_class = @complementary ? Complementary::Categories : Categories
18
- @categories = categories_class.new(Redis.new(options), options)
16
+ @categories = Categories.new(Redis.new(options), options)
19
17
  end
20
18
 
21
19
  attr_reader :categories
@@ -1,25 +1,81 @@
1
- require 'inferx/adapter'
2
1
  require 'inferx/category'
2
+ require 'inferx/category/complementary'
3
+ require 'set'
3
4
 
4
5
  class Inferx
5
- class Categories < Adapter
6
+ class Categories
6
7
  include Enumerable
7
8
 
9
+ # @param [Redis] redis an instance of Redis
10
+ # @param [Hash] options
11
+ # @option options [Boolean] :complementary use complementary Bayes
12
+ # classifier
13
+ # @option options [String] :namespace namespace of keys to be used to Redis
14
+ # @option options [Boolean] :manual whether manual save, defaults to false
15
+ def initialize(redis, options = {})
16
+ @redis = redis
17
+ @category_class = options[:complementary] ? Category::Complementary : Category
18
+ parts = %w(inferx categories)
19
+ parts.insert(1, options[:namespace]) if options[:namespace]
20
+ @key = parts.join(':')
21
+ @manual = !!options[:manual]
22
+ @filter = nil
23
+ @except = Set.new
24
+ end
25
+
26
+ # Get key for access to categories on Redis.
27
+ #
28
+ # @attribute [r] key
29
+ # @return [String] the key
30
+ attr_reader :key
31
+
32
+ # Determine if manual save.
33
+ #
34
+ # @return [Boolean] whether manual save
35
+ def manual?
36
+ @manual
37
+ end
38
+
39
+ # Filter categories.
40
+ #
41
+ # @param [Array<String>] category_names category names
42
+ # @return [Inferx::Categories] categories filtered by the category names
43
+ def filter(*category_names)
44
+ category_names = category_names.map(&:to_s)
45
+
46
+ filtered do
47
+ @filter = @filter ? @filter & category_names : Set.new(category_names)
48
+ end
49
+ end
50
+
51
+ # Filter by excepting categories.
52
+ #
53
+ # @param [Array<String>] category_names category names
54
+ # @return [Inferx::Categories] categories filterd by the category names
55
+ def except(*category_names)
56
+ category_names = category_names.map(&:to_s)
57
+
58
+ filtered do
59
+ @except.merge(category_names)
60
+ end
61
+ end
62
+
8
63
  # Get all category names.
9
64
  #
10
65
  # @return [Array<String>] category names
11
66
  def all
12
- hkeys || []
67
+ all_in_visible.to_a
13
68
  end
14
69
 
15
- # Get a category according the name.
70
+ # Get category according the name.
16
71
  #
17
- # @param [String] category_name a category name
18
- # @return [Inferx::Category] a category
72
+ # @param [String] category_name the category name
73
+ # @return [Inferx::Category] the category
19
74
  def get(category_name)
20
- size = hget(category_name)
75
+ size = @redis.hget(@key, category_name)
21
76
  raise ArgumentError, "#{category_name.inspect} is missing" unless size
22
- spawn_category(category_name, size.to_i)
77
+ raise ArgumentError, "#{category_name.inspect} does not exist in filtered categories" unless all_in_visible.include?(category_name.to_s)
78
+ make_category(category_name, size.to_i)
23
79
  end
24
80
  alias [] get
25
81
 
@@ -28,7 +84,7 @@ class Inferx
28
84
  # @param [Array<String>] category_names category names
29
85
  def add(*category_names)
30
86
  @redis.pipelined do
31
- category_names.each { |category_name| hsetnx(category_name, 0) }
87
+ category_names.each { |category_name| @redis.hsetnx(@key, category_name, 0) }
32
88
  @redis.save unless manual?
33
89
  end
34
90
  end
@@ -38,7 +94,7 @@ class Inferx
38
94
  # @param [Array<String>] category_names category names
39
95
  def remove(*category_names)
40
96
  @redis.pipelined do
41
- category_names.each { |category_name| hdel(category_name) }
97
+ category_names.each { |category_name| @redis.hdel(@key, category_name) }
42
98
  @redis.del(*category_names.map(&method(:make_category_key)))
43
99
  @redis.save unless manual?
44
100
  end
@@ -46,26 +102,125 @@ class Inferx
46
102
 
47
103
  # Determine if the category is defined.
48
104
  #
49
- # @param [String] category_name a category name
105
+ # @param [String] category_name the category name
50
106
  # @return whether the category is defined
51
107
  def exists?(category_name)
52
- hexists(category_name)
108
+ all_in_visible.include?(category_name.to_s)
53
109
  end
54
110
 
55
111
  # Apply process for each category.
56
112
  #
57
- # @yield a block to be called for every category
113
+ # @yield called for every category
58
114
  # @yieldparam [Inferx::Category] category a category
59
115
  def each
60
- hgetall.each do |category_name, size|
61
- yield spawn_category(category_name, size.to_i)
116
+ visible_category_names = all_in_visible
117
+
118
+ @redis.hgetall(@key).each do |category_name, size|
119
+ next unless visible_category_names.include?(category_name)
120
+ yield make_category(category_name, size.to_i)
62
121
  end
63
122
  end
64
123
 
65
- protected
124
+ # Inject the words to the training data of the categories.
125
+ #
126
+ # @param [Array<String>] words an array of words
127
+ # @return [Hash<String, Integer>] increase for each category
128
+ def inject(words)
129
+ category_names = all
130
+ return {} if category_names.empty?
131
+ return associate(category_names, 0) if words.empty?
132
+
133
+ increase = words.size
134
+ words = collect(words)
135
+
136
+ associate(category_names, increase) do
137
+ @redis.pipelined do
138
+ category_names.each do |category_name|
139
+ category_key = make_category_key(category_name)
140
+ words.each { |word, count| @redis.zincrby(category_key, count, word) }
141
+ @redis.hincrby(@key, category_name, increase)
142
+ end
143
+
144
+ @redis.save unless manual?
145
+ end
146
+ end
147
+ end
148
+
149
+ # Eject the words from the training data of the categories.
150
+ #
151
+ # @param [Array<String>] words an array of words
152
+ # @return [Hash<String, Integer>] decrease for each category
153
+ def eject(words)
154
+ category_names = all
155
+ return {} if category_names.empty?
156
+ return associate(category_names, 0) if words.empty?
157
+
158
+ decrease = words.size
159
+ words = collect(words)
160
+
161
+ associate(category_names, decrease) do |fluctuation|
162
+ all_scores = @redis.pipelined do
163
+ category_names.each do |category_name|
164
+ category_key = make_category_key(category_name)
165
+ words.each { |word, count| @redis.zincrby(category_key, -count, word) }
166
+ @redis.zremrangebyscore(category_key, '-inf', 0)
167
+ end
168
+ end
169
+
170
+ length = words.size
171
+
172
+ category_names.each_with_index do |category_name, index|
173
+ scores = all_scores[index * (length + 1), length]
174
+ initial = fluctuation[category_name]
175
+
176
+ fluctuation[category_name] = scores.inject(initial) do |decrease, score|
177
+ score = score.to_i
178
+ score < 0 ? decrease + score : decrease
179
+ end
180
+ end
181
+
182
+ @redis.pipelined do
183
+ fluctuation.each do |category_name, decrease|
184
+ @redis.hincrby(@key, category_name, -decrease)
185
+ end
186
+
187
+ @redis.save unless manual?
188
+ end
189
+ end
190
+ end
191
+
192
+ private
193
+
194
+ def filtered(&block)
195
+ dup.tap { |filtered| filtered.instance_eval(&block) }
196
+ end
197
+
198
+ def all_in_visible
199
+ all = Set.new(@redis.hkeys(@key) || [])
200
+ all &= @filter if @filter
201
+ all - @except
202
+ end
203
+
204
+ def make_category_key(category_name)
205
+ "#{@key}:#{category_name}"
206
+ end
207
+
208
+ def make_category(*args)
209
+ @category_class.new(@redis, self, *args)
210
+ end
211
+
212
+ def collect(words)
213
+ words.inject({}) do |hash, word|
214
+ hash[word] ||= 0
215
+ hash[word] += 1
216
+ hash
217
+ end
218
+ end
66
219
 
67
- def spawn_category(*args)
68
- spawn(Category, *args)
220
+ def associate(keys, value, &block)
221
+ keys_and_values = Hash[keys.map { |key| [key, value] }]
222
+ yield *(block.arity.zero? ? [] : [keys_and_values]) if block_given?
223
+ keys_and_values
69
224
  end
70
225
  end
71
226
  end
@@ -1,7 +1,5 @@
1
- require 'inferx/adapter'
2
-
3
1
  class Inferx
4
- class Category < Adapter
2
+ class Category
5
3
 
6
4
  def self.ready_for(method_name)
7
5
  define_method("ready_to_#{method_name}") do |&block|
@@ -12,51 +10,64 @@ class Inferx
12
10
  end
13
11
 
14
12
  # @param [Redis] redis an instance of Redis
13
+ # @param [Inferx::Categories] categories the categories
15
14
  # @param [String] name a category name
16
15
  # @param [Integer] size total of scores
17
- # @param [Hash] options
18
- # @option options [String] :namespace namespace of keys to be used to Redis
19
- # @option options [Boolean] :manual whether manual save, defaults to false
20
- def initialize(redis, name, size, options = {})
21
- super(redis, options)
16
+ def initialize(redis, categories, name, size)
17
+ @redis = redis
18
+ @categories = categories
19
+ @key = "#{categories.key}:#{name}"
22
20
  @name = name.to_s
23
21
  @size = size
24
22
  end
25
23
 
26
- # Get a category name.
24
+ # Get key for access to training data of the category on Redis.
25
+ #
26
+ # @attribute [r] key
27
+ # @return [String] the key
28
+ attr_reader :key
29
+
30
+ # Get name of the category.
27
31
  #
28
32
  # @attribute [r] name
29
- # @return [String] a category name
33
+ # @return [String] the name
34
+ attr_reader :name
30
35
 
31
36
  # Get total of scores.
32
37
  #
33
38
  # @attribute [r] size
34
39
  # @return [Integer] total of scores
35
- attr_reader :name, :size
40
+ attr_reader :size
36
41
 
37
42
  # Get words with scores in the category.
38
43
  #
39
44
  # @return [Hash<String, Integer>] words with scores
40
45
  def all
41
- words_with_scores = zrevrange(0, -1, :withscores => true)
42
- index = 1
43
- size = words_with_scores.size
46
+ words_with_scores = @redis.zrevrange(@key, 0, -1, :withscores => true)
44
47
 
45
- while index < size
46
- words_with_scores[index] = words_with_scores[index].to_i
47
- index += 2
48
- end
48
+ if !words_with_scores.empty? and words_with_scores.first.is_a?(Array)
49
+ words_with_scores.each { |pair| pair[1] = pair[1].to_i }
50
+ Hash[words_with_scores]
51
+ else
52
+ index = 1
53
+ size = words_with_scores.size
49
54
 
50
- Hash[*words_with_scores]
55
+ while index < size
56
+ words_with_scores[index] = words_with_scores[index].to_i
57
+ index += 2
58
+ end
59
+
60
+ Hash[*words_with_scores]
61
+ end
51
62
  end
52
63
 
53
- # Get score of a word.
64
+ # Get score of the word.
54
65
  #
55
- # @param [String] word a word
66
+ # @param [String] word the word
56
67
  # @return [Integer] when the word is member, score of the word
57
68
  # @return [nil] when the word is not member
58
69
  def get(word)
59
- score = zscore(word)
70
+ score = @redis.zscore(@key, word)
60
71
  score ? score.to_i : nil
61
72
  end
62
73
  alias [] get
@@ -65,18 +76,8 @@ class Inferx
65
76
  #
66
77
  # @param [Array<String>] words an array of words
67
78
  def train(words)
68
- return if words.empty?
69
-
70
- increase = words.size
71
- words = collect(words)
72
-
73
- @redis.pipelined do
74
- words.each { |word, count| zincrby(count, word) }
75
- hincrby(name, increase)
76
- @redis.save unless manual?
77
- end
78
-
79
- @size += increase
79
+ increases = @categories.filter(@name).inject(words)
80
+ @size += increases[@name]
80
81
  end
81
82
 
82
83
  # Prepare to enhance the training data. Use for high performance.
@@ -89,31 +90,8 @@ class Inferx
89
90
  #
90
91
  # @param [Array<String>] words an array of words
91
92
  def untrain(words)
92
- return if words.empty?
93
-
94
- decrease = words.size
95
- words = collect(words)
96
-
97
- scores = @redis.pipelined do
98
- words.each { |word, count| zincrby(-count, word) }
99
- zremrangebyscore('-inf', 0)
100
- end
101
-
102
- length = words.size
103
-
104
- scores[0, length].each do |score|
105
- score = score.to_i
106
- decrease += score if score < 0
107
- end
108
-
109
- return unless decrease > 0
110
-
111
- @redis.pipelined do
112
- hincrby(name, -decrease)
113
- @redis.save unless manual?
114
- end
115
-
116
- @size -= decrease
93
+ decreases = @categories.filter(@name).eject(words)
94
+ @size -= decreases[@name]
117
95
  end
118
96
 
119
97
  # Prepare to attenuate the training data giving words.
@@ -127,25 +105,11 @@ class Inferx
127
105
  # @param [Array<String>] words an array of words
128
106
  # @return [Array<Integer>] scores for each word
129
107
  def scores(words)
130
- scores = @redis.pipelined { words.map(&method(:zscore)) }
131
- scores.map { |score| score ? score.to_i : nil }
132
- end
133
-
134
- protected
135
-
136
- %w(zrevrange zscore zincrby zremrangebyscore).each do |command|
137
- define_method(command) do |*args|
138
- @category_key ||= make_category_key(@name)
139
- @redis.__send__(command, @category_key, *args)
108
+ scores = @redis.pipelined do
109
+ words.map { |word| @redis.zscore(@key, word) }
140
110
  end
141
- end
142
111
 
143
- def collect(words)
144
- words.inject({}) do |hash, word|
145
- hash[word] ||= 0
146
- hash[word] += 1
147
- hash
148
- end
112
+ scores.map { |score| score ? score.to_i : nil }
149
113
  end
150
114
  end
151
115
  end
@@ -0,0 +1,48 @@
1
+ require 'inferx/category'
2
+
3
+ class Inferx
4
+ class Category
5
+ class Complementary < Category
6
+
7
+ # Inject the words to the training data of the category.
8
+ #
9
+ # @param [Array<String>] words an array of words
10
+ alias inject train
11
+
12
+ # Prepare to inject the words to the training data of the category. Use
13
+ # for high performance.
14
+ #
15
+ # @yield [train] process something
16
+ # @yieldparam [Proc] inject inject the words to the training data of the
17
+ # category
18
+ ready_for :inject
19
+
20
+ # Eject the words from the training data of the category.
21
+ #
22
+ # @param [Array<String>] words an array of words
23
+ alias eject untrain
24
+
25
+ # Prepare to eject the words from the training data of the category. Use
26
+ # for high performance.
27
+ #
28
+ # @yield [train] process something
29
+ # @yieldparam [Proc] eject eject the words from the training data of the
30
+ # category
31
+ ready_for :eject
32
+
33
+ # Enhance the training data of other categories giving words.
34
+ #
35
+ # @param [Array<String>] words an array of words
36
+ def train(words)
37
+ @categories.except(@name).inject(words)
38
+ end
39
+
40
+ # Attenuate the training data of other categories giving words.
41
+ #
42
+ # @param [Array<String>] words an array of words
43
+ def untrain(words)
44
+ @categories.except(@name).eject(words)
45
+ end
46
+ end
47
+ end
48
+ end