inferx 0.2.3 → 0.2.4
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/inferx.rb +2 -4
- data/lib/inferx/categories.rb +173 -18
- data/lib/inferx/category.rb +40 -76
- data/lib/inferx/category/complementary.rb +48 -0
- data/lib/inferx/version.rb +1 -1
- data/spec/inferx/categories_spec.rb +325 -59
- data/spec/inferx/category/complementary_spec.rb +76 -0
- data/spec/inferx/category_spec.rb +60 -119
- data/spec/inferx_spec.rb +1 -9
- data/spec/spec_helper.rb +7 -0
- metadata +5 -11
- data/lib/inferx/adapter.rb +0 -64
- data/lib/inferx/complementary/categories.rb +0 -14
- data/lib/inferx/complementary/category.rb +0 -108
- data/spec/inferx/adapter_spec.rb +0 -92
- data/spec/inferx/complementary/categories_spec.rb +0 -25
- data/spec/inferx/complementary/category_spec.rb +0 -139
data/lib/inferx.rb
CHANGED
@@ -2,20 +2,18 @@ require 'redis'
|
|
2
2
|
|
3
3
|
require 'inferx/version'
|
4
4
|
require 'inferx/categories'
|
5
|
-
require 'inferx/complementary/categories'
|
6
5
|
|
7
6
|
class Inferx
|
8
7
|
|
9
8
|
# @param [Hash] options other options are passed to Redis#initialize in
|
10
9
|
# {https://github.com/redis/redis-rb redis}
|
11
10
|
#
|
12
|
-
# @option options [Boolean] :complementary
|
11
|
+
# @option options [Boolean] :complementary use complementary Bayes classifier
|
13
12
|
# @option options [String] :namespace namespace of keys to be used to Redis
|
14
13
|
# @option options [Boolean] :manual whether manual save, defaults to false
|
15
14
|
def initialize(options = {})
|
16
15
|
@complementary = !!options[:complementary]
|
17
|
-
|
18
|
-
@categories = categories_class.new(Redis.new(options), options)
|
16
|
+
@categories = Categories.new(Redis.new(options), options)
|
19
17
|
end
|
20
18
|
|
21
19
|
attr_reader :categories
|
data/lib/inferx/categories.rb
CHANGED
@@ -1,25 +1,81 @@
|
|
1
|
-
require 'inferx/adapter'
|
2
1
|
require 'inferx/category'
|
2
|
+
require 'inferx/category/complementary'
|
3
|
+
require 'set'
|
3
4
|
|
4
5
|
class Inferx
|
5
|
-
class Categories
|
6
|
+
class Categories
|
6
7
|
include Enumerable
|
7
8
|
|
9
|
+
# @param [Redis] redis an instance of Redis
|
10
|
+
# @param [Hash] options
|
11
|
+
# @option options [Boolean] :complementary use complementary Bayes
|
12
|
+
# classifier
|
13
|
+
# @option options [String] :namespace namespace of keys to be used to Redis
|
14
|
+
# @option options [Boolean] :manual whether manual save, defaults to false
|
15
|
+
def initialize(redis, options = {})
|
16
|
+
@redis = redis
|
17
|
+
@category_class = options[:complementary] ? Category::Complementary : Category
|
18
|
+
parts = %w(inferx categories)
|
19
|
+
parts.insert(1, options[:namespace]) if options[:namespace]
|
20
|
+
@key = parts.join(':')
|
21
|
+
@manual = !!options[:manual]
|
22
|
+
@filter = nil
|
23
|
+
@except = Set.new
|
24
|
+
end
|
25
|
+
|
26
|
+
# Get key for access to categories on Redis.
|
27
|
+
#
|
28
|
+
# @attribute [r] key
|
29
|
+
# @return [String] the key
|
30
|
+
attr_reader :key
|
31
|
+
|
32
|
+
# Determine if manual save.
|
33
|
+
#
|
34
|
+
# @return [Boolean] whether manual save
|
35
|
+
def manual?
|
36
|
+
@manual
|
37
|
+
end
|
38
|
+
|
39
|
+
# Filter categories.
|
40
|
+
#
|
41
|
+
# @param [Array<String>] category_names category names
|
42
|
+
# @return [Inferx::Categories] categories filtered by the category names
|
43
|
+
def filter(*category_names)
|
44
|
+
category_names = category_names.map(&:to_s)
|
45
|
+
|
46
|
+
filtered do
|
47
|
+
@filter = @filter ? @filter & category_names : Set.new(category_names)
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
# Filter by excepting categories.
|
52
|
+
#
|
53
|
+
# @param [Array<String>] category_names category names
|
54
|
+
# @return [Inferx::Categories] categories filterd by the category names
|
55
|
+
def except(*category_names)
|
56
|
+
category_names = category_names.map(&:to_s)
|
57
|
+
|
58
|
+
filtered do
|
59
|
+
@except.merge(category_names)
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
8
63
|
# Get all category names.
|
9
64
|
#
|
10
65
|
# @return [Array<String>] category names
|
11
66
|
def all
|
12
|
-
|
67
|
+
all_in_visible.to_a
|
13
68
|
end
|
14
69
|
|
15
|
-
# Get
|
70
|
+
# Get category according the name.
|
16
71
|
#
|
17
|
-
# @param [String] category_name
|
18
|
-
# @return [Inferx::Category]
|
72
|
+
# @param [String] category_name the category name
|
73
|
+
# @return [Inferx::Category] the category
|
19
74
|
def get(category_name)
|
20
|
-
size = hget(category_name)
|
75
|
+
size = @redis.hget(@key, category_name)
|
21
76
|
raise ArgumentError, "#{category_name.inspect} is missing" unless size
|
22
|
-
|
77
|
+
raise ArgumentError, "#{category_name.inspect} does not exist in filtered categories" unless all_in_visible.include?(category_name.to_s)
|
78
|
+
make_category(category_name, size.to_i)
|
23
79
|
end
|
24
80
|
alias [] get
|
25
81
|
|
@@ -28,7 +84,7 @@ class Inferx
|
|
28
84
|
# @param [Array<String>] category_names category names
|
29
85
|
def add(*category_names)
|
30
86
|
@redis.pipelined do
|
31
|
-
category_names.each { |category_name| hsetnx(category_name, 0) }
|
87
|
+
category_names.each { |category_name| @redis.hsetnx(@key, category_name, 0) }
|
32
88
|
@redis.save unless manual?
|
33
89
|
end
|
34
90
|
end
|
@@ -38,7 +94,7 @@ class Inferx
|
|
38
94
|
# @param [Array<String>] category_names category names
|
39
95
|
def remove(*category_names)
|
40
96
|
@redis.pipelined do
|
41
|
-
category_names.each { |category_name| hdel(category_name) }
|
97
|
+
category_names.each { |category_name| @redis.hdel(@key, category_name) }
|
42
98
|
@redis.del(*category_names.map(&method(:make_category_key)))
|
43
99
|
@redis.save unless manual?
|
44
100
|
end
|
@@ -46,26 +102,125 @@ class Inferx
|
|
46
102
|
|
47
103
|
# Determine if the category is defined.
|
48
104
|
#
|
49
|
-
# @param [String] category_name
|
105
|
+
# @param [String] category_name the category name
|
50
106
|
# @return whether the category is defined
|
51
107
|
def exists?(category_name)
|
52
|
-
|
108
|
+
all_in_visible.include?(category_name.to_s)
|
53
109
|
end
|
54
110
|
|
55
111
|
# Apply process for each category.
|
56
112
|
#
|
57
|
-
# @yield
|
113
|
+
# @yield called for every category
|
58
114
|
# @yieldparam [Inferx::Category] category a category
|
59
115
|
def each
|
60
|
-
|
61
|
-
|
116
|
+
visible_category_names = all_in_visible
|
117
|
+
|
118
|
+
@redis.hgetall(@key).each do |category_name, size|
|
119
|
+
next unless visible_category_names.include?(category_name)
|
120
|
+
yield make_category(category_name, size.to_i)
|
62
121
|
end
|
63
122
|
end
|
64
123
|
|
65
|
-
|
124
|
+
# Inject the words to the training data of the categories.
|
125
|
+
#
|
126
|
+
# @param [Array<String>] words an array of words
|
127
|
+
# @return [Hash<String, Integer>] increase for each category
|
128
|
+
def inject(words)
|
129
|
+
category_names = all
|
130
|
+
return {} if category_names.empty?
|
131
|
+
return associate(category_names, 0) if words.empty?
|
132
|
+
|
133
|
+
increase = words.size
|
134
|
+
words = collect(words)
|
135
|
+
|
136
|
+
associate(category_names, increase) do
|
137
|
+
@redis.pipelined do
|
138
|
+
category_names.each do |category_name|
|
139
|
+
category_key = make_category_key(category_name)
|
140
|
+
words.each { |word, count| @redis.zincrby(category_key, count, word) }
|
141
|
+
@redis.hincrby(@key, category_name, increase)
|
142
|
+
end
|
143
|
+
|
144
|
+
@redis.save unless manual?
|
145
|
+
end
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
# Eject the words from the training data of the categories.
|
150
|
+
#
|
151
|
+
# @param [Array<String>] words an array of words
|
152
|
+
# @return [Hash<String, Integer>] decrease for each category
|
153
|
+
def eject(words)
|
154
|
+
category_names = all
|
155
|
+
return {} if category_names.empty?
|
156
|
+
return associate(category_names, 0) if words.empty?
|
157
|
+
|
158
|
+
decrease = words.size
|
159
|
+
words = collect(words)
|
160
|
+
|
161
|
+
associate(category_names, decrease) do |fluctuation|
|
162
|
+
all_scores = @redis.pipelined do
|
163
|
+
category_names.each do |category_name|
|
164
|
+
category_key = make_category_key(category_name)
|
165
|
+
words.each { |word, count| @redis.zincrby(category_key, -count, word) }
|
166
|
+
@redis.zremrangebyscore(category_key, '-inf', 0)
|
167
|
+
end
|
168
|
+
end
|
169
|
+
|
170
|
+
length = words.size
|
171
|
+
|
172
|
+
category_names.each_with_index do |category_name, index|
|
173
|
+
scores = all_scores[index * (length + 1), length]
|
174
|
+
initial = fluctuation[category_name]
|
175
|
+
|
176
|
+
fluctuation[category_name] = scores.inject(initial) do |decrease, score|
|
177
|
+
score = score.to_i
|
178
|
+
score < 0 ? decrease + score : decrease
|
179
|
+
end
|
180
|
+
end
|
181
|
+
|
182
|
+
@redis.pipelined do
|
183
|
+
fluctuation.each do |category_name, decrease|
|
184
|
+
@redis.hincrby(@key, category_name, -decrease)
|
185
|
+
end
|
186
|
+
|
187
|
+
@redis.save unless manual?
|
188
|
+
end
|
189
|
+
end
|
190
|
+
end
|
191
|
+
|
192
|
+
private
|
193
|
+
|
194
|
+
def filtered(&block)
|
195
|
+
dup.tap { |filtered| filtered.instance_eval(&block) }
|
196
|
+
end
|
197
|
+
|
198
|
+
def all_in_visible
|
199
|
+
all = Set.new(@redis.hkeys(@key) || [])
|
200
|
+
all &= @filter if @filter
|
201
|
+
all - @except
|
202
|
+
end
|
203
|
+
|
204
|
+
def make_category_key(category_name)
|
205
|
+
"#{@key}:#{category_name}"
|
206
|
+
end
|
207
|
+
|
208
|
+
def make_category(*args)
|
209
|
+
@category_class.new(@redis, self, *args)
|
210
|
+
end
|
211
|
+
|
212
|
+
def collect(words)
|
213
|
+
words.inject({}) do |hash, word|
|
214
|
+
hash[word] ||= 0
|
215
|
+
hash[word] += 1
|
216
|
+
hash
|
217
|
+
end
|
218
|
+
end
|
66
219
|
|
67
|
-
def
|
68
|
-
|
220
|
+
def associate(keys, value, &block)
|
221
|
+
keys_and_values = Hash[keys.map { |key| [key, value] }]
|
222
|
+
yield *(block.arity.zero? ? [] : [keys_and_values]) if block_given?
|
223
|
+
keys_and_values
|
69
224
|
end
|
70
225
|
end
|
71
226
|
end
|
data/lib/inferx/category.rb
CHANGED
@@ -1,7 +1,5 @@
|
|
1
|
-
require 'inferx/adapter'
|
2
|
-
|
3
1
|
class Inferx
|
4
|
-
class Category
|
2
|
+
class Category
|
5
3
|
|
6
4
|
def self.ready_for(method_name)
|
7
5
|
define_method("ready_to_#{method_name}") do |&block|
|
@@ -12,51 +10,64 @@ class Inferx
|
|
12
10
|
end
|
13
11
|
|
14
12
|
# @param [Redis] redis an instance of Redis
|
13
|
+
# @param [Inferx::Categories] categories the categories
|
15
14
|
# @param [String] name a category name
|
16
15
|
# @param [Integer] size total of scores
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
super(redis, options)
|
16
|
+
def initialize(redis, categories, name, size)
|
17
|
+
@redis = redis
|
18
|
+
@categories = categories
|
19
|
+
@key = "#{categories.key}:#{name}"
|
22
20
|
@name = name.to_s
|
23
21
|
@size = size
|
24
22
|
end
|
25
23
|
|
26
|
-
# Get
|
24
|
+
# Get key for access to training data of the category on Redis.
|
25
|
+
#
|
26
|
+
# @attribute [r] key
|
27
|
+
# @return [String] the key
|
28
|
+
attr_reader :key
|
29
|
+
|
30
|
+
# Get name of the category.
|
27
31
|
#
|
28
32
|
# @attribute [r] name
|
29
|
-
# @return [String]
|
33
|
+
# @return [String] the name
|
34
|
+
attr_reader :name
|
30
35
|
|
31
36
|
# Get total of scores.
|
32
37
|
#
|
33
38
|
# @attribute [r] size
|
34
39
|
# @return [Integer] total of scores
|
35
|
-
attr_reader :
|
40
|
+
attr_reader :size
|
36
41
|
|
37
42
|
# Get words with scores in the category.
|
38
43
|
#
|
39
44
|
# @return [Hash<String, Integer>] words with scores
|
40
45
|
def all
|
41
|
-
words_with_scores = zrevrange(0, -1, :withscores => true)
|
42
|
-
index = 1
|
43
|
-
size = words_with_scores.size
|
46
|
+
words_with_scores = @redis.zrevrange(@key, 0, -1, :withscores => true)
|
44
47
|
|
45
|
-
|
46
|
-
words_with_scores[
|
47
|
-
|
48
|
-
|
48
|
+
if !words_with_scores.empty? and words_with_scores.first.is_a?(Array)
|
49
|
+
words_with_scores.each { |pair| pair[1] = pair[1].to_i }
|
50
|
+
Hash[words_with_scores]
|
51
|
+
else
|
52
|
+
index = 1
|
53
|
+
size = words_with_scores.size
|
49
54
|
|
50
|
-
|
55
|
+
while index < size
|
56
|
+
words_with_scores[index] = words_with_scores[index].to_i
|
57
|
+
index += 2
|
58
|
+
end
|
59
|
+
|
60
|
+
Hash[*words_with_scores]
|
61
|
+
end
|
51
62
|
end
|
52
63
|
|
53
|
-
# Get score of
|
64
|
+
# Get score of the word.
|
54
65
|
#
|
55
|
-
# @param [String] word
|
66
|
+
# @param [String] word the word
|
56
67
|
# @return [Integer] when the word is member, score of the word
|
57
68
|
# @return [nil] when the word is not member
|
58
69
|
def get(word)
|
59
|
-
score = zscore(word)
|
70
|
+
score = @redis.zscore(@key, word)
|
60
71
|
score ? score.to_i : nil
|
61
72
|
end
|
62
73
|
alias [] get
|
@@ -65,18 +76,8 @@ class Inferx
|
|
65
76
|
#
|
66
77
|
# @param [Array<String>] words an array of words
|
67
78
|
def train(words)
|
68
|
-
|
69
|
-
|
70
|
-
increase = words.size
|
71
|
-
words = collect(words)
|
72
|
-
|
73
|
-
@redis.pipelined do
|
74
|
-
words.each { |word, count| zincrby(count, word) }
|
75
|
-
hincrby(name, increase)
|
76
|
-
@redis.save unless manual?
|
77
|
-
end
|
78
|
-
|
79
|
-
@size += increase
|
79
|
+
increases = @categories.filter(@name).inject(words)
|
80
|
+
@size += increases[@name]
|
80
81
|
end
|
81
82
|
|
82
83
|
# Prepare to enhance the training data. Use for high performance.
|
@@ -89,31 +90,8 @@ class Inferx
|
|
89
90
|
#
|
90
91
|
# @param [Array<String>] words an array of words
|
91
92
|
def untrain(words)
|
92
|
-
|
93
|
-
|
94
|
-
decrease = words.size
|
95
|
-
words = collect(words)
|
96
|
-
|
97
|
-
scores = @redis.pipelined do
|
98
|
-
words.each { |word, count| zincrby(-count, word) }
|
99
|
-
zremrangebyscore('-inf', 0)
|
100
|
-
end
|
101
|
-
|
102
|
-
length = words.size
|
103
|
-
|
104
|
-
scores[0, length].each do |score|
|
105
|
-
score = score.to_i
|
106
|
-
decrease += score if score < 0
|
107
|
-
end
|
108
|
-
|
109
|
-
return unless decrease > 0
|
110
|
-
|
111
|
-
@redis.pipelined do
|
112
|
-
hincrby(name, -decrease)
|
113
|
-
@redis.save unless manual?
|
114
|
-
end
|
115
|
-
|
116
|
-
@size -= decrease
|
93
|
+
decreases = @categories.filter(@name).eject(words)
|
94
|
+
@size -= decreases[@name]
|
117
95
|
end
|
118
96
|
|
119
97
|
# Prepare to attenuate the training data giving words.
|
@@ -127,25 +105,11 @@ class Inferx
|
|
127
105
|
# @param [Array<String>] words an array of words
|
128
106
|
# @return [Array<Integer>] scores for each word
|
129
107
|
def scores(words)
|
130
|
-
scores = @redis.pipelined
|
131
|
-
|
132
|
-
end
|
133
|
-
|
134
|
-
protected
|
135
|
-
|
136
|
-
%w(zrevrange zscore zincrby zremrangebyscore).each do |command|
|
137
|
-
define_method(command) do |*args|
|
138
|
-
@category_key ||= make_category_key(@name)
|
139
|
-
@redis.__send__(command, @category_key, *args)
|
108
|
+
scores = @redis.pipelined do
|
109
|
+
words.map { |word| @redis.zscore(@key, word) }
|
140
110
|
end
|
141
|
-
end
|
142
111
|
|
143
|
-
|
144
|
-
words.inject({}) do |hash, word|
|
145
|
-
hash[word] ||= 0
|
146
|
-
hash[word] += 1
|
147
|
-
hash
|
148
|
-
end
|
112
|
+
scores.map { |score| score ? score.to_i : nil }
|
149
113
|
end
|
150
114
|
end
|
151
115
|
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
require 'inferx/category'
|
2
|
+
|
3
|
+
class Inferx
|
4
|
+
class Category
|
5
|
+
class Complementary < Category
|
6
|
+
|
7
|
+
# Inject the words to the training data of the category.
|
8
|
+
#
|
9
|
+
# @param [Array<String>] words an array of words
|
10
|
+
alias inject train
|
11
|
+
|
12
|
+
# Prepare to inject the words to the training data of the category. Use
|
13
|
+
# for high performance.
|
14
|
+
#
|
15
|
+
# @yield [train] process something
|
16
|
+
# @yieldparam [Proc] inject inject the words to the training data of the
|
17
|
+
# category
|
18
|
+
ready_for :inject
|
19
|
+
|
20
|
+
# Eject the words from the training data of the category.
|
21
|
+
#
|
22
|
+
# @param [Array<String>] words an array of words
|
23
|
+
alias eject untrain
|
24
|
+
|
25
|
+
# Prepare to eject the words from the training data of the category. Use
|
26
|
+
# for high performance.
|
27
|
+
#
|
28
|
+
# @yield [train] process something
|
29
|
+
# @yieldparam [Proc] eject eject the words from the training data of the
|
30
|
+
# category
|
31
|
+
ready_for :eject
|
32
|
+
|
33
|
+
# Enhance the training data of other categories giving words.
|
34
|
+
#
|
35
|
+
# @param [Array<String>] words an array of words
|
36
|
+
def train(words)
|
37
|
+
@categories.except(@name).inject(words)
|
38
|
+
end
|
39
|
+
|
40
|
+
# Attenuate the training data of other categories giving words.
|
41
|
+
#
|
42
|
+
# @param [Array<String>] words an array of words
|
43
|
+
def untrain(words)
|
44
|
+
@categories.except(@name).eject(words)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|