inferx 0.2.3 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/inferx.rb +2 -4
- data/lib/inferx/categories.rb +173 -18
- data/lib/inferx/category.rb +40 -76
- data/lib/inferx/category/complementary.rb +48 -0
- data/lib/inferx/version.rb +1 -1
- data/spec/inferx/categories_spec.rb +325 -59
- data/spec/inferx/category/complementary_spec.rb +76 -0
- data/spec/inferx/category_spec.rb +60 -119
- data/spec/inferx_spec.rb +1 -9
- data/spec/spec_helper.rb +7 -0
- metadata +5 -11
- data/lib/inferx/adapter.rb +0 -64
- data/lib/inferx/complementary/categories.rb +0 -14
- data/lib/inferx/complementary/category.rb +0 -108
- data/spec/inferx/adapter_spec.rb +0 -92
- data/spec/inferx/complementary/categories_spec.rb +0 -25
- data/spec/inferx/complementary/category_spec.rb +0 -139
data/lib/inferx.rb
CHANGED
@@ -2,20 +2,18 @@ require 'redis'
|
|
2
2
|
|
3
3
|
require 'inferx/version'
|
4
4
|
require 'inferx/categories'
|
5
|
-
require 'inferx/complementary/categories'
|
6
5
|
|
7
6
|
class Inferx
|
8
7
|
|
9
8
|
# @param [Hash] options other options are passed to Redis#initialize in
|
10
9
|
# {https://github.com/redis/redis-rb redis}
|
11
10
|
#
|
12
|
-
# @option options [Boolean] :complementary
|
11
|
+
# @option options [Boolean] :complementary use complementary Bayes classifier
|
13
12
|
# @option options [String] :namespace namespace of keys to be used to Redis
|
14
13
|
# @option options [Boolean] :manual whether manual save, defaults to false
|
15
14
|
def initialize(options = {})
|
16
15
|
@complementary = !!options[:complementary]
|
17
|
-
|
18
|
-
@categories = categories_class.new(Redis.new(options), options)
|
16
|
+
@categories = Categories.new(Redis.new(options), options)
|
19
17
|
end
|
20
18
|
|
21
19
|
attr_reader :categories
|
data/lib/inferx/categories.rb
CHANGED
@@ -1,25 +1,81 @@
|
|
1
|
-
require 'inferx/adapter'
|
2
1
|
require 'inferx/category'
|
2
|
+
require 'inferx/category/complementary'
|
3
|
+
require 'set'
|
3
4
|
|
4
5
|
class Inferx
|
5
|
-
class Categories
|
6
|
+
class Categories
|
6
7
|
include Enumerable
|
7
8
|
|
9
|
+
# @param [Redis] redis an instance of Redis
|
10
|
+
# @param [Hash] options
|
11
|
+
# @option options [Boolean] :complementary use complementary Bayes
|
12
|
+
# classifier
|
13
|
+
# @option options [String] :namespace namespace of keys to be used to Redis
|
14
|
+
# @option options [Boolean] :manual whether manual save, defaults to false
|
15
|
+
def initialize(redis, options = {})
|
16
|
+
@redis = redis
|
17
|
+
@category_class = options[:complementary] ? Category::Complementary : Category
|
18
|
+
parts = %w(inferx categories)
|
19
|
+
parts.insert(1, options[:namespace]) if options[:namespace]
|
20
|
+
@key = parts.join(':')
|
21
|
+
@manual = !!options[:manual]
|
22
|
+
@filter = nil
|
23
|
+
@except = Set.new
|
24
|
+
end
|
25
|
+
|
26
|
+
# Get key for access to categories on Redis.
|
27
|
+
#
|
28
|
+
# @attribute [r] key
|
29
|
+
# @return [String] the key
|
30
|
+
attr_reader :key
|
31
|
+
|
32
|
+
# Determine if manual save.
|
33
|
+
#
|
34
|
+
# @return [Boolean] whether manual save
|
35
|
+
def manual?
|
36
|
+
@manual
|
37
|
+
end
|
38
|
+
|
39
|
+
# Filter categories.
|
40
|
+
#
|
41
|
+
# @param [Array<String>] category_names category names
|
42
|
+
# @return [Inferx::Categories] categories filtered by the category names
|
43
|
+
def filter(*category_names)
|
44
|
+
category_names = category_names.map(&:to_s)
|
45
|
+
|
46
|
+
filtered do
|
47
|
+
@filter = @filter ? @filter & category_names : Set.new(category_names)
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
# Filter by excepting categories.
|
52
|
+
#
|
53
|
+
# @param [Array<String>] category_names category names
|
54
|
+
# @return [Inferx::Categories] categories filterd by the category names
|
55
|
+
def except(*category_names)
|
56
|
+
category_names = category_names.map(&:to_s)
|
57
|
+
|
58
|
+
filtered do
|
59
|
+
@except.merge(category_names)
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
8
63
|
# Get all category names.
|
9
64
|
#
|
10
65
|
# @return [Array<String>] category names
|
11
66
|
def all
|
12
|
-
|
67
|
+
all_in_visible.to_a
|
13
68
|
end
|
14
69
|
|
15
|
-
# Get
|
70
|
+
# Get category according the name.
|
16
71
|
#
|
17
|
-
# @param [String] category_name
|
18
|
-
# @return [Inferx::Category]
|
72
|
+
# @param [String] category_name the category name
|
73
|
+
# @return [Inferx::Category] the category
|
19
74
|
def get(category_name)
|
20
|
-
size = hget(category_name)
|
75
|
+
size = @redis.hget(@key, category_name)
|
21
76
|
raise ArgumentError, "#{category_name.inspect} is missing" unless size
|
22
|
-
|
77
|
+
raise ArgumentError, "#{category_name.inspect} does not exist in filtered categories" unless all_in_visible.include?(category_name.to_s)
|
78
|
+
make_category(category_name, size.to_i)
|
23
79
|
end
|
24
80
|
alias [] get
|
25
81
|
|
@@ -28,7 +84,7 @@ class Inferx
|
|
28
84
|
# @param [Array<String>] category_names category names
|
29
85
|
def add(*category_names)
|
30
86
|
@redis.pipelined do
|
31
|
-
category_names.each { |category_name| hsetnx(category_name, 0) }
|
87
|
+
category_names.each { |category_name| @redis.hsetnx(@key, category_name, 0) }
|
32
88
|
@redis.save unless manual?
|
33
89
|
end
|
34
90
|
end
|
@@ -38,7 +94,7 @@ class Inferx
|
|
38
94
|
# @param [Array<String>] category_names category names
|
39
95
|
def remove(*category_names)
|
40
96
|
@redis.pipelined do
|
41
|
-
category_names.each { |category_name| hdel(category_name) }
|
97
|
+
category_names.each { |category_name| @redis.hdel(@key, category_name) }
|
42
98
|
@redis.del(*category_names.map(&method(:make_category_key)))
|
43
99
|
@redis.save unless manual?
|
44
100
|
end
|
@@ -46,26 +102,125 @@ class Inferx
|
|
46
102
|
|
47
103
|
# Determine if the category is defined.
|
48
104
|
#
|
49
|
-
# @param [String] category_name
|
105
|
+
# @param [String] category_name the category name
|
50
106
|
# @return whether the category is defined
|
51
107
|
def exists?(category_name)
|
52
|
-
|
108
|
+
all_in_visible.include?(category_name.to_s)
|
53
109
|
end
|
54
110
|
|
55
111
|
# Apply process for each category.
|
56
112
|
#
|
57
|
-
# @yield
|
113
|
+
# @yield called for every category
|
58
114
|
# @yieldparam [Inferx::Category] category a category
|
59
115
|
def each
|
60
|
-
|
61
|
-
|
116
|
+
visible_category_names = all_in_visible
|
117
|
+
|
118
|
+
@redis.hgetall(@key).each do |category_name, size|
|
119
|
+
next unless visible_category_names.include?(category_name)
|
120
|
+
yield make_category(category_name, size.to_i)
|
62
121
|
end
|
63
122
|
end
|
64
123
|
|
65
|
-
|
124
|
+
# Inject the words to the training data of the categories.
|
125
|
+
#
|
126
|
+
# @param [Array<String>] words an array of words
|
127
|
+
# @return [Hash<String, Integer>] increase for each category
|
128
|
+
def inject(words)
|
129
|
+
category_names = all
|
130
|
+
return {} if category_names.empty?
|
131
|
+
return associate(category_names, 0) if words.empty?
|
132
|
+
|
133
|
+
increase = words.size
|
134
|
+
words = collect(words)
|
135
|
+
|
136
|
+
associate(category_names, increase) do
|
137
|
+
@redis.pipelined do
|
138
|
+
category_names.each do |category_name|
|
139
|
+
category_key = make_category_key(category_name)
|
140
|
+
words.each { |word, count| @redis.zincrby(category_key, count, word) }
|
141
|
+
@redis.hincrby(@key, category_name, increase)
|
142
|
+
end
|
143
|
+
|
144
|
+
@redis.save unless manual?
|
145
|
+
end
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
# Eject the words from the training data of the categories.
|
150
|
+
#
|
151
|
+
# @param [Array<String>] words an array of words
|
152
|
+
# @return [Hash<String, Integer>] decrease for each category
|
153
|
+
def eject(words)
|
154
|
+
category_names = all
|
155
|
+
return {} if category_names.empty?
|
156
|
+
return associate(category_names, 0) if words.empty?
|
157
|
+
|
158
|
+
decrease = words.size
|
159
|
+
words = collect(words)
|
160
|
+
|
161
|
+
associate(category_names, decrease) do |fluctuation|
|
162
|
+
all_scores = @redis.pipelined do
|
163
|
+
category_names.each do |category_name|
|
164
|
+
category_key = make_category_key(category_name)
|
165
|
+
words.each { |word, count| @redis.zincrby(category_key, -count, word) }
|
166
|
+
@redis.zremrangebyscore(category_key, '-inf', 0)
|
167
|
+
end
|
168
|
+
end
|
169
|
+
|
170
|
+
length = words.size
|
171
|
+
|
172
|
+
category_names.each_with_index do |category_name, index|
|
173
|
+
scores = all_scores[index * (length + 1), length]
|
174
|
+
initial = fluctuation[category_name]
|
175
|
+
|
176
|
+
fluctuation[category_name] = scores.inject(initial) do |decrease, score|
|
177
|
+
score = score.to_i
|
178
|
+
score < 0 ? decrease + score : decrease
|
179
|
+
end
|
180
|
+
end
|
181
|
+
|
182
|
+
@redis.pipelined do
|
183
|
+
fluctuation.each do |category_name, decrease|
|
184
|
+
@redis.hincrby(@key, category_name, -decrease)
|
185
|
+
end
|
186
|
+
|
187
|
+
@redis.save unless manual?
|
188
|
+
end
|
189
|
+
end
|
190
|
+
end
|
191
|
+
|
192
|
+
private
|
193
|
+
|
194
|
+
def filtered(&block)
|
195
|
+
dup.tap { |filtered| filtered.instance_eval(&block) }
|
196
|
+
end
|
197
|
+
|
198
|
+
def all_in_visible
|
199
|
+
all = Set.new(@redis.hkeys(@key) || [])
|
200
|
+
all &= @filter if @filter
|
201
|
+
all - @except
|
202
|
+
end
|
203
|
+
|
204
|
+
def make_category_key(category_name)
|
205
|
+
"#{@key}:#{category_name}"
|
206
|
+
end
|
207
|
+
|
208
|
+
def make_category(*args)
|
209
|
+
@category_class.new(@redis, self, *args)
|
210
|
+
end
|
211
|
+
|
212
|
+
def collect(words)
|
213
|
+
words.inject({}) do |hash, word|
|
214
|
+
hash[word] ||= 0
|
215
|
+
hash[word] += 1
|
216
|
+
hash
|
217
|
+
end
|
218
|
+
end
|
66
219
|
|
67
|
-
def
|
68
|
-
|
220
|
+
def associate(keys, value, &block)
|
221
|
+
keys_and_values = Hash[keys.map { |key| [key, value] }]
|
222
|
+
yield *(block.arity.zero? ? [] : [keys_and_values]) if block_given?
|
223
|
+
keys_and_values
|
69
224
|
end
|
70
225
|
end
|
71
226
|
end
|
data/lib/inferx/category.rb
CHANGED
@@ -1,7 +1,5 @@
|
|
1
|
-
require 'inferx/adapter'
|
2
|
-
|
3
1
|
class Inferx
|
4
|
-
class Category
|
2
|
+
class Category
|
5
3
|
|
6
4
|
def self.ready_for(method_name)
|
7
5
|
define_method("ready_to_#{method_name}") do |&block|
|
@@ -12,51 +10,64 @@ class Inferx
|
|
12
10
|
end
|
13
11
|
|
14
12
|
# @param [Redis] redis an instance of Redis
|
13
|
+
# @param [Inferx::Categories] categories the categories
|
15
14
|
# @param [String] name a category name
|
16
15
|
# @param [Integer] size total of scores
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
super(redis, options)
|
16
|
+
def initialize(redis, categories, name, size)
|
17
|
+
@redis = redis
|
18
|
+
@categories = categories
|
19
|
+
@key = "#{categories.key}:#{name}"
|
22
20
|
@name = name.to_s
|
23
21
|
@size = size
|
24
22
|
end
|
25
23
|
|
26
|
-
# Get
|
24
|
+
# Get key for access to training data of the category on Redis.
|
25
|
+
#
|
26
|
+
# @attribute [r] key
|
27
|
+
# @return [String] the key
|
28
|
+
attr_reader :key
|
29
|
+
|
30
|
+
# Get name of the category.
|
27
31
|
#
|
28
32
|
# @attribute [r] name
|
29
|
-
# @return [String]
|
33
|
+
# @return [String] the name
|
34
|
+
attr_reader :name
|
30
35
|
|
31
36
|
# Get total of scores.
|
32
37
|
#
|
33
38
|
# @attribute [r] size
|
34
39
|
# @return [Integer] total of scores
|
35
|
-
attr_reader :
|
40
|
+
attr_reader :size
|
36
41
|
|
37
42
|
# Get words with scores in the category.
|
38
43
|
#
|
39
44
|
# @return [Hash<String, Integer>] words with scores
|
40
45
|
def all
|
41
|
-
words_with_scores = zrevrange(0, -1, :withscores => true)
|
42
|
-
index = 1
|
43
|
-
size = words_with_scores.size
|
46
|
+
words_with_scores = @redis.zrevrange(@key, 0, -1, :withscores => true)
|
44
47
|
|
45
|
-
|
46
|
-
words_with_scores[
|
47
|
-
|
48
|
-
|
48
|
+
if !words_with_scores.empty? and words_with_scores.first.is_a?(Array)
|
49
|
+
words_with_scores.each { |pair| pair[1] = pair[1].to_i }
|
50
|
+
Hash[words_with_scores]
|
51
|
+
else
|
52
|
+
index = 1
|
53
|
+
size = words_with_scores.size
|
49
54
|
|
50
|
-
|
55
|
+
while index < size
|
56
|
+
words_with_scores[index] = words_with_scores[index].to_i
|
57
|
+
index += 2
|
58
|
+
end
|
59
|
+
|
60
|
+
Hash[*words_with_scores]
|
61
|
+
end
|
51
62
|
end
|
52
63
|
|
53
|
-
# Get score of
|
64
|
+
# Get score of the word.
|
54
65
|
#
|
55
|
-
# @param [String] word
|
66
|
+
# @param [String] word the word
|
56
67
|
# @return [Integer] when the word is member, score of the word
|
57
68
|
# @return [nil] when the word is not member
|
58
69
|
def get(word)
|
59
|
-
score = zscore(word)
|
70
|
+
score = @redis.zscore(@key, word)
|
60
71
|
score ? score.to_i : nil
|
61
72
|
end
|
62
73
|
alias [] get
|
@@ -65,18 +76,8 @@ class Inferx
|
|
65
76
|
#
|
66
77
|
# @param [Array<String>] words an array of words
|
67
78
|
def train(words)
|
68
|
-
|
69
|
-
|
70
|
-
increase = words.size
|
71
|
-
words = collect(words)
|
72
|
-
|
73
|
-
@redis.pipelined do
|
74
|
-
words.each { |word, count| zincrby(count, word) }
|
75
|
-
hincrby(name, increase)
|
76
|
-
@redis.save unless manual?
|
77
|
-
end
|
78
|
-
|
79
|
-
@size += increase
|
79
|
+
increases = @categories.filter(@name).inject(words)
|
80
|
+
@size += increases[@name]
|
80
81
|
end
|
81
82
|
|
82
83
|
# Prepare to enhance the training data. Use for high performance.
|
@@ -89,31 +90,8 @@ class Inferx
|
|
89
90
|
#
|
90
91
|
# @param [Array<String>] words an array of words
|
91
92
|
def untrain(words)
|
92
|
-
|
93
|
-
|
94
|
-
decrease = words.size
|
95
|
-
words = collect(words)
|
96
|
-
|
97
|
-
scores = @redis.pipelined do
|
98
|
-
words.each { |word, count| zincrby(-count, word) }
|
99
|
-
zremrangebyscore('-inf', 0)
|
100
|
-
end
|
101
|
-
|
102
|
-
length = words.size
|
103
|
-
|
104
|
-
scores[0, length].each do |score|
|
105
|
-
score = score.to_i
|
106
|
-
decrease += score if score < 0
|
107
|
-
end
|
108
|
-
|
109
|
-
return unless decrease > 0
|
110
|
-
|
111
|
-
@redis.pipelined do
|
112
|
-
hincrby(name, -decrease)
|
113
|
-
@redis.save unless manual?
|
114
|
-
end
|
115
|
-
|
116
|
-
@size -= decrease
|
93
|
+
decreases = @categories.filter(@name).eject(words)
|
94
|
+
@size -= decreases[@name]
|
117
95
|
end
|
118
96
|
|
119
97
|
# Prepare to attenuate the training data giving words.
|
@@ -127,25 +105,11 @@ class Inferx
|
|
127
105
|
# @param [Array<String>] words an array of words
|
128
106
|
# @return [Array<Integer>] scores for each word
|
129
107
|
def scores(words)
|
130
|
-
scores = @redis.pipelined
|
131
|
-
|
132
|
-
end
|
133
|
-
|
134
|
-
protected
|
135
|
-
|
136
|
-
%w(zrevrange zscore zincrby zremrangebyscore).each do |command|
|
137
|
-
define_method(command) do |*args|
|
138
|
-
@category_key ||= make_category_key(@name)
|
139
|
-
@redis.__send__(command, @category_key, *args)
|
108
|
+
scores = @redis.pipelined do
|
109
|
+
words.map { |word| @redis.zscore(@key, word) }
|
140
110
|
end
|
141
|
-
end
|
142
111
|
|
143
|
-
|
144
|
-
words.inject({}) do |hash, word|
|
145
|
-
hash[word] ||= 0
|
146
|
-
hash[word] += 1
|
147
|
-
hash
|
148
|
-
end
|
112
|
+
scores.map { |score| score ? score.to_i : nil }
|
149
113
|
end
|
150
114
|
end
|
151
115
|
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
require 'inferx/category'
|
2
|
+
|
3
|
+
class Inferx
|
4
|
+
class Category
|
5
|
+
class Complementary < Category
|
6
|
+
|
7
|
+
# Inject the words to the training data of the category.
|
8
|
+
#
|
9
|
+
# @param [Array<String>] words an array of words
|
10
|
+
alias inject train
|
11
|
+
|
12
|
+
# Prepare to inject the words to the training data of the category. Use
|
13
|
+
# for high performance.
|
14
|
+
#
|
15
|
+
# @yield [train] process something
|
16
|
+
# @yieldparam [Proc] inject inject the words to the training data of the
|
17
|
+
# category
|
18
|
+
ready_for :inject
|
19
|
+
|
20
|
+
# Eject the words from the training data of the category.
|
21
|
+
#
|
22
|
+
# @param [Array<String>] words an array of words
|
23
|
+
alias eject untrain
|
24
|
+
|
25
|
+
# Prepare to eject the words from the training data of the category. Use
|
26
|
+
# for high performance.
|
27
|
+
#
|
28
|
+
# @yield [train] process something
|
29
|
+
# @yieldparam [Proc] eject eject the words from the training data of the
|
30
|
+
# category
|
31
|
+
ready_for :eject
|
32
|
+
|
33
|
+
# Enhance the training data of other categories giving words.
|
34
|
+
#
|
35
|
+
# @param [Array<String>] words an array of words
|
36
|
+
def train(words)
|
37
|
+
@categories.except(@name).inject(words)
|
38
|
+
end
|
39
|
+
|
40
|
+
# Attenuate the training data of other categories giving words.
|
41
|
+
#
|
42
|
+
# @param [Array<String>] words an array of words
|
43
|
+
def untrain(words)
|
44
|
+
@categories.except(@name).eject(words)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|