cofi_cost 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/README +10 -0
- data/lib/cofi_cost.rb +167 -0
- data/test/unit/test_cofi_cost.rb +40 -0
- metadata +71 -0
data/README
ADDED
@@ -0,0 +1,10 @@
|
|
1
|
+
Playground for collaborative filtering in Ruby using NArray and rb-gsl.
|
2
|
+
|
3
|
+
“Snowflakes, like people, are all different and beautiful,
|
4
|
+
but they can be a nuisance when they lose their identity in a mob”
|
5
|
+
|
6
|
+
Roadmap:
|
7
|
+
First release name: Graupel
|
8
|
+
Release Date: TBD
|
9
|
+
From http://en.wikipedia.org/wiki/Graupel Jan 1, 2012
|
10
|
+
"Graupel refers to precipitation that forms when supercooled droplets of water are collected and freeze on a falling snowflake, forming a 2–5 mm (0.079–0.197 in) ball of rime. The term graupel is the German word for this meteorological phenomenon. Graupel is sometimes referred to as small hail, although the World Meteorological Organization defines small hail as snow pellets encapsulated by ice, a precipitation halfway between graupel and hail."
|
data/lib/cofi_cost.rb
ADDED
@@ -0,0 +1,167 @@
|
|
1
|
+
require 'gsl'
|
2
|
+
require 'matrix'
|
3
|
+
require 'narray'
|
4
|
+
|
5
|
+
include GSL::MultiMin
|
6
|
+
|
7
|
+
class CofiCost
|
8
|
+
|
9
|
+
attr_accessor :ratings, :num_features, :cost, :lambda
|
10
|
+
attr_reader :boolean_rated, :num_tracks, :num_users, :features, :theta, :ratings_mean, :ratings_norm, :predictions
|
11
|
+
|
12
|
+
def initialize(ratings, num_features, lambda, features = nil, theta = nil)
|
13
|
+
@ratings = ratings.to_f # make sure it's a float for correct normalization
|
14
|
+
@num_features = num_features
|
15
|
+
@cost = 0
|
16
|
+
@boolean_rated = @ratings > 0 # return 0 for all rated and 1 for all unrated
|
17
|
+
@boolean_unrated = @boolean_rated.eq 0 # return 1 for all unrated and 0 for all unrated
|
18
|
+
@num_tracks = @ratings.shape[1] # @ratings is users x tracks
|
19
|
+
@num_users = @ratings.shape[0]
|
20
|
+
# set initial parameters
|
21
|
+
# allow theta/features to be set for testing
|
22
|
+
if features.nil? then @features = NArray.float(@num_features, @num_tracks).randomn else @features = features end
|
23
|
+
if theta.nil? then @theta = NArray.float(@num_features, @num_users).randomn else @theta = theta end
|
24
|
+
@ratings_mean = NArray.float(1, @num_tracks).fill(0.0)
|
25
|
+
@ratings_norm = NArray.float(@num_users, @num_tracks).fill(0.0)
|
26
|
+
@ratings_mean, @ratings_norm = normalize_ratings
|
27
|
+
@lambda = lambda
|
28
|
+
@predictions = nil
|
29
|
+
end
|
30
|
+
|
31
|
+
def normalize_ratings
|
32
|
+
for i in 0..@num_tracks-1 # sadly, @num_tracks.each_index does not work with NArray yet
|
33
|
+
track_rating = @ratings[true,i] # get all user ratings for track i (including unrated)
|
34
|
+
boolean_track_rating = boolean_rated[true,i] # get all user ratings that exist for track i
|
35
|
+
@ratings_mean[i] = track_rating[boolean_track_rating].mean
|
36
|
+
|
37
|
+
track_norm = @ratings_norm[true,i]
|
38
|
+
track_norm[boolean_track_rating] = track_rating[boolean_track_rating] - @ratings_mean[i]
|
39
|
+
@ratings_norm[true,i] = track_norm
|
40
|
+
end
|
41
|
+
return @ratings_mean, @ratings_norm
|
42
|
+
end
|
43
|
+
|
44
|
+
def unroll_params(v)
|
45
|
+
v = v.to_na
|
46
|
+
theta = v.slice(0..@theta.size-1).reshape(@theta.shape[0],true)
|
47
|
+
features = v.slice(@theta.size..-1).reshape(@features.shape[0],true)
|
48
|
+
return theta, features
|
49
|
+
end
|
50
|
+
|
51
|
+
def partial_cost_calc(theta,features)
|
52
|
+
(NArray.ref(NMatrix.ref(features) * NMatrix.ref(theta.transpose(1,0))) - @ratings_norm) * @boolean_rated
|
53
|
+
end
|
54
|
+
|
55
|
+
def roll_up_theta_and_features
|
56
|
+
# roll up theta and features together
|
57
|
+
# (oddly, NArray objects created don't seem to recognize the hcat method
|
58
|
+
# added to the open class NArray
|
59
|
+
# x = GSL:: Vector.alloc(@theta.reshape(true).hcat(@features.reshape(true)))
|
60
|
+
# will fail)
|
61
|
+
# I don't understand why this is/how to fix it.
|
62
|
+
theta_reshaped = @theta.reshape(true)
|
63
|
+
features_reshaped = @features.reshape(true)
|
64
|
+
rolled = NArray.hcat(theta_reshaped,features_reshaped)
|
65
|
+
GSL:: Vector.alloc(rolled) # starting point
|
66
|
+
end
|
67
|
+
|
68
|
+
def unroll_params_init_shape(x)
|
69
|
+
theta, features = unroll_params(x)
|
70
|
+
@theta = theta.reshape(@theta.shape[0],true)
|
71
|
+
@features = features.reshape(@features.shape[0],true)
|
72
|
+
end
|
73
|
+
|
74
|
+
def min_cost
|
75
|
+
cost_f = Proc.new { |v|
|
76
|
+
theta, features = unroll_params(v)
|
77
|
+
# In octave:
|
78
|
+
# 1/2 * sum(sum(((X * Theta.transpose - Y).*R).^2)) + lambda/2 * sum(sum((Theta).^2)) + lambda/2 * sum(sum((X).^2))
|
79
|
+
(partial_cost_calc(theta,features)**2).sum + @lambda/2 * (features**2).sum
|
80
|
+
}
|
81
|
+
cost_df = Proc.new { |v, df|
|
82
|
+
theta, features = unroll_params(v)
|
83
|
+
# In octave:
|
84
|
+
# xgrad = ((X * Theta.transpose - Y).* R) * Theta + lambda * X # X_grad
|
85
|
+
# thetagrad = ((X * Theta.transpose - Y).* R).transpose * X + lambda * Theta
|
86
|
+
|
87
|
+
# I realize this is a hack. I'm not totally sure why or how but just setting
|
88
|
+
# df = NArray.hcat(dfzero,dfone) results in no steps being made in gradient descent.
|
89
|
+
# ideas/suggestions welcome :)
|
90
|
+
dfzero = (NArray.ref(NMatrix.ref(partial_cost_calc(theta,features)) * NMatrix.ref(theta)) + @lambda * features).flatten
|
91
|
+
dfone = (NArray.ref(NMatrix.ref((partial_cost_calc(theta,features)).transpose(1,0)) * NMatrix.ref(features)) + @lambda * theta).flatten
|
92
|
+
dfcomp = NArray.hcat(dfzero,dfone)
|
93
|
+
for i in 0..dfcomp.size-1 # again .each_index does not yet work with NArray
|
94
|
+
df[i] = dfcomp[i]
|
95
|
+
end
|
96
|
+
}
|
97
|
+
|
98
|
+
x = roll_up_theta_and_features
|
99
|
+
cost_func = Function_fdf.alloc(cost_f, cost_df, x.size)
|
100
|
+
|
101
|
+
# TODO: figure out which algorithm to use
|
102
|
+
# http://www.gnu.org/software/gsl/manual/html_node/Multimin-Algorithms-with-Derivatives.html
|
103
|
+
minimizer = FdfMinimizer.alloc("conjugate_fr", x.size)
|
104
|
+
minimizer.set(cost_func, x, 0.01, 1e-4)
|
105
|
+
|
106
|
+
iter = 0
|
107
|
+
begin
|
108
|
+
iter += 1
|
109
|
+
status = minimizer.iterate()
|
110
|
+
status = minimizer.test_gradient(1e-3)
|
111
|
+
if status == GSL::SUCCESS
|
112
|
+
puts("Minimum found at")
|
113
|
+
end
|
114
|
+
x = minimizer.x
|
115
|
+
f = minimizer.f
|
116
|
+
printf("%5d %.5f %.5f %10.5f\n", iter, x[0], x[1], f)
|
117
|
+
end while status == GSL::CONTINUE and iter < 10
|
118
|
+
|
119
|
+
unroll_params_init_shape(x)
|
120
|
+
@cost = f
|
121
|
+
@predictions = calc_predictions
|
122
|
+
end
|
123
|
+
|
124
|
+
def calc_predictions
|
125
|
+
NArray.ref(NMatrix.ref(@features) * NMatrix.ref(@theta.transpose(1,0))) + @ratings_mean
|
126
|
+
end
|
127
|
+
|
128
|
+
end
|
129
|
+
|
130
|
+
class NArray
|
131
|
+
class << self
|
132
|
+
def cat(dim=0, *narrays)
|
133
|
+
raise ArgumentError, "'dim' must be an integer" unless dim.is_a?(Integer)
|
134
|
+
raise ArgumentError, "must have narrays to cat" if narrays.size == 0
|
135
|
+
new_typecode = narrays.map(&:typecode).max
|
136
|
+
narrays.uniq.each {|narray| narray.newdim!(dim) if narray.shape[dim].nil? }
|
137
|
+
shapes = narrays.map(&:shape)
|
138
|
+
new_dim_size = shapes.inject(0) {|sum,v| sum + v[dim] }
|
139
|
+
new_shape = shapes.first.dup
|
140
|
+
new_shape[dim] = new_dim_size
|
141
|
+
narr = NArray.new(new_typecode, *new_shape)
|
142
|
+
range_cnt = 0
|
143
|
+
narrays.zip(shapes) do |narray, shape|
|
144
|
+
index = shape.map {true}
|
145
|
+
index[dim] = (range_cnt...(range_cnt += shape[dim]))
|
146
|
+
narr[*index] = narray
|
147
|
+
end
|
148
|
+
narr
|
149
|
+
end
|
150
|
+
def vcat(*narrays) ; cat(1, *narrays) end
|
151
|
+
def hcat(*narrays) ; cat(0, *narrays) end
|
152
|
+
end
|
153
|
+
end
|
154
|
+
|
155
|
+
##ratings = NArray.float(4,5).indgen(0,2)
|
156
|
+
#ratings = NArray[[5.0,4.0,0.0,0.0],[3.0,0.0,0.0,0.0],[4.0,0.0,0.0,0.0],[3.0,0.0,0.0,0.0],[3.0,0.0,0.0,0.0]]
|
157
|
+
#num_features = 2
|
158
|
+
#lambda = 1
|
159
|
+
#g = CofiCost.new(ratings, num_features, lambda)
|
160
|
+
#puts g.theta.nil?
|
161
|
+
#g.min_cost
|
162
|
+
#puts "new theta"
|
163
|
+
#puts g.theta.to_a.to_s
|
164
|
+
#puts "new features"
|
165
|
+
#puts g.features.to_a.to_s
|
166
|
+
#puts "predictions"
|
167
|
+
#puts g.predictions.to_a.to_s
|
@@ -0,0 +1,40 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require_relative '../../lib/cofi_cost.rb'
|
3
|
+
require 'narray'
|
4
|
+
require 'gsl'
|
5
|
+
require 'matrix'
|
6
|
+
|
7
|
+
class CofiCostTest < Test::Unit::TestCase
|
8
|
+
|
9
|
+
def setup
|
10
|
+
ratings = NArray[[5.0,4.0,0.0,0.0],[3.0,0.0,0.0,0.0],[4.0,0.0,0.0,0.0],[3.0,0.0,0.0,0.0],[3.0,0.0,0.0,0.0]]
|
11
|
+
num_features = 2
|
12
|
+
lambda = 1
|
13
|
+
features = NArray[[0.139489,1.804804],[-0.501808,1.050885],[0.354079,-0.518884],[-0.015370,0.096253],[1.147623,-0.745562]]
|
14
|
+
theta = NArray[[-0.079641,1.211386],[-0.130688,0.444762],[-0.789258,1.222232],[0.212132,-1.174545]]
|
15
|
+
@c = CofiCost.new(ratings, num_features, lambda, features, theta)
|
16
|
+
end
|
17
|
+
|
18
|
+
def teardown
|
19
|
+
@c = nil
|
20
|
+
end
|
21
|
+
|
22
|
+
def test_happy_case
|
23
|
+
@c.min_cost
|
24
|
+
assert_equal 0.15929446605989878, @c.cost
|
25
|
+
# oddly the following fails, even though they are equal (not enough decimal places me thinks)
|
26
|
+
# assert_equal NArray[[4.62547,3.91302,8.30084,1.59081],[2.96361,3.17939,1.88322,3.88434],[3.92356,4.32263,1.739,5.6172],[2.98132,3.06219,2.47359,3.3213],[2.93724,3.14111,1.33728,3.77855]], @c.predictions
|
27
|
+
assert_equal 4.625468057637709, @c.predictions[0,0]
|
28
|
+
end
|
29
|
+
|
30
|
+
def test_normalize_ratings
|
31
|
+
assert_equal NArray[[4.5],[3.0],[4.0],[3.0],[3.0]], @c.ratings_mean
|
32
|
+
assert_equal NArray[[0.5,-0.5,0.0,0.0],[0.0,0.0,0.0,0.0],[0.0,0.0,0.0,0.0],[0.0,0.0,0.0,0.0],[0.0,0.0,0.0,0.0]], @c.ratings_norm
|
33
|
+
end
|
34
|
+
|
35
|
+
def test_roll_up_theta_and_features
|
36
|
+
rolled = @c.roll_up_theta_and_features
|
37
|
+
assert_equal GSL:: Vector.alloc([-0.079641, 1.211386, -0.130688, 0.444762, -0.789258, 1.222232, 0.212132, -1.174545, 0.139489, 1.804804, -0.501808, 1.050885, 0.354079, -0.518884, -0.01537, 0.096253, 1.147623, -0.745562]), rolled
|
38
|
+
end
|
39
|
+
|
40
|
+
end
|
metadata
ADDED
@@ -0,0 +1,71 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: cofi_cost
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.2
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Thomas Wolfe
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-07-02 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: gsl
|
16
|
+
requirement: &83060750 !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: *83060750
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: narray
|
27
|
+
requirement: &83060170 !ruby/object:Gem::Requirement
|
28
|
+
none: false
|
29
|
+
requirements:
|
30
|
+
- - ! '>='
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '0'
|
33
|
+
type: :runtime
|
34
|
+
prerelease: false
|
35
|
+
version_requirements: *83060170
|
36
|
+
description: Playground for collaborative filtering in Ruby using NArray and rb-gsl.
|
37
|
+
email: tomwolfe@gmail.com
|
38
|
+
executables: []
|
39
|
+
extensions: []
|
40
|
+
extra_rdoc_files: []
|
41
|
+
files:
|
42
|
+
- lib/cofi_cost.rb
|
43
|
+
- README
|
44
|
+
- test/unit/test_cofi_cost.rb
|
45
|
+
homepage: http://github.com/tomwolfe/cofi_cost
|
46
|
+
licenses:
|
47
|
+
- MIT
|
48
|
+
post_install_message:
|
49
|
+
rdoc_options: []
|
50
|
+
require_paths:
|
51
|
+
- lib
|
52
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
53
|
+
none: false
|
54
|
+
requirements:
|
55
|
+
- - ! '>='
|
56
|
+
- !ruby/object:Gem::Version
|
57
|
+
version: 1.9.2
|
58
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
59
|
+
none: false
|
60
|
+
requirements:
|
61
|
+
- - ! '>='
|
62
|
+
- !ruby/object:Gem::Version
|
63
|
+
version: '0'
|
64
|
+
requirements:
|
65
|
+
- libgsl0-dev
|
66
|
+
rubyforge_project:
|
67
|
+
rubygems_version: 1.8.11
|
68
|
+
signing_key:
|
69
|
+
specification_version: 3
|
70
|
+
summary: Collaborative filtering
|
71
|
+
test_files: []
|