cofi_cost 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (4) hide show
  1. data/README +10 -0
  2. data/lib/cofi_cost.rb +167 -0
  3. data/test/unit/test_cofi_cost.rb +40 -0
  4. metadata +71 -0
data/README ADDED
@@ -0,0 +1,10 @@
1
+ Playground for collaborative filtering in Ruby using NArray and rb-gsl.
2
+
3
+ “Snowflakes, like people, are all different and beautiful,
4
+ but they can be a nuisance when they lose their identity in a mob”
5
+
6
+ Roadmap:
7
+ First release name: Graupel
8
+ Release Date: TBD
9
+ From http://en.wikipedia.org/wiki/Graupel Jan 1, 2012
10
+ "Graupel refers to precipitation that forms when supercooled droplets of water are collected and freeze on a falling snowflake, forming a 2–5 mm (0.079–0.197 in) ball of rime. The term graupel is the German word for this meteorological phenomenon. Graupel is sometimes referred to as small hail, although the World Meteorological Organization defines small hail as snow pellets encapsulated by ice, a precipitation halfway between graupel and hail."
data/lib/cofi_cost.rb ADDED
@@ -0,0 +1,167 @@
1
+ require 'gsl'
2
+ require 'matrix'
3
+ require 'narray'
4
+
5
+ include GSL::MultiMin
6
+
7
+ class CofiCost
8
+
9
+ attr_accessor :ratings, :num_features, :cost, :lambda
10
+ attr_reader :boolean_rated, :num_tracks, :num_users, :features, :theta, :ratings_mean, :ratings_norm, :predictions
11
+
12
+ def initialize(ratings, num_features, lambda, features = nil, theta = nil)
13
+ @ratings = ratings.to_f # make sure it's a float for correct normalization
14
+ @num_features = num_features
15
+ @cost = 0
16
+ @boolean_rated = @ratings > 0 # return 0 for all rated and 1 for all unrated
17
+ @boolean_unrated = @boolean_rated.eq 0 # return 1 for all unrated and 0 for all unrated
18
+ @num_tracks = @ratings.shape[1] # @ratings is users x tracks
19
+ @num_users = @ratings.shape[0]
20
+ # set initial parameters
21
+ # allow theta/features to be set for testing
22
+ if features.nil? then @features = NArray.float(@num_features, @num_tracks).randomn else @features = features end
23
+ if theta.nil? then @theta = NArray.float(@num_features, @num_users).randomn else @theta = theta end
24
+ @ratings_mean = NArray.float(1, @num_tracks).fill(0.0)
25
+ @ratings_norm = NArray.float(@num_users, @num_tracks).fill(0.0)
26
+ @ratings_mean, @ratings_norm = normalize_ratings
27
+ @lambda = lambda
28
+ @predictions = nil
29
+ end
30
+
31
+ def normalize_ratings
32
+ for i in 0..@num_tracks-1 # sadly, @num_tracks.each_index does not work with NArray yet
33
+ track_rating = @ratings[true,i] # get all user ratings for track i (including unrated)
34
+ boolean_track_rating = boolean_rated[true,i] # get all user ratings that exist for track i
35
+ @ratings_mean[i] = track_rating[boolean_track_rating].mean
36
+
37
+ track_norm = @ratings_norm[true,i]
38
+ track_norm[boolean_track_rating] = track_rating[boolean_track_rating] - @ratings_mean[i]
39
+ @ratings_norm[true,i] = track_norm
40
+ end
41
+ return @ratings_mean, @ratings_norm
42
+ end
43
+
44
+ def unroll_params(v)
45
+ v = v.to_na
46
+ theta = v.slice(0..@theta.size-1).reshape(@theta.shape[0],true)
47
+ features = v.slice(@theta.size..-1).reshape(@features.shape[0],true)
48
+ return theta, features
49
+ end
50
+
51
+ def partial_cost_calc(theta,features)
52
+ (NArray.ref(NMatrix.ref(features) * NMatrix.ref(theta.transpose(1,0))) - @ratings_norm) * @boolean_rated
53
+ end
54
+
55
+ def roll_up_theta_and_features
56
+ # roll up theta and features together
57
+ # (oddly, NArray objects created don't seem to recognize the hcat method
58
+ # added to the open class NArray
59
+ # x = GSL:: Vector.alloc(@theta.reshape(true).hcat(@features.reshape(true)))
60
+ # will fail)
61
+ # I don't understand why this is/how to fix it.
62
+ theta_reshaped = @theta.reshape(true)
63
+ features_reshaped = @features.reshape(true)
64
+ rolled = NArray.hcat(theta_reshaped,features_reshaped)
65
+ GSL:: Vector.alloc(rolled) # starting point
66
+ end
67
+
68
+ def unroll_params_init_shape(x)
69
+ theta, features = unroll_params(x)
70
+ @theta = theta.reshape(@theta.shape[0],true)
71
+ @features = features.reshape(@features.shape[0],true)
72
+ end
73
+
74
+ def min_cost
75
+ cost_f = Proc.new { |v|
76
+ theta, features = unroll_params(v)
77
+ # In octave:
78
+ # 1/2 * sum(sum(((X * Theta.transpose - Y).*R).^2)) + lambda/2 * sum(sum((Theta).^2)) + lambda/2 * sum(sum((X).^2))
79
+ (partial_cost_calc(theta,features)**2).sum + @lambda/2 * (features**2).sum
80
+ }
81
+ cost_df = Proc.new { |v, df|
82
+ theta, features = unroll_params(v)
83
+ # In octave:
84
+ # xgrad = ((X * Theta.transpose - Y).* R) * Theta + lambda * X # X_grad
85
+ # thetagrad = ((X * Theta.transpose - Y).* R).transpose * X + lambda * Theta
86
+
87
+ # I realize this is a hack. I'm not totally sure why or how but just setting
88
+ # df = NArray.hcat(dfzero,dfone) results in no steps being made in gradient descent.
89
+ # ideas/suggestions welcome :)
90
+ dfzero = (NArray.ref(NMatrix.ref(partial_cost_calc(theta,features)) * NMatrix.ref(theta)) + @lambda * features).flatten
91
+ dfone = (NArray.ref(NMatrix.ref((partial_cost_calc(theta,features)).transpose(1,0)) * NMatrix.ref(features)) + @lambda * theta).flatten
92
+ dfcomp = NArray.hcat(dfzero,dfone)
93
+ for i in 0..dfcomp.size-1 # again .each_index does not yet work with NArray
94
+ df[i] = dfcomp[i]
95
+ end
96
+ }
97
+
98
+ x = roll_up_theta_and_features
99
+ cost_func = Function_fdf.alloc(cost_f, cost_df, x.size)
100
+
101
+ # TODO: figure out which algorithm to use
102
+ # http://www.gnu.org/software/gsl/manual/html_node/Multimin-Algorithms-with-Derivatives.html
103
+ minimizer = FdfMinimizer.alloc("conjugate_fr", x.size)
104
+ minimizer.set(cost_func, x, 0.01, 1e-4)
105
+
106
+ iter = 0
107
+ begin
108
+ iter += 1
109
+ status = minimizer.iterate()
110
+ status = minimizer.test_gradient(1e-3)
111
+ if status == GSL::SUCCESS
112
+ puts("Minimum found at")
113
+ end
114
+ x = minimizer.x
115
+ f = minimizer.f
116
+ printf("%5d %.5f %.5f %10.5f\n", iter, x[0], x[1], f)
117
+ end while status == GSL::CONTINUE and iter < 10
118
+
119
+ unroll_params_init_shape(x)
120
+ @cost = f
121
+ @predictions = calc_predictions
122
+ end
123
+
124
+ def calc_predictions
125
+ NArray.ref(NMatrix.ref(@features) * NMatrix.ref(@theta.transpose(1,0))) + @ratings_mean
126
+ end
127
+
128
+ end
129
+
130
+ class NArray
131
+ class << self
132
+ def cat(dim=0, *narrays)
133
+ raise ArgumentError, "'dim' must be an integer" unless dim.is_a?(Integer)
134
+ raise ArgumentError, "must have narrays to cat" if narrays.size == 0
135
+ new_typecode = narrays.map(&:typecode).max
136
+ narrays.uniq.each {|narray| narray.newdim!(dim) if narray.shape[dim].nil? }
137
+ shapes = narrays.map(&:shape)
138
+ new_dim_size = shapes.inject(0) {|sum,v| sum + v[dim] }
139
+ new_shape = shapes.first.dup
140
+ new_shape[dim] = new_dim_size
141
+ narr = NArray.new(new_typecode, *new_shape)
142
+ range_cnt = 0
143
+ narrays.zip(shapes) do |narray, shape|
144
+ index = shape.map {true}
145
+ index[dim] = (range_cnt...(range_cnt += shape[dim]))
146
+ narr[*index] = narray
147
+ end
148
+ narr
149
+ end
150
+ def vcat(*narrays) ; cat(1, *narrays) end
151
+ def hcat(*narrays) ; cat(0, *narrays) end
152
+ end
153
+ end
154
+
155
+ ##ratings = NArray.float(4,5).indgen(0,2)
156
+ #ratings = NArray[[5.0,4.0,0.0,0.0],[3.0,0.0,0.0,0.0],[4.0,0.0,0.0,0.0],[3.0,0.0,0.0,0.0],[3.0,0.0,0.0,0.0]]
157
+ #num_features = 2
158
+ #lambda = 1
159
+ #g = CofiCost.new(ratings, num_features, lambda)
160
+ #puts g.theta.nil?
161
+ #g.min_cost
162
+ #puts "new theta"
163
+ #puts g.theta.to_a.to_s
164
+ #puts "new features"
165
+ #puts g.features.to_a.to_s
166
+ #puts "predictions"
167
+ #puts g.predictions.to_a.to_s
@@ -0,0 +1,40 @@
1
+ require 'test/unit'
2
+ require_relative '../../lib/cofi_cost.rb'
3
+ require 'narray'
4
+ require 'gsl'
5
+ require 'matrix'
6
+
7
+ class CofiCostTest < Test::Unit::TestCase
8
+
9
+ def setup
10
+ ratings = NArray[[5.0,4.0,0.0,0.0],[3.0,0.0,0.0,0.0],[4.0,0.0,0.0,0.0],[3.0,0.0,0.0,0.0],[3.0,0.0,0.0,0.0]]
11
+ num_features = 2
12
+ lambda = 1
13
+ features = NArray[[0.139489,1.804804],[-0.501808,1.050885],[0.354079,-0.518884],[-0.015370,0.096253],[1.147623,-0.745562]]
14
+ theta = NArray[[-0.079641,1.211386],[-0.130688,0.444762],[-0.789258,1.222232],[0.212132,-1.174545]]
15
+ @c = CofiCost.new(ratings, num_features, lambda, features, theta)
16
+ end
17
+
18
+ def teardown
19
+ @c = nil
20
+ end
21
+
22
+ def test_happy_case
23
+ @c.min_cost
24
+ assert_equal 0.15929446605989878, @c.cost
25
+ # oddly the following fails, even though they are equal (not enough decimal places me thinks)
26
+ # assert_equal NArray[[4.62547,3.91302,8.30084,1.59081],[2.96361,3.17939,1.88322,3.88434],[3.92356,4.32263,1.739,5.6172],[2.98132,3.06219,2.47359,3.3213],[2.93724,3.14111,1.33728,3.77855]], @c.predictions
27
+ assert_equal 4.625468057637709, @c.predictions[0,0]
28
+ end
29
+
30
+ def test_normalize_ratings
31
+ assert_equal NArray[[4.5],[3.0],[4.0],[3.0],[3.0]], @c.ratings_mean
32
+ assert_equal NArray[[0.5,-0.5,0.0,0.0],[0.0,0.0,0.0,0.0],[0.0,0.0,0.0,0.0],[0.0,0.0,0.0,0.0],[0.0,0.0,0.0,0.0]], @c.ratings_norm
33
+ end
34
+
35
+ def test_roll_up_theta_and_features
36
+ rolled = @c.roll_up_theta_and_features
37
+ assert_equal GSL:: Vector.alloc([-0.079641, 1.211386, -0.130688, 0.444762, -0.789258, 1.222232, 0.212132, -1.174545, 0.139489, 1.804804, -0.501808, 1.050885, 0.354079, -0.518884, -0.01537, 0.096253, 1.147623, -0.745562]), rolled
38
+ end
39
+
40
+ end
metadata ADDED
@@ -0,0 +1,71 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: cofi_cost
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Thomas Wolfe
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-07-02 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: gsl
16
+ requirement: &83060750 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: *83060750
25
+ - !ruby/object:Gem::Dependency
26
+ name: narray
27
+ requirement: &83060170 !ruby/object:Gem::Requirement
28
+ none: false
29
+ requirements:
30
+ - - ! '>='
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
33
+ type: :runtime
34
+ prerelease: false
35
+ version_requirements: *83060170
36
+ description: Playground for collaborative filtering in Ruby using NArray and rb-gsl.
37
+ email: tomwolfe@gmail.com
38
+ executables: []
39
+ extensions: []
40
+ extra_rdoc_files: []
41
+ files:
42
+ - lib/cofi_cost.rb
43
+ - README
44
+ - test/unit/test_cofi_cost.rb
45
+ homepage: http://github.com/tomwolfe/cofi_cost
46
+ licenses:
47
+ - MIT
48
+ post_install_message:
49
+ rdoc_options: []
50
+ require_paths:
51
+ - lib
52
+ required_ruby_version: !ruby/object:Gem::Requirement
53
+ none: false
54
+ requirements:
55
+ - - ! '>='
56
+ - !ruby/object:Gem::Version
57
+ version: 1.9.2
58
+ required_rubygems_version: !ruby/object:Gem::Requirement
59
+ none: false
60
+ requirements:
61
+ - - ! '>='
62
+ - !ruby/object:Gem::Version
63
+ version: '0'
64
+ requirements:
65
+ - libgsl0-dev
66
+ rubyforge_project:
67
+ rubygems_version: 1.8.11
68
+ signing_key:
69
+ specification_version: 3
70
+ summary: Collaborative filtering
71
+ test_files: []