cmfrec 0.1.2 → 0.1.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9c19c6069b22f666ab280851cdfcaab346c1c08599be2e97043bdc41da552b6f
4
- data.tar.gz: 6f0ffb63861b47568119e7014ff1c9d1a71fcf5852c6b1df2a8ca9b16bd239d3
3
+ metadata.gz: 34e8dc08914cbc418470cd7eb3adf3d33013b786319f4510212c80bf3629f3ca
4
+ data.tar.gz: c1b91a1f77b4b51a5ca4491376f8a02230ea54873f8c1b2b06f4761d6ddd0686
5
5
  SHA512:
6
- metadata.gz: 15104fb705c8bf1ffe3959846ddacde37a8845ee667c6ab334df6b1aacc7da42af894e2f94f1bfc35739943f8293b3f70e22358f7fc030e814c5dadbce4bb75e
7
- data.tar.gz: 8d7806b1500de927ecca3705ade074a2dc197a5e413a3a0e755f3d952aa8916b10a87f635e7958726e315a05376c216c6c5a79b6d33f4a61d723e4bbfbbcd382
6
+ metadata.gz: a3c57734379199196a4e3f51d9ec02b19ef1abac13d57a10ca3c20e9b76c9ee5db4b17d790330d41a9576c2ba28a9eeccafeb5760b54cfdf80a7431368895068
7
+ data.tar.gz: 5a24a77a6665854abb38916a22e8141a6cae637a51f98e3df3762566f2e73cb60b9bd9a25303df0411ea53dec3211bf7534711dc55c510311620341cbe4e4ac3
data/CHANGELOG.md CHANGED
@@ -1,3 +1,28 @@
1
+ ## 0.1.6 (2021-08-12)
2
+
3
+ - Added `user_ids` and `item_ids` methods
4
+ - Added `user_id` argument to `user_factors`
5
+ - Added `item_id` argument to `item_factors`
6
+ - Added `user_id` argument to `user_bias`
7
+ - Added `item_id` argument to `item_bias`
8
+ - Added `item_ids` argument to `new_user_recs`
9
+ - Fixed order for `user_recs`
10
+
11
+ ## 0.1.5 (2021-08-10)
12
+
13
+ - Fixed issue with `user_recs` and `new_user_recs` returning rated items
14
+ - Fixed error with `new_user_recs`
15
+
16
+ ## 0.1.4 (2021-02-04)
17
+
18
+ - Added support for saving and loading recommenders
19
+ - Added `similar_users` and `similar_items`
20
+ - Improved ARM detection
21
+
22
+ ## 0.1.3 (2020-12-28)
23
+
24
+ - Added ARM shared library for Mac
25
+
1
26
  ## 0.1.2 (2020-12-09)
2
27
 
3
28
  - Added `load_movielens` method
data/LICENSE.txt CHANGED
@@ -1,7 +1,7 @@
1
1
  MIT License
2
2
 
3
3
  Copyright (c) 2020 David Cortes
4
- Copyright (c) 2020 Andrew Kane
4
+ Copyright (c) 2020-2021 Andrew Kane
5
5
 
6
6
  All rights reserved.
7
7
 
data/README.md CHANGED
@@ -107,6 +107,26 @@ Get recommendations with only side information
107
107
  recommender.new_user_recs([], user_info: {cats: 0, dogs: 2})
108
108
  ```
109
109
 
110
+ ## Similarity
111
+
112
+ Add this line to your application’s Gemfile:
113
+
114
+ ```ruby
115
+ gem 'ngt'
116
+ ```
117
+
118
+ Get similar users
119
+
120
+ ```ruby
121
+ recommender.similar_users(user_id)
122
+ ```
123
+
124
+ Get similar items - “users who liked this item also liked”
125
+
126
+ ```ruby
127
+ recommender.similar_items(item_id)
128
+ ```
129
+
110
130
  ## Examples
111
131
 
112
132
  ### MovieLens
@@ -125,6 +145,35 @@ recommender.fit(ratings.first(80000), user_info: user_info, item_info: item_info
125
145
  recommender.predict(ratings.last(20000))
126
146
  ```
127
147
 
148
+ ### Ahoy
149
+
150
+ [Ahoy](https://github.com/ankane/ahoy) is a great source for implicit feedback
151
+
152
+ ```ruby
153
+ views = Ahoy::Event.
154
+ where(name: "Viewed post").
155
+ group(:user_id).
156
+ group("properties->>'post_id'"). # postgres syntax
157
+ count
158
+
159
+ data =
160
+ views.map do |(user_id, post_id), count|
161
+ {
162
+ user_id: user_id,
163
+ item_id: post_id,
164
+ value: count
165
+ }
166
+ end
167
+ ```
168
+
169
+ Create a recommender and get recommended posts for a user
170
+
171
+ ```ruby
172
+ recommender = Cmfrec::Recommender.new
173
+ recommender.fit(data)
174
+ recommender.user_recs(current_user.id)
175
+ ```
176
+
128
177
  ## Options
129
178
 
130
179
  Specify the number of factors and epochs
@@ -163,8 +212,33 @@ Or a Rover data frame
163
212
  Rover.read_csv("ratings.csv")
164
213
  ```
165
214
 
215
+ ## Storing Recommenders
216
+
217
+ Store the recommender
218
+
219
+ ```ruby
220
+ bin = Marshal.dump(recommender)
221
+ File.binwrite("recommender.bin", bin)
222
+ ```
223
+
224
+ > You can save it to a file, database, or any other storage system
225
+
226
+ Load a recommender
227
+
228
+ ```ruby
229
+ bin = File.binread("recommender.bin")
230
+ recommender = Marshal.load(bin)
231
+ ```
232
+
166
233
  ## Reference
167
234
 
235
+ Get ids
236
+
237
+ ```ruby
238
+ recommender.user_ids
239
+ recommender.item_ids
240
+ ```
241
+
168
242
  Get the global mean
169
243
 
170
244
  ```ruby
data/lib/cmfrec.rb CHANGED
@@ -19,7 +19,11 @@ module Cmfrec
19
19
  if Gem.win_platform?
20
20
  "cmfrec.dll"
21
21
  elsif RbConfig::CONFIG["host_os"] =~ /darwin/i
22
- "libcmfrec.dylib"
22
+ if RbConfig::CONFIG["host_cpu"] =~ /arm/i
23
+ "libcmfrec.arm64.dylib"
24
+ else
25
+ "libcmfrec.dylib"
26
+ end
23
27
  else
24
28
  "libcmfrec.so"
25
29
  end
@@ -11,29 +11,193 @@ module Cmfrec
11
11
  item_bias: item_bias,
12
12
  add_implicit_features: add_implicit_features
13
13
  )
14
+
15
+ @fit = false
16
+ @user_map = {}
17
+ @item_map = {}
18
+ @user_info_map = {}
19
+ @item_info_map = {}
14
20
  end
15
21
 
16
22
  def fit(train_set, user_info: nil, item_info: nil)
23
+ reset
24
+ partial_fit(train_set, user_info: user_info, item_info: item_info)
25
+ end
26
+
27
+ def predict(data)
28
+ check_fit
29
+
30
+ data = to_dataset(data)
31
+
32
+ u = data.map { |v| @user_map[v[:user_id]] || @user_map.size }
33
+ i = data.map { |v| @item_map[v[:item_id]] || @item_map.size }
34
+
35
+ row = int_ptr(u)
36
+ col = int_ptr(i)
37
+ n_predict = data.size
38
+ predicted = Fiddle::Pointer.malloc(n_predict * Fiddle::SIZEOF_DOUBLE)
39
+
40
+ if @implicit
41
+ check_status FFI.predict_X_old_collective_implicit(
42
+ row, col, predicted, n_predict,
43
+ @a, @b,
44
+ @k, @k_user, @k_item, @k_main,
45
+ @m, @n,
46
+ @nthreads
47
+ )
48
+ else
49
+ check_status FFI.predict_X_old_collective_explicit(
50
+ row, col, predicted, n_predict,
51
+ @a, @bias_a,
52
+ @b, @bias_b,
53
+ @global_mean,
54
+ @k, @k_user, @k_item, @k_main,
55
+ @m, @n,
56
+ @nthreads
57
+ )
58
+ end
59
+
60
+ predictions = real_array(predicted)
61
+ predictions.map! { |v| v.nan? ? @global_mean : v } if @implicit
62
+ predictions
63
+ end
64
+
65
+ def user_recs(user_id, count: 5, item_ids: nil)
66
+ check_fit
67
+ user = @user_map[user_id]
68
+
69
+ if user
70
+ a_vec = @a[user * @k * Fiddle::SIZEOF_DOUBLE, @k * Fiddle::SIZEOF_DOUBLE]
71
+ a_bias = @bias_a ? @bias_a[user * Fiddle::SIZEOF_DOUBLE, Fiddle::SIZEOF_DOUBLE].unpack1("d") : 0
72
+ # @rated[user] will be nil for recommenders saved before 0.1.5
73
+ top_n(a_vec: a_vec, a_bias: a_bias, count: count, rated: (@rated[user] || {}).keys, item_ids: item_ids)
74
+ else
75
+ # no items if user is unknown
76
+ # TODO maybe most popular items
77
+ []
78
+ end
79
+ end
80
+
81
+ def new_user_recs(data, count: 5, user_info: nil, item_ids: nil)
82
+ check_fit
83
+
84
+ a_vec, a_bias, rated = factors_warm(data, user_info: user_info)
85
+ top_n(a_vec: a_vec, a_bias: a_bias, count: count, rated: rated, item_ids: item_ids)
86
+ end
87
+
88
+ def user_ids
89
+ @user_map.keys
90
+ end
91
+
92
+ def item_ids
93
+ @item_map.keys
94
+ end
95
+
96
+ def user_factors(user_id = nil)
97
+ read_factors(@a, [@m, @m_u].max, @k_user + @k + @k_main, user_id, @user_map)
98
+ end
99
+
100
+ def item_factors(item_id = nil)
101
+ read_factors(@b, [@n, @n_i].max, @k_item + @k + @k_main, item_id, @item_map)
102
+ end
103
+
104
+ def user_bias(user_id = nil)
105
+ read_bias(@bias_a, user_id, @user_map) if @bias_a
106
+ end
107
+
108
+ def item_bias(item_id = nil)
109
+ read_bias(@bias_b, item_id, @item_map) if @bias_b
110
+ end
111
+
112
+ def similar_items(item_id, count: 5)
113
+ check_fit
114
+ similar(item_id, @item_map, item_factors, count, item_index)
115
+ end
116
+ alias_method :item_recs, :similar_items
117
+
118
+ def similar_users(user_id, count: 5)
119
+ check_fit
120
+ similar(user_id, @user_map, user_factors, count, user_index)
121
+ end
122
+
123
+ private
124
+
125
+ def user_index
126
+ @user_index ||= create_index(user_factors)
127
+ end
128
+
129
+ def item_index
130
+ @item_index ||= create_index(item_factors)
131
+ end
132
+
133
+ def create_index(factors)
134
+ require "ngt"
135
+
136
+ index = Ngt::Index.new(@k, distance_type: "Cosine")
137
+ index.batch_insert(factors)
138
+ index
139
+ end
140
+
141
+ # TODO include bias
142
+ def similar(id, map, factors, count, index)
143
+ i = map[id]
144
+ if i
145
+ keys = map.keys
146
+ result = index.search(factors[i], size: count + 1)[1..-1]
147
+ result.map do |v|
148
+ {
149
+ # ids from batch_insert start at 1 instead of 0
150
+ item_id: keys[v[:id] - 1],
151
+ # convert cosine distance to cosine similarity
152
+ score: 1 - v[:distance]
153
+ }
154
+ end
155
+ else
156
+ []
157
+ end
158
+ end
159
+
160
+ def reset
161
+ @fit = false
162
+ @user_map.clear
163
+ @item_map.clear
164
+ @user_info_map.clear
165
+ @item_info_map.clear
166
+ @user_index = nil
167
+ @item_index = nil
168
+ end
169
+
170
+ # TODO resize pointers as needed and reset values for new memory
171
+ def partial_fit(train_set, user_info: nil, item_info: nil)
17
172
  train_set = to_dataset(train_set)
18
173
 
19
- @implicit = !train_set.any? { |v| v[:rating] }
174
+ unless @fit
175
+ @implicit = !train_set.any? { |v| v[:rating] }
176
+ end
177
+
20
178
  unless @implicit
21
179
  ratings = train_set.map { |o| o[:rating] }
22
180
  check_ratings(ratings)
23
181
  end
24
182
 
25
183
  check_training_set(train_set)
26
- create_maps(train_set)
184
+ update_maps(train_set)
27
185
 
28
186
  x_row = []
29
187
  x_col = []
30
188
  x_val = []
31
189
  value_key = @implicit ? :value : :rating
190
+ @rated = Hash.new { |hash, key| hash[key] = {} }
32
191
  train_set.each do |v|
33
- x_row << @user_map[v[:user_id]]
34
- x_col << @item_map[v[:item_id]]
192
+ u = @user_map[v[:user_id]]
193
+ i = @item_map[v[:item_id]]
194
+ @rated[u][i] = true
195
+
196
+ x_row << u
197
+ x_col << i
35
198
  x_val << (v[value_key] || 1)
36
199
  end
200
+ @rated.default = nil
37
201
 
38
202
  @m = @user_map.size
39
203
  @n = @item_map.size
@@ -52,16 +216,14 @@ module Cmfrec
52
216
  uu = nil
53
217
  ii = nil
54
218
 
55
- @user_info_map = {}
219
+ # side info
56
220
  u_row, u_col, u_sp, nnz_u, @m_u, p_ = process_info(user_info, @user_map, @user_info_map, :user_id)
57
-
58
- @item_info_map = {}
59
221
  i_row, i_col, i_sp, nnz_i, @n_i, q = process_info(item_info, @item_map, @item_info_map, :item_id)
60
222
 
61
223
  @precompute_for_predictions = false
62
224
 
63
225
  # initialize w/ normal distribution
64
- reset_values = true
226
+ reset_values = !@fit
65
227
 
66
228
  @a = Fiddle::Pointer.malloc([@m, @m_u].max * (@k_user + @k + @k_main) * Fiddle::SIZEOF_DOUBLE)
67
229
  @b = Fiddle::Pointer.malloc([@n, @n_i].max * (@k_item + @k + @k_main) * Fiddle::SIZEOF_DOUBLE)
@@ -75,16 +237,7 @@ module Cmfrec
75
237
  i_colmeans = Fiddle::Pointer.malloc(q * Fiddle::SIZEOF_DOUBLE)
76
238
 
77
239
  if @implicit
78
- @w_main_multiplier = 1.0
79
- @alpha = 1.0
80
- @adjust_weight = false # downweight?
81
- @apply_log_transf = false
82
-
83
- # different defaults
84
- @lambda_ = 1e0
85
- @w_user = 10
86
- @w_item = 10
87
- @finalize_chol = false
240
+ set_implicit_vars
88
241
 
89
242
  args = [
90
243
  @a, @b,
@@ -175,104 +328,13 @@ module Cmfrec
175
328
  @global_mean = real_array(glob_mean).first
176
329
  end
177
330
 
178
- @u_colmeans = real_array(u_colmeans)
179
- @i_colmeans = real_array(i_colmeans)
180
- @u_colmeans_ptr = u_colmeans
181
-
182
- self
183
- end
184
-
185
- def predict(data)
186
- check_fit
187
-
188
- data = to_dataset(data)
189
-
190
- u = data.map { |v| @user_map[v[:user_id]] || @user_map.size }
191
- i = data.map { |v| @item_map[v[:item_id]] || @item_map.size }
192
-
193
- row = int_ptr(u)
194
- col = int_ptr(i)
195
- n_predict = data.size
196
- predicted = Fiddle::Pointer.malloc(n_predict * Fiddle::SIZEOF_DOUBLE)
197
-
198
- if @implicit
199
- check_status FFI.predict_X_old_collective_implicit(
200
- row, col, predicted, n_predict,
201
- @a, @b,
202
- @k, @k_user, @k_item, @k_main,
203
- @m, @n,
204
- @nthreads
205
- )
206
- else
207
- check_status FFI.predict_X_old_collective_explicit(
208
- row, col, predicted, n_predict,
209
- @a, @bias_a,
210
- @b, @bias_b,
211
- @global_mean,
212
- @k, @k_user, @k_item, @k_main,
213
- @m, @n,
214
- @nthreads
215
- )
216
- end
217
-
218
- predictions = real_array(predicted)
219
- predictions.map! { |v| v.nan? ? @global_mean : v } if @implicit
220
- predictions
221
- end
222
-
223
- def user_recs(user_id, count: 5, item_ids: nil)
224
- check_fit
225
- user = @user_map[user_id]
226
-
227
- if user
228
- if item_ids
229
- # remove missing ids
230
- item_ids = item_ids.select { |v| @item_map[v] }
231
-
232
- data = item_ids.map { |v| {user_id: user_id, item_id: v} }
233
- scores = predict(data)
234
-
235
- item_ids.zip(scores).map do |item_id, score|
236
- {item_id: item_id, score: score}
237
- end
238
- else
239
- a_vec = @a[user * @k * Fiddle::SIZEOF_DOUBLE, @k * Fiddle::SIZEOF_DOUBLE]
240
- a_bias = @bias_a ? @bias_a[user * Fiddle::SIZEOF_DOUBLE, Fiddle::SIZEOF_DOUBLE].unpack1("d") : 0
241
- top_n(a_vec: a_vec, a_bias: a_bias, count: count)
242
- end
243
- else
244
- # no items if user is unknown
245
- # TODO maybe most popular items
246
- []
247
- end
248
- end
249
-
250
- # TODO add item_ids
251
- def new_user_recs(data, count: 5, user_info: nil)
252
- check_fit
253
-
254
- a_vec, a_bias = factors_warm(data, user_info: user_info)
255
- top_n(a_vec: a_vec, a_bias: a_bias, count: count)
256
- end
257
-
258
- def user_factors
259
- read_factors(@a, [@m, @m_u].max, @k_user + @k + @k_main)
260
- end
261
-
262
- def item_factors
263
- read_factors(@b, [@n, @n_i].max, @k_item + @k + @k_main)
264
- end
331
+ @u_colmeans = u_colmeans
265
332
 
266
- def user_bias
267
- read_bias(@bias_a) if @bias_a
268
- end
333
+ @fit = true
269
334
 
270
- def item_bias
271
- read_bias(@bias_b) if @bias_b
335
+ self
272
336
  end
273
337
 
274
- private
275
-
276
338
  def set_params(
277
339
  k: 40, lambda_: 1e+1, method: "als", use_cg: true, user_bias: true,
278
340
  item_bias: true, add_implicit_features: false,
@@ -329,15 +391,14 @@ module Cmfrec
329
391
  @nthreads = nthreads
330
392
  end
331
393
 
332
- def create_maps(train_set)
333
- user_ids = train_set.map { |v| v[:user_id] }.uniq.sort
334
- item_ids = train_set.map { |v| v[:item_id] }.uniq.sort
335
-
336
- raise ArgumentError, "Missing user_id" if user_ids.any?(&:nil?)
337
- raise ArgumentError, "Missing item_id" if item_ids.any?(&:nil?)
394
+ def update_maps(train_set)
395
+ raise ArgumentError, "Missing user_id" if train_set.any? { |v| v[:user_id].nil? }
396
+ raise ArgumentError, "Missing item_id" if train_set.any? { |v| v[:item_id].nil? }
338
397
 
339
- @user_map = user_ids.zip(user_ids.size.times).to_h
340
- @item_map = item_ids.zip(item_ids.size.times).to_h
398
+ train_set.each do |v|
399
+ @user_map[v[:user_id]] ||= @user_map.size
400
+ @item_map[v[:item_id]] ||= @item_map.size
401
+ end
341
402
  end
342
403
 
343
404
  def check_ratings(ratings)
@@ -354,7 +415,7 @@ module Cmfrec
354
415
  end
355
416
 
356
417
  def check_fit
357
- raise "Not fit" unless defined?(@implicit)
418
+ raise "Not fit" unless @fit
358
419
  end
359
420
 
360
421
  def to_dataset(dataset)
@@ -376,26 +437,59 @@ module Cmfrec
376
437
  end
377
438
  end
378
439
 
379
- def read_factors(ptr, d1, d2)
380
- arr = []
381
- offset = 0
440
+ def read_factors(ptr, d1, d2, id, map)
382
441
  width = d2 * Fiddle::SIZEOF_DOUBLE
383
- d1.times do |i|
384
- arr << ptr[offset, width].unpack("d*")
385
- offset += width
442
+ if id
443
+ i = map[id]
444
+ ptr[i * width, width].unpack("d*") if i
445
+ else
446
+ arr = []
447
+ offset = 0
448
+ d1.times do |i|
449
+ arr << ptr[offset, width].unpack("d*")
450
+ offset += width
451
+ end
452
+ arr
386
453
  end
387
- arr
388
454
  end
389
455
 
390
- def read_bias(ptr)
391
- real_array(ptr)
456
+ def read_bias(ptr, id, map)
457
+ if id
458
+ i = map[id]
459
+ ptr[i * Fiddle::SIZEOF_DOUBLE, Fiddle::SIZEOF_DOUBLE].unpack1("d") if i
460
+ else
461
+ real_array(ptr)
462
+ end
392
463
  end
393
464
 
394
- def top_n(a_vec:, a_bias:, count:)
395
- include_ix = nil
396
- n_include = 0
397
- exclude_ix = nil
398
- n_exclude = 0
465
+ def top_n(a_vec:, a_bias:, count:, rated: nil, item_ids: nil)
466
+ if item_ids
467
+ # remove missing ids
468
+ item_ids = item_ids.map { |v| @item_map[v] }.compact
469
+ return [] if item_ids.empty?
470
+
471
+ include_ix = int_ptr(item_ids)
472
+ n_include = item_ids.size
473
+
474
+ # TODO uncomment in 0.2.0
475
+ count = n_include # if n_include < count
476
+ else
477
+ include_ix = nil
478
+ n_include = 0
479
+ end
480
+
481
+ if rated && !item_ids
482
+ # assumes rated is unique and all items are known
483
+ # calling code is responsible for this
484
+ exclude_ix = int_ptr(rated)
485
+ n_exclude = rated.size
486
+ remaining = @item_map.size - n_exclude
487
+ return [] if remaining == 0
488
+ count = remaining if remaining < count
489
+ else
490
+ exclude_ix = nil
491
+ n_exclude = 0
492
+ end
399
493
 
400
494
  outp_ix = Fiddle::Pointer.malloc(count * Fiddle::SIZEOF_INT)
401
495
  outp_score = Fiddle::Pointer.malloc(count * Fiddle::SIZEOF_DOUBLE)
@@ -425,6 +519,16 @@ module Cmfrec
425
519
  data = to_dataset(data)
426
520
  user_info = to_dataset(user_info) if user_info
427
521
 
522
+ # remove unknown items
523
+ data, unknown_data = data.partition { |d| @item_map[d[:item_id]] }
524
+
525
+ if unknown_data.any?
526
+ # TODO warn for unknown items?
527
+ # warn "[cmfrec] Unknown items: #{unknown_data.map { |d| d[:item_id] }.join(", ")}"
528
+ end
529
+
530
+ item_ids = data.map { |d| @item_map[d[:item_id]] }
531
+
428
532
  nnz = data.size
429
533
  a_vec = Fiddle::Pointer.malloc((@k_user + @k + @k_main) * Fiddle::SIZEOF_DOUBLE)
430
534
  bias_a = Fiddle::Pointer.malloc(Fiddle::SIZEOF_DOUBLE)
@@ -465,7 +569,7 @@ module Cmfrec
465
569
  check_ratings(ratings)
466
570
  end
467
571
  xa = real_ptr(ratings)
468
- x_col = int_ptr(data.map { |d| d[:item_id] })
572
+ x_col = int_ptr(item_ids)
469
573
  else
470
574
  xa = nil
471
575
  x_col = nil
@@ -479,7 +583,7 @@ module Cmfrec
479
583
  u_vec_sp, u_vec_x_col, nnz_u_vec,
480
584
  @na_as_zero_user,
481
585
  @nonneg,
482
- @u_colmeans_ptr,
586
+ @u_colmeans,
483
587
  @b, @n, @c,
484
588
  xa, x_col, nnz,
485
589
  @k, @k_user, @k_item, @k_main,
@@ -505,7 +609,7 @@ module Cmfrec
505
609
  @na_as_zero_user, @na_as_zero,
506
610
  @nonneg,
507
611
  @c, cb,
508
- @global_mean, @bias_b, @u_colmeans_ptr,
612
+ @global_mean, @bias_b, @u_colmeans,
509
613
  xa, x_col, nnz, xa_dense,
510
614
  @n, weight, @b, @bi,
511
615
  @add_implicit_features,
@@ -528,7 +632,7 @@ module Cmfrec
528
632
  check_status FFI.factors_collective_explicit_single(*fiddle_args(args))
529
633
  end
530
634
 
531
- [a_vec, real_array(bias_a).first]
635
+ [a_vec, real_array(bias_a).first, item_ids.uniq]
532
636
  end
533
637
 
534
638
  # convert boolean to int
@@ -585,5 +689,126 @@ module Cmfrec
585
689
  def real_array(ptr)
586
690
  ptr.to_s(ptr.size).unpack("d*")
587
691
  end
692
+
693
+ def set_implicit_vars
694
+ @w_main_multiplier = 1.0
695
+ @alpha = 1.0
696
+ @adjust_weight = false # downweight?
697
+ @apply_log_transf = false
698
+
699
+ # different defaults
700
+ @lambda_ = 1e0
701
+ @w_user = 10
702
+ @w_item = 10
703
+ @finalize_chol = false
704
+ end
705
+
706
+ def dump_ptr(ptr)
707
+ ptr.to_s(ptr.size) if ptr
708
+ end
709
+
710
+ def load_ptr(str)
711
+ Fiddle::Pointer[str] if str
712
+ end
713
+
714
+ def marshal_dump
715
+ obj = {
716
+ implicit: @implicit
717
+ }
718
+
719
+ # options
720
+ obj[:factors] = @k
721
+ obj[:epochs] = @niter
722
+ obj[:verbose] = @verbose
723
+
724
+ # factors
725
+ obj[:user_map] = @user_map
726
+ obj[:item_map] = @item_map
727
+ obj[:rated] = @rated
728
+ obj[:user_factors] = dump_ptr(@a)
729
+ obj[:item_factors] = dump_ptr(@b)
730
+
731
+ # bias
732
+ obj[:user_bias] = dump_ptr(@bias_a)
733
+ obj[:item_bias] = dump_ptr(@bias_b)
734
+
735
+ # mean
736
+ obj[:global_mean] = @global_mean
737
+
738
+ # side info
739
+ obj[:user_info_map] = @user_info_map
740
+ obj[:item_info_map] = @item_info_map
741
+ obj[:user_info_factors] = dump_ptr(@c)
742
+ obj[:item_info_factors] = dump_ptr(@d)
743
+
744
+ # implicit features
745
+ obj[:add_implicit_features] = @add_implicit_features
746
+ obj[:user_factors_implicit] = dump_ptr(@ai)
747
+ obj[:item_factors_implicit] = dump_ptr(@bi)
748
+
749
+ unless @implicit
750
+ obj[:min_rating] = @min_rating
751
+ obj[:max_rating] = @max_rating
752
+ end
753
+
754
+ obj[:user_means] = dump_ptr(@u_colmeans)
755
+
756
+ obj
757
+ end
758
+
759
+ def marshal_load(obj)
760
+ @implicit = obj[:implicit]
761
+
762
+ # options
763
+ set_params(
764
+ k: obj[:factors],
765
+ niter: obj[:epochs],
766
+ verbose: obj[:verbose],
767
+ user_bias: !obj[:user_bias].nil?,
768
+ item_bias: !obj[:item_bias].nil?,
769
+ add_implicit_features: obj[:add_implicit_features]
770
+ )
771
+
772
+ # factors
773
+ @user_map = obj[:user_map]
774
+ @item_map = obj[:item_map]
775
+ @rated = obj[:rated] || {}
776
+ @a = load_ptr(obj[:user_factors])
777
+ @b = load_ptr(obj[:item_factors])
778
+
779
+ # bias
780
+ @bias_a = load_ptr(obj[:user_bias])
781
+ @bias_b = load_ptr(obj[:item_bias])
782
+
783
+ # mean
784
+ @global_mean = obj[:global_mean]
785
+
786
+ # side info
787
+ @user_info_map = obj[:user_info_map]
788
+ @item_info_map = obj[:item_info_map]
789
+ @c = load_ptr(obj[:user_info_factors])
790
+ @d = load_ptr(obj[:item_info_factors])
791
+
792
+ # implicit features
793
+ @add_implicit_features = obj[:add_implicit_features]
794
+ @ai = load_ptr(obj[:user_factors_implicit])
795
+ @bi = load_ptr(obj[:item_factors_implicit])
796
+
797
+ unless @implicit
798
+ @min_rating = obj[:min_rating]
799
+ @max_rating = obj[:max_rating]
800
+ end
801
+
802
+ @u_colmeans = load_ptr(obj[:user_means])
803
+
804
+ @m = @user_map.size
805
+ @n = @item_map.size
806
+ @m_u = @user_info_map.size
807
+ @n_i = @item_info_map.size
808
+
809
+ set_implicit_vars if @implicit
810
+
811
+ @fit = @m > 0
812
+ end
588
813
  end
589
814
  end
@@ -1,3 +1,3 @@
1
1
  module Cmfrec
2
- VERSION = "0.1.2"
2
+ VERSION = "0.1.6"
3
3
  end
Binary file
metadata CHANGED
@@ -1,17 +1,17 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cmfrec
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-12-10 00:00:00.000000000 Z
11
+ date: 2021-08-12 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description:
14
- email: andrew@chartkick.com
14
+ email: andrew@ankane.org
15
15
  executables: []
16
16
  extensions: []
17
17
  extra_rdoc_files: []
@@ -25,6 +25,7 @@ files:
25
25
  - lib/cmfrec/recommender.rb
26
26
  - lib/cmfrec/version.rb
27
27
  - vendor/LICENSE.txt
28
+ - vendor/libcmfrec.arm64.dylib
28
29
  - vendor/libcmfrec.dylib
29
30
  - vendor/libcmfrec.so
30
31
  homepage: https://github.com/ankane/cmfrec
@@ -46,7 +47,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
46
47
  - !ruby/object:Gem::Version
47
48
  version: '0'
48
49
  requirements: []
49
- rubygems_version: 3.1.4
50
+ rubygems_version: 3.2.22
50
51
  signing_key:
51
52
  specification_version: 4
52
53
  summary: Recommendations for Ruby using collective matrix factorization