cmfrec 0.1.2 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9c19c6069b22f666ab280851cdfcaab346c1c08599be2e97043bdc41da552b6f
4
- data.tar.gz: 6f0ffb63861b47568119e7014ff1c9d1a71fcf5852c6b1df2a8ca9b16bd239d3
3
+ metadata.gz: 34e8dc08914cbc418470cd7eb3adf3d33013b786319f4510212c80bf3629f3ca
4
+ data.tar.gz: c1b91a1f77b4b51a5ca4491376f8a02230ea54873f8c1b2b06f4761d6ddd0686
5
5
  SHA512:
6
- metadata.gz: 15104fb705c8bf1ffe3959846ddacde37a8845ee667c6ab334df6b1aacc7da42af894e2f94f1bfc35739943f8293b3f70e22358f7fc030e814c5dadbce4bb75e
7
- data.tar.gz: 8d7806b1500de927ecca3705ade074a2dc197a5e413a3a0e755f3d952aa8916b10a87f635e7958726e315a05376c216c6c5a79b6d33f4a61d723e4bbfbbcd382
6
+ metadata.gz: a3c57734379199196a4e3f51d9ec02b19ef1abac13d57a10ca3c20e9b76c9ee5db4b17d790330d41a9576c2ba28a9eeccafeb5760b54cfdf80a7431368895068
7
+ data.tar.gz: 5a24a77a6665854abb38916a22e8141a6cae637a51f98e3df3762566f2e73cb60b9bd9a25303df0411ea53dec3211bf7534711dc55c510311620341cbe4e4ac3
data/CHANGELOG.md CHANGED
@@ -1,3 +1,28 @@
1
+ ## 0.1.6 (2021-08-12)
2
+
3
+ - Added `user_ids` and `item_ids` methods
4
+ - Added `user_id` argument to `user_factors`
5
+ - Added `item_id` argument to `item_factors`
6
+ - Added `user_id` argument to `user_bias`
7
+ - Added `item_id` argument to `item_bias`
8
+ - Added `item_ids` argument to `new_user_recs`
9
+ - Fixed order for `user_recs`
10
+
11
+ ## 0.1.5 (2021-08-10)
12
+
13
+ - Fixed issue with `user_recs` and `new_user_recs` returning rated items
14
+ - Fixed error with `new_user_recs`
15
+
16
+ ## 0.1.4 (2021-02-04)
17
+
18
+ - Added support for saving and loading recommenders
19
+ - Added `similar_users` and `similar_items`
20
+ - Improved ARM detection
21
+
22
+ ## 0.1.3 (2020-12-28)
23
+
24
+ - Added ARM shared library for Mac
25
+
1
26
  ## 0.1.2 (2020-12-09)
2
27
 
3
28
  - Added `load_movielens` method
data/LICENSE.txt CHANGED
@@ -1,7 +1,7 @@
1
1
  MIT License
2
2
 
3
3
  Copyright (c) 2020 David Cortes
4
- Copyright (c) 2020 Andrew Kane
4
+ Copyright (c) 2020-2021 Andrew Kane
5
5
 
6
6
  All rights reserved.
7
7
 
data/README.md CHANGED
@@ -107,6 +107,26 @@ Get recommendations with only side information
107
107
  recommender.new_user_recs([], user_info: {cats: 0, dogs: 2})
108
108
  ```
109
109
 
110
+ ## Similarity
111
+
112
+ Add this line to your application’s Gemfile:
113
+
114
+ ```ruby
115
+ gem 'ngt'
116
+ ```
117
+
118
+ Get similar users
119
+
120
+ ```ruby
121
+ recommender.similar_users(user_id)
122
+ ```
123
+
124
+ Get similar items - “users who liked this item also liked”
125
+
126
+ ```ruby
127
+ recommender.similar_items(item_id)
128
+ ```
129
+
110
130
  ## Examples
111
131
 
112
132
  ### MovieLens
@@ -125,6 +145,35 @@ recommender.fit(ratings.first(80000), user_info: user_info, item_info: item_info
125
145
  recommender.predict(ratings.last(20000))
126
146
  ```
127
147
 
148
+ ### Ahoy
149
+
150
+ [Ahoy](https://github.com/ankane/ahoy) is a great source for implicit feedback
151
+
152
+ ```ruby
153
+ views = Ahoy::Event.
154
+ where(name: "Viewed post").
155
+ group(:user_id).
156
+ group("properties->>'post_id'"). # postgres syntax
157
+ count
158
+
159
+ data =
160
+ views.map do |(user_id, post_id), count|
161
+ {
162
+ user_id: user_id,
163
+ item_id: post_id,
164
+ value: count
165
+ }
166
+ end
167
+ ```
168
+
169
+ Create a recommender and get recommended posts for a user
170
+
171
+ ```ruby
172
+ recommender = Cmfrec::Recommender.new
173
+ recommender.fit(data)
174
+ recommender.user_recs(current_user.id)
175
+ ```
176
+
128
177
  ## Options
129
178
 
130
179
  Specify the number of factors and epochs
@@ -163,8 +212,33 @@ Or a Rover data frame
163
212
  Rover.read_csv("ratings.csv")
164
213
  ```
165
214
 
215
+ ## Storing Recommenders
216
+
217
+ Store the recommender
218
+
219
+ ```ruby
220
+ bin = Marshal.dump(recommender)
221
+ File.binwrite("recommender.bin", bin)
222
+ ```
223
+
224
+ > You can save it to a file, database, or any other storage system
225
+
226
+ Load a recommender
227
+
228
+ ```ruby
229
+ bin = File.binread("recommender.bin")
230
+ recommender = Marshal.load(bin)
231
+ ```
232
+
166
233
  ## Reference
167
234
 
235
+ Get ids
236
+
237
+ ```ruby
238
+ recommender.user_ids
239
+ recommender.item_ids
240
+ ```
241
+
168
242
  Get the global mean
169
243
 
170
244
  ```ruby
data/lib/cmfrec.rb CHANGED
@@ -19,7 +19,11 @@ module Cmfrec
19
19
  if Gem.win_platform?
20
20
  "cmfrec.dll"
21
21
  elsif RbConfig::CONFIG["host_os"] =~ /darwin/i
22
- "libcmfrec.dylib"
22
+ if RbConfig::CONFIG["host_cpu"] =~ /arm/i
23
+ "libcmfrec.arm64.dylib"
24
+ else
25
+ "libcmfrec.dylib"
26
+ end
23
27
  else
24
28
  "libcmfrec.so"
25
29
  end
@@ -11,29 +11,193 @@ module Cmfrec
11
11
  item_bias: item_bias,
12
12
  add_implicit_features: add_implicit_features
13
13
  )
14
+
15
+ @fit = false
16
+ @user_map = {}
17
+ @item_map = {}
18
+ @user_info_map = {}
19
+ @item_info_map = {}
14
20
  end
15
21
 
16
22
  def fit(train_set, user_info: nil, item_info: nil)
23
+ reset
24
+ partial_fit(train_set, user_info: user_info, item_info: item_info)
25
+ end
26
+
27
+ def predict(data)
28
+ check_fit
29
+
30
+ data = to_dataset(data)
31
+
32
+ u = data.map { |v| @user_map[v[:user_id]] || @user_map.size }
33
+ i = data.map { |v| @item_map[v[:item_id]] || @item_map.size }
34
+
35
+ row = int_ptr(u)
36
+ col = int_ptr(i)
37
+ n_predict = data.size
38
+ predicted = Fiddle::Pointer.malloc(n_predict * Fiddle::SIZEOF_DOUBLE)
39
+
40
+ if @implicit
41
+ check_status FFI.predict_X_old_collective_implicit(
42
+ row, col, predicted, n_predict,
43
+ @a, @b,
44
+ @k, @k_user, @k_item, @k_main,
45
+ @m, @n,
46
+ @nthreads
47
+ )
48
+ else
49
+ check_status FFI.predict_X_old_collective_explicit(
50
+ row, col, predicted, n_predict,
51
+ @a, @bias_a,
52
+ @b, @bias_b,
53
+ @global_mean,
54
+ @k, @k_user, @k_item, @k_main,
55
+ @m, @n,
56
+ @nthreads
57
+ )
58
+ end
59
+
60
+ predictions = real_array(predicted)
61
+ predictions.map! { |v| v.nan? ? @global_mean : v } if @implicit
62
+ predictions
63
+ end
64
+
65
+ def user_recs(user_id, count: 5, item_ids: nil)
66
+ check_fit
67
+ user = @user_map[user_id]
68
+
69
+ if user
70
+ a_vec = @a[user * @k * Fiddle::SIZEOF_DOUBLE, @k * Fiddle::SIZEOF_DOUBLE]
71
+ a_bias = @bias_a ? @bias_a[user * Fiddle::SIZEOF_DOUBLE, Fiddle::SIZEOF_DOUBLE].unpack1("d") : 0
72
+ # @rated[user] will be nil for recommenders saved before 0.1.5
73
+ top_n(a_vec: a_vec, a_bias: a_bias, count: count, rated: (@rated[user] || {}).keys, item_ids: item_ids)
74
+ else
75
+ # no items if user is unknown
76
+ # TODO maybe most popular items
77
+ []
78
+ end
79
+ end
80
+
81
+ def new_user_recs(data, count: 5, user_info: nil, item_ids: nil)
82
+ check_fit
83
+
84
+ a_vec, a_bias, rated = factors_warm(data, user_info: user_info)
85
+ top_n(a_vec: a_vec, a_bias: a_bias, count: count, rated: rated, item_ids: item_ids)
86
+ end
87
+
88
+ def user_ids
89
+ @user_map.keys
90
+ end
91
+
92
+ def item_ids
93
+ @item_map.keys
94
+ end
95
+
96
+ def user_factors(user_id = nil)
97
+ read_factors(@a, [@m, @m_u].max, @k_user + @k + @k_main, user_id, @user_map)
98
+ end
99
+
100
+ def item_factors(item_id = nil)
101
+ read_factors(@b, [@n, @n_i].max, @k_item + @k + @k_main, item_id, @item_map)
102
+ end
103
+
104
+ def user_bias(user_id = nil)
105
+ read_bias(@bias_a, user_id, @user_map) if @bias_a
106
+ end
107
+
108
+ def item_bias(item_id = nil)
109
+ read_bias(@bias_b, item_id, @item_map) if @bias_b
110
+ end
111
+
112
+ def similar_items(item_id, count: 5)
113
+ check_fit
114
+ similar(item_id, @item_map, item_factors, count, item_index)
115
+ end
116
+ alias_method :item_recs, :similar_items
117
+
118
+ def similar_users(user_id, count: 5)
119
+ check_fit
120
+ similar(user_id, @user_map, user_factors, count, user_index)
121
+ end
122
+
123
+ private
124
+
125
+ def user_index
126
+ @user_index ||= create_index(user_factors)
127
+ end
128
+
129
+ def item_index
130
+ @item_index ||= create_index(item_factors)
131
+ end
132
+
133
+ def create_index(factors)
134
+ require "ngt"
135
+
136
+ index = Ngt::Index.new(@k, distance_type: "Cosine")
137
+ index.batch_insert(factors)
138
+ index
139
+ end
140
+
141
+ # TODO include bias
142
+ def similar(id, map, factors, count, index)
143
+ i = map[id]
144
+ if i
145
+ keys = map.keys
146
+ result = index.search(factors[i], size: count + 1)[1..-1]
147
+ result.map do |v|
148
+ {
149
+ # ids from batch_insert start at 1 instead of 0
150
+ item_id: keys[v[:id] - 1],
151
+ # convert cosine distance to cosine similarity
152
+ score: 1 - v[:distance]
153
+ }
154
+ end
155
+ else
156
+ []
157
+ end
158
+ end
159
+
160
+ def reset
161
+ @fit = false
162
+ @user_map.clear
163
+ @item_map.clear
164
+ @user_info_map.clear
165
+ @item_info_map.clear
166
+ @user_index = nil
167
+ @item_index = nil
168
+ end
169
+
170
+ # TODO resize pointers as needed and reset values for new memory
171
+ def partial_fit(train_set, user_info: nil, item_info: nil)
17
172
  train_set = to_dataset(train_set)
18
173
 
19
- @implicit = !train_set.any? { |v| v[:rating] }
174
+ unless @fit
175
+ @implicit = !train_set.any? { |v| v[:rating] }
176
+ end
177
+
20
178
  unless @implicit
21
179
  ratings = train_set.map { |o| o[:rating] }
22
180
  check_ratings(ratings)
23
181
  end
24
182
 
25
183
  check_training_set(train_set)
26
- create_maps(train_set)
184
+ update_maps(train_set)
27
185
 
28
186
  x_row = []
29
187
  x_col = []
30
188
  x_val = []
31
189
  value_key = @implicit ? :value : :rating
190
+ @rated = Hash.new { |hash, key| hash[key] = {} }
32
191
  train_set.each do |v|
33
- x_row << @user_map[v[:user_id]]
34
- x_col << @item_map[v[:item_id]]
192
+ u = @user_map[v[:user_id]]
193
+ i = @item_map[v[:item_id]]
194
+ @rated[u][i] = true
195
+
196
+ x_row << u
197
+ x_col << i
35
198
  x_val << (v[value_key] || 1)
36
199
  end
200
+ @rated.default = nil
37
201
 
38
202
  @m = @user_map.size
39
203
  @n = @item_map.size
@@ -52,16 +216,14 @@ module Cmfrec
52
216
  uu = nil
53
217
  ii = nil
54
218
 
55
- @user_info_map = {}
219
+ # side info
56
220
  u_row, u_col, u_sp, nnz_u, @m_u, p_ = process_info(user_info, @user_map, @user_info_map, :user_id)
57
-
58
- @item_info_map = {}
59
221
  i_row, i_col, i_sp, nnz_i, @n_i, q = process_info(item_info, @item_map, @item_info_map, :item_id)
60
222
 
61
223
  @precompute_for_predictions = false
62
224
 
63
225
  # initialize w/ normal distribution
64
- reset_values = true
226
+ reset_values = !@fit
65
227
 
66
228
  @a = Fiddle::Pointer.malloc([@m, @m_u].max * (@k_user + @k + @k_main) * Fiddle::SIZEOF_DOUBLE)
67
229
  @b = Fiddle::Pointer.malloc([@n, @n_i].max * (@k_item + @k + @k_main) * Fiddle::SIZEOF_DOUBLE)
@@ -75,16 +237,7 @@ module Cmfrec
75
237
  i_colmeans = Fiddle::Pointer.malloc(q * Fiddle::SIZEOF_DOUBLE)
76
238
 
77
239
  if @implicit
78
- @w_main_multiplier = 1.0
79
- @alpha = 1.0
80
- @adjust_weight = false # downweight?
81
- @apply_log_transf = false
82
-
83
- # different defaults
84
- @lambda_ = 1e0
85
- @w_user = 10
86
- @w_item = 10
87
- @finalize_chol = false
240
+ set_implicit_vars
88
241
 
89
242
  args = [
90
243
  @a, @b,
@@ -175,104 +328,13 @@ module Cmfrec
175
328
  @global_mean = real_array(glob_mean).first
176
329
  end
177
330
 
178
- @u_colmeans = real_array(u_colmeans)
179
- @i_colmeans = real_array(i_colmeans)
180
- @u_colmeans_ptr = u_colmeans
181
-
182
- self
183
- end
184
-
185
- def predict(data)
186
- check_fit
187
-
188
- data = to_dataset(data)
189
-
190
- u = data.map { |v| @user_map[v[:user_id]] || @user_map.size }
191
- i = data.map { |v| @item_map[v[:item_id]] || @item_map.size }
192
-
193
- row = int_ptr(u)
194
- col = int_ptr(i)
195
- n_predict = data.size
196
- predicted = Fiddle::Pointer.malloc(n_predict * Fiddle::SIZEOF_DOUBLE)
197
-
198
- if @implicit
199
- check_status FFI.predict_X_old_collective_implicit(
200
- row, col, predicted, n_predict,
201
- @a, @b,
202
- @k, @k_user, @k_item, @k_main,
203
- @m, @n,
204
- @nthreads
205
- )
206
- else
207
- check_status FFI.predict_X_old_collective_explicit(
208
- row, col, predicted, n_predict,
209
- @a, @bias_a,
210
- @b, @bias_b,
211
- @global_mean,
212
- @k, @k_user, @k_item, @k_main,
213
- @m, @n,
214
- @nthreads
215
- )
216
- end
217
-
218
- predictions = real_array(predicted)
219
- predictions.map! { |v| v.nan? ? @global_mean : v } if @implicit
220
- predictions
221
- end
222
-
223
- def user_recs(user_id, count: 5, item_ids: nil)
224
- check_fit
225
- user = @user_map[user_id]
226
-
227
- if user
228
- if item_ids
229
- # remove missing ids
230
- item_ids = item_ids.select { |v| @item_map[v] }
231
-
232
- data = item_ids.map { |v| {user_id: user_id, item_id: v} }
233
- scores = predict(data)
234
-
235
- item_ids.zip(scores).map do |item_id, score|
236
- {item_id: item_id, score: score}
237
- end
238
- else
239
- a_vec = @a[user * @k * Fiddle::SIZEOF_DOUBLE, @k * Fiddle::SIZEOF_DOUBLE]
240
- a_bias = @bias_a ? @bias_a[user * Fiddle::SIZEOF_DOUBLE, Fiddle::SIZEOF_DOUBLE].unpack1("d") : 0
241
- top_n(a_vec: a_vec, a_bias: a_bias, count: count)
242
- end
243
- else
244
- # no items if user is unknown
245
- # TODO maybe most popular items
246
- []
247
- end
248
- end
249
-
250
- # TODO add item_ids
251
- def new_user_recs(data, count: 5, user_info: nil)
252
- check_fit
253
-
254
- a_vec, a_bias = factors_warm(data, user_info: user_info)
255
- top_n(a_vec: a_vec, a_bias: a_bias, count: count)
256
- end
257
-
258
- def user_factors
259
- read_factors(@a, [@m, @m_u].max, @k_user + @k + @k_main)
260
- end
261
-
262
- def item_factors
263
- read_factors(@b, [@n, @n_i].max, @k_item + @k + @k_main)
264
- end
331
+ @u_colmeans = u_colmeans
265
332
 
266
- def user_bias
267
- read_bias(@bias_a) if @bias_a
268
- end
333
+ @fit = true
269
334
 
270
- def item_bias
271
- read_bias(@bias_b) if @bias_b
335
+ self
272
336
  end
273
337
 
274
- private
275
-
276
338
  def set_params(
277
339
  k: 40, lambda_: 1e+1, method: "als", use_cg: true, user_bias: true,
278
340
  item_bias: true, add_implicit_features: false,
@@ -329,15 +391,14 @@ module Cmfrec
329
391
  @nthreads = nthreads
330
392
  end
331
393
 
332
- def create_maps(train_set)
333
- user_ids = train_set.map { |v| v[:user_id] }.uniq.sort
334
- item_ids = train_set.map { |v| v[:item_id] }.uniq.sort
335
-
336
- raise ArgumentError, "Missing user_id" if user_ids.any?(&:nil?)
337
- raise ArgumentError, "Missing item_id" if item_ids.any?(&:nil?)
394
+ def update_maps(train_set)
395
+ raise ArgumentError, "Missing user_id" if train_set.any? { |v| v[:user_id].nil? }
396
+ raise ArgumentError, "Missing item_id" if train_set.any? { |v| v[:item_id].nil? }
338
397
 
339
- @user_map = user_ids.zip(user_ids.size.times).to_h
340
- @item_map = item_ids.zip(item_ids.size.times).to_h
398
+ train_set.each do |v|
399
+ @user_map[v[:user_id]] ||= @user_map.size
400
+ @item_map[v[:item_id]] ||= @item_map.size
401
+ end
341
402
  end
342
403
 
343
404
  def check_ratings(ratings)
@@ -354,7 +415,7 @@ module Cmfrec
354
415
  end
355
416
 
356
417
  def check_fit
357
- raise "Not fit" unless defined?(@implicit)
418
+ raise "Not fit" unless @fit
358
419
  end
359
420
 
360
421
  def to_dataset(dataset)
@@ -376,26 +437,59 @@ module Cmfrec
376
437
  end
377
438
  end
378
439
 
379
- def read_factors(ptr, d1, d2)
380
- arr = []
381
- offset = 0
440
+ def read_factors(ptr, d1, d2, id, map)
382
441
  width = d2 * Fiddle::SIZEOF_DOUBLE
383
- d1.times do |i|
384
- arr << ptr[offset, width].unpack("d*")
385
- offset += width
442
+ if id
443
+ i = map[id]
444
+ ptr[i * width, width].unpack("d*") if i
445
+ else
446
+ arr = []
447
+ offset = 0
448
+ d1.times do |i|
449
+ arr << ptr[offset, width].unpack("d*")
450
+ offset += width
451
+ end
452
+ arr
386
453
  end
387
- arr
388
454
  end
389
455
 
390
- def read_bias(ptr)
391
- real_array(ptr)
456
+ def read_bias(ptr, id, map)
457
+ if id
458
+ i = map[id]
459
+ ptr[i * Fiddle::SIZEOF_DOUBLE, Fiddle::SIZEOF_DOUBLE].unpack1("d") if i
460
+ else
461
+ real_array(ptr)
462
+ end
392
463
  end
393
464
 
394
- def top_n(a_vec:, a_bias:, count:)
395
- include_ix = nil
396
- n_include = 0
397
- exclude_ix = nil
398
- n_exclude = 0
465
+ def top_n(a_vec:, a_bias:, count:, rated: nil, item_ids: nil)
466
+ if item_ids
467
+ # remove missing ids
468
+ item_ids = item_ids.map { |v| @item_map[v] }.compact
469
+ return [] if item_ids.empty?
470
+
471
+ include_ix = int_ptr(item_ids)
472
+ n_include = item_ids.size
473
+
474
+ # TODO uncomment in 0.2.0
475
+ count = n_include # if n_include < count
476
+ else
477
+ include_ix = nil
478
+ n_include = 0
479
+ end
480
+
481
+ if rated && !item_ids
482
+ # assumes rated is unique and all items are known
483
+ # calling code is responsible for this
484
+ exclude_ix = int_ptr(rated)
485
+ n_exclude = rated.size
486
+ remaining = @item_map.size - n_exclude
487
+ return [] if remaining == 0
488
+ count = remaining if remaining < count
489
+ else
490
+ exclude_ix = nil
491
+ n_exclude = 0
492
+ end
399
493
 
400
494
  outp_ix = Fiddle::Pointer.malloc(count * Fiddle::SIZEOF_INT)
401
495
  outp_score = Fiddle::Pointer.malloc(count * Fiddle::SIZEOF_DOUBLE)
@@ -425,6 +519,16 @@ module Cmfrec
425
519
  data = to_dataset(data)
426
520
  user_info = to_dataset(user_info) if user_info
427
521
 
522
+ # remove unknown items
523
+ data, unknown_data = data.partition { |d| @item_map[d[:item_id]] }
524
+
525
+ if unknown_data.any?
526
+ # TODO warn for unknown items?
527
+ # warn "[cmfrec] Unknown items: #{unknown_data.map { |d| d[:item_id] }.join(", ")}"
528
+ end
529
+
530
+ item_ids = data.map { |d| @item_map[d[:item_id]] }
531
+
428
532
  nnz = data.size
429
533
  a_vec = Fiddle::Pointer.malloc((@k_user + @k + @k_main) * Fiddle::SIZEOF_DOUBLE)
430
534
  bias_a = Fiddle::Pointer.malloc(Fiddle::SIZEOF_DOUBLE)
@@ -465,7 +569,7 @@ module Cmfrec
465
569
  check_ratings(ratings)
466
570
  end
467
571
  xa = real_ptr(ratings)
468
- x_col = int_ptr(data.map { |d| d[:item_id] })
572
+ x_col = int_ptr(item_ids)
469
573
  else
470
574
  xa = nil
471
575
  x_col = nil
@@ -479,7 +583,7 @@ module Cmfrec
479
583
  u_vec_sp, u_vec_x_col, nnz_u_vec,
480
584
  @na_as_zero_user,
481
585
  @nonneg,
482
- @u_colmeans_ptr,
586
+ @u_colmeans,
483
587
  @b, @n, @c,
484
588
  xa, x_col, nnz,
485
589
  @k, @k_user, @k_item, @k_main,
@@ -505,7 +609,7 @@ module Cmfrec
505
609
  @na_as_zero_user, @na_as_zero,
506
610
  @nonneg,
507
611
  @c, cb,
508
- @global_mean, @bias_b, @u_colmeans_ptr,
612
+ @global_mean, @bias_b, @u_colmeans,
509
613
  xa, x_col, nnz, xa_dense,
510
614
  @n, weight, @b, @bi,
511
615
  @add_implicit_features,
@@ -528,7 +632,7 @@ module Cmfrec
528
632
  check_status FFI.factors_collective_explicit_single(*fiddle_args(args))
529
633
  end
530
634
 
531
- [a_vec, real_array(bias_a).first]
635
+ [a_vec, real_array(bias_a).first, item_ids.uniq]
532
636
  end
533
637
 
534
638
  # convert boolean to int
@@ -585,5 +689,126 @@ module Cmfrec
585
689
  def real_array(ptr)
586
690
  ptr.to_s(ptr.size).unpack("d*")
587
691
  end
692
+
693
+ def set_implicit_vars
694
+ @w_main_multiplier = 1.0
695
+ @alpha = 1.0
696
+ @adjust_weight = false # downweight?
697
+ @apply_log_transf = false
698
+
699
+ # different defaults
700
+ @lambda_ = 1e0
701
+ @w_user = 10
702
+ @w_item = 10
703
+ @finalize_chol = false
704
+ end
705
+
706
+ def dump_ptr(ptr)
707
+ ptr.to_s(ptr.size) if ptr
708
+ end
709
+
710
+ def load_ptr(str)
711
+ Fiddle::Pointer[str] if str
712
+ end
713
+
714
+ def marshal_dump
715
+ obj = {
716
+ implicit: @implicit
717
+ }
718
+
719
+ # options
720
+ obj[:factors] = @k
721
+ obj[:epochs] = @niter
722
+ obj[:verbose] = @verbose
723
+
724
+ # factors
725
+ obj[:user_map] = @user_map
726
+ obj[:item_map] = @item_map
727
+ obj[:rated] = @rated
728
+ obj[:user_factors] = dump_ptr(@a)
729
+ obj[:item_factors] = dump_ptr(@b)
730
+
731
+ # bias
732
+ obj[:user_bias] = dump_ptr(@bias_a)
733
+ obj[:item_bias] = dump_ptr(@bias_b)
734
+
735
+ # mean
736
+ obj[:global_mean] = @global_mean
737
+
738
+ # side info
739
+ obj[:user_info_map] = @user_info_map
740
+ obj[:item_info_map] = @item_info_map
741
+ obj[:user_info_factors] = dump_ptr(@c)
742
+ obj[:item_info_factors] = dump_ptr(@d)
743
+
744
+ # implicit features
745
+ obj[:add_implicit_features] = @add_implicit_features
746
+ obj[:user_factors_implicit] = dump_ptr(@ai)
747
+ obj[:item_factors_implicit] = dump_ptr(@bi)
748
+
749
+ unless @implicit
750
+ obj[:min_rating] = @min_rating
751
+ obj[:max_rating] = @max_rating
752
+ end
753
+
754
+ obj[:user_means] = dump_ptr(@u_colmeans)
755
+
756
+ obj
757
+ end
758
+
759
+ def marshal_load(obj)
760
+ @implicit = obj[:implicit]
761
+
762
+ # options
763
+ set_params(
764
+ k: obj[:factors],
765
+ niter: obj[:epochs],
766
+ verbose: obj[:verbose],
767
+ user_bias: !obj[:user_bias].nil?,
768
+ item_bias: !obj[:item_bias].nil?,
769
+ add_implicit_features: obj[:add_implicit_features]
770
+ )
771
+
772
+ # factors
773
+ @user_map = obj[:user_map]
774
+ @item_map = obj[:item_map]
775
+ @rated = obj[:rated] || {}
776
+ @a = load_ptr(obj[:user_factors])
777
+ @b = load_ptr(obj[:item_factors])
778
+
779
+ # bias
780
+ @bias_a = load_ptr(obj[:user_bias])
781
+ @bias_b = load_ptr(obj[:item_bias])
782
+
783
+ # mean
784
+ @global_mean = obj[:global_mean]
785
+
786
+ # side info
787
+ @user_info_map = obj[:user_info_map]
788
+ @item_info_map = obj[:item_info_map]
789
+ @c = load_ptr(obj[:user_info_factors])
790
+ @d = load_ptr(obj[:item_info_factors])
791
+
792
+ # implicit features
793
+ @add_implicit_features = obj[:add_implicit_features]
794
+ @ai = load_ptr(obj[:user_factors_implicit])
795
+ @bi = load_ptr(obj[:item_factors_implicit])
796
+
797
+ unless @implicit
798
+ @min_rating = obj[:min_rating]
799
+ @max_rating = obj[:max_rating]
800
+ end
801
+
802
+ @u_colmeans = load_ptr(obj[:user_means])
803
+
804
+ @m = @user_map.size
805
+ @n = @item_map.size
806
+ @m_u = @user_info_map.size
807
+ @n_i = @item_info_map.size
808
+
809
+ set_implicit_vars if @implicit
810
+
811
+ @fit = @m > 0
812
+ end
588
813
  end
589
814
  end
@@ -1,3 +1,3 @@
1
1
  module Cmfrec
2
- VERSION = "0.1.2"
2
+ VERSION = "0.1.6"
3
3
  end
Binary file
metadata CHANGED
@@ -1,17 +1,17 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cmfrec
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-12-10 00:00:00.000000000 Z
11
+ date: 2021-08-12 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description:
14
- email: andrew@chartkick.com
14
+ email: andrew@ankane.org
15
15
  executables: []
16
16
  extensions: []
17
17
  extra_rdoc_files: []
@@ -25,6 +25,7 @@ files:
25
25
  - lib/cmfrec/recommender.rb
26
26
  - lib/cmfrec/version.rb
27
27
  - vendor/LICENSE.txt
28
+ - vendor/libcmfrec.arm64.dylib
28
29
  - vendor/libcmfrec.dylib
29
30
  - vendor/libcmfrec.so
30
31
  homepage: https://github.com/ankane/cmfrec
@@ -46,7 +47,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
46
47
  - !ruby/object:Gem::Version
47
48
  version: '0'
48
49
  requirements: []
49
- rubygems_version: 3.1.4
50
+ rubygems_version: 3.2.22
50
51
  signing_key:
51
52
  specification_version: 4
52
53
  summary: Recommendations for Ruby using collective matrix factorization