cmfrec 0.1.4 → 0.1.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: bb7b07ae46500a545f1a130dfc5648aa3f925f9b5766a6c70a1652c7b5732182
4
- data.tar.gz: e89a6d1900cda651dc6b0aac2899050e28680cddfb6b39b6b5eacfe467b59aad
3
+ metadata.gz: 7a200b99780aeee83c5a0190e593c3806c4140aa4c096b0ef2c112fd21a858b7
4
+ data.tar.gz: 27b354dd491ca1d7a728d4ef2318c72c56d76c691286bc267c74d3b8dafd7c5b
5
5
  SHA512:
6
- metadata.gz: 117aa6952fe0ab8ddebfaece6655cf479a7adbab7d6f634e7d3428c72824a410812c037ae006366180a9691a6d160d8065b777a9c10a33a5ccfefedb28c99ec6
7
- data.tar.gz: 57985a055705b820226a2aa1451453383ee3509e43225f8fdb09e713c4530754b0b608f7d1b4814973b43e3d625f824f9f87939687d015b352cc8905f7b4f118
6
+ metadata.gz: 0c1ceefeac9131a804d94b6da78c7a6614f50c15d144841277829e5279ce2583872a681e972e336e21cb82d49466dee9f5f1de6a482fe8a99ab7aa5176ab0e5c
7
+ data.tar.gz: 930b20b017b92555071699b38d903230da3ca1fc7b8b91b9e2f96ae3ae4dda9c8c289a47117beaa1e2539e5c9d622fe958d431f5106e8ab7c4809ea17e9fb6e8
data/CHANGELOG.md CHANGED
@@ -1,3 +1,8 @@
1
+ ## 0.1.5 (2021-08-10)
2
+
3
+ - Fixed issue with `user_recs` and `new_user_recs` returning rated items
4
+ - Fixed error with `new_user_recs`
5
+
1
6
  ## 0.1.4 (2021-02-04)
2
7
 
3
8
  - Added support for saving and loading recommenders
@@ -67,6 +67,7 @@ module Cmfrec
67
67
  user = @user_map[user_id]
68
68
 
69
69
  if user
70
+ # TODO use top_n for item_ids as well
70
71
  if item_ids
71
72
  # remove missing ids
72
73
  item_ids = item_ids.select { |v| @item_map[v] }
@@ -80,7 +81,8 @@ module Cmfrec
80
81
  else
81
82
  a_vec = @a[user * @k * Fiddle::SIZEOF_DOUBLE, @k * Fiddle::SIZEOF_DOUBLE]
82
83
  a_bias = @bias_a ? @bias_a[user * Fiddle::SIZEOF_DOUBLE, Fiddle::SIZEOF_DOUBLE].unpack1("d") : 0
83
- top_n(a_vec: a_vec, a_bias: a_bias, count: count)
84
+ # @rated[user] will be nil for recommenders saved before 0.1.5
85
+ top_n(a_vec: a_vec, a_bias: a_bias, count: count, rated: (@rated[user] || {}).keys)
84
86
  end
85
87
  else
86
88
  # no items if user is unknown
@@ -93,8 +95,8 @@ module Cmfrec
93
95
  def new_user_recs(data, count: 5, user_info: nil)
94
96
  check_fit
95
97
 
96
- a_vec, a_bias = factors_warm(data, user_info: user_info)
97
- top_n(a_vec: a_vec, a_bias: a_bias, count: count)
98
+ a_vec, a_bias, rated = factors_warm(data, user_info: user_info)
99
+ top_n(a_vec: a_vec, a_bias: a_bias, count: count, rated: rated)
98
100
  end
99
101
 
100
102
  def user_factors
@@ -191,11 +193,17 @@ module Cmfrec
191
193
  x_col = []
192
194
  x_val = []
193
195
  value_key = @implicit ? :value : :rating
196
+ @rated = Hash.new { |hash, key| hash[key] = {} }
194
197
  train_set.each do |v|
195
- x_row << @user_map[v[:user_id]]
196
- x_col << @item_map[v[:item_id]]
198
+ u = @user_map[v[:user_id]]
199
+ i = @item_map[v[:item_id]]
200
+ @rated[u][i] = true
201
+
202
+ x_row << u
203
+ x_col << i
197
204
  x_val << (v[value_key] || 1)
198
205
  end
206
+ @rated.default = nil
199
207
 
200
208
  @m = @user_map.size
201
209
  @n = @item_map.size
@@ -450,11 +458,22 @@ module Cmfrec
450
458
  real_array(ptr)
451
459
  end
452
460
 
453
- def top_n(a_vec:, a_bias:, count:)
461
+ def top_n(a_vec:, a_bias:, count:, rated: nil)
454
462
  include_ix = nil
455
463
  n_include = 0
456
- exclude_ix = nil
457
- n_exclude = 0
464
+
465
+ if rated
466
+ # assumes rated is unique and all items are known
467
+ # calling code is responsible for this
468
+ exclude_ix = int_ptr(rated)
469
+ n_exclude = rated.size
470
+ remaining = @item_map.size - n_exclude
471
+ return [] if remaining == 0
472
+ count = remaining if remaining < count
473
+ else
474
+ exclude_ix = nil
475
+ n_exclude = 0
476
+ end
458
477
 
459
478
  outp_ix = Fiddle::Pointer.malloc(count * Fiddle::SIZEOF_INT)
460
479
  outp_score = Fiddle::Pointer.malloc(count * Fiddle::SIZEOF_DOUBLE)
@@ -484,6 +503,16 @@ module Cmfrec
484
503
  data = to_dataset(data)
485
504
  user_info = to_dataset(user_info) if user_info
486
505
 
506
+ # remove unknown items
507
+ data, unknown_data = data.partition { |d| @item_map[d[:item_id]] }
508
+
509
+ if unknown_data.any?
510
+ # TODO warn for unknown items?
511
+ # warn "[cmfrec] Unknown items: #{unknown_data.map { |d| d[:item_id] }.join(", ")}"
512
+ end
513
+
514
+ item_ids = data.map { |d| @item_map[d[:item_id]] }
515
+
487
516
  nnz = data.size
488
517
  a_vec = Fiddle::Pointer.malloc((@k_user + @k + @k_main) * Fiddle::SIZEOF_DOUBLE)
489
518
  bias_a = Fiddle::Pointer.malloc(Fiddle::SIZEOF_DOUBLE)
@@ -524,7 +553,7 @@ module Cmfrec
524
553
  check_ratings(ratings)
525
554
  end
526
555
  xa = real_ptr(ratings)
527
- x_col = int_ptr(data.map { |d| d[:item_id] })
556
+ x_col = int_ptr(item_ids)
528
557
  else
529
558
  xa = nil
530
559
  x_col = nil
@@ -587,7 +616,7 @@ module Cmfrec
587
616
  check_status FFI.factors_collective_explicit_single(*fiddle_args(args))
588
617
  end
589
618
 
590
- [a_vec, real_array(bias_a).first]
619
+ [a_vec, real_array(bias_a).first, item_ids.uniq]
591
620
  end
592
621
 
593
622
  # convert boolean to int
@@ -679,6 +708,7 @@ module Cmfrec
679
708
  # factors
680
709
  obj[:user_map] = @user_map
681
710
  obj[:item_map] = @item_map
711
+ obj[:rated] = @rated
682
712
  obj[:user_factors] = dump_ptr(@a)
683
713
  obj[:item_factors] = dump_ptr(@b)
684
714
 
@@ -726,6 +756,7 @@ module Cmfrec
726
756
  # factors
727
757
  @user_map = obj[:user_map]
728
758
  @item_map = obj[:item_map]
759
+ @rated = obj[:rated] || {}
729
760
  @a = load_ptr(obj[:user_factors])
730
761
  @b = load_ptr(obj[:item_factors])
731
762
 
@@ -1,3 +1,3 @@
1
1
  module Cmfrec
2
- VERSION = "0.1.4"
2
+ VERSION = "0.1.5"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cmfrec
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.4
4
+ version: 0.1.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-02-05 00:00:00.000000000 Z
11
+ date: 2021-08-11 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description:
14
14
  email: andrew@ankane.org
@@ -47,7 +47,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
47
47
  - !ruby/object:Gem::Version
48
48
  version: '0'
49
49
  requirements: []
50
- rubygems_version: 3.2.3
50
+ rubygems_version: 3.2.22
51
51
  signing_key:
52
52
  specification_version: 4
53
53
  summary: Recommendations for Ruby using collective matrix factorization