cmfrec 0.1.4 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: bb7b07ae46500a545f1a130dfc5648aa3f925f9b5766a6c70a1652c7b5732182
4
- data.tar.gz: e89a6d1900cda651dc6b0aac2899050e28680cddfb6b39b6b5eacfe467b59aad
3
+ metadata.gz: 7a200b99780aeee83c5a0190e593c3806c4140aa4c096b0ef2c112fd21a858b7
4
+ data.tar.gz: 27b354dd491ca1d7a728d4ef2318c72c56d76c691286bc267c74d3b8dafd7c5b
5
5
  SHA512:
6
- metadata.gz: 117aa6952fe0ab8ddebfaece6655cf479a7adbab7d6f634e7d3428c72824a410812c037ae006366180a9691a6d160d8065b777a9c10a33a5ccfefedb28c99ec6
7
- data.tar.gz: 57985a055705b820226a2aa1451453383ee3509e43225f8fdb09e713c4530754b0b608f7d1b4814973b43e3d625f824f9f87939687d015b352cc8905f7b4f118
6
+ metadata.gz: 0c1ceefeac9131a804d94b6da78c7a6614f50c15d144841277829e5279ce2583872a681e972e336e21cb82d49466dee9f5f1de6a482fe8a99ab7aa5176ab0e5c
7
+ data.tar.gz: 930b20b017b92555071699b38d903230da3ca1fc7b8b91b9e2f96ae3ae4dda9c8c289a47117beaa1e2539e5c9d622fe958d431f5106e8ab7c4809ea17e9fb6e8
data/CHANGELOG.md CHANGED
@@ -1,3 +1,8 @@
1
+ ## 0.1.5 (2021-08-10)
2
+
3
+ - Fixed issue with `user_recs` and `new_user_recs` returning rated items
4
+ - Fixed error with `new_user_recs`
5
+
1
6
  ## 0.1.4 (2021-02-04)
2
7
 
3
8
  - Added support for saving and loading recommenders
@@ -67,6 +67,7 @@ module Cmfrec
67
67
  user = @user_map[user_id]
68
68
 
69
69
  if user
70
+ # TODO use top_n for item_ids as well
70
71
  if item_ids
71
72
  # remove missing ids
72
73
  item_ids = item_ids.select { |v| @item_map[v] }
@@ -80,7 +81,8 @@ module Cmfrec
80
81
  else
81
82
  a_vec = @a[user * @k * Fiddle::SIZEOF_DOUBLE, @k * Fiddle::SIZEOF_DOUBLE]
82
83
  a_bias = @bias_a ? @bias_a[user * Fiddle::SIZEOF_DOUBLE, Fiddle::SIZEOF_DOUBLE].unpack1("d") : 0
83
- top_n(a_vec: a_vec, a_bias: a_bias, count: count)
84
+ # @rated[user] will be nil for recommenders saved before 0.1.5
85
+ top_n(a_vec: a_vec, a_bias: a_bias, count: count, rated: (@rated[user] || {}).keys)
84
86
  end
85
87
  else
86
88
  # no items if user is unknown
@@ -93,8 +95,8 @@ module Cmfrec
93
95
  def new_user_recs(data, count: 5, user_info: nil)
94
96
  check_fit
95
97
 
96
- a_vec, a_bias = factors_warm(data, user_info: user_info)
97
- top_n(a_vec: a_vec, a_bias: a_bias, count: count)
98
+ a_vec, a_bias, rated = factors_warm(data, user_info: user_info)
99
+ top_n(a_vec: a_vec, a_bias: a_bias, count: count, rated: rated)
98
100
  end
99
101
 
100
102
  def user_factors
@@ -191,11 +193,17 @@ module Cmfrec
191
193
  x_col = []
192
194
  x_val = []
193
195
  value_key = @implicit ? :value : :rating
196
+ @rated = Hash.new { |hash, key| hash[key] = {} }
194
197
  train_set.each do |v|
195
- x_row << @user_map[v[:user_id]]
196
- x_col << @item_map[v[:item_id]]
198
+ u = @user_map[v[:user_id]]
199
+ i = @item_map[v[:item_id]]
200
+ @rated[u][i] = true
201
+
202
+ x_row << u
203
+ x_col << i
197
204
  x_val << (v[value_key] || 1)
198
205
  end
206
+ @rated.default = nil
199
207
 
200
208
  @m = @user_map.size
201
209
  @n = @item_map.size
@@ -450,11 +458,22 @@ module Cmfrec
450
458
  real_array(ptr)
451
459
  end
452
460
 
453
- def top_n(a_vec:, a_bias:, count:)
461
+ def top_n(a_vec:, a_bias:, count:, rated: nil)
454
462
  include_ix = nil
455
463
  n_include = 0
456
- exclude_ix = nil
457
- n_exclude = 0
464
+
465
+ if rated
466
+ # assumes rated is unique and all items are known
467
+ # calling code is responsible for this
468
+ exclude_ix = int_ptr(rated)
469
+ n_exclude = rated.size
470
+ remaining = @item_map.size - n_exclude
471
+ return [] if remaining == 0
472
+ count = remaining if remaining < count
473
+ else
474
+ exclude_ix = nil
475
+ n_exclude = 0
476
+ end
458
477
 
459
478
  outp_ix = Fiddle::Pointer.malloc(count * Fiddle::SIZEOF_INT)
460
479
  outp_score = Fiddle::Pointer.malloc(count * Fiddle::SIZEOF_DOUBLE)
@@ -484,6 +503,16 @@ module Cmfrec
484
503
  data = to_dataset(data)
485
504
  user_info = to_dataset(user_info) if user_info
486
505
 
506
+ # remove unknown items
507
+ data, unknown_data = data.partition { |d| @item_map[d[:item_id]] }
508
+
509
+ if unknown_data.any?
510
+ # TODO warn for unknown items?
511
+ # warn "[cmfrec] Unknown items: #{unknown_data.map { |d| d[:item_id] }.join(", ")}"
512
+ end
513
+
514
+ item_ids = data.map { |d| @item_map[d[:item_id]] }
515
+
487
516
  nnz = data.size
488
517
  a_vec = Fiddle::Pointer.malloc((@k_user + @k + @k_main) * Fiddle::SIZEOF_DOUBLE)
489
518
  bias_a = Fiddle::Pointer.malloc(Fiddle::SIZEOF_DOUBLE)
@@ -524,7 +553,7 @@ module Cmfrec
524
553
  check_ratings(ratings)
525
554
  end
526
555
  xa = real_ptr(ratings)
527
- x_col = int_ptr(data.map { |d| d[:item_id] })
556
+ x_col = int_ptr(item_ids)
528
557
  else
529
558
  xa = nil
530
559
  x_col = nil
@@ -587,7 +616,7 @@ module Cmfrec
587
616
  check_status FFI.factors_collective_explicit_single(*fiddle_args(args))
588
617
  end
589
618
 
590
- [a_vec, real_array(bias_a).first]
619
+ [a_vec, real_array(bias_a).first, item_ids.uniq]
591
620
  end
592
621
 
593
622
  # convert boolean to int
@@ -679,6 +708,7 @@ module Cmfrec
679
708
  # factors
680
709
  obj[:user_map] = @user_map
681
710
  obj[:item_map] = @item_map
711
+ obj[:rated] = @rated
682
712
  obj[:user_factors] = dump_ptr(@a)
683
713
  obj[:item_factors] = dump_ptr(@b)
684
714
 
@@ -726,6 +756,7 @@ module Cmfrec
726
756
  # factors
727
757
  @user_map = obj[:user_map]
728
758
  @item_map = obj[:item_map]
759
+ @rated = obj[:rated] || {}
729
760
  @a = load_ptr(obj[:user_factors])
730
761
  @b = load_ptr(obj[:item_factors])
731
762
 
@@ -1,3 +1,3 @@
1
1
  module Cmfrec
2
- VERSION = "0.1.4"
2
+ VERSION = "0.1.5"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cmfrec
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.4
4
+ version: 0.1.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-02-05 00:00:00.000000000 Z
11
+ date: 2021-08-11 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description:
14
14
  email: andrew@ankane.org
@@ -47,7 +47,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
47
47
  - !ruby/object:Gem::Version
48
48
  version: '0'
49
49
  requirements: []
50
- rubygems_version: 3.2.3
50
+ rubygems_version: 3.2.22
51
51
  signing_key:
52
52
  specification_version: 4
53
53
  summary: Recommendations for Ruby using collective matrix factorization