cmfrec 0.1.4 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/lib/cmfrec/recommender.rb +41 -10
- data/lib/cmfrec/version.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7a200b99780aeee83c5a0190e593c3806c4140aa4c096b0ef2c112fd21a858b7
|
4
|
+
data.tar.gz: 27b354dd491ca1d7a728d4ef2318c72c56d76c691286bc267c74d3b8dafd7c5b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0c1ceefeac9131a804d94b6da78c7a6614f50c15d144841277829e5279ce2583872a681e972e336e21cb82d49466dee9f5f1de6a482fe8a99ab7aa5176ab0e5c
|
7
|
+
data.tar.gz: 930b20b017b92555071699b38d903230da3ca1fc7b8b91b9e2f96ae3ae4dda9c8c289a47117beaa1e2539e5c9d622fe958d431f5106e8ab7c4809ea17e9fb6e8
|
data/CHANGELOG.md
CHANGED
data/lib/cmfrec/recommender.rb
CHANGED
@@ -67,6 +67,7 @@ module Cmfrec
|
|
67
67
|
user = @user_map[user_id]
|
68
68
|
|
69
69
|
if user
|
70
|
+
# TODO use top_n for item_ids as well
|
70
71
|
if item_ids
|
71
72
|
# remove missing ids
|
72
73
|
item_ids = item_ids.select { |v| @item_map[v] }
|
@@ -80,7 +81,8 @@ module Cmfrec
|
|
80
81
|
else
|
81
82
|
a_vec = @a[user * @k * Fiddle::SIZEOF_DOUBLE, @k * Fiddle::SIZEOF_DOUBLE]
|
82
83
|
a_bias = @bias_a ? @bias_a[user * Fiddle::SIZEOF_DOUBLE, Fiddle::SIZEOF_DOUBLE].unpack1("d") : 0
|
83
|
-
|
84
|
+
# @rated[user] will be nil for recommenders saved before 0.1.5
|
85
|
+
top_n(a_vec: a_vec, a_bias: a_bias, count: count, rated: (@rated[user] || {}).keys)
|
84
86
|
end
|
85
87
|
else
|
86
88
|
# no items if user is unknown
|
@@ -93,8 +95,8 @@ module Cmfrec
|
|
93
95
|
def new_user_recs(data, count: 5, user_info: nil)
|
94
96
|
check_fit
|
95
97
|
|
96
|
-
a_vec, a_bias = factors_warm(data, user_info: user_info)
|
97
|
-
top_n(a_vec: a_vec, a_bias: a_bias, count: count)
|
98
|
+
a_vec, a_bias, rated = factors_warm(data, user_info: user_info)
|
99
|
+
top_n(a_vec: a_vec, a_bias: a_bias, count: count, rated: rated)
|
98
100
|
end
|
99
101
|
|
100
102
|
def user_factors
|
@@ -191,11 +193,17 @@ module Cmfrec
|
|
191
193
|
x_col = []
|
192
194
|
x_val = []
|
193
195
|
value_key = @implicit ? :value : :rating
|
196
|
+
@rated = Hash.new { |hash, key| hash[key] = {} }
|
194
197
|
train_set.each do |v|
|
195
|
-
|
196
|
-
|
198
|
+
u = @user_map[v[:user_id]]
|
199
|
+
i = @item_map[v[:item_id]]
|
200
|
+
@rated[u][i] = true
|
201
|
+
|
202
|
+
x_row << u
|
203
|
+
x_col << i
|
197
204
|
x_val << (v[value_key] || 1)
|
198
205
|
end
|
206
|
+
@rated.default = nil
|
199
207
|
|
200
208
|
@m = @user_map.size
|
201
209
|
@n = @item_map.size
|
@@ -450,11 +458,22 @@ module Cmfrec
|
|
450
458
|
real_array(ptr)
|
451
459
|
end
|
452
460
|
|
453
|
-
def top_n(a_vec:, a_bias:, count:)
|
461
|
+
def top_n(a_vec:, a_bias:, count:, rated: nil)
|
454
462
|
include_ix = nil
|
455
463
|
n_include = 0
|
456
|
-
|
457
|
-
|
464
|
+
|
465
|
+
if rated
|
466
|
+
# assumes rated is unique and all items are known
|
467
|
+
# calling code is responsible for this
|
468
|
+
exclude_ix = int_ptr(rated)
|
469
|
+
n_exclude = rated.size
|
470
|
+
remaining = @item_map.size - n_exclude
|
471
|
+
return [] if remaining == 0
|
472
|
+
count = remaining if remaining < count
|
473
|
+
else
|
474
|
+
exclude_ix = nil
|
475
|
+
n_exclude = 0
|
476
|
+
end
|
458
477
|
|
459
478
|
outp_ix = Fiddle::Pointer.malloc(count * Fiddle::SIZEOF_INT)
|
460
479
|
outp_score = Fiddle::Pointer.malloc(count * Fiddle::SIZEOF_DOUBLE)
|
@@ -484,6 +503,16 @@ module Cmfrec
|
|
484
503
|
data = to_dataset(data)
|
485
504
|
user_info = to_dataset(user_info) if user_info
|
486
505
|
|
506
|
+
# remove unknown items
|
507
|
+
data, unknown_data = data.partition { |d| @item_map[d[:item_id]] }
|
508
|
+
|
509
|
+
if unknown_data.any?
|
510
|
+
# TODO warn for unknown items?
|
511
|
+
# warn "[cmfrec] Unknown items: #{unknown_data.map { |d| d[:item_id] }.join(", ")}"
|
512
|
+
end
|
513
|
+
|
514
|
+
item_ids = data.map { |d| @item_map[d[:item_id]] }
|
515
|
+
|
487
516
|
nnz = data.size
|
488
517
|
a_vec = Fiddle::Pointer.malloc((@k_user + @k + @k_main) * Fiddle::SIZEOF_DOUBLE)
|
489
518
|
bias_a = Fiddle::Pointer.malloc(Fiddle::SIZEOF_DOUBLE)
|
@@ -524,7 +553,7 @@ module Cmfrec
|
|
524
553
|
check_ratings(ratings)
|
525
554
|
end
|
526
555
|
xa = real_ptr(ratings)
|
527
|
-
x_col = int_ptr(
|
556
|
+
x_col = int_ptr(item_ids)
|
528
557
|
else
|
529
558
|
xa = nil
|
530
559
|
x_col = nil
|
@@ -587,7 +616,7 @@ module Cmfrec
|
|
587
616
|
check_status FFI.factors_collective_explicit_single(*fiddle_args(args))
|
588
617
|
end
|
589
618
|
|
590
|
-
[a_vec, real_array(bias_a).first]
|
619
|
+
[a_vec, real_array(bias_a).first, item_ids.uniq]
|
591
620
|
end
|
592
621
|
|
593
622
|
# convert boolean to int
|
@@ -679,6 +708,7 @@ module Cmfrec
|
|
679
708
|
# factors
|
680
709
|
obj[:user_map] = @user_map
|
681
710
|
obj[:item_map] = @item_map
|
711
|
+
obj[:rated] = @rated
|
682
712
|
obj[:user_factors] = dump_ptr(@a)
|
683
713
|
obj[:item_factors] = dump_ptr(@b)
|
684
714
|
|
@@ -726,6 +756,7 @@ module Cmfrec
|
|
726
756
|
# factors
|
727
757
|
@user_map = obj[:user_map]
|
728
758
|
@item_map = obj[:item_map]
|
759
|
+
@rated = obj[:rated] || {}
|
729
760
|
@a = load_ptr(obj[:user_factors])
|
730
761
|
@b = load_ptr(obj[:item_factors])
|
731
762
|
|
data/lib/cmfrec/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cmfrec
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-08-11 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description:
|
14
14
|
email: andrew@ankane.org
|
@@ -47,7 +47,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
47
47
|
- !ruby/object:Gem::Version
|
48
48
|
version: '0'
|
49
49
|
requirements: []
|
50
|
-
rubygems_version: 3.2.
|
50
|
+
rubygems_version: 3.2.22
|
51
51
|
signing_key:
|
52
52
|
specification_version: 4
|
53
53
|
summary: Recommendations for Ruby using collective matrix factorization
|