cmfrec 0.1.4 → 0.1.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/lib/cmfrec/recommender.rb +41 -10
- data/lib/cmfrec/version.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7a200b99780aeee83c5a0190e593c3806c4140aa4c096b0ef2c112fd21a858b7
|
4
|
+
data.tar.gz: 27b354dd491ca1d7a728d4ef2318c72c56d76c691286bc267c74d3b8dafd7c5b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0c1ceefeac9131a804d94b6da78c7a6614f50c15d144841277829e5279ce2583872a681e972e336e21cb82d49466dee9f5f1de6a482fe8a99ab7aa5176ab0e5c
|
7
|
+
data.tar.gz: 930b20b017b92555071699b38d903230da3ca1fc7b8b91b9e2f96ae3ae4dda9c8c289a47117beaa1e2539e5c9d622fe958d431f5106e8ab7c4809ea17e9fb6e8
|
data/CHANGELOG.md
CHANGED
data/lib/cmfrec/recommender.rb
CHANGED
@@ -67,6 +67,7 @@ module Cmfrec
|
|
67
67
|
user = @user_map[user_id]
|
68
68
|
|
69
69
|
if user
|
70
|
+
# TODO use top_n for item_ids as well
|
70
71
|
if item_ids
|
71
72
|
# remove missing ids
|
72
73
|
item_ids = item_ids.select { |v| @item_map[v] }
|
@@ -80,7 +81,8 @@ module Cmfrec
|
|
80
81
|
else
|
81
82
|
a_vec = @a[user * @k * Fiddle::SIZEOF_DOUBLE, @k * Fiddle::SIZEOF_DOUBLE]
|
82
83
|
a_bias = @bias_a ? @bias_a[user * Fiddle::SIZEOF_DOUBLE, Fiddle::SIZEOF_DOUBLE].unpack1("d") : 0
|
83
|
-
|
84
|
+
# @rated[user] will be nil for recommenders saved before 0.1.5
|
85
|
+
top_n(a_vec: a_vec, a_bias: a_bias, count: count, rated: (@rated[user] || {}).keys)
|
84
86
|
end
|
85
87
|
else
|
86
88
|
# no items if user is unknown
|
@@ -93,8 +95,8 @@ module Cmfrec
|
|
93
95
|
def new_user_recs(data, count: 5, user_info: nil)
|
94
96
|
check_fit
|
95
97
|
|
96
|
-
a_vec, a_bias = factors_warm(data, user_info: user_info)
|
97
|
-
top_n(a_vec: a_vec, a_bias: a_bias, count: count)
|
98
|
+
a_vec, a_bias, rated = factors_warm(data, user_info: user_info)
|
99
|
+
top_n(a_vec: a_vec, a_bias: a_bias, count: count, rated: rated)
|
98
100
|
end
|
99
101
|
|
100
102
|
def user_factors
|
@@ -191,11 +193,17 @@ module Cmfrec
|
|
191
193
|
x_col = []
|
192
194
|
x_val = []
|
193
195
|
value_key = @implicit ? :value : :rating
|
196
|
+
@rated = Hash.new { |hash, key| hash[key] = {} }
|
194
197
|
train_set.each do |v|
|
195
|
-
|
196
|
-
|
198
|
+
u = @user_map[v[:user_id]]
|
199
|
+
i = @item_map[v[:item_id]]
|
200
|
+
@rated[u][i] = true
|
201
|
+
|
202
|
+
x_row << u
|
203
|
+
x_col << i
|
197
204
|
x_val << (v[value_key] || 1)
|
198
205
|
end
|
206
|
+
@rated.default = nil
|
199
207
|
|
200
208
|
@m = @user_map.size
|
201
209
|
@n = @item_map.size
|
@@ -450,11 +458,22 @@ module Cmfrec
|
|
450
458
|
real_array(ptr)
|
451
459
|
end
|
452
460
|
|
453
|
-
def top_n(a_vec:, a_bias:, count:)
|
461
|
+
def top_n(a_vec:, a_bias:, count:, rated: nil)
|
454
462
|
include_ix = nil
|
455
463
|
n_include = 0
|
456
|
-
|
457
|
-
|
464
|
+
|
465
|
+
if rated
|
466
|
+
# assumes rated is unique and all items are known
|
467
|
+
# calling code is responsible for this
|
468
|
+
exclude_ix = int_ptr(rated)
|
469
|
+
n_exclude = rated.size
|
470
|
+
remaining = @item_map.size - n_exclude
|
471
|
+
return [] if remaining == 0
|
472
|
+
count = remaining if remaining < count
|
473
|
+
else
|
474
|
+
exclude_ix = nil
|
475
|
+
n_exclude = 0
|
476
|
+
end
|
458
477
|
|
459
478
|
outp_ix = Fiddle::Pointer.malloc(count * Fiddle::SIZEOF_INT)
|
460
479
|
outp_score = Fiddle::Pointer.malloc(count * Fiddle::SIZEOF_DOUBLE)
|
@@ -484,6 +503,16 @@ module Cmfrec
|
|
484
503
|
data = to_dataset(data)
|
485
504
|
user_info = to_dataset(user_info) if user_info
|
486
505
|
|
506
|
+
# remove unknown items
|
507
|
+
data, unknown_data = data.partition { |d| @item_map[d[:item_id]] }
|
508
|
+
|
509
|
+
if unknown_data.any?
|
510
|
+
# TODO warn for unknown items?
|
511
|
+
# warn "[cmfrec] Unknown items: #{unknown_data.map { |d| d[:item_id] }.join(", ")}"
|
512
|
+
end
|
513
|
+
|
514
|
+
item_ids = data.map { |d| @item_map[d[:item_id]] }
|
515
|
+
|
487
516
|
nnz = data.size
|
488
517
|
a_vec = Fiddle::Pointer.malloc((@k_user + @k + @k_main) * Fiddle::SIZEOF_DOUBLE)
|
489
518
|
bias_a = Fiddle::Pointer.malloc(Fiddle::SIZEOF_DOUBLE)
|
@@ -524,7 +553,7 @@ module Cmfrec
|
|
524
553
|
check_ratings(ratings)
|
525
554
|
end
|
526
555
|
xa = real_ptr(ratings)
|
527
|
-
x_col = int_ptr(
|
556
|
+
x_col = int_ptr(item_ids)
|
528
557
|
else
|
529
558
|
xa = nil
|
530
559
|
x_col = nil
|
@@ -587,7 +616,7 @@ module Cmfrec
|
|
587
616
|
check_status FFI.factors_collective_explicit_single(*fiddle_args(args))
|
588
617
|
end
|
589
618
|
|
590
|
-
[a_vec, real_array(bias_a).first]
|
619
|
+
[a_vec, real_array(bias_a).first, item_ids.uniq]
|
591
620
|
end
|
592
621
|
|
593
622
|
# convert boolean to int
|
@@ -679,6 +708,7 @@ module Cmfrec
|
|
679
708
|
# factors
|
680
709
|
obj[:user_map] = @user_map
|
681
710
|
obj[:item_map] = @item_map
|
711
|
+
obj[:rated] = @rated
|
682
712
|
obj[:user_factors] = dump_ptr(@a)
|
683
713
|
obj[:item_factors] = dump_ptr(@b)
|
684
714
|
|
@@ -726,6 +756,7 @@ module Cmfrec
|
|
726
756
|
# factors
|
727
757
|
@user_map = obj[:user_map]
|
728
758
|
@item_map = obj[:item_map]
|
759
|
+
@rated = obj[:rated] || {}
|
729
760
|
@a = load_ptr(obj[:user_factors])
|
730
761
|
@b = load_ptr(obj[:item_factors])
|
731
762
|
|
data/lib/cmfrec/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cmfrec
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-08-11 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description:
|
14
14
|
email: andrew@ankane.org
|
@@ -47,7 +47,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
47
47
|
- !ruby/object:Gem::Version
|
48
48
|
version: '0'
|
49
49
|
requirements: []
|
50
|
-
rubygems_version: 3.2.
|
50
|
+
rubygems_version: 3.2.22
|
51
51
|
signing_key:
|
52
52
|
specification_version: 4
|
53
53
|
summary: Recommendations for Ruby using collective matrix factorization
|