nextrec 0.1.3__py3-none-any.whl → 0.1.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nextrec/__init__.py +4 -4
- nextrec/__version__.py +1 -1
- nextrec/basic/activation.py +9 -10
- nextrec/basic/callback.py +0 -1
- nextrec/basic/dataloader.py +127 -168
- nextrec/basic/features.py +27 -24
- nextrec/basic/layers.py +159 -328
- nextrec/basic/loggers.py +37 -50
- nextrec/basic/metrics.py +147 -255
- nextrec/basic/model.py +462 -817
- nextrec/data/__init__.py +5 -5
- nextrec/data/data_utils.py +12 -16
- nextrec/data/preprocessor.py +252 -276
- nextrec/loss/__init__.py +12 -12
- nextrec/loss/loss_utils.py +22 -30
- nextrec/loss/match_losses.py +83 -116
- nextrec/models/match/__init__.py +5 -5
- nextrec/models/match/dssm.py +61 -70
- nextrec/models/match/dssm_v2.py +51 -61
- nextrec/models/match/mind.py +71 -89
- nextrec/models/match/sdm.py +81 -93
- nextrec/models/match/youtube_dnn.py +53 -62
- nextrec/models/multi_task/esmm.py +43 -49
- nextrec/models/multi_task/mmoe.py +56 -65
- nextrec/models/multi_task/ple.py +65 -92
- nextrec/models/multi_task/share_bottom.py +42 -48
- nextrec/models/ranking/__init__.py +7 -7
- nextrec/models/ranking/afm.py +30 -39
- nextrec/models/ranking/autoint.py +57 -70
- nextrec/models/ranking/dcn.py +35 -43
- nextrec/models/ranking/deepfm.py +28 -34
- nextrec/models/ranking/dien.py +79 -115
- nextrec/models/ranking/din.py +60 -84
- nextrec/models/ranking/fibinet.py +35 -51
- nextrec/models/ranking/fm.py +26 -28
- nextrec/models/ranking/masknet.py +31 -31
- nextrec/models/ranking/pnn.py +31 -30
- nextrec/models/ranking/widedeep.py +31 -36
- nextrec/models/ranking/xdeepfm.py +39 -46
- nextrec/utils/__init__.py +9 -9
- nextrec/utils/embedding.py +1 -1
- nextrec/utils/initializer.py +15 -23
- nextrec/utils/optimizer.py +10 -14
- {nextrec-0.1.3.dist-info → nextrec-0.1.7.dist-info}/METADATA +16 -7
- nextrec-0.1.7.dist-info/RECORD +51 -0
- nextrec-0.1.3.dist-info/RECORD +0 -51
- {nextrec-0.1.3.dist-info → nextrec-0.1.7.dist-info}/WHEEL +0 -0
- {nextrec-0.1.3.dist-info → nextrec-0.1.7.dist-info}/licenses/LICENSE +0 -0
nextrec/data/__init__.py
CHANGED
|
@@ -19,9 +19,9 @@ from nextrec.data.data_utils import (
|
|
|
19
19
|
from nextrec.data import data_utils
|
|
20
20
|
|
|
21
21
|
__all__ = [
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
22
|
+
'collate_fn',
|
|
23
|
+
'get_column_data',
|
|
24
|
+
'split_dict_random',
|
|
25
|
+
'build_eval_candidates',
|
|
26
|
+
'data_utils',
|
|
27
27
|
]
|
nextrec/data/data_utils.py
CHANGED
|
@@ -10,7 +10,6 @@ import torch
|
|
|
10
10
|
import numpy as np
|
|
11
11
|
import pandas as pd
|
|
12
12
|
|
|
13
|
-
|
|
14
13
|
def collate_fn(batch):
|
|
15
14
|
"""
|
|
16
15
|
Custom collate function for batching tuples of tensors.
|
|
@@ -31,12 +30,12 @@ def collate_fn(batch):
|
|
|
31
30
|
|
|
32
31
|
num_tensors = len(batch[0])
|
|
33
32
|
result = []
|
|
34
|
-
|
|
33
|
+
|
|
35
34
|
for i in range(num_tensors):
|
|
36
35
|
tensor_list = [item[i] for item in batch]
|
|
37
36
|
stacked = torch.cat(tensor_list, dim=0)
|
|
38
37
|
result.append(stacked)
|
|
39
|
-
|
|
38
|
+
|
|
40
39
|
return tuple(result)
|
|
41
40
|
|
|
42
41
|
|
|
@@ -54,9 +53,7 @@ def get_column_data(data: dict | pd.DataFrame, name: str):
|
|
|
54
53
|
raise KeyError(f"Unsupported data type for extracting column {name}")
|
|
55
54
|
|
|
56
55
|
|
|
57
|
-
def split_dict_random(
|
|
58
|
-
data_dict: dict, test_size: float = 0.2, random_state: int | None = None
|
|
59
|
-
):
|
|
56
|
+
def split_dict_random(data_dict: dict, test_size: float=0.2, random_state:int|None=None):
|
|
60
57
|
"""Randomly split a dictionary of data into training and testing sets."""
|
|
61
58
|
lengths = [len(v) for v in data_dict.values()]
|
|
62
59
|
if len(set(lengths)) != 1:
|
|
@@ -72,13 +69,13 @@ def split_dict_random(
|
|
|
72
69
|
if isinstance(v, np.ndarray):
|
|
73
70
|
return v[idx]
|
|
74
71
|
elif isinstance(v, pd.Series):
|
|
75
|
-
return v.iloc[idx].to_numpy()
|
|
72
|
+
return v.iloc[idx].to_numpy()
|
|
76
73
|
else:
|
|
77
|
-
v_arr = np.asarray(v, dtype=object)
|
|
74
|
+
v_arr = np.asarray(v, dtype=object)
|
|
78
75
|
return v_arr[idx]
|
|
79
76
|
|
|
80
77
|
train_dict = {k: take(v, train_idx) for k, v in data_dict.items()}
|
|
81
|
-
test_dict
|
|
78
|
+
test_dict = {k: take(v, test_idx) for k, v in data_dict.items()}
|
|
82
79
|
return train_dict, test_dict
|
|
83
80
|
|
|
84
81
|
|
|
@@ -93,7 +90,7 @@ def build_eval_candidates(
|
|
|
93
90
|
num_neg_per_pos: int = 50,
|
|
94
91
|
random_seed: int = 2025,
|
|
95
92
|
) -> pd.DataFrame:
|
|
96
|
-
"""Build evaluation candidates with positive and negative samples for each user."""
|
|
93
|
+
"""Build evaluation candidates with positive and negative samples for each user. """
|
|
97
94
|
rng = np.random.default_rng(random_seed)
|
|
98
95
|
|
|
99
96
|
users = df_all[user_col].unique()
|
|
@@ -102,7 +99,8 @@ def build_eval_candidates(
|
|
|
102
99
|
rows = []
|
|
103
100
|
|
|
104
101
|
user_hist_items = {
|
|
105
|
-
u: df_all[df_all[user_col] == u][item_col].unique()
|
|
102
|
+
u: df_all[df_all[user_col] == u][item_col].unique()
|
|
103
|
+
for u in users
|
|
106
104
|
}
|
|
107
105
|
|
|
108
106
|
for u in users:
|
|
@@ -114,9 +112,7 @@ def build_eval_candidates(
|
|
|
114
112
|
pos_items = pos_items[:num_pos_per_user]
|
|
115
113
|
seen_items = set(user_hist_items[u])
|
|
116
114
|
|
|
117
|
-
neg_pool = np.setdiff1d(
|
|
118
|
-
all_items, np.fromiter(seen_items, dtype=all_items.dtype)
|
|
119
|
-
)
|
|
115
|
+
neg_pool = np.setdiff1d(all_items, np.fromiter(seen_items, dtype=all_items.dtype))
|
|
120
116
|
if len(neg_pool) == 0:
|
|
121
117
|
continue
|
|
122
118
|
|
|
@@ -131,6 +127,6 @@ def build_eval_candidates(
|
|
|
131
127
|
rows.append((u, ni, 0))
|
|
132
128
|
|
|
133
129
|
eval_df = pd.DataFrame(rows, columns=[user_col, item_col, label_col])
|
|
134
|
-
eval_df = eval_df.merge(user_features, on=user_col, how=
|
|
135
|
-
eval_df = eval_df.merge(item_features, on=item_col, how=
|
|
130
|
+
eval_df = eval_df.merge(user_features, on=user_col, how='left')
|
|
131
|
+
eval_df = eval_df.merge(item_features, on=item_col, how='left')
|
|
136
132
|
return eval_df
|