nextrec 0.1.1__py3-none-any.whl → 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nextrec/__init__.py +4 -4
- nextrec/__version__.py +1 -1
- nextrec/basic/activation.py +10 -9
- nextrec/basic/callback.py +1 -0
- nextrec/basic/dataloader.py +168 -127
- nextrec/basic/features.py +24 -27
- nextrec/basic/layers.py +328 -159
- nextrec/basic/loggers.py +50 -37
- nextrec/basic/metrics.py +255 -147
- nextrec/basic/model.py +817 -462
- nextrec/data/__init__.py +5 -5
- nextrec/data/data_utils.py +16 -12
- nextrec/data/preprocessor.py +276 -252
- nextrec/loss/__init__.py +12 -12
- nextrec/loss/loss_utils.py +30 -22
- nextrec/loss/match_losses.py +116 -83
- nextrec/models/match/__init__.py +5 -5
- nextrec/models/match/dssm.py +70 -61
- nextrec/models/match/dssm_v2.py +61 -51
- nextrec/models/match/mind.py +89 -71
- nextrec/models/match/sdm.py +93 -81
- nextrec/models/match/youtube_dnn.py +62 -53
- nextrec/models/multi_task/esmm.py +49 -43
- nextrec/models/multi_task/mmoe.py +65 -56
- nextrec/models/multi_task/ple.py +92 -65
- nextrec/models/multi_task/share_bottom.py +48 -42
- nextrec/models/ranking/__init__.py +7 -7
- nextrec/models/ranking/afm.py +39 -30
- nextrec/models/ranking/autoint.py +70 -57
- nextrec/models/ranking/dcn.py +43 -35
- nextrec/models/ranking/deepfm.py +34 -28
- nextrec/models/ranking/dien.py +115 -79
- nextrec/models/ranking/din.py +84 -60
- nextrec/models/ranking/fibinet.py +51 -35
- nextrec/models/ranking/fm.py +28 -26
- nextrec/models/ranking/masknet.py +31 -31
- nextrec/models/ranking/pnn.py +30 -31
- nextrec/models/ranking/widedeep.py +36 -31
- nextrec/models/ranking/xdeepfm.py +46 -39
- nextrec/utils/__init__.py +9 -9
- nextrec/utils/embedding.py +1 -1
- nextrec/utils/initializer.py +23 -15
- nextrec/utils/optimizer.py +14 -10
- {nextrec-0.1.1.dist-info → nextrec-0.1.2.dist-info}/METADATA +6 -40
- nextrec-0.1.2.dist-info/RECORD +51 -0
- nextrec-0.1.1.dist-info/RECORD +0 -51
- {nextrec-0.1.1.dist-info → nextrec-0.1.2.dist-info}/WHEEL +0 -0
- {nextrec-0.1.1.dist-info → nextrec-0.1.2.dist-info}/licenses/LICENSE +0 -0
nextrec/data/__init__.py
CHANGED
|
@@ -19,9 +19,9 @@ from nextrec.data.data_utils import (
|
|
|
19
19
|
from nextrec.data import data_utils
|
|
20
20
|
|
|
21
21
|
__all__ = [
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
22
|
+
"collate_fn",
|
|
23
|
+
"get_column_data",
|
|
24
|
+
"split_dict_random",
|
|
25
|
+
"build_eval_candidates",
|
|
26
|
+
"data_utils",
|
|
27
27
|
]
|
nextrec/data/data_utils.py
CHANGED
|
@@ -10,6 +10,7 @@ import torch
|
|
|
10
10
|
import numpy as np
|
|
11
11
|
import pandas as pd
|
|
12
12
|
|
|
13
|
+
|
|
13
14
|
def collate_fn(batch):
|
|
14
15
|
"""
|
|
15
16
|
Custom collate function for batching tuples of tensors.
|
|
@@ -30,12 +31,12 @@ def collate_fn(batch):
|
|
|
30
31
|
|
|
31
32
|
num_tensors = len(batch[0])
|
|
32
33
|
result = []
|
|
33
|
-
|
|
34
|
+
|
|
34
35
|
for i in range(num_tensors):
|
|
35
36
|
tensor_list = [item[i] for item in batch]
|
|
36
37
|
stacked = torch.cat(tensor_list, dim=0)
|
|
37
38
|
result.append(stacked)
|
|
38
|
-
|
|
39
|
+
|
|
39
40
|
return tuple(result)
|
|
40
41
|
|
|
41
42
|
|
|
@@ -53,7 +54,9 @@ def get_column_data(data: dict | pd.DataFrame, name: str):
|
|
|
53
54
|
raise KeyError(f"Unsupported data type for extracting column {name}")
|
|
54
55
|
|
|
55
56
|
|
|
56
|
-
def split_dict_random(
|
|
57
|
+
def split_dict_random(
|
|
58
|
+
data_dict: dict, test_size: float = 0.2, random_state: int | None = None
|
|
59
|
+
):
|
|
57
60
|
"""Randomly split a dictionary of data into training and testing sets."""
|
|
58
61
|
lengths = [len(v) for v in data_dict.values()]
|
|
59
62
|
if len(set(lengths)) != 1:
|
|
@@ -69,13 +72,13 @@ def split_dict_random(data_dict: dict, test_size: float=0.2, random_state:int|No
|
|
|
69
72
|
if isinstance(v, np.ndarray):
|
|
70
73
|
return v[idx]
|
|
71
74
|
elif isinstance(v, pd.Series):
|
|
72
|
-
return v.iloc[idx].to_numpy()
|
|
75
|
+
return v.iloc[idx].to_numpy()
|
|
73
76
|
else:
|
|
74
|
-
v_arr = np.asarray(v, dtype=object)
|
|
77
|
+
v_arr = np.asarray(v, dtype=object)
|
|
75
78
|
return v_arr[idx]
|
|
76
79
|
|
|
77
80
|
train_dict = {k: take(v, train_idx) for k, v in data_dict.items()}
|
|
78
|
-
test_dict
|
|
81
|
+
test_dict = {k: take(v, test_idx) for k, v in data_dict.items()}
|
|
79
82
|
return train_dict, test_dict
|
|
80
83
|
|
|
81
84
|
|
|
@@ -90,7 +93,7 @@ def build_eval_candidates(
|
|
|
90
93
|
num_neg_per_pos: int = 50,
|
|
91
94
|
random_seed: int = 2025,
|
|
92
95
|
) -> pd.DataFrame:
|
|
93
|
-
"""Build evaluation candidates with positive and negative samples for each user.
|
|
96
|
+
"""Build evaluation candidates with positive and negative samples for each user."""
|
|
94
97
|
rng = np.random.default_rng(random_seed)
|
|
95
98
|
|
|
96
99
|
users = df_all[user_col].unique()
|
|
@@ -99,8 +102,7 @@ def build_eval_candidates(
|
|
|
99
102
|
rows = []
|
|
100
103
|
|
|
101
104
|
user_hist_items = {
|
|
102
|
-
u: df_all[df_all[user_col] == u][item_col].unique()
|
|
103
|
-
for u in users
|
|
105
|
+
u: df_all[df_all[user_col] == u][item_col].unique() for u in users
|
|
104
106
|
}
|
|
105
107
|
|
|
106
108
|
for u in users:
|
|
@@ -112,7 +114,9 @@ def build_eval_candidates(
|
|
|
112
114
|
pos_items = pos_items[:num_pos_per_user]
|
|
113
115
|
seen_items = set(user_hist_items[u])
|
|
114
116
|
|
|
115
|
-
neg_pool = np.setdiff1d(
|
|
117
|
+
neg_pool = np.setdiff1d(
|
|
118
|
+
all_items, np.fromiter(seen_items, dtype=all_items.dtype)
|
|
119
|
+
)
|
|
116
120
|
if len(neg_pool) == 0:
|
|
117
121
|
continue
|
|
118
122
|
|
|
@@ -127,6 +131,6 @@ def build_eval_candidates(
|
|
|
127
131
|
rows.append((u, ni, 0))
|
|
128
132
|
|
|
129
133
|
eval_df = pd.DataFrame(rows, columns=[user_col, item_col, label_col])
|
|
130
|
-
eval_df = eval_df.merge(user_features, on=user_col, how=
|
|
131
|
-
eval_df = eval_df.merge(item_features, on=item_col, how=
|
|
134
|
+
eval_df = eval_df.merge(user_features, on=user_col, how="left")
|
|
135
|
+
eval_df = eval_df.merge(item_features, on=item_col, how="left")
|
|
132
136
|
return eval_df
|