nextrec 0.1.1__py3-none-any.whl → 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. nextrec/__init__.py +4 -4
  2. nextrec/__version__.py +1 -1
  3. nextrec/basic/activation.py +10 -9
  4. nextrec/basic/callback.py +1 -0
  5. nextrec/basic/dataloader.py +168 -127
  6. nextrec/basic/features.py +24 -27
  7. nextrec/basic/layers.py +328 -159
  8. nextrec/basic/loggers.py +50 -37
  9. nextrec/basic/metrics.py +255 -147
  10. nextrec/basic/model.py +817 -462
  11. nextrec/data/__init__.py +5 -5
  12. nextrec/data/data_utils.py +16 -12
  13. nextrec/data/preprocessor.py +276 -252
  14. nextrec/loss/__init__.py +12 -12
  15. nextrec/loss/loss_utils.py +30 -22
  16. nextrec/loss/match_losses.py +116 -83
  17. nextrec/models/match/__init__.py +5 -5
  18. nextrec/models/match/dssm.py +70 -61
  19. nextrec/models/match/dssm_v2.py +61 -51
  20. nextrec/models/match/mind.py +89 -71
  21. nextrec/models/match/sdm.py +93 -81
  22. nextrec/models/match/youtube_dnn.py +62 -53
  23. nextrec/models/multi_task/esmm.py +49 -43
  24. nextrec/models/multi_task/mmoe.py +65 -56
  25. nextrec/models/multi_task/ple.py +92 -65
  26. nextrec/models/multi_task/share_bottom.py +48 -42
  27. nextrec/models/ranking/__init__.py +7 -7
  28. nextrec/models/ranking/afm.py +39 -30
  29. nextrec/models/ranking/autoint.py +70 -57
  30. nextrec/models/ranking/dcn.py +43 -35
  31. nextrec/models/ranking/deepfm.py +34 -28
  32. nextrec/models/ranking/dien.py +115 -79
  33. nextrec/models/ranking/din.py +84 -60
  34. nextrec/models/ranking/fibinet.py +51 -35
  35. nextrec/models/ranking/fm.py +28 -26
  36. nextrec/models/ranking/masknet.py +31 -31
  37. nextrec/models/ranking/pnn.py +30 -31
  38. nextrec/models/ranking/widedeep.py +36 -31
  39. nextrec/models/ranking/xdeepfm.py +46 -39
  40. nextrec/utils/__init__.py +9 -9
  41. nextrec/utils/embedding.py +1 -1
  42. nextrec/utils/initializer.py +23 -15
  43. nextrec/utils/optimizer.py +14 -10
  44. {nextrec-0.1.1.dist-info → nextrec-0.1.2.dist-info}/METADATA +6 -40
  45. nextrec-0.1.2.dist-info/RECORD +51 -0
  46. nextrec-0.1.1.dist-info/RECORD +0 -51
  47. {nextrec-0.1.1.dist-info → nextrec-0.1.2.dist-info}/WHEEL +0 -0
  48. {nextrec-0.1.1.dist-info → nextrec-0.1.2.dist-info}/licenses/LICENSE +0 -0
nextrec/data/__init__.py CHANGED
@@ -19,9 +19,9 @@ from nextrec.data.data_utils import (
19
19
  from nextrec.data import data_utils
20
20
 
21
21
  __all__ = [
22
- 'collate_fn',
23
- 'get_column_data',
24
- 'split_dict_random',
25
- 'build_eval_candidates',
26
- 'data_utils',
22
+ "collate_fn",
23
+ "get_column_data",
24
+ "split_dict_random",
25
+ "build_eval_candidates",
26
+ "data_utils",
27
27
  ]
@@ -10,6 +10,7 @@ import torch
10
10
  import numpy as np
11
11
  import pandas as pd
12
12
 
13
+
13
14
  def collate_fn(batch):
14
15
  """
15
16
  Custom collate function for batching tuples of tensors.
@@ -30,12 +31,12 @@ def collate_fn(batch):
30
31
 
31
32
  num_tensors = len(batch[0])
32
33
  result = []
33
-
34
+
34
35
  for i in range(num_tensors):
35
36
  tensor_list = [item[i] for item in batch]
36
37
  stacked = torch.cat(tensor_list, dim=0)
37
38
  result.append(stacked)
38
-
39
+
39
40
  return tuple(result)
40
41
 
41
42
 
@@ -53,7 +54,9 @@ def get_column_data(data: dict | pd.DataFrame, name: str):
53
54
  raise KeyError(f"Unsupported data type for extracting column {name}")
54
55
 
55
56
 
56
- def split_dict_random(data_dict: dict, test_size: float=0.2, random_state:int|None=None):
57
+ def split_dict_random(
58
+ data_dict: dict, test_size: float = 0.2, random_state: int | None = None
59
+ ):
57
60
  """Randomly split a dictionary of data into training and testing sets."""
58
61
  lengths = [len(v) for v in data_dict.values()]
59
62
  if len(set(lengths)) != 1:
@@ -69,13 +72,13 @@ def split_dict_random(data_dict: dict, test_size: float=0.2, random_state:int|No
69
72
  if isinstance(v, np.ndarray):
70
73
  return v[idx]
71
74
  elif isinstance(v, pd.Series):
72
- return v.iloc[idx].to_numpy()
75
+ return v.iloc[idx].to_numpy()
73
76
  else:
74
- v_arr = np.asarray(v, dtype=object)
77
+ v_arr = np.asarray(v, dtype=object)
75
78
  return v_arr[idx]
76
79
 
77
80
  train_dict = {k: take(v, train_idx) for k, v in data_dict.items()}
78
- test_dict = {k: take(v, test_idx) for k, v in data_dict.items()}
81
+ test_dict = {k: take(v, test_idx) for k, v in data_dict.items()}
79
82
  return train_dict, test_dict
80
83
 
81
84
 
@@ -90,7 +93,7 @@ def build_eval_candidates(
90
93
  num_neg_per_pos: int = 50,
91
94
  random_seed: int = 2025,
92
95
  ) -> pd.DataFrame:
93
- """Build evaluation candidates with positive and negative samples for each user. """
96
+ """Build evaluation candidates with positive and negative samples for each user."""
94
97
  rng = np.random.default_rng(random_seed)
95
98
 
96
99
  users = df_all[user_col].unique()
@@ -99,8 +102,7 @@ def build_eval_candidates(
99
102
  rows = []
100
103
 
101
104
  user_hist_items = {
102
- u: df_all[df_all[user_col] == u][item_col].unique()
103
- for u in users
105
+ u: df_all[df_all[user_col] == u][item_col].unique() for u in users
104
106
  }
105
107
 
106
108
  for u in users:
@@ -112,7 +114,9 @@ def build_eval_candidates(
112
114
  pos_items = pos_items[:num_pos_per_user]
113
115
  seen_items = set(user_hist_items[u])
114
116
 
115
- neg_pool = np.setdiff1d(all_items, np.fromiter(seen_items, dtype=all_items.dtype))
117
+ neg_pool = np.setdiff1d(
118
+ all_items, np.fromiter(seen_items, dtype=all_items.dtype)
119
+ )
116
120
  if len(neg_pool) == 0:
117
121
  continue
118
122
 
@@ -127,6 +131,6 @@ def build_eval_candidates(
127
131
  rows.append((u, ni, 0))
128
132
 
129
133
  eval_df = pd.DataFrame(rows, columns=[user_col, item_col, label_col])
130
- eval_df = eval_df.merge(user_features, on=user_col, how='left')
131
- eval_df = eval_df.merge(item_features, on=item_col, how='left')
134
+ eval_df = eval_df.merge(user_features, on=user_col, how="left")
135
+ eval_df = eval_df.merge(item_features, on=item_col, how="left")
132
136
  return eval_df