nextrec 0.1.4__py3-none-any.whl → 0.1.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. nextrec/__init__.py +4 -4
  2. nextrec/__version__.py +1 -1
  3. nextrec/basic/activation.py +9 -10
  4. nextrec/basic/callback.py +0 -1
  5. nextrec/basic/dataloader.py +127 -168
  6. nextrec/basic/features.py +27 -24
  7. nextrec/basic/layers.py +159 -328
  8. nextrec/basic/loggers.py +37 -50
  9. nextrec/basic/metrics.py +147 -255
  10. nextrec/basic/model.py +462 -817
  11. nextrec/data/__init__.py +5 -5
  12. nextrec/data/data_utils.py +12 -16
  13. nextrec/data/preprocessor.py +252 -276
  14. nextrec/loss/__init__.py +12 -12
  15. nextrec/loss/loss_utils.py +22 -30
  16. nextrec/loss/match_losses.py +83 -116
  17. nextrec/models/match/__init__.py +5 -5
  18. nextrec/models/match/dssm.py +61 -70
  19. nextrec/models/match/dssm_v2.py +51 -61
  20. nextrec/models/match/mind.py +71 -89
  21. nextrec/models/match/sdm.py +81 -93
  22. nextrec/models/match/youtube_dnn.py +53 -62
  23. nextrec/models/multi_task/esmm.py +43 -49
  24. nextrec/models/multi_task/mmoe.py +56 -65
  25. nextrec/models/multi_task/ple.py +65 -92
  26. nextrec/models/multi_task/share_bottom.py +42 -48
  27. nextrec/models/ranking/__init__.py +7 -7
  28. nextrec/models/ranking/afm.py +30 -39
  29. nextrec/models/ranking/autoint.py +57 -70
  30. nextrec/models/ranking/dcn.py +35 -43
  31. nextrec/models/ranking/deepfm.py +28 -34
  32. nextrec/models/ranking/dien.py +79 -115
  33. nextrec/models/ranking/din.py +60 -84
  34. nextrec/models/ranking/fibinet.py +35 -51
  35. nextrec/models/ranking/fm.py +26 -28
  36. nextrec/models/ranking/masknet.py +31 -31
  37. nextrec/models/ranking/pnn.py +31 -30
  38. nextrec/models/ranking/widedeep.py +31 -36
  39. nextrec/models/ranking/xdeepfm.py +39 -46
  40. nextrec/utils/__init__.py +9 -9
  41. nextrec/utils/embedding.py +1 -1
  42. nextrec/utils/initializer.py +15 -23
  43. nextrec/utils/optimizer.py +10 -14
  44. {nextrec-0.1.4.dist-info → nextrec-0.1.7.dist-info}/METADATA +16 -7
  45. nextrec-0.1.7.dist-info/RECORD +51 -0
  46. nextrec-0.1.4.dist-info/RECORD +0 -51
  47. {nextrec-0.1.4.dist-info → nextrec-0.1.7.dist-info}/WHEEL +0 -0
  48. {nextrec-0.1.4.dist-info → nextrec-0.1.7.dist-info}/licenses/LICENSE +0 -0
nextrec/data/__init__.py CHANGED
@@ -19,9 +19,9 @@ from nextrec.data.data_utils import (
19
19
  from nextrec.data import data_utils
20
20
 
21
21
  __all__ = [
22
- "collate_fn",
23
- "get_column_data",
24
- "split_dict_random",
25
- "build_eval_candidates",
26
- "data_utils",
22
+ 'collate_fn',
23
+ 'get_column_data',
24
+ 'split_dict_random',
25
+ 'build_eval_candidates',
26
+ 'data_utils',
27
27
  ]
@@ -10,7 +10,6 @@ import torch
10
10
  import numpy as np
11
11
  import pandas as pd
12
12
 
13
-
14
13
  def collate_fn(batch):
15
14
  """
16
15
  Custom collate function for batching tuples of tensors.
@@ -31,12 +30,12 @@ def collate_fn(batch):
31
30
 
32
31
  num_tensors = len(batch[0])
33
32
  result = []
34
-
33
+
35
34
  for i in range(num_tensors):
36
35
  tensor_list = [item[i] for item in batch]
37
36
  stacked = torch.cat(tensor_list, dim=0)
38
37
  result.append(stacked)
39
-
38
+
40
39
  return tuple(result)
41
40
 
42
41
 
@@ -54,9 +53,7 @@ def get_column_data(data: dict | pd.DataFrame, name: str):
54
53
  raise KeyError(f"Unsupported data type for extracting column {name}")
55
54
 
56
55
 
57
- def split_dict_random(
58
- data_dict: dict, test_size: float = 0.2, random_state: int | None = None
59
- ):
56
+ def split_dict_random(data_dict: dict, test_size: float=0.2, random_state:int|None=None):
60
57
  """Randomly split a dictionary of data into training and testing sets."""
61
58
  lengths = [len(v) for v in data_dict.values()]
62
59
  if len(set(lengths)) != 1:
@@ -72,13 +69,13 @@ def split_dict_random(
72
69
  if isinstance(v, np.ndarray):
73
70
  return v[idx]
74
71
  elif isinstance(v, pd.Series):
75
- return v.iloc[idx].to_numpy()
72
+ return v.iloc[idx].to_numpy()
76
73
  else:
77
- v_arr = np.asarray(v, dtype=object)
74
+ v_arr = np.asarray(v, dtype=object)
78
75
  return v_arr[idx]
79
76
 
80
77
  train_dict = {k: take(v, train_idx) for k, v in data_dict.items()}
81
- test_dict = {k: take(v, test_idx) for k, v in data_dict.items()}
78
+ test_dict = {k: take(v, test_idx) for k, v in data_dict.items()}
82
79
  return train_dict, test_dict
83
80
 
84
81
 
@@ -93,7 +90,7 @@ def build_eval_candidates(
93
90
  num_neg_per_pos: int = 50,
94
91
  random_seed: int = 2025,
95
92
  ) -> pd.DataFrame:
96
- """Build evaluation candidates with positive and negative samples for each user."""
93
+ """Build evaluation candidates with positive and negative samples for each user. """
97
94
  rng = np.random.default_rng(random_seed)
98
95
 
99
96
  users = df_all[user_col].unique()
@@ -102,7 +99,8 @@ def build_eval_candidates(
102
99
  rows = []
103
100
 
104
101
  user_hist_items = {
105
- u: df_all[df_all[user_col] == u][item_col].unique() for u in users
102
+ u: df_all[df_all[user_col] == u][item_col].unique()
103
+ for u in users
106
104
  }
107
105
 
108
106
  for u in users:
@@ -114,9 +112,7 @@ def build_eval_candidates(
114
112
  pos_items = pos_items[:num_pos_per_user]
115
113
  seen_items = set(user_hist_items[u])
116
114
 
117
- neg_pool = np.setdiff1d(
118
- all_items, np.fromiter(seen_items, dtype=all_items.dtype)
119
- )
115
+ neg_pool = np.setdiff1d(all_items, np.fromiter(seen_items, dtype=all_items.dtype))
120
116
  if len(neg_pool) == 0:
121
117
  continue
122
118
 
@@ -131,6 +127,6 @@ def build_eval_candidates(
131
127
  rows.append((u, ni, 0))
132
128
 
133
129
  eval_df = pd.DataFrame(rows, columns=[user_col, item_col, label_col])
134
- eval_df = eval_df.merge(user_features, on=user_col, how="left")
135
- eval_df = eval_df.merge(item_features, on=item_col, how="left")
130
+ eval_df = eval_df.merge(user_features, on=user_col, how='left')
131
+ eval_df = eval_df.merge(item_features, on=item_col, how='left')
136
132
  return eval_df