nextrec 0.4.32__py3-none-any.whl → 0.4.34__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
nextrec/loss/grad_norm.py CHANGED
@@ -2,12 +2,40 @@
2
2
  GradNorm loss weighting for multi-task learning.
3
3
 
4
4
  Date: create on 27/10/2025
5
- Checkpoint: edit on 24/12/2025
5
+ Checkpoint: edit on 22/01/2026
6
6
  Author: Yang Zhou,zyaztec@gmail.com
7
7
 
8
8
  Reference:
9
9
  Chen, Zhao, et al. "GradNorm: Gradient Normalization for Adaptive Loss Balancing
10
10
  in Deep Multitask Networks." ICML 2018.
11
+
12
+ pseudocode:
13
+ ---
14
+ Initialize w_i = 1
15
+ Record L_i(0)
16
+
17
+ for each step:
18
+ 1. Forward: compute each task loss L_i
19
+ 2. Compute G_i = ||∇_W (w_i * L_i)||
20
+ 3. Compute r_i = (L_i / L_i(0)) / mean(...)
21
+ 4. Compute target: Ĝ_i = mean(G) * r_i^α
22
+ 5. L_grad = sum |G_i - Ĝ_i|
23
+ 6. Update w_i using ∇ L_grad
24
+ 7. Backprop with sum_i (w_i * L_i) to update model
25
+
26
+ 伪代码:
27
+ ---
28
+ 初始化 w_i = 1
29
+ 记录 L_i(0)
30
+
31
+ for each step:
32
+ 1. 前向算各 task loss: L_i
33
+ 2. 计算 G_i = ||∇_W (w_i * L_i)||
34
+ 3. 计算 r_i = (L_i / L_i(0)) / mean(...)
35
+ 4. 计算 target: Ĝ_i = mean(G) * r_i^α
36
+ 5. L_grad = sum |G_i - Ĝ_i|
37
+ 6. 对 w_i 用 ∇L_grad 更新
38
+ 7. 用 ∑ w_i * L_i 反传更新模型
11
39
  """
12
40
 
13
41
  from __future__ import annotations
@@ -15,6 +43,7 @@ from __future__ import annotations
15
43
  from typing import Iterable
16
44
 
17
45
  import torch
46
+ import torch.distributed as dist
18
47
  import torch.nn as nn
19
48
  import torch.nn.functional as F
20
49
 
@@ -23,7 +52,15 @@ def get_grad_norm_shared_params(
23
52
  model,
24
53
  shared_modules=None,
25
54
  ):
55
+ """
56
+ Get shared parameters for GradNorm.
57
+
58
+ Args:
59
+ model: A pytorch model instance containing grad_norm_shared_modules attribute.
60
+ shared_modules: Optional list of module names to consider as shared.
61
+ """
26
62
  if not shared_modules:
63
+ # If no specific shared modules are provided, consider all parameters as shared
27
64
  return [p for p in model.parameters() if p.requires_grad]
28
65
  shared_params = []
29
66
  seen = set()
@@ -35,26 +72,10 @@ def get_grad_norm_shared_params(
35
72
  if param.requires_grad and id(param) not in seen:
36
73
  shared_params.append(param)
37
74
  seen.add(id(param))
38
- if not shared_params:
39
- return [p for p in model.parameters() if p.requires_grad]
40
75
  return shared_params
41
76
 
42
77
 
43
78
  class GradNormLossWeighting:
44
- """
45
- Adaptive multi-task loss weighting with GradNorm.
46
-
47
- Args:
48
- nums_task: Number of tasks.
49
- alpha: GradNorm balancing strength.
50
- lr: Learning rate for the weight optimizer.
51
- init_weights: Optional initial weights per task.
52
- device: Torch device for weights.
53
- ema_decay: Optional EMA decay for smoothing loss ratios.
54
- init_ema_steps: Number of steps to build EMA for initial losses.
55
- init_ema_decay: EMA decay for initial losses when init_ema_steps > 0.
56
- eps: Small value for numerical stability.
57
- """
58
79
 
59
80
  def __init__(
60
81
  self,
@@ -63,58 +84,43 @@ class GradNormLossWeighting:
63
84
  lr: float = 0.025,
64
85
  init_weights: Iterable[float] | None = None,
65
86
  device: torch.device | str | None = None,
66
- ema_decay: float | None = None,
67
- init_ema_steps: int = 0,
68
- init_ema_decay: float = 0.9,
69
87
  eps: float = 1e-8,
70
88
  ) -> None:
89
+ """
90
+ Adaptive multi-task loss weighting with GradNorm.
91
+
92
+ Args:
93
+ nums_task: Number of tasks.
94
+ alpha: GradNorm balancing strength.
95
+ lr: Learning rate for the weight optimizer.
96
+ init_weights: Optional initial weights per task.
97
+ device: Torch device for weights.
98
+ eps: Small value for numerical stability.
99
+
100
+
101
+
102
+ """
103
+
71
104
  if nums_task <= 1:
72
105
  raise ValueError("GradNorm requires nums_task > 1.")
106
+
73
107
  self.nums_task = nums_task
74
108
  self.alpha = alpha
75
109
  self.eps = eps
76
- if ema_decay is not None:
77
- ema_decay = ema_decay
78
- if ema_decay < 0.0 or ema_decay >= 1.0:
79
- raise ValueError("ema_decay must be in [0.0, 1.0).")
80
- self.ema_decay = ema_decay
81
- self.init_ema_steps = init_ema_steps
82
- if self.init_ema_steps < 0:
83
- raise ValueError("init_ema_steps must be >= 0.")
84
- self.init_ema_decay = init_ema_decay
85
- if self.init_ema_decay < 0.0 or self.init_ema_decay >= 1.0:
86
- raise ValueError("init_ema_decay must be in [0.0, 1.0).")
87
- self.init_ema_count = 0
88
110
 
89
111
  if init_weights is None:
90
112
  weights = torch.ones(self.nums_task, dtype=torch.float32)
91
113
  else:
92
114
  weights = torch.tensor(list(init_weights), dtype=torch.float32)
93
- if weights.numel() != self.nums_task:
94
- raise ValueError(
95
- "init_weights length must match nums_task for GradNorm."
96
- )
115
+
97
116
  if device is not None:
98
117
  weights = weights.to(device)
99
118
  self.weights = nn.Parameter(weights)
100
119
  self.optimizer = torch.optim.Adam([self.weights], lr=float(lr))
101
120
 
102
121
  self.initial_losses = None
103
- self.initial_losses_ema = None
104
- self.loss_ema = None
105
122
  self.pending_grad = None
106
123
 
107
- def to(self, device):
108
- device = torch.device(device)
109
- self.weights.data = self.weights.data.to(device)
110
- if self.initial_losses is not None:
111
- self.initial_losses = self.initial_losses.to(device)
112
- if self.initial_losses_ema is not None:
113
- self.initial_losses_ema = self.initial_losses_ema.to(device)
114
- if self.loss_ema is not None:
115
- self.loss_ema = self.loss_ema.to(device)
116
- return self
117
-
118
124
  def compute_weighted_loss(
119
125
  self,
120
126
  task_losses: list[torch.Tensor],
@@ -122,6 +128,8 @@ class GradNormLossWeighting:
122
128
  ) -> torch.Tensor:
123
129
  """
124
130
  Return weighted total loss and update task weights with GradNorm.
131
+
132
+ BaseModel will use this method to compute the weighted loss when self.grad_norm is enabled.
125
133
  """
126
134
  if len(task_losses) != self.nums_task:
127
135
  raise ValueError(
@@ -136,19 +144,7 @@ class GradNormLossWeighting:
136
144
  [loss.item() for loss in task_losses], device=self.weights.device
137
145
  )
138
146
  if self.initial_losses is None:
139
- if self.init_ema_steps > 0:
140
- if self.initial_losses_ema is None:
141
- self.initial_losses_ema = loss_values
142
- else:
143
- self.initial_losses_ema = (
144
- self.init_ema_decay * self.initial_losses_ema
145
- + (1.0 - self.init_ema_decay) * loss_values
146
- )
147
- self.init_ema_count += 1
148
- if self.init_ema_count >= self.init_ema_steps:
149
- self.initial_losses = self.initial_losses_ema.clone()
150
- else:
151
- self.initial_losses = loss_values
147
+ self.initial_losses = loss_values.clone()
152
148
 
153
149
  weights_detached = self.weights.detach()
154
150
  weighted_losses = [
@@ -157,25 +153,14 @@ class GradNormLossWeighting:
157
153
  total_loss = torch.stack(weighted_losses).sum()
158
154
 
159
155
  grad_norms = self.compute_grad_norms(task_losses, shared_params)
156
+
157
+ # compute inverse training rate, inv rate = loss_ratio / mean(loss_ratio)
160
158
  with torch.no_grad():
161
- if self.ema_decay is not None:
162
- if self.loss_ema is None:
163
- self.loss_ema = loss_values
164
- else:
165
- self.loss_ema = (
166
- self.ema_decay * self.loss_ema
167
- + (1.0 - self.ema_decay) * loss_values
168
- )
169
- ratio_source = self.loss_ema
170
- else:
171
- ratio_source = loss_values
172
159
  if self.initial_losses is not None:
173
160
  base_initial = self.initial_losses
174
- elif self.initial_losses_ema is not None:
175
- base_initial = self.initial_losses_ema
176
161
  else:
177
162
  base_initial = loss_values
178
- loss_ratios = ratio_source / (base_initial + self.eps)
163
+ loss_ratios = loss_values / (base_initial + self.eps)
179
164
  inv_rate = loss_ratios / (loss_ratios.mean() + self.eps)
180
165
  target = grad_norms.mean() * (inv_rate**self.alpha)
181
166
 
@@ -187,6 +172,7 @@ class GradNormLossWeighting:
187
172
 
188
173
  def compute_grad_norms(self, task_losses, shared_params):
189
174
  grad_norms = []
175
+ # compute gradient norms for each task, gradient norms = sqrt(sum(grad^2))
190
176
  for i, task_loss in enumerate(task_losses):
191
177
  grads = torch.autograd.grad(
192
178
  self.weights[i] * task_loss,
@@ -230,3 +216,19 @@ class GradNormLossWeighting:
230
216
  self.weights.copy_(w)
231
217
 
232
218
  self.pending_grad = None
219
+
220
+ def sync(self) -> None:
221
+ """
222
+ Synchronize GradNorm buffers across DDP ranks.
223
+
224
+ - pending_grad: averaged so all ranks update weights consistently
225
+ - initial_losses: averaged so the baseline loss is consistent
226
+ """
227
+
228
+ world_size = dist.get_world_size()
229
+ if self.pending_grad is not None:
230
+ dist.all_reduce(self.pending_grad, op=dist.ReduceOp.SUM)
231
+ self.pending_grad /= world_size
232
+ if self.initial_losses is not None:
233
+ dist.all_reduce(self.initial_losses, op=dist.ReduceOp.SUM)
234
+ self.initial_losses /= world_size
@@ -54,6 +54,7 @@ from nextrec.basic.model import BaseModel
54
54
  from nextrec.utils.model import get_mlp_output_dim
55
55
  from nextrec.utils.types import TaskTypeInput
56
56
 
57
+
57
58
  class CGCLayer(nn.Module):
58
59
  """
59
60
  CGC (Customized Gate Control) block used by PLE.
@@ -45,6 +45,7 @@ from nextrec.basic.heads import TaskHead
45
45
  from nextrec.basic.model import BaseModel
46
46
  from nextrec.utils.types import TaskTypeInput
47
47
 
48
+
48
49
  class ShareBottom(BaseModel):
49
50
  @property
50
51
  def model_name(self):
@@ -29,7 +29,7 @@ from nextrec.data.dataloader import RecDataLoader
29
29
  from nextrec.data.data_processing import get_column_data
30
30
  from nextrec.utils.console import display_metrics_table
31
31
  from nextrec.utils.data import FILE_FORMAT_CONFIG, check_streaming_support
32
- from nextrec.utils.feature import to_list
32
+ from nextrec.utils.torch_utils import to_list
33
33
  from nextrec.utils.torch_utils import to_numpy
34
34
 
35
35
 
nextrec/utils/__init__.py CHANGED
@@ -36,7 +36,7 @@ from .data import (
36
36
  resolve_file_paths,
37
37
  )
38
38
  from .embedding import get_auto_embedding_dim
39
- from .feature import to_list
39
+ from .torch_utils import as_float, to_list
40
40
  from .model import (
41
41
  compute_pair_scores,
42
42
  get_mlp_output_dim,
@@ -90,6 +90,7 @@ __all__ = [
90
90
  "normalize_task_loss",
91
91
  # Feature utilities
92
92
  "to_list",
93
+ "as_float",
93
94
  # Config utilities
94
95
  "resolve_path",
95
96
  "safe_value",
nextrec/utils/config.py CHANGED
@@ -21,7 +21,7 @@ from typing import TYPE_CHECKING, Any, Dict, List, Tuple
21
21
  import pandas as pd
22
22
  import torch
23
23
 
24
- from nextrec.utils.feature import to_list
24
+ from nextrec.utils.torch_utils import to_list
25
25
 
26
26
  if TYPE_CHECKING:
27
27
  from nextrec.basic.features import DenseFeature, SequenceFeature, SparseFeature
nextrec/utils/console.py CHANGED
@@ -36,7 +36,7 @@ from rich.progress import (
36
36
  from rich.table import Table
37
37
  from rich.text import Text
38
38
 
39
- from nextrec.utils.feature import as_float, to_list
39
+ from nextrec.utils.torch_utils import as_float, to_list
40
40
 
41
41
  T = TypeVar("T")
42
42
 
@@ -5,14 +5,15 @@ This module groups device setup, distributed helpers, optimizers/schedulers,
5
5
  initialization, and tensor helpers.
6
6
 
7
7
  Date: create on 27/10/2025
8
- Checkpoint: edit on 27/12/2025
8
+ Checkpoint: edit on 22/01/2026
9
9
  Author: Yang Zhou, zyaztec@gmail.com
10
10
  """
11
11
 
12
12
  from __future__ import annotations
13
13
 
14
14
  import logging
15
- from typing import Any, Dict, Iterable, Literal
15
+ import numbers
16
+ from typing import Any, Dict, Iterable
16
17
 
17
18
  import numpy as np
18
19
  import torch
@@ -22,7 +23,55 @@ from torch.utils.data import DataLoader, IterableDataset
22
23
  from torch.utils.data.distributed import DistributedSampler
23
24
 
24
25
  from nextrec.basic.loggers import colorize
25
- from nextrec.utils.types import OptimizerName, SchedulerName
26
+ from nextrec.utils.types import (
27
+ EmbeddingInitType,
28
+ InitializerActivationType,
29
+ OptimizerName,
30
+ SchedulerName,
31
+ )
32
+
33
+
34
+ def to_list(value: str | list[str] | None) -> list[str]:
35
+ if value is None:
36
+ return []
37
+ if isinstance(value, str):
38
+ return [value]
39
+ return list(value)
40
+
41
+
42
+ def as_float(value: Any) -> float | None:
43
+ if isinstance(value, numbers.Number):
44
+ return float(value)
45
+ if hasattr(value, "item"):
46
+ try:
47
+ return float(value.item())
48
+ except Exception:
49
+ return None
50
+ return None
51
+
52
+
53
+ def to_numpy(values: Any) -> np.ndarray:
54
+ if isinstance(values, torch.Tensor):
55
+ return values.detach().cpu().numpy()
56
+ return np.asarray(values)
57
+
58
+
59
+ def to_tensor(
60
+ value: Any, dtype: torch.dtype, device: torch.device | str | None = None
61
+ ) -> torch.Tensor:
62
+ if value is None:
63
+ raise ValueError("[Tensor Utils Error] Cannot convert None to tensor.")
64
+ tensor = value if isinstance(value, torch.Tensor) else torch.as_tensor(value)
65
+ if tensor.dtype != dtype:
66
+ tensor = tensor.to(dtype=dtype)
67
+
68
+ if device is not None:
69
+ target_device = (
70
+ device if isinstance(device, torch.device) else torch.device(device)
71
+ )
72
+ if tensor.device != target_device:
73
+ tensor = tensor.to(target_device)
74
+ return tensor
26
75
 
27
76
 
28
77
  def resolve_nonlinearity(activation: str) -> str:
@@ -56,30 +105,8 @@ def resolve_gain(activation: str, param: Dict[str, Any]) -> float:
56
105
 
57
106
 
58
107
  def get_initializer(
59
- init_type: Literal[
60
- "xavier_uniform",
61
- "xavier_normal",
62
- "kaiming_uniform",
63
- "kaiming_normal",
64
- "orthogonal",
65
- "normal",
66
- "uniform",
67
- ] = "normal",
68
- activation: Literal[
69
- "linear",
70
- "conv1d",
71
- "conv2d",
72
- "conv3d",
73
- "conv_transpose1d",
74
- "conv_transpose2d",
75
- "conv_transpose3d",
76
- "sigmoid",
77
- "tanh",
78
- "relu",
79
- "leaky_relu",
80
- "selu",
81
- "gelu",
82
- ] = "linear",
108
+ init_type: EmbeddingInitType = "normal",
109
+ activation: InitializerActivationType = "linear",
83
110
  param: Dict[str, Any] | None = None,
84
111
  ):
85
112
  param = param or {}
@@ -108,7 +135,7 @@ def get_initializer(
108
135
  elif init_type == "uniform":
109
136
  nn.init.uniform_(tensor, a=param.get("a", -0.05), b=param.get("b", 0.05))
110
137
  else:
111
- raise ValueError(f"Unknown init_type: {init_type}")
138
+ raise ValueError(f"[Initializer Error] Unknown init_type: {init_type}")
112
139
  return tensor
113
140
 
114
141
  return initializer_fn
@@ -172,12 +199,14 @@ def get_optimizer(
172
199
  elif opt_name == "rmsprop":
173
200
  opt_class = torch.optim.RMSprop
174
201
  else:
175
- raise NotImplementedError(f"Unsupported optimizer: {optimizer}")
202
+ raise NotImplementedError(
203
+ f"[Optimizer Error] Unsupported optimizer: {optimizer}"
204
+ )
176
205
  optimizer_fn = opt_class(params=params, **optimizer_params)
177
206
  elif isinstance(optimizer, torch.optim.Optimizer):
178
207
  optimizer_fn = optimizer
179
208
  else:
180
- raise TypeError(f"Invalid optimizer type: {type(optimizer)}")
209
+ raise TypeError(f"[Optimizer Error] Invalid optimizer type: {type(optimizer)}")
181
210
  return optimizer_fn
182
211
 
183
212
 
@@ -203,7 +232,9 @@ def get_scheduler(
203
232
  optimizer, **scheduler_params
204
233
  )
205
234
  else:
206
- raise NotImplementedError(f"Unsupported scheduler: {scheduler}")
235
+ raise NotImplementedError(
236
+ f"[Scheduler Error] Unsupported scheduler: {scheduler}"
237
+ )
207
238
  elif isinstance(scheduler, type) and issubclass(
208
239
  scheduler,
209
240
  (torch.optim.lr_scheduler._LRScheduler, torch.optim.lr_scheduler.LRScheduler),
@@ -215,35 +246,11 @@ def get_scheduler(
215
246
  ):
216
247
  scheduler_fn = scheduler
217
248
  else:
218
- raise TypeError(f"Invalid scheduler type: {type(scheduler)}")
249
+ raise TypeError(f"[Scheduler Error] Invalid scheduler type: {type(scheduler)}")
219
250
 
220
251
  return scheduler_fn
221
252
 
222
253
 
223
- def to_numpy(values: Any) -> np.ndarray:
224
- if isinstance(values, torch.Tensor):
225
- return values.detach().cpu().numpy()
226
- return np.asarray(values)
227
-
228
-
229
- def to_tensor(
230
- value: Any, dtype: torch.dtype, device: torch.device | str | None = None
231
- ) -> torch.Tensor:
232
- if value is None:
233
- raise ValueError("[Tensor Utils Error] Cannot convert None to tensor.")
234
- tensor = value if isinstance(value, torch.Tensor) else torch.as_tensor(value)
235
- if tensor.dtype != dtype:
236
- tensor = tensor.to(dtype=dtype)
237
-
238
- if device is not None:
239
- target_device = (
240
- device if isinstance(device, torch.device) else torch.device(device)
241
- )
242
- if tensor.device != target_device:
243
- tensor = tensor.to(target_device)
244
- return tensor
245
-
246
-
247
254
  def init_process_group(
248
255
  distributed: bool, rank: int, world_size: int, device_id: int | None = None
249
256
  ) -> None:
nextrec/utils/types.py CHANGED
@@ -64,6 +64,40 @@ TaskTypeName = Literal["binary", "regression"]
64
64
 
65
65
  TaskTypeInput = TaskTypeName | str
66
66
 
67
+ EmbeddingInitType = Literal[
68
+ "normal",
69
+ "uniform",
70
+ "xavier_uniform",
71
+ "xavier_normal",
72
+ "kaiming_uniform",
73
+ "kaiming_normal",
74
+ "orthogonal",
75
+ ]
76
+
77
+ SequenceCombinerType = Literal[
78
+ "mean",
79
+ "sum",
80
+ "concat",
81
+ "dot_attention",
82
+ "self_attention",
83
+ ]
84
+
85
+ InitializerActivationType = Literal[
86
+ "linear",
87
+ "conv1d",
88
+ "conv2d",
89
+ "conv3d",
90
+ "conv_transpose1d",
91
+ "conv_transpose2d",
92
+ "conv_transpose3d",
93
+ "sigmoid",
94
+ "tanh",
95
+ "relu",
96
+ "leaky_relu",
97
+ "selu",
98
+ "gelu",
99
+ ]
100
+
67
101
  MetricsName = Literal[
68
102
  "auc",
69
103
  "gauc",
@@ -97,4 +131,13 @@ MetricsName = Literal[
97
131
  "mrr@5",
98
132
  "mrr@10",
99
133
  "mrr@20",
134
+ "topk_recall@5",
135
+ "topk_recall@10",
136
+ "topk_recall@20",
137
+ "topk_precision@5",
138
+ "topk_precision@10",
139
+ "topk_precision@20",
140
+ "lift@5",
141
+ "lift@10",
142
+ "lift@20",
100
143
  ]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nextrec
3
- Version: 0.4.32
3
+ Version: 0.4.34
4
4
  Summary: A comprehensive recommendation library with match, ranking, and multi-task learning models
5
5
  Project-URL: Homepage, https://github.com/zerolovesea/NextRec
6
6
  Project-URL: Repository, https://github.com/zerolovesea/NextRec
@@ -69,7 +69,7 @@ Description-Content-Type: text/markdown
69
69
  ![Python](https://img.shields.io/badge/Python-3.10+-blue.svg)
70
70
  ![PyTorch](https://img.shields.io/badge/PyTorch-1.10+-ee4c2c.svg)
71
71
  ![License](https://img.shields.io/badge/License-Apache%202.0-green.svg)
72
- ![Version](https://img.shields.io/badge/Version-0.4.32-orange.svg)
72
+ ![Version](https://img.shields.io/badge/Version-0.4.34-orange.svg)
73
73
  [![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/zerolovesea/NextRec)
74
74
 
75
75
  中文文档 | [English Version](README_en.md)
@@ -254,11 +254,11 @@ nextrec --mode=predict --predict_config=path/to/predict_config.yaml
254
254
 
255
255
  预测结果固定保存到 `{checkpoint_path}/predictions/{name}.{save_data_format}`。
256
256
 
257
- > 截止当前版本0.4.32,NextRec CLI支持单机训练,分布式训练相关功能尚在开发中。
257
+ > 截止当前版本0.4.34,NextRec CLI支持单机训练,分布式训练相关功能尚在开发中。
258
258
 
259
259
  ## 兼容平台
260
260
 
261
- 当前最新版本为0.4.32,所有模型和测试代码均已在以下平台通过验证,如果开发者在使用中遇到兼容问题,请在issue区提出错误报告及系统版本:
261
+ 当前最新版本为0.4.34,所有模型和测试代码均已在以下平台通过验证,如果开发者在使用中遇到兼容问题,请在issue区提出错误报告及系统版本:
262
262
 
263
263
  | 平台 | 配置 |
264
264
  |------|------|