nextrec 0.4.17__py3-none-any.whl → 0.4.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
nextrec/loss/grad_norm.py CHANGED
@@ -2,7 +2,7 @@
2
2
  GradNorm loss weighting for multi-task learning.
3
3
 
4
4
  Date: create on 27/10/2025
5
- Checkpoint: edit on 20/12/2025
5
+ Checkpoint: edit on 24/12/2025
6
6
  Author: Yang Zhou,zyaztec@gmail.com
7
7
 
8
8
  Reference:
@@ -45,7 +45,7 @@ class GradNormLossWeighting:
45
45
  Adaptive multi-task loss weighting with GradNorm.
46
46
 
47
47
  Args:
48
- num_tasks: Number of tasks.
48
+ nums_task: Number of tasks.
49
49
  alpha: GradNorm balancing strength.
50
50
  lr: Learning rate for the weight optimizer.
51
51
  init_weights: Optional initial weights per task.
@@ -58,7 +58,7 @@ class GradNormLossWeighting:
58
58
 
59
59
  def __init__(
60
60
  self,
61
- num_tasks: int,
61
+ nums_task: int,
62
62
  alpha: float = 1.5,
63
63
  lr: float = 0.025,
64
64
  init_weights: Iterable[float] | None = None,
@@ -68,9 +68,9 @@ class GradNormLossWeighting:
68
68
  init_ema_decay: float = 0.9,
69
69
  eps: float = 1e-8,
70
70
  ) -> None:
71
- if num_tasks <= 1:
72
- raise ValueError("GradNorm requires num_tasks > 1.")
73
- self.num_tasks = num_tasks
71
+ if nums_task <= 1:
72
+ raise ValueError("GradNorm requires nums_task > 1.")
73
+ self.nums_task = nums_task
74
74
  self.alpha = alpha
75
75
  self.eps = eps
76
76
  if ema_decay is not None:
@@ -87,12 +87,12 @@ class GradNormLossWeighting:
87
87
  self.init_ema_count = 0
88
88
 
89
89
  if init_weights is None:
90
- weights = torch.ones(self.num_tasks, dtype=torch.float32)
90
+ weights = torch.ones(self.nums_task, dtype=torch.float32)
91
91
  else:
92
92
  weights = torch.tensor(list(init_weights), dtype=torch.float32)
93
- if weights.numel() != self.num_tasks:
93
+ if weights.numel() != self.nums_task:
94
94
  raise ValueError(
95
- "init_weights length must match num_tasks for GradNorm."
95
+ "init_weights length must match nums_task for GradNorm."
96
96
  )
97
97
  if device is not None:
98
98
  weights = weights.to(device)
@@ -123,9 +123,9 @@ class GradNormLossWeighting:
123
123
  """
124
124
  Return weighted total loss and update task weights with GradNorm.
125
125
  """
126
- if len(task_losses) != self.num_tasks:
126
+ if len(task_losses) != self.nums_task:
127
127
  raise ValueError(
128
- f"Expected {self.num_tasks} task losses, got {len(task_losses)}."
128
+ f"Expected {self.nums_task} task losses, got {len(task_losses)}."
129
129
  )
130
130
  shared_params = [p for p in shared_params if p.requires_grad]
131
131
  if not shared_params:
@@ -152,7 +152,7 @@ class GradNormLossWeighting:
152
152
 
153
153
  weights_detached = self.weights.detach()
154
154
  weighted_losses = [
155
- weights_detached[i] * task_losses[i] for i in range(self.num_tasks)
155
+ weights_detached[i] * task_losses[i] for i in range(self.nums_task)
156
156
  ]
157
157
  total_loss = torch.stack(weighted_losses).sum()
158
158
 
@@ -226,7 +226,7 @@ class GradNormLossWeighting:
226
226
 
227
227
  with torch.no_grad():
228
228
  w = self.weights.clamp(min=self.eps)
229
- w = w * self.num_tasks / (w.sum() + self.eps)
229
+ w = w * self.nums_task / (w.sum() + self.eps)
230
230
  self.weights.copy_(w)
231
231
 
232
232
  self.pending_grad = None
@@ -1,6 +1,6 @@
1
1
  """
2
2
  Date: create on 09/11/2025
3
- Checkpoint: edit on 29/11/2025
3
+ Checkpoint: edit on 23/12/2025
4
4
  Author: Yang Zhou,zyaztec@gmail.com
5
5
  Reference:
6
6
  [1] Ma X, Zhao L, Huang G, et al. Entire space multi-task model: An effective approach
@@ -101,17 +101,17 @@ class ESMM(BaseModel):
101
101
  f"ESMM requires exactly 2 targets (ctr and ctcvr), got {len(target)}"
102
102
  )
103
103
 
104
- self.num_tasks = len(target)
104
+ self.nums_task = len(target)
105
105
  resolved_task = task
106
106
  if resolved_task is None:
107
107
  resolved_task = self.default_task
108
108
  elif isinstance(resolved_task, str):
109
- resolved_task = [resolved_task] * self.num_tasks
110
- elif len(resolved_task) == 1 and self.num_tasks > 1:
111
- resolved_task = resolved_task * self.num_tasks
112
- elif len(resolved_task) != self.num_tasks:
109
+ resolved_task = [resolved_task] * self.nums_task
110
+ elif len(resolved_task) == 1 and self.nums_task > 1:
111
+ resolved_task = resolved_task * self.nums_task
112
+ elif len(resolved_task) != self.nums_task:
113
113
  raise ValueError(
114
- f"Length of task ({len(resolved_task)}) must match number of targets ({self.num_tasks})."
114
+ f"Length of task ({len(resolved_task)}) must match number of targets ({self.nums_task})."
115
115
  )
116
116
  # resolved_task is now guaranteed to be a list[str]
117
117
 
@@ -140,9 +140,7 @@ class ESMM(BaseModel):
140
140
  # CVR tower
141
141
  self.cvr_tower = MLP(input_dim=input_dim, output_layer=True, **cvr_params)
142
142
  self.grad_norm_shared_modules = ["embedding"]
143
- self.prediction_layer = TaskHead(
144
- task_type=self.default_task, task_dims=[1, 1]
145
- )
143
+ self.prediction_layer = TaskHead(task_type=self.default_task, task_dims=[1, 1])
146
144
  # Register regularization weights
147
145
  self.register_regularization_weights(
148
146
  embedding_attr="embedding", include_modules=["ctr_tower", "cvr_tower"]
@@ -168,4 +166,4 @@ class ESMM(BaseModel):
168
166
 
169
167
  # Output: [CTR, CTCVR], We supervise CTR with click labels and CTCVR with conversion labels
170
168
  y = torch.cat([ctr, ctcvr], dim=1) # [B, 2]
171
- return y # [B, 2], where y[:, 0] is CTR and y[:, 1] is CTCVR
169
+ return y # [B, 2], where y[:, 0] is CTR and y[:, 1] is CTCVR
@@ -1,6 +1,6 @@
1
1
  """
2
2
  Date: create on 09/11/2025
3
- Checkpoint: edit on 29/11/2025
3
+ Checkpoint: edit on 23/12/2025
4
4
  Author: Yang Zhou,zyaztec@gmail.com
5
5
  Reference:
6
6
  [1] Ma J, Zhao Z, Yi X, et al. Modeling task relationships in multi-task learning with
@@ -67,9 +67,9 @@ class MMOE(BaseModel):
67
67
 
68
68
  @property
69
69
  def default_task(self):
70
- num_tasks = getattr(self, "num_tasks", None)
71
- if num_tasks is not None and num_tasks > 0:
72
- return ["binary"] * num_tasks
70
+ nums_task = getattr(self, "nums_task", None)
71
+ if nums_task is not None and nums_task > 0:
72
+ return ["binary"] * nums_task
73
73
  return ["binary"]
74
74
 
75
75
  def __init__(
@@ -107,18 +107,18 @@ class MMOE(BaseModel):
107
107
  elif isinstance(target, str):
108
108
  target = [target]
109
109
 
110
- self.num_tasks = len(target) if target else 1
110
+ self.nums_task = len(target) if target else 1
111
111
 
112
112
  resolved_task = task
113
113
  if resolved_task is None:
114
114
  resolved_task = self.default_task
115
115
  elif isinstance(resolved_task, str):
116
- resolved_task = [resolved_task] * self.num_tasks
117
- elif len(resolved_task) == 1 and self.num_tasks > 1:
118
- resolved_task = resolved_task * self.num_tasks
119
- elif len(resolved_task) != self.num_tasks:
116
+ resolved_task = [resolved_task] * self.nums_task
117
+ elif len(resolved_task) == 1 and self.nums_task > 1:
118
+ resolved_task = resolved_task * self.nums_task
119
+ elif len(resolved_task) != self.nums_task:
120
120
  raise ValueError(
121
- f"Length of task ({len(resolved_task)}) must match number of targets ({self.num_tasks})."
121
+ f"Length of task ({len(resolved_task)}) must match number of targets ({self.nums_task})."
122
122
  )
123
123
 
124
124
  super(MMOE, self).__init__(
@@ -138,12 +138,12 @@ class MMOE(BaseModel):
138
138
  self.loss = loss
139
139
 
140
140
  # Number of tasks and experts
141
- self.num_tasks = len(target)
141
+ self.nums_task = len(target)
142
142
  self.num_experts = num_experts
143
143
 
144
- if len(tower_params_list) != self.num_tasks:
144
+ if len(tower_params_list) != self.nums_task:
145
145
  raise ValueError(
146
- f"Number of tower params ({len(tower_params_list)}) must match number of tasks ({self.num_tasks})"
146
+ f"Number of tower params ({len(tower_params_list)}) must match number of tasks ({self.nums_task})"
147
147
  )
148
148
 
149
149
  self.embedding = EmbeddingLayer(features=self.all_features)
@@ -163,7 +163,7 @@ class MMOE(BaseModel):
163
163
 
164
164
  # Task-specific gates
165
165
  self.gates = nn.ModuleList()
166
- for _ in range(self.num_tasks):
166
+ for _ in range(self.nums_task):
167
167
  gate = nn.Sequential(nn.Linear(input_dim, num_experts), nn.Softmax(dim=1))
168
168
  self.gates.append(gate)
169
169
  self.grad_norm_shared_modules = ["embedding", "experts", "gates"]
@@ -174,7 +174,7 @@ class MMOE(BaseModel):
174
174
  tower = MLP(input_dim=expert_output_dim, output_layer=True, **tower_params)
175
175
  self.towers.append(tower)
176
176
  self.prediction_layer = TaskHead(
177
- task_type=self.default_task, task_dims=[1] * self.num_tasks
177
+ task_type=self.default_task, task_dims=[1] * self.nums_task
178
178
  )
179
179
  # Register regularization weights
180
180
  self.register_regularization_weights(
@@ -199,7 +199,7 @@ class MMOE(BaseModel):
199
199
 
200
200
  # Task-specific processing
201
201
  task_outputs = []
202
- for task_idx in range(self.num_tasks):
202
+ for task_idx in range(self.nums_task):
203
203
  # Gate weights for this task: [B, num_experts]
204
204
  gate_weights = self.gates[task_idx](input_flat) # [B, num_experts]
205
205
 
@@ -218,6 +218,6 @@ class MMOE(BaseModel):
218
218
  tower_output = self.towers[task_idx](gated_output) # [B, 1]
219
219
  task_outputs.append(tower_output)
220
220
 
221
- # Stack outputs: [B, num_tasks]
221
+ # Stack outputs: [B, nums_task]
222
222
  y = torch.cat(task_outputs, dim=1)
223
- return self.prediction_layer(y)
223
+ return self.prediction_layer(y)
@@ -1,6 +1,6 @@
1
1
  """
2
2
  Date: create on 09/11/2025
3
- Checkpoint: edit on 29/11/2025
3
+ Checkpoint: edit on 23/12/2025
4
4
  Author: Yang Zhou,zyaztec@gmail.com
5
5
  Reference:
6
6
  [1] Tang H, Liu J, Zhao M, et al. Progressive layered extraction (PLE): A novel
@@ -64,18 +64,18 @@ class CGCLayer(nn.Module):
64
64
  def __init__(
65
65
  self,
66
66
  input_dim: int,
67
- num_tasks: int,
67
+ nums_task: int,
68
68
  num_shared_experts: int,
69
69
  num_specific_experts: int,
70
70
  shared_expert_params: dict,
71
71
  specific_expert_params: dict | list[dict],
72
72
  ):
73
73
  super().__init__()
74
- if num_tasks < 1:
75
- raise ValueError("num_tasks must be >= 1")
74
+ if nums_task < 1:
75
+ raise ValueError("nums_task must be >= 1")
76
76
 
77
77
  specific_params_list = self.normalize_specific_params(
78
- specific_expert_params, num_tasks
78
+ specific_expert_params, nums_task
79
79
  )
80
80
 
81
81
  self.output_dim = get_mlp_output_dim(shared_expert_params, input_dim)
@@ -121,23 +121,23 @@ class CGCLayer(nn.Module):
121
121
  nn.Linear(input_dim, task_gate_expert_num),
122
122
  nn.Softmax(dim=1),
123
123
  )
124
- for _ in range(num_tasks)
124
+ for _ in range(nums_task)
125
125
  ]
126
126
  )
127
- shared_gate_expert_num = num_shared_experts + num_specific_experts * num_tasks
127
+ shared_gate_expert_num = num_shared_experts + num_specific_experts * nums_task
128
128
  self.shared_gate = nn.Sequential(
129
129
  nn.Linear(input_dim, shared_gate_expert_num),
130
130
  nn.Softmax(dim=1),
131
131
  )
132
132
 
133
- self.num_tasks = num_tasks
133
+ self.nums_task = nums_task
134
134
 
135
135
  def forward(
136
136
  self, task_inputs: list[torch.Tensor], shared_input: torch.Tensor
137
137
  ) -> tuple[list[torch.Tensor], torch.Tensor]:
138
- if len(task_inputs) != self.num_tasks:
138
+ if len(task_inputs) != self.nums_task:
139
139
  raise ValueError(
140
- f"Expected {self.num_tasks} task inputs, got {len(task_inputs)}"
140
+ f"Expected {self.nums_task} task inputs, got {len(task_inputs)}"
141
141
  )
142
142
 
143
143
  shared_outputs = [expert(shared_input) for expert in self.shared_experts]
@@ -146,7 +146,7 @@ class CGCLayer(nn.Module):
146
146
  new_task_fea: list[torch.Tensor] = []
147
147
  all_specific_for_shared: list[torch.Tensor] = []
148
148
 
149
- for task_idx in range(self.num_tasks):
149
+ for task_idx in range(self.nums_task):
150
150
  task_input = task_inputs[task_idx]
151
151
  task_specific_outputs = [expert(task_input) for expert in self.specific_experts[task_idx]] # type: ignore
152
152
  all_specific_for_shared.extend(task_specific_outputs)
@@ -168,15 +168,15 @@ class CGCLayer(nn.Module):
168
168
 
169
169
  @staticmethod
170
170
  def normalize_specific_params(
171
- params: dict | list[dict], num_tasks: int
171
+ params: dict | list[dict], nums_task: int
172
172
  ) -> list[dict]:
173
173
  if isinstance(params, list):
174
- if len(params) != num_tasks:
174
+ if len(params) != nums_task:
175
175
  raise ValueError(
176
- f"Length of specific_expert_params ({len(params)}) must match num_tasks ({num_tasks})."
176
+ f"Length of specific_expert_params ({len(params)}) must match nums_task ({nums_task})."
177
177
  )
178
178
  return [p.copy() for p in params]
179
- return [params.copy() for _ in range(num_tasks)]
179
+ return [params.copy() for _ in range(nums_task)]
180
180
 
181
181
 
182
182
  class PLE(BaseModel):
@@ -195,9 +195,9 @@ class PLE(BaseModel):
195
195
 
196
196
  @property
197
197
  def default_task(self):
198
- num_tasks = getattr(self, "num_tasks", None)
199
- if num_tasks is not None and num_tasks > 0:
200
- return ["binary"] * num_tasks
198
+ nums_task = getattr(self, "nums_task", None)
199
+ if nums_task is not None and nums_task > 0:
200
+ return ["binary"] * nums_task
201
201
  return ["binary"]
202
202
 
203
203
  def __init__(
@@ -225,18 +225,18 @@ class PLE(BaseModel):
225
225
  **kwargs,
226
226
  ):
227
227
 
228
- self.num_tasks = len(target)
228
+ self.nums_task = len(target)
229
229
 
230
230
  resolved_task = task
231
231
  if resolved_task is None:
232
232
  resolved_task = self.default_task
233
233
  elif isinstance(resolved_task, str):
234
- resolved_task = [resolved_task] * self.num_tasks
235
- elif len(resolved_task) == 1 and self.num_tasks > 1:
236
- resolved_task = resolved_task * self.num_tasks
237
- elif len(resolved_task) != self.num_tasks:
234
+ resolved_task = [resolved_task] * self.nums_task
235
+ elif len(resolved_task) == 1 and self.nums_task > 1:
236
+ resolved_task = resolved_task * self.nums_task
237
+ elif len(resolved_task) != self.nums_task:
238
238
  raise ValueError(
239
- f"Length of task ({len(resolved_task)}) must match number of targets ({self.num_tasks})."
239
+ f"Length of task ({len(resolved_task)}) must match number of targets ({self.nums_task})."
240
240
  )
241
241
 
242
242
  super(PLE, self).__init__(
@@ -257,15 +257,15 @@ class PLE(BaseModel):
257
257
  if self.loss is None:
258
258
  self.loss = "bce"
259
259
  # Number of tasks, experts, and levels
260
- self.num_tasks = len(target)
260
+ self.nums_task = len(target)
261
261
  self.num_shared_experts = num_shared_experts
262
262
  self.num_specific_experts = num_specific_experts
263
263
  self.num_levels = num_levels
264
264
  if optimizer_params is None:
265
265
  optimizer_params = {}
266
- if len(tower_params_list) != self.num_tasks:
266
+ if len(tower_params_list) != self.nums_task:
267
267
  raise ValueError(
268
- f"Number of tower params ({len(tower_params_list)}) must match number of tasks ({self.num_tasks})"
268
+ f"Number of tower params ({len(tower_params_list)}) must match number of tasks ({self.nums_task})"
269
269
  )
270
270
  # Embedding layer
271
271
  self.embedding = EmbeddingLayer(features=self.all_features)
@@ -288,7 +288,7 @@ class PLE(BaseModel):
288
288
  level_input_dim = input_dim if level == 0 else expert_output_dim
289
289
  cgc_layer = CGCLayer(
290
290
  input_dim=level_input_dim,
291
- num_tasks=self.num_tasks,
291
+ nums_task=self.nums_task,
292
292
  num_shared_experts=num_shared_experts,
293
293
  num_specific_experts=num_specific_experts,
294
294
  shared_expert_params=shared_expert_params,
@@ -304,7 +304,7 @@ class PLE(BaseModel):
304
304
  tower = MLP(input_dim=expert_output_dim, output_layer=True, **tower_params)
305
305
  self.towers.append(tower)
306
306
  self.prediction_layer = TaskHead(
307
- task_type=self.default_task, task_dims=[1] * self.num_tasks
307
+ task_type=self.default_task, task_dims=[1] * self.nums_task
308
308
  )
309
309
  # Register regularization weights
310
310
  self.register_regularization_weights(
@@ -322,7 +322,7 @@ class PLE(BaseModel):
322
322
  input_flat = self.embedding(x=x, features=self.all_features, squeeze_dim=True)
323
323
 
324
324
  # Initial features for each task and shared
325
- task_fea = [input_flat for _ in range(self.num_tasks)]
325
+ task_fea = [input_flat for _ in range(self.nums_task)]
326
326
  shared_fea = input_flat
327
327
 
328
328
  # Progressive Layered Extraction: CGC
@@ -331,10 +331,10 @@ class PLE(BaseModel):
331
331
 
332
332
  # task tower
333
333
  task_outputs = []
334
- for task_idx in range(self.num_tasks):
334
+ for task_idx in range(self.nums_task):
335
335
  tower_output = self.towers[task_idx](task_fea[task_idx]) # [B, 1]
336
336
  task_outputs.append(tower_output)
337
337
 
338
- # [B, num_tasks]
338
+ # [B, nums_task]
339
339
  y = torch.cat(task_outputs, dim=1)
340
- return self.prediction_layer(y)
340
+ return self.prediction_layer(y)
@@ -1,5 +1,6 @@
1
1
  """
2
2
  Date: create on 28/11/2025
3
+ Checkpoint: edit on 23/12/2025
3
4
  Author: Yang Zhou,zyaztec@gmail.com
4
5
  Reference:
5
6
  [1] Wang et al. "POSO: Personalized Cold Start Modules for Large-scale Recommender Systems", 2021.
@@ -196,7 +197,7 @@ class POSOMMoE(nn.Module):
196
197
  pc_dim: int, # for poso feature dimension
197
198
  num_experts: int,
198
199
  expert_hidden_dims: list[int],
199
- num_tasks: int,
200
+ nums_task: int,
200
201
  activation: str = "relu",
201
202
  expert_dropout: float = 0.0,
202
203
  gate_hidden_dim: int = 32, # for poso gate hidden dimension
@@ -205,7 +206,7 @@ class POSOMMoE(nn.Module):
205
206
  ) -> None:
206
207
  super().__init__()
207
208
  self.num_experts = num_experts
208
- self.num_tasks = num_tasks
209
+ self.nums_task = nums_task
209
210
 
210
211
  # Experts built with framework MLP, same as standard MMoE
211
212
  self.experts = nn.ModuleList(
@@ -226,7 +227,7 @@ class POSOMMoE(nn.Module):
226
227
 
227
228
  # Task-specific gates: gate_t(x) over experts
228
229
  self.gates = nn.ModuleList(
229
- [nn.Linear(input_dim, num_experts) for _ in range(num_tasks)]
230
+ [nn.Linear(input_dim, num_experts) for _ in range(nums_task)]
230
231
  )
231
232
  self.gate_use_softmax = gate_use_softmax
232
233
 
@@ -248,7 +249,7 @@ class POSOMMoE(nn.Module):
248
249
  """
249
250
  x: (B, input_dim)
250
251
  pc: (B, pc_dim)
251
- return: list of task outputs z_t with length num_tasks, each (B, D)
252
+ return: list of task outputs z_t with length nums_task, each (B, D)
252
253
  """
253
254
  # 1) Expert outputs with POSO PC gate
254
255
  masked_expert_outputs = []
@@ -262,7 +263,7 @@ class POSOMMoE(nn.Module):
262
263
 
263
264
  # 2) Task gates depend on x as in standard MMoE
264
265
  task_outputs: list[torch.Tensor] = []
265
- for t in range(self.num_tasks):
266
+ for t in range(self.nums_task):
266
267
  logits = self.gates[t](x) # (B, E)
267
268
  if self.gate_use_softmax:
268
269
  gate = F.softmax(logits, dim=1)
@@ -289,9 +290,9 @@ class POSO(BaseModel):
289
290
 
290
291
  @property
291
292
  def default_task(self) -> list[str]:
292
- num_tasks = getattr(self, "num_tasks", None)
293
- if num_tasks is not None and num_tasks > 0:
294
- return ["binary"] * num_tasks
293
+ nums_task = getattr(self, "nums_task", None)
294
+ if nums_task is not None and nums_task > 0:
295
+ return ["binary"] * nums_task
295
296
  return ["binary"]
296
297
 
297
298
  def __init__(
@@ -333,24 +334,24 @@ class POSO(BaseModel):
333
334
  dense_l2_reg: float = 1e-4,
334
335
  **kwargs,
335
336
  ):
336
- self.num_tasks = len(target)
337
+ self.nums_task = len(target)
337
338
 
338
- # Normalize task to match num_tasks
339
+ # Normalize task to match nums_task
339
340
  resolved_task = task
340
341
  if resolved_task is None:
341
342
  resolved_task = self.default_task
342
343
  elif isinstance(resolved_task, str):
343
- resolved_task = [resolved_task] * self.num_tasks
344
- elif len(resolved_task) == 1 and self.num_tasks > 1:
345
- resolved_task = resolved_task * self.num_tasks
346
- elif len(resolved_task) != self.num_tasks:
344
+ resolved_task = [resolved_task] * self.nums_task
345
+ elif len(resolved_task) == 1 and self.nums_task > 1:
346
+ resolved_task = resolved_task * self.nums_task
347
+ elif len(resolved_task) != self.nums_task:
347
348
  raise ValueError(
348
- f"Length of task ({len(resolved_task)}) must match number of targets ({self.num_tasks})."
349
+ f"Length of task ({len(resolved_task)}) must match number of targets ({self.nums_task})."
349
350
  )
350
351
 
351
- if len(tower_params_list) != self.num_tasks:
352
+ if len(tower_params_list) != self.nums_task:
352
353
  raise ValueError(
353
- f"Number of tower params ({len(tower_params_list)}) must match number of tasks ({self.num_tasks})"
354
+ f"Number of tower params ({len(tower_params_list)}) must match number of tasks ({self.nums_task})"
354
355
  )
355
356
 
356
357
  super().__init__(
@@ -466,7 +467,7 @@ class POSO(BaseModel):
466
467
  pc_dim=self.pc_input_dim,
467
468
  num_experts=num_experts,
468
469
  expert_hidden_dims=expert_hidden_dims,
469
- num_tasks=self.num_tasks,
470
+ nums_task=self.nums_task,
470
471
  activation=expert_activation,
471
472
  expert_dropout=expert_dropout,
472
473
  gate_hidden_dim=expert_gate_hidden_dim,
@@ -490,7 +491,7 @@ class POSO(BaseModel):
490
491
  self.grad_norm_shared_modules = ["embedding", "mmoe"]
491
492
  self.prediction_layer = TaskHead(
492
493
  task_type=self.default_task,
493
- task_dims=[1] * self.num_tasks,
494
+ task_dims=[1] * self.nums_task,
494
495
  )
495
496
  include_modules = (
496
497
  ["towers", "tower_heads"]
@@ -525,4 +526,4 @@ class POSO(BaseModel):
525
526
  task_outputs.append(logit)
526
527
 
527
528
  y = torch.cat(task_outputs, dim=1)
528
- return self.prediction_layer(y)
529
+ return self.prediction_layer(y)
@@ -1,6 +1,6 @@
1
1
  """
2
2
  Date: create on 09/11/2025
3
- Checkpoint: edit on 24/11/2025
3
+ Checkpoint: edit on 23/12/2025
4
4
  Author: Yang Zhou,zyaztec@gmail.com
5
5
  Reference:
6
6
  [1] Caruana R. Multitask learning[J]. Machine Learning, 1997, 28: 41-75.
@@ -55,9 +55,9 @@ class ShareBottom(BaseModel):
55
55
 
56
56
  @property
57
57
  def default_task(self):
58
- num_tasks = getattr(self, "num_tasks", None)
59
- if num_tasks is not None and num_tasks > 0:
60
- return ["binary"] * num_tasks
58
+ nums_task = getattr(self, "nums_task", None)
59
+ if nums_task is not None and nums_task > 0:
60
+ return ["binary"] * nums_task
61
61
  return ["binary"]
62
62
 
63
63
  def __init__(
@@ -83,18 +83,18 @@ class ShareBottom(BaseModel):
83
83
 
84
84
  optimizer_params = optimizer_params or {}
85
85
 
86
- self.num_tasks = len(target)
86
+ self.nums_task = len(target)
87
87
 
88
88
  resolved_task = task
89
89
  if resolved_task is None:
90
90
  resolved_task = self.default_task
91
91
  elif isinstance(resolved_task, str):
92
- resolved_task = [resolved_task] * self.num_tasks
93
- elif len(resolved_task) == 1 and self.num_tasks > 1:
94
- resolved_task = resolved_task * self.num_tasks
95
- elif len(resolved_task) != self.num_tasks:
92
+ resolved_task = [resolved_task] * self.nums_task
93
+ elif len(resolved_task) == 1 and self.nums_task > 1:
94
+ resolved_task = resolved_task * self.nums_task
95
+ elif len(resolved_task) != self.nums_task:
96
96
  raise ValueError(
97
- f"Length of task ({len(resolved_task)}) must match number of targets ({self.num_tasks})."
97
+ f"Length of task ({len(resolved_task)}) must match number of targets ({self.nums_task})."
98
98
  )
99
99
 
100
100
  super(ShareBottom, self).__init__(
@@ -115,10 +115,10 @@ class ShareBottom(BaseModel):
115
115
  if self.loss is None:
116
116
  self.loss = "bce"
117
117
  # Number of tasks
118
- self.num_tasks = len(target)
119
- if len(tower_params_list) != self.num_tasks:
118
+ self.nums_task = len(target)
119
+ if len(tower_params_list) != self.nums_task:
120
120
  raise ValueError(
121
- f"Number of tower params ({len(tower_params_list)}) must match number of tasks ({self.num_tasks})"
121
+ f"Number of tower params ({len(tower_params_list)}) must match number of tasks ({self.nums_task})"
122
122
  )
123
123
  # Embedding layer
124
124
  self.embedding = EmbeddingLayer(features=self.all_features)
@@ -144,7 +144,7 @@ class ShareBottom(BaseModel):
144
144
  tower = MLP(input_dim=bottom_output_dim, output_layer=True, **tower_params)
145
145
  self.towers.append(tower)
146
146
  self.prediction_layer = TaskHead(
147
- task_type=self.default_task, task_dims=[1] * self.num_tasks
147
+ task_type=self.default_task, task_dims=[1] * self.nums_task
148
148
  )
149
149
  # Register regularization weights
150
150
  self.register_regularization_weights(
@@ -170,6 +170,6 @@ class ShareBottom(BaseModel):
170
170
  tower_output = tower(bottom_output) # [B, 1]
171
171
  task_outputs.append(tower_output)
172
172
 
173
- # Stack outputs: [B, num_tasks]
173
+ # Stack outputs: [B, nums_task]
174
174
  y = torch.cat(task_outputs, dim=1)
175
- return self.prediction_layer(y)
175
+ return self.prediction_layer(y)
@@ -1,6 +1,6 @@
1
1
  """
2
2
  Date: create on 09/11/2025
3
- Checkpoint: edit on 09/12/2025
3
+ Checkpoint: edit on 23/12/2025
4
4
  Author: Yang Zhou, zyaztec@gmail.com
5
5
  Reference:
6
6
  [1] Xiao J, Ye H, He X, et al. Attentional factorization machines: Learning the weight of
@@ -244,4 +244,4 @@ class AFM(BaseModel):
244
244
  y_afm = self.output_projection(weighted_sum)
245
245
 
246
246
  y = y_linear + y_afm
247
- return self.prediction_layer(y)
247
+ return self.prediction_layer(y)
@@ -1,6 +1,6 @@
1
1
  """
2
2
  Date: create on 09/11/2025
3
- Checkpoint: edit on 09/12/2025
3
+ Checkpoint: edit on 23/12/2025
4
4
  Author: Yang Zhou, zyaztec@gmail.com
5
5
  Reference:
6
6
  [1] Song W, Shi C, Xiao Z, et al. Autoint: Automatic feature interaction learning via
@@ -207,4 +207,4 @@ class AutoInt(BaseModel):
207
207
  start_dim=1
208
208
  ) # [B, num_fields * att_embedding_dim]
209
209
  y = self.fc(attention_output_flat) # [B, 1]
210
- return self.prediction_layer(y)
210
+ return self.prediction_layer(y)
@@ -1,6 +1,6 @@
1
1
  """
2
2
  Date: create on 09/11/2025
3
- Checkpoint: edit on 09/12/2025
3
+ Checkpoint: edit on 23/12/2025
4
4
  Author: Yang Zhou, zyaztec@gmail.com
5
5
  Reference:
6
6
  [1] Wang R, Fu B, Fu G, et al. Deep & cross network for ad click predictions[C]
@@ -198,4 +198,4 @@ class DCN(BaseModel):
198
198
 
199
199
  # Final prediction
200
200
  y = self.final_layer(combined)
201
- return self.prediction_layer(y)
201
+ return self.prediction_layer(y)
@@ -1,6 +1,6 @@
1
1
  """
2
2
  Date: create on 09/11/2025
3
- Checkpoint: edit on 09/12/2025
3
+ Checkpoint: edit on 23/12/2025
4
4
  Author: Yang Zhou, zyaztec@gmail.com
5
5
  Reference:
6
6
  [1] R. Wang et al. DCN V2: Improved Deep & Cross Network and Practical Lessons for
@@ -302,4 +302,4 @@ class DCNv2(BaseModel):
302
302
  combined = cross_out
303
303
 
304
304
  logit = self.final_layer(combined)
305
- return self.prediction_layer(logit)
305
+ return self.prediction_layer(logit)