nextrec 0.1.4__py3-none-any.whl → 0.1.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. nextrec/__init__.py +4 -4
  2. nextrec/__version__.py +1 -1
  3. nextrec/basic/activation.py +9 -10
  4. nextrec/basic/callback.py +0 -1
  5. nextrec/basic/dataloader.py +127 -168
  6. nextrec/basic/features.py +27 -24
  7. nextrec/basic/layers.py +159 -328
  8. nextrec/basic/loggers.py +37 -50
  9. nextrec/basic/metrics.py +147 -255
  10. nextrec/basic/model.py +462 -817
  11. nextrec/data/__init__.py +5 -5
  12. nextrec/data/data_utils.py +12 -16
  13. nextrec/data/preprocessor.py +252 -276
  14. nextrec/loss/__init__.py +12 -12
  15. nextrec/loss/loss_utils.py +22 -30
  16. nextrec/loss/match_losses.py +83 -116
  17. nextrec/models/match/__init__.py +5 -5
  18. nextrec/models/match/dssm.py +61 -70
  19. nextrec/models/match/dssm_v2.py +51 -61
  20. nextrec/models/match/mind.py +71 -89
  21. nextrec/models/match/sdm.py +81 -93
  22. nextrec/models/match/youtube_dnn.py +53 -62
  23. nextrec/models/multi_task/esmm.py +43 -49
  24. nextrec/models/multi_task/mmoe.py +56 -65
  25. nextrec/models/multi_task/ple.py +65 -92
  26. nextrec/models/multi_task/share_bottom.py +42 -48
  27. nextrec/models/ranking/__init__.py +7 -7
  28. nextrec/models/ranking/afm.py +30 -39
  29. nextrec/models/ranking/autoint.py +57 -70
  30. nextrec/models/ranking/dcn.py +35 -43
  31. nextrec/models/ranking/deepfm.py +28 -34
  32. nextrec/models/ranking/dien.py +79 -115
  33. nextrec/models/ranking/din.py +60 -84
  34. nextrec/models/ranking/fibinet.py +35 -51
  35. nextrec/models/ranking/fm.py +26 -28
  36. nextrec/models/ranking/masknet.py +31 -31
  37. nextrec/models/ranking/pnn.py +31 -30
  38. nextrec/models/ranking/widedeep.py +31 -36
  39. nextrec/models/ranking/xdeepfm.py +39 -46
  40. nextrec/utils/__init__.py +9 -9
  41. nextrec/utils/embedding.py +1 -1
  42. nextrec/utils/initializer.py +15 -23
  43. nextrec/utils/optimizer.py +10 -14
  44. {nextrec-0.1.4.dist-info → nextrec-0.1.8.dist-info}/METADATA +16 -7
  45. nextrec-0.1.8.dist-info/RECORD +51 -0
  46. nextrec-0.1.4.dist-info/RECORD +0 -51
  47. {nextrec-0.1.4.dist-info → nextrec-0.1.8.dist-info}/WHEEL +0 -0
  48. {nextrec-0.1.4.dist-info → nextrec-0.1.8.dist-info}/licenses/LICENSE +0 -0
@@ -17,13 +17,13 @@ from nextrec.basic.features import DenseFeature, SparseFeature, SequenceFeature
17
17
  class PLE(BaseModel):
18
18
  """
19
19
  Progressive Layered Extraction
20
-
20
+
21
21
  PLE is an advanced multi-task learning model that extends MMOE by introducing
22
22
  both task-specific experts and shared experts at each level. It uses a progressive
23
23
  routing mechanism where experts from level k feed into gates at level k+1.
24
24
  This design better captures task-specific and shared information progressively.
25
25
  """
26
-
26
+
27
27
  @property
28
28
  def model_name(self):
29
29
  return "PLE"
@@ -31,31 +31,29 @@ class PLE(BaseModel):
31
31
  @property
32
32
  def task_type(self):
33
33
  return self.task if isinstance(self.task, list) else [self.task]
34
-
35
- def __init__(
36
- self,
37
- dense_features: list[DenseFeature],
38
- sparse_features: list[SparseFeature],
39
- sequence_features: list[SequenceFeature],
40
- shared_expert_params: dict,
41
- specific_expert_params: dict,
42
- num_shared_experts: int,
43
- num_specific_experts: int,
44
- num_levels: int,
45
- tower_params_list: list[dict],
46
- target: list[str],
47
- task: str | list[str] = "binary",
48
- optimizer: str = "adam",
49
- optimizer_params: dict = {},
50
- loss: str | nn.Module | list[str | nn.Module] | None = "bce",
51
- device: str = "cpu",
52
- model_id: str = "baseline",
53
- embedding_l1_reg=1e-6,
54
- dense_l1_reg=1e-5,
55
- embedding_l2_reg=1e-5,
56
- dense_l2_reg=1e-4,
57
- ):
58
-
34
+
35
+ def __init__(self,
36
+ dense_features: list[DenseFeature],
37
+ sparse_features: list[SparseFeature],
38
+ sequence_features: list[SequenceFeature],
39
+ shared_expert_params: dict,
40
+ specific_expert_params: dict,
41
+ num_shared_experts: int,
42
+ num_specific_experts: int,
43
+ num_levels: int,
44
+ tower_params_list: list[dict],
45
+ target: list[str],
46
+ task: str | list[str] = 'binary',
47
+ optimizer: str = "adam",
48
+ optimizer_params: dict = {},
49
+ loss: str | nn.Module | list[str | nn.Module] | None = "bce",
50
+ device: str = 'cpu',
51
+ model_id: str = "baseline",
52
+ embedding_l1_reg=1e-6,
53
+ dense_l1_reg=1e-5,
54
+ embedding_l2_reg=1e-5,
55
+ dense_l2_reg=1e-4):
56
+
59
57
  super(PLE, self).__init__(
60
58
  dense_features=dense_features,
61
59
  sparse_features=sparse_features,
@@ -68,13 +66,13 @@ class PLE(BaseModel):
68
66
  embedding_l2_reg=embedding_l2_reg,
69
67
  dense_l2_reg=dense_l2_reg,
70
68
  early_stop_patience=20,
71
- model_id=model_id,
69
+ model_id=model_id
72
70
  )
73
71
 
74
72
  self.loss = loss
75
73
  if self.loss is None:
76
74
  self.loss = "bce"
77
-
75
+
78
76
  # Number of tasks, experts, and levels
79
77
  self.num_tasks = len(target)
80
78
  self.num_shared_experts = num_shared_experts
@@ -82,12 +80,10 @@ class PLE(BaseModel):
82
80
  self.num_levels = num_levels
83
81
  if optimizer_params is None:
84
82
  optimizer_params = {}
85
-
83
+
86
84
  if len(tower_params_list) != self.num_tasks:
87
- raise ValueError(
88
- f"Number of tower params ({len(tower_params_list)}) must match number of tasks ({self.num_tasks})"
89
- )
90
-
85
+ raise ValueError(f"Number of tower params ({len(tower_params_list)}) must match number of tasks ({self.num_tasks})")
86
+
91
87
  # All features
92
88
  self.all_features = dense_features + sparse_features + sequence_features
93
89
 
@@ -95,60 +91,42 @@ class PLE(BaseModel):
95
91
  self.embedding = EmbeddingLayer(features=self.all_features)
96
92
 
97
93
  # Calculate input dimension
98
- emb_dim_total = sum(
99
- [
100
- f.embedding_dim
101
- for f in self.all_features
102
- if not isinstance(f, DenseFeature)
103
- ]
104
- )
105
- dense_input_dim = sum(
106
- [getattr(f, "embedding_dim", 1) or 1 for f in dense_features]
107
- )
94
+ emb_dim_total = sum([f.embedding_dim for f in self.all_features if not isinstance(f, DenseFeature)])
95
+ dense_input_dim = sum([getattr(f, "embedding_dim", 1) or 1 for f in dense_features])
108
96
  input_dim = emb_dim_total + dense_input_dim
109
-
97
+
110
98
  # Get expert output dimension
111
- if "dims" in shared_expert_params and len(shared_expert_params["dims"]) > 0:
112
- expert_output_dim = shared_expert_params["dims"][-1]
99
+ if 'dims' in shared_expert_params and len(shared_expert_params['dims']) > 0:
100
+ expert_output_dim = shared_expert_params['dims'][-1]
113
101
  else:
114
102
  expert_output_dim = input_dim
115
-
103
+
116
104
  # Build extraction layers (CGC layers)
117
105
  self.shared_experts_layers = nn.ModuleList() # [num_levels]
118
106
  self.specific_experts_layers = nn.ModuleList() # [num_levels, num_tasks]
119
- self.gates_layers = (
120
- nn.ModuleList()
121
- ) # [num_levels, num_tasks + 1] (+1 for shared gate)
122
-
107
+ self.gates_layers = nn.ModuleList() # [num_levels, num_tasks + 1] (+1 for shared gate)
108
+
123
109
  for level in range(num_levels):
124
110
  # Input dimension for this level
125
111
  level_input_dim = input_dim if level == 0 else expert_output_dim
126
-
112
+
127
113
  # Shared experts for this level
128
114
  shared_experts = nn.ModuleList()
129
115
  for _ in range(num_shared_experts):
130
- expert = MLP(
131
- input_dim=level_input_dim,
132
- output_layer=False,
133
- **shared_expert_params,
134
- )
116
+ expert = MLP(input_dim=level_input_dim, output_layer=False, **shared_expert_params)
135
117
  shared_experts.append(expert)
136
118
  self.shared_experts_layers.append(shared_experts)
137
-
119
+
138
120
  # Task-specific experts for this level
139
121
  specific_experts_for_tasks = nn.ModuleList()
140
122
  for _ in range(self.num_tasks):
141
123
  task_experts = nn.ModuleList()
142
124
  for _ in range(num_specific_experts):
143
- expert = MLP(
144
- input_dim=level_input_dim,
145
- output_layer=False,
146
- **specific_expert_params,
147
- )
125
+ expert = MLP(input_dim=level_input_dim, output_layer=False, **specific_expert_params)
148
126
  task_experts.append(expert)
149
127
  specific_experts_for_tasks.append(task_experts)
150
128
  self.specific_experts_layers.append(specific_experts_for_tasks)
151
-
129
+
152
130
  # Gates for this level (num_tasks task gates + 1 shared gate)
153
131
  gates = nn.ModuleList()
154
132
  # Task-specific gates
@@ -156,42 +134,40 @@ class PLE(BaseModel):
156
134
  for _ in range(self.num_tasks):
157
135
  gate = nn.Sequential(
158
136
  nn.Linear(level_input_dim, num_experts_for_task_gate),
159
- nn.Softmax(dim=1),
137
+ nn.Softmax(dim=1)
160
138
  )
161
139
  gates.append(gate)
162
140
  # Shared gate: contains all tasks' specific experts + shared experts
163
141
  # expert counts = num_shared_experts + num_specific_experts * num_tasks
164
- num_experts_for_shared_gate = (
165
- num_shared_experts + num_specific_experts * self.num_tasks
166
- )
142
+ num_experts_for_shared_gate = num_shared_experts + num_specific_experts * self.num_tasks
167
143
  shared_gate = nn.Sequential(
168
144
  nn.Linear(level_input_dim, num_experts_for_shared_gate),
169
- nn.Softmax(dim=1),
145
+ nn.Softmax(dim=1)
170
146
  )
171
147
  gates.append(shared_gate)
172
148
  self.gates_layers.append(gates)
173
-
149
+
174
150
  # Task-specific towers
175
151
  self.towers = nn.ModuleList()
176
152
  for tower_params in tower_params_list:
177
153
  tower = MLP(input_dim=expert_output_dim, output_layer=True, **tower_params)
178
154
  self.towers.append(tower)
179
155
  self.prediction_layer = PredictionLayer(
180
- task_type=self.task_type, task_dims=[1] * self.num_tasks
156
+ task_type=self.task_type,
157
+ task_dims=[1] * self.num_tasks
181
158
  )
182
159
 
183
160
  # Register regularization weights
184
161
  self._register_regularization_weights(
185
- embedding_attr="embedding",
186
- include_modules=[
187
- "shared_experts_layers",
188
- "specific_experts_layers",
189
- "gates_layers",
190
- "towers",
191
- ],
162
+ embedding_attr='embedding',
163
+ include_modules=['shared_experts_layers', 'specific_experts_layers', 'gates_layers', 'towers']
192
164
  )
193
165
 
194
- self.compile(optimizer=optimizer, optimizer_params=optimizer_params, loss=loss)
166
+ self.compile(
167
+ optimizer=optimizer,
168
+ optimizer_params=optimizer_params,
169
+ loss=loss
170
+ )
195
171
 
196
172
  def forward(self, x):
197
173
  # Get all embeddings and flatten
@@ -203,17 +179,13 @@ class PLE(BaseModel):
203
179
 
204
180
  # Progressive Layered Extraction: CGC
205
181
  for level in range(self.num_levels):
206
- shared_experts = self.shared_experts_layers[
207
- level
208
- ] # ModuleList[num_shared_experts]
209
- specific_experts = self.specific_experts_layers[
210
- level
211
- ] # ModuleList[num_tasks][num_specific_experts]
212
- gates = self.gates_layers[level] # ModuleList[num_tasks + 1]
182
+ shared_experts = self.shared_experts_layers[level] # ModuleList[num_shared_experts]
183
+ specific_experts = self.specific_experts_layers[level] # ModuleList[num_tasks][num_specific_experts]
184
+ gates = self.gates_layers[level] # ModuleList[num_tasks + 1]
213
185
 
214
186
  # Compute shared experts output for this level
215
187
  # shared_expert_list: List[Tensor[B, expert_dim]]
216
- shared_expert_list = [expert(shared_fea) for expert in shared_experts] # type: ignore[list-item]
188
+ shared_expert_list = [expert(shared_fea) for expert in shared_experts] # type: ignore[list-item]
217
189
  # [num_shared_experts, B, expert_dim]
218
190
  shared_expert_outputs = torch.stack(shared_expert_list, dim=0)
219
191
 
@@ -226,7 +198,7 @@ class PLE(BaseModel):
226
198
  current_task_in = task_fea[task_idx]
227
199
 
228
200
  # Specific task experts for this task
229
- task_expert_modules = specific_experts[task_idx] # type: ignore
201
+ task_expert_modules = specific_experts[task_idx] # type: ignore
230
202
 
231
203
  # Specific task expert output list List[Tensor[B, expert_dim]]
232
204
  task_specific_list = []
@@ -242,7 +214,8 @@ class PLE(BaseModel):
242
214
  # Input for gate: shared_experts + own specific task experts
243
215
  # [num_shared + num_specific, B, expert_dim]
244
216
  all_expert_outputs = torch.cat(
245
- [shared_expert_outputs, task_specific_outputs], dim=0
217
+ [shared_expert_outputs, task_specific_outputs],
218
+ dim=0
246
219
  )
247
220
  # [B, num_experts, expert_dim]
248
221
  all_expert_outputs_t = all_expert_outputs.permute(1, 0, 2)
@@ -266,7 +239,7 @@ class PLE(BaseModel):
266
239
  all_for_shared = torch.stack(all_for_shared_list, dim=1)
267
240
 
268
241
  # [B, num_all_experts]
269
- shared_gate_weights = gates[self.num_tasks](shared_fea) # type: ignore
242
+ shared_gate_weights = gates[self.num_tasks](shared_fea) # type: ignore
270
243
  # [B, 1, num_all_experts]
271
244
  shared_gate_weights = shared_gate_weights.unsqueeze(1)
272
245
 
@@ -284,4 +257,4 @@ class PLE(BaseModel):
284
257
 
285
258
  # [B, num_tasks]
286
259
  y = torch.cat(task_outputs, dim=1)
287
- return self.prediction_layer(y)
260
+ return self.prediction_layer(y)
@@ -23,27 +23,25 @@ class ShareBottom(BaseModel):
23
23
  def task_type(self):
24
24
  # Multi-task model, return list of task types
25
25
  return self.task if isinstance(self.task, list) else [self.task]
26
-
27
- def __init__(
28
- self,
29
- dense_features: list[DenseFeature],
30
- sparse_features: list[SparseFeature],
31
- sequence_features: list[SequenceFeature],
32
- bottom_params: dict,
33
- tower_params_list: list[dict],
34
- target: list[str],
35
- task: str | list[str] = "binary",
36
- optimizer: str = "adam",
37
- optimizer_params: dict = {},
38
- loss: str | nn.Module | list[str | nn.Module] | None = "bce",
39
- device: str = "cpu",
40
- model_id: str = "baseline",
41
- embedding_l1_reg=1e-6,
42
- dense_l1_reg=1e-5,
43
- embedding_l2_reg=1e-5,
44
- dense_l2_reg=1e-4,
45
- ):
46
-
26
+
27
+ def __init__(self,
28
+ dense_features: list[DenseFeature],
29
+ sparse_features: list[SparseFeature],
30
+ sequence_features: list[SequenceFeature],
31
+ bottom_params: dict,
32
+ tower_params_list: list[dict],
33
+ target: list[str],
34
+ task: str | list[str] = 'binary',
35
+ optimizer: str = "adam",
36
+ optimizer_params: dict = {},
37
+ loss: str | nn.Module | list[str | nn.Module] | None = "bce",
38
+ device: str = 'cpu',
39
+ model_id: str = "baseline",
40
+ embedding_l1_reg=1e-6,
41
+ dense_l1_reg=1e-5,
42
+ embedding_l2_reg=1e-5,
43
+ dense_l2_reg=1e-4):
44
+
47
45
  super(ShareBottom, self).__init__(
48
46
  dense_features=dense_features,
49
47
  sparse_features=sparse_features,
@@ -56,20 +54,18 @@ class ShareBottom(BaseModel):
56
54
  embedding_l2_reg=embedding_l2_reg,
57
55
  dense_l2_reg=dense_l2_reg,
58
56
  early_stop_patience=20,
59
- model_id=model_id,
57
+ model_id=model_id
60
58
  )
61
59
 
62
60
  self.loss = loss
63
61
  if self.loss is None:
64
62
  self.loss = "bce"
65
-
63
+
66
64
  # Number of tasks
67
65
  self.num_tasks = len(target)
68
66
  if len(tower_params_list) != self.num_tasks:
69
- raise ValueError(
70
- f"Number of tower params ({len(tower_params_list)}) must match number of tasks ({self.num_tasks})"
71
- )
72
-
67
+ raise ValueError(f"Number of tower params ({len(tower_params_list)}) must match number of tasks ({self.num_tasks})")
68
+
73
69
  # All features
74
70
  self.all_features = dense_features + sparse_features + sequence_features
75
71
 
@@ -77,56 +73,54 @@ class ShareBottom(BaseModel):
77
73
  self.embedding = EmbeddingLayer(features=self.all_features)
78
74
 
79
75
  # Calculate input dimension
80
- emb_dim_total = sum(
81
- [
82
- f.embedding_dim
83
- for f in self.all_features
84
- if not isinstance(f, DenseFeature)
85
- ]
86
- )
87
- dense_input_dim = sum(
88
- [getattr(f, "embedding_dim", 1) or 1 for f in dense_features]
89
- )
76
+ emb_dim_total = sum([f.embedding_dim for f in self.all_features if not isinstance(f, DenseFeature)])
77
+ dense_input_dim = sum([getattr(f, "embedding_dim", 1) or 1 for f in dense_features])
90
78
  input_dim = emb_dim_total + dense_input_dim
91
-
79
+
92
80
  # Shared bottom network
93
81
  self.bottom = MLP(input_dim=input_dim, output_layer=False, **bottom_params)
94
-
82
+
95
83
  # Get bottom output dimension
96
- if "dims" in bottom_params and len(bottom_params["dims"]) > 0:
97
- bottom_output_dim = bottom_params["dims"][-1]
84
+ if 'dims' in bottom_params and len(bottom_params['dims']) > 0:
85
+ bottom_output_dim = bottom_params['dims'][-1]
98
86
  else:
99
87
  bottom_output_dim = input_dim
100
-
88
+
101
89
  # Task-specific towers
102
90
  self.towers = nn.ModuleList()
103
91
  for tower_params in tower_params_list:
104
92
  tower = MLP(input_dim=bottom_output_dim, output_layer=True, **tower_params)
105
93
  self.towers.append(tower)
106
94
  self.prediction_layer = PredictionLayer(
107
- task_type=self.task_type, task_dims=[1] * self.num_tasks
95
+ task_type=self.task_type,
96
+ task_dims=[1] * self.num_tasks
108
97
  )
109
98
 
110
99
  # Register regularization weights
111
100
  self._register_regularization_weights(
112
- embedding_attr="embedding", include_modules=["bottom", "towers"]
101
+ embedding_attr='embedding',
102
+ include_modules=['bottom', 'towers']
113
103
  )
114
104
 
115
- self.compile(optimizer=optimizer, optimizer_params=optimizer_params, loss=loss)
105
+ self.compile(
106
+ optimizer=optimizer,
107
+ optimizer_params=optimizer_params,
108
+ loss=loss
109
+ )
116
110
 
117
111
  def forward(self, x):
118
112
  # Get all embeddings and flatten
119
113
  input_flat = self.embedding(x=x, features=self.all_features, squeeze_dim=True)
120
-
114
+
121
115
  # Shared bottom
122
116
  bottom_output = self.bottom(input_flat) # [B, bottom_dim]
123
-
117
+
124
118
  # Task-specific towers
125
119
  task_outputs = []
126
120
  for tower in self.towers:
127
121
  tower_output = tower(bottom_output) # [B, 1]
128
122
  task_outputs.append(tower_output)
129
-
123
+
130
124
  # Stack outputs: [B, num_tasks]
131
125
  y = torch.cat(task_outputs, dim=1)
132
126
  return self.prediction_layer(y)
@@ -7,11 +7,11 @@ from .din import DIN
7
7
  from .dien import DIEN
8
8
 
9
9
  __all__ = [
10
- "DeepFM",
11
- "AutoInt",
12
- "WideDeep",
13
- "xDeepFM",
14
- "DCN",
15
- "DIN",
16
- "DIEN",
10
+ 'DeepFM',
11
+ 'AutoInt',
12
+ 'WideDeep',
13
+ 'xDeepFM',
14
+ 'DCN',
15
+ 'DIN',
16
+ 'DIEN',
17
17
  ]
@@ -23,26 +23,24 @@ class AFM(BaseModel):
23
23
  @property
24
24
  def task_type(self):
25
25
  return "binary"
26
-
27
- def __init__(
28
- self,
29
- dense_features: list[DenseFeature] | list = [],
30
- sparse_features: list[SparseFeature] | list = [],
31
- sequence_features: list[SequenceFeature] | list = [],
32
- attention_dim: int = 32,
33
- attention_dropout: float = 0.0,
34
- target: list[str] | list = [],
35
- optimizer: str = "adam",
36
- optimizer_params: dict = {},
37
- loss: str | nn.Module | None = "bce",
38
- device: str = "cpu",
39
- model_id: str = "baseline",
40
- embedding_l1_reg=1e-6,
41
- dense_l1_reg=1e-5,
42
- embedding_l2_reg=1e-5,
43
- dense_l2_reg=1e-4,
44
- ):
45
-
26
+
27
+ def __init__(self,
28
+ dense_features: list[DenseFeature] | list = [],
29
+ sparse_features: list[SparseFeature] | list = [],
30
+ sequence_features: list[SequenceFeature] | list = [],
31
+ attention_dim: int = 32,
32
+ attention_dropout: float = 0.0,
33
+ target: list[str] | list = [],
34
+ optimizer: str = "adam",
35
+ optimizer_params: dict = {},
36
+ loss: str | nn.Module | None = "bce",
37
+ device: str = 'cpu',
38
+ model_id: str = "baseline",
39
+ embedding_l1_reg=1e-6,
40
+ dense_l1_reg=1e-5,
41
+ embedding_l2_reg=1e-5,
42
+ dense_l2_reg=1e-4):
43
+
46
44
  super(AFM, self).__init__(
47
45
  dense_features=dense_features,
48
46
  sparse_features=sparse_features,
@@ -55,25 +53,21 @@ class AFM(BaseModel):
55
53
  embedding_l2_reg=embedding_l2_reg,
56
54
  dense_l2_reg=dense_l2_reg,
57
55
  early_stop_patience=20,
58
- model_id=model_id,
56
+ model_id=model_id
59
57
  )
60
58
 
61
59
  self.loss = loss
62
60
  if self.loss is None:
63
61
  self.loss = "bce"
64
-
62
+
65
63
  self.fm_features = sparse_features + sequence_features
66
64
  if len(self.fm_features) < 2:
67
- raise ValueError(
68
- "AFM requires at least two sparse/sequence features to build pairwise interactions."
69
- )
65
+ raise ValueError("AFM requires at least two sparse/sequence features to build pairwise interactions.")
70
66
 
71
67
  # Assume uniform embedding dimension across FM fields
72
68
  self.embedding_dim = self.fm_features[0].embedding_dim
73
69
  if any(f.embedding_dim != self.embedding_dim for f in self.fm_features):
74
- raise ValueError(
75
- "All FM features must share the same embedding_dim for AFM."
76
- )
70
+ raise ValueError("All FM features must share the same embedding_dim for AFM.")
77
71
 
78
72
  self.embedding = EmbeddingLayer(features=self.fm_features)
79
73
 
@@ -88,21 +82,18 @@ class AFM(BaseModel):
88
82
 
89
83
  # Register regularization weights
90
84
  self._register_regularization_weights(
91
- embedding_attr="embedding",
92
- include_modules=[
93
- "linear",
94
- "attention_linear",
95
- "attention_p",
96
- "output_projection",
97
- ],
85
+ embedding_attr='embedding',
86
+ include_modules=['linear', 'attention_linear', 'attention_p', 'output_projection']
98
87
  )
99
88
 
100
- self.compile(optimizer=optimizer, optimizer_params=optimizer_params, loss=loss)
89
+ self.compile(
90
+ optimizer=optimizer,
91
+ optimizer_params=optimizer_params,
92
+ loss=loss
93
+ )
101
94
 
102
95
  def forward(self, x):
103
- field_emb = self.embedding(
104
- x=x, features=self.fm_features, squeeze_dim=False
105
- ) # [B, F, D]
96
+ field_emb = self.embedding(x=x, features=self.fm_features, squeeze_dim=False) # [B, F, D]
106
97
  input_linear = field_emb.flatten(start_dim=1)
107
98
  y_linear = self.linear(input_linear)
108
99