nextrec 0.1.1__py3-none-any.whl → 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. nextrec/__init__.py +4 -4
  2. nextrec/__version__.py +1 -1
  3. nextrec/basic/activation.py +10 -9
  4. nextrec/basic/callback.py +1 -0
  5. nextrec/basic/dataloader.py +168 -127
  6. nextrec/basic/features.py +24 -27
  7. nextrec/basic/layers.py +328 -159
  8. nextrec/basic/loggers.py +50 -37
  9. nextrec/basic/metrics.py +255 -147
  10. nextrec/basic/model.py +817 -462
  11. nextrec/data/__init__.py +5 -5
  12. nextrec/data/data_utils.py +16 -12
  13. nextrec/data/preprocessor.py +276 -252
  14. nextrec/loss/__init__.py +12 -12
  15. nextrec/loss/loss_utils.py +30 -22
  16. nextrec/loss/match_losses.py +116 -83
  17. nextrec/models/match/__init__.py +5 -5
  18. nextrec/models/match/dssm.py +70 -61
  19. nextrec/models/match/dssm_v2.py +61 -51
  20. nextrec/models/match/mind.py +89 -71
  21. nextrec/models/match/sdm.py +93 -81
  22. nextrec/models/match/youtube_dnn.py +62 -53
  23. nextrec/models/multi_task/esmm.py +49 -43
  24. nextrec/models/multi_task/mmoe.py +65 -56
  25. nextrec/models/multi_task/ple.py +92 -65
  26. nextrec/models/multi_task/share_bottom.py +48 -42
  27. nextrec/models/ranking/__init__.py +7 -7
  28. nextrec/models/ranking/afm.py +39 -30
  29. nextrec/models/ranking/autoint.py +70 -57
  30. nextrec/models/ranking/dcn.py +43 -35
  31. nextrec/models/ranking/deepfm.py +34 -28
  32. nextrec/models/ranking/dien.py +115 -79
  33. nextrec/models/ranking/din.py +84 -60
  34. nextrec/models/ranking/fibinet.py +51 -35
  35. nextrec/models/ranking/fm.py +28 -26
  36. nextrec/models/ranking/masknet.py +31 -31
  37. nextrec/models/ranking/pnn.py +30 -31
  38. nextrec/models/ranking/widedeep.py +36 -31
  39. nextrec/models/ranking/xdeepfm.py +46 -39
  40. nextrec/utils/__init__.py +9 -9
  41. nextrec/utils/embedding.py +1 -1
  42. nextrec/utils/initializer.py +23 -15
  43. nextrec/utils/optimizer.py +14 -10
  44. {nextrec-0.1.1.dist-info → nextrec-0.1.2.dist-info}/METADATA +6 -40
  45. nextrec-0.1.2.dist-info/RECORD +51 -0
  46. nextrec-0.1.1.dist-info/RECORD +0 -51
  47. {nextrec-0.1.1.dist-info → nextrec-0.1.2.dist-info}/WHEEL +0 -0
  48. {nextrec-0.1.1.dist-info → nextrec-0.1.2.dist-info}/licenses/LICENSE +0 -0
@@ -30,25 +30,27 @@ class FiBiNET(BaseModel):
30
30
  @property
31
31
  def task_type(self):
32
32
  return "binary"
33
-
34
- def __init__(self,
35
- dense_features: list[DenseFeature] | list = [],
36
- sparse_features: list[SparseFeature] | list = [],
37
- sequence_features: list[SequenceFeature] | list = [],
38
- mlp_params: dict = {},
39
- bilinear_type: str = "field_interaction",
40
- senet_reduction: int = 3,
41
- target: list[str] | list = [],
42
- optimizer: str = "adam",
43
- optimizer_params: dict = {},
44
- loss: str | nn.Module | None = "bce",
45
- device: str = 'cpu',
46
- model_id: str = "baseline",
47
- embedding_l1_reg=1e-6,
48
- dense_l1_reg=1e-5,
49
- embedding_l2_reg=1e-5,
50
- dense_l2_reg=1e-4):
51
-
33
+
34
+ def __init__(
35
+ self,
36
+ dense_features: list[DenseFeature] | list = [],
37
+ sparse_features: list[SparseFeature] | list = [],
38
+ sequence_features: list[SequenceFeature] | list = [],
39
+ mlp_params: dict = {},
40
+ bilinear_type: str = "field_interaction",
41
+ senet_reduction: int = 3,
42
+ target: list[str] | list = [],
43
+ optimizer: str = "adam",
44
+ optimizer_params: dict = {},
45
+ loss: str | nn.Module | None = "bce",
46
+ device: str = "cpu",
47
+ model_id: str = "baseline",
48
+ embedding_l1_reg=1e-6,
49
+ dense_l1_reg=1e-5,
50
+ embedding_l2_reg=1e-5,
51
+ dense_l2_reg=1e-4,
52
+ ):
53
+
52
54
  super(FiBiNET, self).__init__(
53
55
  dense_features=dense_features,
54
56
  sparse_features=sparse_features,
@@ -61,28 +63,36 @@ class FiBiNET(BaseModel):
61
63
  embedding_l2_reg=embedding_l2_reg,
62
64
  dense_l2_reg=dense_l2_reg,
63
65
  early_stop_patience=20,
64
- model_id=model_id
66
+ model_id=model_id,
65
67
  )
66
68
 
67
69
  self.loss = loss
68
70
  if self.loss is None:
69
71
  self.loss = "bce"
70
-
72
+
71
73
  self.linear_features = sparse_features + sequence_features
72
74
  self.deep_features = dense_features + sparse_features + sequence_features
73
75
  self.interaction_features = sparse_features + sequence_features
74
76
 
75
77
  if len(self.interaction_features) < 2:
76
- raise ValueError("FiBiNET requires at least two sparse/sequence features for interactions.")
78
+ raise ValueError(
79
+ "FiBiNET requires at least two sparse/sequence features for interactions."
80
+ )
77
81
 
78
82
  self.embedding = EmbeddingLayer(features=self.deep_features)
79
83
 
80
84
  self.num_fields = len(self.interaction_features)
81
85
  self.embedding_dim = self.interaction_features[0].embedding_dim
82
- if any(f.embedding_dim != self.embedding_dim for f in self.interaction_features):
83
- raise ValueError("All interaction features must share the same embedding_dim in FiBiNET.")
84
-
85
- self.senet = SENETLayer(num_fields=self.num_fields, reduction_ratio=senet_reduction)
86
+ if any(
87
+ f.embedding_dim != self.embedding_dim for f in self.interaction_features
88
+ ):
89
+ raise ValueError(
90
+ "All interaction features must share the same embedding_dim in FiBiNET."
91
+ )
92
+
93
+ self.senet = SENETLayer(
94
+ num_fields=self.num_fields, reduction_ratio=senet_reduction
95
+ )
86
96
  self.bilinear_standard = BiLinearInteractionLayer(
87
97
  input_dim=self.embedding_dim,
88
98
  num_fields=self.num_fields,
@@ -104,21 +114,27 @@ class FiBiNET(BaseModel):
104
114
 
105
115
  # Register regularization weights
106
116
  self._register_regularization_weights(
107
- embedding_attr='embedding',
108
- include_modules=['linear', 'senet', 'bilinear_standard', 'bilinear_senet', 'mlp']
117
+ embedding_attr="embedding",
118
+ include_modules=[
119
+ "linear",
120
+ "senet",
121
+ "bilinear_standard",
122
+ "bilinear_senet",
123
+ "mlp",
124
+ ],
109
125
  )
110
126
 
111
- self.compile(
112
- optimizer=optimizer,
113
- optimizer_params=optimizer_params,
114
- loss=loss
115
- )
127
+ self.compile(optimizer=optimizer, optimizer_params=optimizer_params, loss=loss)
116
128
 
117
129
  def forward(self, x):
118
- input_linear = self.embedding(x=x, features=self.linear_features, squeeze_dim=True)
130
+ input_linear = self.embedding(
131
+ x=x, features=self.linear_features, squeeze_dim=True
132
+ )
119
133
  y_linear = self.linear(input_linear)
120
134
 
121
- field_emb = self.embedding(x=x, features=self.interaction_features, squeeze_dim=False)
135
+ field_emb = self.embedding(
136
+ x=x, features=self.interaction_features, squeeze_dim=False
137
+ )
122
138
  senet_emb = self.senet(field_emb)
123
139
 
124
140
  bilinear_standard = self.bilinear_standard(field_emb).flatten(start_dim=1)
@@ -9,7 +9,12 @@ Reference:
9
9
  import torch.nn as nn
10
10
 
11
11
  from nextrec.basic.model import BaseModel
12
- from nextrec.basic.layers import EmbeddingLayer, FM as FMInteraction, LR, PredictionLayer
12
+ from nextrec.basic.layers import (
13
+ EmbeddingLayer,
14
+ FM as FMInteraction,
15
+ LR,
16
+ PredictionLayer,
17
+ )
13
18
  from nextrec.basic.features import DenseFeature, SparseFeature, SequenceFeature
14
19
 
15
20
 
@@ -21,22 +26,24 @@ class FM(BaseModel):
21
26
  @property
22
27
  def task_type(self):
23
28
  return "binary"
24
-
25
- def __init__(self,
26
- dense_features: list[DenseFeature] | list = [],
27
- sparse_features: list[SparseFeature] | list = [],
28
- sequence_features: list[SequenceFeature] | list = [],
29
- target: list[str] | list = [],
30
- optimizer: str = "adam",
31
- optimizer_params: dict = {},
32
- loss: str | nn.Module | None = "bce",
33
- device: str = 'cpu',
34
- model_id: str = "baseline",
35
- embedding_l1_reg=1e-6,
36
- dense_l1_reg=1e-5,
37
- embedding_l2_reg=1e-5,
38
- dense_l2_reg=1e-4):
39
-
29
+
30
+ def __init__(
31
+ self,
32
+ dense_features: list[DenseFeature] | list = [],
33
+ sparse_features: list[SparseFeature] | list = [],
34
+ sequence_features: list[SequenceFeature] | list = [],
35
+ target: list[str] | list = [],
36
+ optimizer: str = "adam",
37
+ optimizer_params: dict = {},
38
+ loss: str | nn.Module | None = "bce",
39
+ device: str = "cpu",
40
+ model_id: str = "baseline",
41
+ embedding_l1_reg=1e-6,
42
+ dense_l1_reg=1e-5,
43
+ embedding_l2_reg=1e-5,
44
+ dense_l2_reg=1e-4,
45
+ ):
46
+
40
47
  super(FM, self).__init__(
41
48
  dense_features=dense_features,
42
49
  sparse_features=sparse_features,
@@ -49,13 +56,13 @@ class FM(BaseModel):
49
56
  embedding_l2_reg=embedding_l2_reg,
50
57
  dense_l2_reg=dense_l2_reg,
51
58
  early_stop_patience=20,
52
- model_id=model_id
59
+ model_id=model_id,
53
60
  )
54
61
 
55
62
  self.loss = loss
56
63
  if self.loss is None:
57
64
  self.loss = "bce"
58
-
65
+
59
66
  self.fm_features = sparse_features + sequence_features
60
67
  if len(self.fm_features) == 0:
61
68
  raise ValueError("FM requires at least one sparse or sequence feature.")
@@ -69,15 +76,10 @@ class FM(BaseModel):
69
76
 
70
77
  # Register regularization weights
71
78
  self._register_regularization_weights(
72
- embedding_attr='embedding',
73
- include_modules=['linear']
79
+ embedding_attr="embedding", include_modules=["linear"]
74
80
  )
75
81
 
76
- self.compile(
77
- optimizer=optimizer,
78
- optimizer_params=optimizer_params,
79
- loss=loss
80
- )
82
+ self.compile(optimizer=optimizer, optimizer_params=optimizer_params, loss=loss)
81
83
 
82
84
  def forward(self, x):
83
85
  input_fm = self.embedding(x=x, features=self.fm_features, squeeze_dim=False)
@@ -23,26 +23,28 @@ class MaskNet(BaseModel):
23
23
  @property
24
24
  def task_type(self):
25
25
  return "binary"
26
-
27
- def __init__(self,
28
- dense_features: list[DenseFeature] | list = [],
29
- sparse_features: list[SparseFeature] | list = [],
30
- sequence_features: list[SequenceFeature] | list = [],
31
- num_blocks: int = 3,
32
- mask_hidden_dim: int = 64,
33
- block_dropout: float = 0.1,
34
- mlp_params: dict = {},
35
- target: list[str] | list = [],
36
- optimizer: str = "adam",
37
- optimizer_params: dict = {},
38
- loss: str | nn.Module | None = "bce",
39
- device: str = 'cpu',
40
- model_id: str = "baseline",
41
- embedding_l1_reg=1e-6,
42
- dense_l1_reg=1e-5,
43
- embedding_l2_reg=1e-5,
44
- dense_l2_reg=1e-4):
45
-
26
+
27
+ def __init__(
28
+ self,
29
+ dense_features: list[DenseFeature] | list = [],
30
+ sparse_features: list[SparseFeature] | list = [],
31
+ sequence_features: list[SequenceFeature] | list = [],
32
+ num_blocks: int = 3,
33
+ mask_hidden_dim: int = 64,
34
+ block_dropout: float = 0.1,
35
+ mlp_params: dict = {},
36
+ target: list[str] | list = [],
37
+ optimizer: str = "adam",
38
+ optimizer_params: dict = {},
39
+ loss: str | nn.Module | None = "bce",
40
+ device: str = "cpu",
41
+ model_id: str = "baseline",
42
+ embedding_l1_reg=1e-6,
43
+ dense_l1_reg=1e-5,
44
+ embedding_l2_reg=1e-5,
45
+ dense_l2_reg=1e-4,
46
+ ):
47
+
46
48
  super(MaskNet, self).__init__(
47
49
  dense_features=dense_features,
48
50
  sparse_features=sparse_features,
@@ -55,13 +57,13 @@ class MaskNet(BaseModel):
55
57
  embedding_l2_reg=embedding_l2_reg,
56
58
  dense_l2_reg=dense_l2_reg,
57
59
  early_stop_patience=20,
58
- model_id=model_id
60
+ model_id=model_id,
59
61
  )
60
62
 
61
63
  self.loss = loss
62
64
  if self.loss is None:
63
65
  self.loss = "bce"
64
-
66
+
65
67
  self.mask_features = sparse_features + sequence_features
66
68
  if len(self.mask_features) == 0:
67
69
  raise ValueError("MaskNet requires at least one sparse/sequence feature.")
@@ -70,7 +72,9 @@ class MaskNet(BaseModel):
70
72
  self.num_fields = len(self.mask_features)
71
73
  self.embedding_dim = self.mask_features[0].embedding_dim
72
74
  if any(f.embedding_dim != self.embedding_dim for f in self.mask_features):
73
- raise ValueError("MaskNet expects identical embedding_dim across mask_features.")
75
+ raise ValueError(
76
+ "MaskNet expects identical embedding_dim across mask_features."
77
+ )
74
78
 
75
79
  self.num_blocks = max(1, num_blocks)
76
80
  self.field_dim = self.num_fields * self.embedding_dim
@@ -82,7 +86,7 @@ class MaskNet(BaseModel):
82
86
  nn.Sequential(
83
87
  nn.Linear(self.field_dim, mask_hidden_dim),
84
88
  nn.ReLU(),
85
- nn.Linear(mask_hidden_dim, self.num_fields)
89
+ nn.Linear(mask_hidden_dim, self.num_fields),
86
90
  )
87
91
  )
88
92
 
@@ -91,15 +95,11 @@ class MaskNet(BaseModel):
91
95
  self.prediction_layer = PredictionLayer(task_type=self.task_type)
92
96
 
93
97
  self._register_regularization_weights(
94
- embedding_attr='embedding',
95
- include_modules=['linear', 'mask_generators', 'final_mlp']
98
+ embedding_attr="embedding",
99
+ include_modules=["linear", "mask_generators", "final_mlp"],
96
100
  )
97
101
 
98
- self.compile(
99
- optimizer=optimizer,
100
- optimizer_params=optimizer_params,
101
- loss=loss
102
- )
102
+ self.compile(optimizer=optimizer, optimizer_params=optimizer_params, loss=loss)
103
103
 
104
104
  def forward(self, x):
105
105
  field_emb = self.embedding(x=x, features=self.mask_features, squeeze_dim=False)
@@ -22,25 +22,27 @@ class PNN(BaseModel):
22
22
  @property
23
23
  def task_type(self):
24
24
  return "binary"
25
-
26
- def __init__(self,
27
- dense_features: list[DenseFeature] | list = [],
28
- sparse_features: list[SparseFeature] | list = [],
29
- sequence_features: list[SequenceFeature] | list = [],
30
- mlp_params: dict = {},
31
- product_type: str = "inner",
32
- outer_product_dim: int | None = None,
33
- target: list[str] | list = [],
34
- optimizer: str = "adam",
35
- optimizer_params: dict = {},
36
- loss: str | nn.Module | None = "bce",
37
- device: str = 'cpu',
38
- model_id: str = "baseline",
39
- embedding_l1_reg=1e-6,
40
- dense_l1_reg=1e-5,
41
- embedding_l2_reg=1e-5,
42
- dense_l2_reg=1e-4):
43
-
25
+
26
+ def __init__(
27
+ self,
28
+ dense_features: list[DenseFeature] | list = [],
29
+ sparse_features: list[SparseFeature] | list = [],
30
+ sequence_features: list[SequenceFeature] | list = [],
31
+ mlp_params: dict = {},
32
+ product_type: str = "inner",
33
+ outer_product_dim: int | None = None,
34
+ target: list[str] | list = [],
35
+ optimizer: str = "adam",
36
+ optimizer_params: dict = {},
37
+ loss: str | nn.Module | None = "bce",
38
+ device: str = "cpu",
39
+ model_id: str = "baseline",
40
+ embedding_l1_reg=1e-6,
41
+ dense_l1_reg=1e-5,
42
+ embedding_l2_reg=1e-5,
43
+ dense_l2_reg=1e-4,
44
+ ):
45
+
44
46
  super(PNN, self).__init__(
45
47
  dense_features=dense_features,
46
48
  sparse_features=sparse_features,
@@ -53,13 +55,13 @@ class PNN(BaseModel):
53
55
  embedding_l2_reg=embedding_l2_reg,
54
56
  dense_l2_reg=dense_l2_reg,
55
57
  early_stop_patience=20,
56
- model_id=model_id
58
+ model_id=model_id,
57
59
  )
58
60
 
59
61
  self.loss = loss
60
62
  if self.loss is None:
61
63
  self.loss = "bce"
62
-
64
+
63
65
  self.field_features = sparse_features + sequence_features
64
66
  if len(self.field_features) < 2:
65
67
  raise ValueError("PNN requires at least two sparse/sequence features.")
@@ -68,7 +70,9 @@ class PNN(BaseModel):
68
70
  self.num_fields = len(self.field_features)
69
71
  self.embedding_dim = self.field_features[0].embedding_dim
70
72
  if any(f.embedding_dim != self.embedding_dim for f in self.field_features):
71
- raise ValueError("All field features must share the same embedding_dim for PNN.")
73
+ raise ValueError(
74
+ "All field features must share the same embedding_dim for PNN."
75
+ )
72
76
 
73
77
  self.product_type = product_type.lower()
74
78
  if self.product_type not in {"inner", "outer"}:
@@ -87,19 +91,14 @@ class PNN(BaseModel):
87
91
  self.mlp = MLP(input_dim=linear_dim + product_dim, **mlp_params)
88
92
  self.prediction_layer = PredictionLayer(task_type=self.task_type)
89
93
 
90
- modules = ['mlp']
94
+ modules = ["mlp"]
91
95
  if self.product_type == "outer":
92
- modules.append('kernel')
96
+ modules.append("kernel")
93
97
  self._register_regularization_weights(
94
- embedding_attr='embedding',
95
- include_modules=modules
98
+ embedding_attr="embedding", include_modules=modules
96
99
  )
97
100
 
98
- self.compile(
99
- optimizer=optimizer,
100
- optimizer_params=optimizer_params,
101
- loss=loss
102
- )
101
+ self.compile(optimizer=optimizer, optimizer_params=optimizer_params, loss=loss)
103
102
 
104
103
  def forward(self, x):
105
104
  field_emb = self.embedding(x=x, features=self.field_features, squeeze_dim=False)
@@ -24,23 +24,25 @@ class WideDeep(BaseModel):
24
24
  @property
25
25
  def task_type(self):
26
26
  return "binary"
27
-
28
- def __init__(self,
29
- dense_features: list[DenseFeature],
30
- sparse_features: list[SparseFeature],
31
- sequence_features: list[SequenceFeature],
32
- mlp_params: dict,
33
- target: list[str] = [],
34
- optimizer: str = "adam",
35
- optimizer_params: dict = {},
36
- loss: str | nn.Module | None = "bce",
37
- device: str = 'cpu',
38
- model_id: str = "baseline",
39
- embedding_l1_reg=1e-6,
40
- dense_l1_reg=1e-5,
41
- embedding_l2_reg=1e-5,
42
- dense_l2_reg=1e-4):
43
-
27
+
28
+ def __init__(
29
+ self,
30
+ dense_features: list[DenseFeature],
31
+ sparse_features: list[SparseFeature],
32
+ sequence_features: list[SequenceFeature],
33
+ mlp_params: dict,
34
+ target: list[str] = [],
35
+ optimizer: str = "adam",
36
+ optimizer_params: dict = {},
37
+ loss: str | nn.Module | None = "bce",
38
+ device: str = "cpu",
39
+ model_id: str = "baseline",
40
+ embedding_l1_reg=1e-6,
41
+ dense_l1_reg=1e-5,
42
+ embedding_l2_reg=1e-5,
43
+ dense_l2_reg=1e-4,
44
+ ):
45
+
44
46
  super(WideDeep, self).__init__(
45
47
  dense_features=dense_features,
46
48
  sparse_features=sparse_features,
@@ -53,16 +55,16 @@ class WideDeep(BaseModel):
53
55
  embedding_l2_reg=embedding_l2_reg,
54
56
  dense_l2_reg=dense_l2_reg,
55
57
  early_stop_patience=20,
56
- model_id=model_id
58
+ model_id=model_id,
57
59
  )
58
60
 
59
61
  self.loss = loss
60
62
  if self.loss is None:
61
63
  self.loss = "bce"
62
-
64
+
63
65
  # Wide part: use all features for linear model
64
66
  self.wide_features = sparse_features + sequence_features
65
-
67
+
66
68
  # Deep part: use all features
67
69
  self.deep_features = dense_features + sparse_features + sequence_features
68
70
 
@@ -72,30 +74,33 @@ class WideDeep(BaseModel):
72
74
  # Wide part: Linear layer
73
75
  wide_dim = sum([f.embedding_dim for f in self.wide_features])
74
76
  self.linear = LR(wide_dim)
75
-
77
+
76
78
  # Deep part: MLP
77
- deep_emb_dim_total = sum([f.embedding_dim for f in self.deep_features if not isinstance(f, DenseFeature)])
78
- dense_input_dim = sum([getattr(f, "embedding_dim", 1) or 1 for f in dense_features])
79
+ deep_emb_dim_total = sum(
80
+ [
81
+ f.embedding_dim
82
+ for f in self.deep_features
83
+ if not isinstance(f, DenseFeature)
84
+ ]
85
+ )
86
+ dense_input_dim = sum(
87
+ [getattr(f, "embedding_dim", 1) or 1 for f in dense_features]
88
+ )
79
89
  self.mlp = MLP(input_dim=deep_emb_dim_total + dense_input_dim, **mlp_params)
80
90
  self.prediction_layer = PredictionLayer(task_type=self.task_type)
81
91
 
82
92
  # Register regularization weights
83
93
  self._register_regularization_weights(
84
- embedding_attr='embedding',
85
- include_modules=['linear', 'mlp']
94
+ embedding_attr="embedding", include_modules=["linear", "mlp"]
86
95
  )
87
96
 
88
- self.compile(
89
- optimizer=optimizer,
90
- optimizer_params=optimizer_params,
91
- loss=loss
92
- )
97
+ self.compile(optimizer=optimizer, optimizer_params=optimizer_params, loss=loss)
93
98
 
94
99
  def forward(self, x):
95
100
  # Deep part
96
101
  input_deep = self.embedding(x=x, features=self.deep_features, squeeze_dim=True)
97
102
  y_deep = self.mlp(input_deep) # [B, 1]
98
-
103
+
99
104
  # Wide part
100
105
  input_wide = self.embedding(x=x, features=self.wide_features, squeeze_dim=True)
101
106
  y_wide = self.linear(input_wide)
@@ -3,8 +3,8 @@ Date: create on 09/11/2025
3
3
  Author:
4
4
  Yang Zhou,zyaztec@gmail.com
5
5
  Reference:
6
- [1] Lian J, Zhou X, Zhang F, et al. xdeepfm: Combining explicit and implicit feature interactions
7
- for recommender systems[C]//Proceedings of the 24th ACM SIGKDD international conference on
6
+ [1] Lian J, Zhou X, Zhang F, et al. xdeepfm: Combining explicit and implicit feature interactions
7
+ for recommender systems[C]//Proceedings of the 24th ACM SIGKDD international conference on
8
8
  knowledge discovery & data mining. 2018: 1754-1763.
9
9
  (https://arxiv.org/abs/1803.05170)
10
10
  """
@@ -25,25 +25,27 @@ class xDeepFM(BaseModel):
25
25
  @property
26
26
  def task_type(self):
27
27
  return "binary"
28
-
29
- def __init__(self,
30
- dense_features: list[DenseFeature],
31
- sparse_features: list[SparseFeature],
32
- sequence_features: list[SequenceFeature],
33
- mlp_params: dict,
34
- cin_size: list[int] = [128, 128],
35
- split_half: bool = True,
36
- target: list[str] = [],
37
- optimizer: str = "adam",
38
- optimizer_params: dict = {},
39
- loss: str | nn.Module | None = "bce",
40
- device: str = 'cpu',
41
- model_id: str = "baseline",
42
- embedding_l1_reg=1e-6,
43
- dense_l1_reg=1e-5,
44
- embedding_l2_reg=1e-5,
45
- dense_l2_reg=1e-4):
46
-
28
+
29
+ def __init__(
30
+ self,
31
+ dense_features: list[DenseFeature],
32
+ sparse_features: list[SparseFeature],
33
+ sequence_features: list[SequenceFeature],
34
+ mlp_params: dict,
35
+ cin_size: list[int] = [128, 128],
36
+ split_half: bool = True,
37
+ target: list[str] = [],
38
+ optimizer: str = "adam",
39
+ optimizer_params: dict = {},
40
+ loss: str | nn.Module | None = "bce",
41
+ device: str = "cpu",
42
+ model_id: str = "baseline",
43
+ embedding_l1_reg=1e-6,
44
+ dense_l1_reg=1e-5,
45
+ embedding_l2_reg=1e-5,
46
+ dense_l2_reg=1e-4,
47
+ ):
48
+
47
49
  super(xDeepFM, self).__init__(
48
50
  dense_features=dense_features,
49
51
  sparse_features=sparse_features,
@@ -56,16 +58,16 @@ class xDeepFM(BaseModel):
56
58
  embedding_l2_reg=embedding_l2_reg,
57
59
  dense_l2_reg=dense_l2_reg,
58
60
  early_stop_patience=20,
59
- model_id=model_id
61
+ model_id=model_id,
60
62
  )
61
63
 
62
64
  self.loss = loss
63
65
  if self.loss is None:
64
66
  self.loss = "bce"
65
-
67
+
66
68
  # Linear part and CIN part: use sparse and sequence features
67
69
  self.linear_features = sparse_features + sequence_features
68
-
70
+
69
71
  # Deep part: use all features
70
72
  self.deep_features = dense_features + sparse_features + sequence_features
71
73
 
@@ -75,39 +77,44 @@ class xDeepFM(BaseModel):
75
77
  # Linear part
76
78
  linear_dim = sum([f.embedding_dim for f in self.linear_features])
77
79
  self.linear = LR(linear_dim)
78
-
80
+
79
81
  # CIN part: Compressed Interaction Network
80
82
  num_fields = len(self.linear_features)
81
83
  self.cin = CIN(input_dim=num_fields, cin_size=cin_size, split_half=split_half)
82
-
84
+
83
85
  # Deep part: DNN
84
- deep_emb_dim_total = sum([f.embedding_dim for f in self.deep_features if not isinstance(f, DenseFeature)])
85
- dense_input_dim = sum([getattr(f, "embedding_dim", 1) or 1 for f in dense_features])
86
+ deep_emb_dim_total = sum(
87
+ [
88
+ f.embedding_dim
89
+ for f in self.deep_features
90
+ if not isinstance(f, DenseFeature)
91
+ ]
92
+ )
93
+ dense_input_dim = sum(
94
+ [getattr(f, "embedding_dim", 1) or 1 for f in dense_features]
95
+ )
86
96
  self.mlp = MLP(input_dim=deep_emb_dim_total + dense_input_dim, **mlp_params)
87
97
  self.prediction_layer = PredictionLayer(task_type=self.task_type)
88
98
 
89
99
  # Register regularization weights
90
100
  self._register_regularization_weights(
91
- embedding_attr='embedding',
92
- include_modules=['linear', 'cin', 'mlp']
101
+ embedding_attr="embedding", include_modules=["linear", "cin", "mlp"]
93
102
  )
94
103
 
95
- self.compile(
96
- optimizer=optimizer,
97
- optimizer_params=optimizer_params,
98
- loss=loss
99
- )
104
+ self.compile(optimizer=optimizer, optimizer_params=optimizer_params, loss=loss)
100
105
 
101
106
  def forward(self, x):
102
107
  # Get embeddings for linear and CIN (sparse features only)
103
- input_linear = self.embedding(x=x, features=self.linear_features, squeeze_dim=False)
104
-
108
+ input_linear = self.embedding(
109
+ x=x, features=self.linear_features, squeeze_dim=False
110
+ )
111
+
105
112
  # Linear part
106
113
  y_linear = self.linear(input_linear.flatten(start_dim=1))
107
-
114
+
108
115
  # CIN part
109
116
  y_cin = self.cin(input_linear) # [B, 1]
110
-
117
+
111
118
  # Deep part
112
119
  input_deep = self.embedding(x=x, features=self.deep_features, squeeze_dim=True)
113
120
  y_deep = self.mlp(input_deep) # [B, 1]