nextrec 0.1.4__py3-none-any.whl → 0.1.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. nextrec/__init__.py +4 -4
  2. nextrec/__version__.py +1 -1
  3. nextrec/basic/activation.py +9 -10
  4. nextrec/basic/callback.py +0 -1
  5. nextrec/basic/dataloader.py +127 -168
  6. nextrec/basic/features.py +27 -24
  7. nextrec/basic/layers.py +159 -328
  8. nextrec/basic/loggers.py +37 -50
  9. nextrec/basic/metrics.py +147 -255
  10. nextrec/basic/model.py +462 -817
  11. nextrec/data/__init__.py +5 -5
  12. nextrec/data/data_utils.py +12 -16
  13. nextrec/data/preprocessor.py +252 -276
  14. nextrec/loss/__init__.py +12 -12
  15. nextrec/loss/loss_utils.py +22 -30
  16. nextrec/loss/match_losses.py +83 -116
  17. nextrec/models/match/__init__.py +5 -5
  18. nextrec/models/match/dssm.py +61 -70
  19. nextrec/models/match/dssm_v2.py +51 -61
  20. nextrec/models/match/mind.py +71 -89
  21. nextrec/models/match/sdm.py +81 -93
  22. nextrec/models/match/youtube_dnn.py +53 -62
  23. nextrec/models/multi_task/esmm.py +43 -49
  24. nextrec/models/multi_task/mmoe.py +56 -65
  25. nextrec/models/multi_task/ple.py +65 -92
  26. nextrec/models/multi_task/share_bottom.py +42 -48
  27. nextrec/models/ranking/__init__.py +7 -7
  28. nextrec/models/ranking/afm.py +30 -39
  29. nextrec/models/ranking/autoint.py +57 -70
  30. nextrec/models/ranking/dcn.py +35 -43
  31. nextrec/models/ranking/deepfm.py +28 -34
  32. nextrec/models/ranking/dien.py +79 -115
  33. nextrec/models/ranking/din.py +60 -84
  34. nextrec/models/ranking/fibinet.py +35 -51
  35. nextrec/models/ranking/fm.py +26 -28
  36. nextrec/models/ranking/masknet.py +31 -31
  37. nextrec/models/ranking/pnn.py +31 -30
  38. nextrec/models/ranking/widedeep.py +31 -36
  39. nextrec/models/ranking/xdeepfm.py +39 -46
  40. nextrec/utils/__init__.py +9 -9
  41. nextrec/utils/embedding.py +1 -1
  42. nextrec/utils/initializer.py +15 -23
  43. nextrec/utils/optimizer.py +10 -14
  44. {nextrec-0.1.4.dist-info → nextrec-0.1.7.dist-info}/METADATA +16 -7
  45. nextrec-0.1.7.dist-info/RECORD +51 -0
  46. nextrec-0.1.4.dist-info/RECORD +0 -51
  47. {nextrec-0.1.4.dist-info → nextrec-0.1.7.dist-info}/WHEEL +0 -0
  48. {nextrec-0.1.4.dist-info → nextrec-0.1.7.dist-info}/licenses/LICENSE +0 -0
@@ -30,27 +30,25 @@ class FiBiNET(BaseModel):
30
30
  @property
31
31
  def task_type(self):
32
32
  return "binary"
33
-
34
- def __init__(
35
- self,
36
- dense_features: list[DenseFeature] | list = [],
37
- sparse_features: list[SparseFeature] | list = [],
38
- sequence_features: list[SequenceFeature] | list = [],
39
- mlp_params: dict = {},
40
- bilinear_type: str = "field_interaction",
41
- senet_reduction: int = 3,
42
- target: list[str] | list = [],
43
- optimizer: str = "adam",
44
- optimizer_params: dict = {},
45
- loss: str | nn.Module | None = "bce",
46
- device: str = "cpu",
47
- model_id: str = "baseline",
48
- embedding_l1_reg=1e-6,
49
- dense_l1_reg=1e-5,
50
- embedding_l2_reg=1e-5,
51
- dense_l2_reg=1e-4,
52
- ):
53
-
33
+
34
+ def __init__(self,
35
+ dense_features: list[DenseFeature] | list = [],
36
+ sparse_features: list[SparseFeature] | list = [],
37
+ sequence_features: list[SequenceFeature] | list = [],
38
+ mlp_params: dict = {},
39
+ bilinear_type: str = "field_interaction",
40
+ senet_reduction: int = 3,
41
+ target: list[str] | list = [],
42
+ optimizer: str = "adam",
43
+ optimizer_params: dict = {},
44
+ loss: str | nn.Module | None = "bce",
45
+ device: str = 'cpu',
46
+ model_id: str = "baseline",
47
+ embedding_l1_reg=1e-6,
48
+ dense_l1_reg=1e-5,
49
+ embedding_l2_reg=1e-5,
50
+ dense_l2_reg=1e-4):
51
+
54
52
  super(FiBiNET, self).__init__(
55
53
  dense_features=dense_features,
56
54
  sparse_features=sparse_features,
@@ -63,36 +61,28 @@ class FiBiNET(BaseModel):
63
61
  embedding_l2_reg=embedding_l2_reg,
64
62
  dense_l2_reg=dense_l2_reg,
65
63
  early_stop_patience=20,
66
- model_id=model_id,
64
+ model_id=model_id
67
65
  )
68
66
 
69
67
  self.loss = loss
70
68
  if self.loss is None:
71
69
  self.loss = "bce"
72
-
70
+
73
71
  self.linear_features = sparse_features + sequence_features
74
72
  self.deep_features = dense_features + sparse_features + sequence_features
75
73
  self.interaction_features = sparse_features + sequence_features
76
74
 
77
75
  if len(self.interaction_features) < 2:
78
- raise ValueError(
79
- "FiBiNET requires at least two sparse/sequence features for interactions."
80
- )
76
+ raise ValueError("FiBiNET requires at least two sparse/sequence features for interactions.")
81
77
 
82
78
  self.embedding = EmbeddingLayer(features=self.deep_features)
83
79
 
84
80
  self.num_fields = len(self.interaction_features)
85
81
  self.embedding_dim = self.interaction_features[0].embedding_dim
86
- if any(
87
- f.embedding_dim != self.embedding_dim for f in self.interaction_features
88
- ):
89
- raise ValueError(
90
- "All interaction features must share the same embedding_dim in FiBiNET."
91
- )
92
-
93
- self.senet = SENETLayer(
94
- num_fields=self.num_fields, reduction_ratio=senet_reduction
95
- )
82
+ if any(f.embedding_dim != self.embedding_dim for f in self.interaction_features):
83
+ raise ValueError("All interaction features must share the same embedding_dim in FiBiNET.")
84
+
85
+ self.senet = SENETLayer(num_fields=self.num_fields, reduction_ratio=senet_reduction)
96
86
  self.bilinear_standard = BiLinearInteractionLayer(
97
87
  input_dim=self.embedding_dim,
98
88
  num_fields=self.num_fields,
@@ -114,27 +104,21 @@ class FiBiNET(BaseModel):
114
104
 
115
105
  # Register regularization weights
116
106
  self._register_regularization_weights(
117
- embedding_attr="embedding",
118
- include_modules=[
119
- "linear",
120
- "senet",
121
- "bilinear_standard",
122
- "bilinear_senet",
123
- "mlp",
124
- ],
107
+ embedding_attr='embedding',
108
+ include_modules=['linear', 'senet', 'bilinear_standard', 'bilinear_senet', 'mlp']
125
109
  )
126
110
 
127
- self.compile(optimizer=optimizer, optimizer_params=optimizer_params, loss=loss)
111
+ self.compile(
112
+ optimizer=optimizer,
113
+ optimizer_params=optimizer_params,
114
+ loss=loss
115
+ )
128
116
 
129
117
  def forward(self, x):
130
- input_linear = self.embedding(
131
- x=x, features=self.linear_features, squeeze_dim=True
132
- )
118
+ input_linear = self.embedding(x=x, features=self.linear_features, squeeze_dim=True)
133
119
  y_linear = self.linear(input_linear)
134
120
 
135
- field_emb = self.embedding(
136
- x=x, features=self.interaction_features, squeeze_dim=False
137
- )
121
+ field_emb = self.embedding(x=x, features=self.interaction_features, squeeze_dim=False)
138
122
  senet_emb = self.senet(field_emb)
139
123
 
140
124
  bilinear_standard = self.bilinear_standard(field_emb).flatten(start_dim=1)
@@ -9,12 +9,7 @@ Reference:
9
9
  import torch.nn as nn
10
10
 
11
11
  from nextrec.basic.model import BaseModel
12
- from nextrec.basic.layers import (
13
- EmbeddingLayer,
14
- FM as FMInteraction,
15
- LR,
16
- PredictionLayer,
17
- )
12
+ from nextrec.basic.layers import EmbeddingLayer, FM as FMInteraction, LR, PredictionLayer
18
13
  from nextrec.basic.features import DenseFeature, SparseFeature, SequenceFeature
19
14
 
20
15
 
@@ -26,24 +21,22 @@ class FM(BaseModel):
26
21
  @property
27
22
  def task_type(self):
28
23
  return "binary"
29
-
30
- def __init__(
31
- self,
32
- dense_features: list[DenseFeature] | list = [],
33
- sparse_features: list[SparseFeature] | list = [],
34
- sequence_features: list[SequenceFeature] | list = [],
35
- target: list[str] | list = [],
36
- optimizer: str = "adam",
37
- optimizer_params: dict = {},
38
- loss: str | nn.Module | None = "bce",
39
- device: str = "cpu",
40
- model_id: str = "baseline",
41
- embedding_l1_reg=1e-6,
42
- dense_l1_reg=1e-5,
43
- embedding_l2_reg=1e-5,
44
- dense_l2_reg=1e-4,
45
- ):
46
-
24
+
25
+ def __init__(self,
26
+ dense_features: list[DenseFeature] | list = [],
27
+ sparse_features: list[SparseFeature] | list = [],
28
+ sequence_features: list[SequenceFeature] | list = [],
29
+ target: list[str] | list = [],
30
+ optimizer: str = "adam",
31
+ optimizer_params: dict = {},
32
+ loss: str | nn.Module | None = "bce",
33
+ device: str = 'cpu',
34
+ model_id: str = "baseline",
35
+ embedding_l1_reg=1e-6,
36
+ dense_l1_reg=1e-5,
37
+ embedding_l2_reg=1e-5,
38
+ dense_l2_reg=1e-4):
39
+
47
40
  super(FM, self).__init__(
48
41
  dense_features=dense_features,
49
42
  sparse_features=sparse_features,
@@ -56,13 +49,13 @@ class FM(BaseModel):
56
49
  embedding_l2_reg=embedding_l2_reg,
57
50
  dense_l2_reg=dense_l2_reg,
58
51
  early_stop_patience=20,
59
- model_id=model_id,
52
+ model_id=model_id
60
53
  )
61
54
 
62
55
  self.loss = loss
63
56
  if self.loss is None:
64
57
  self.loss = "bce"
65
-
58
+
66
59
  self.fm_features = sparse_features + sequence_features
67
60
  if len(self.fm_features) == 0:
68
61
  raise ValueError("FM requires at least one sparse or sequence feature.")
@@ -76,10 +69,15 @@ class FM(BaseModel):
76
69
 
77
70
  # Register regularization weights
78
71
  self._register_regularization_weights(
79
- embedding_attr="embedding", include_modules=["linear"]
72
+ embedding_attr='embedding',
73
+ include_modules=['linear']
80
74
  )
81
75
 
82
- self.compile(optimizer=optimizer, optimizer_params=optimizer_params, loss=loss)
76
+ self.compile(
77
+ optimizer=optimizer,
78
+ optimizer_params=optimizer_params,
79
+ loss=loss
80
+ )
83
81
 
84
82
  def forward(self, x):
85
83
  input_fm = self.embedding(x=x, features=self.fm_features, squeeze_dim=False)
@@ -23,28 +23,26 @@ class MaskNet(BaseModel):
23
23
  @property
24
24
  def task_type(self):
25
25
  return "binary"
26
-
27
- def __init__(
28
- self,
29
- dense_features: list[DenseFeature] | list = [],
30
- sparse_features: list[SparseFeature] | list = [],
31
- sequence_features: list[SequenceFeature] | list = [],
32
- num_blocks: int = 3,
33
- mask_hidden_dim: int = 64,
34
- block_dropout: float = 0.1,
35
- mlp_params: dict = {},
36
- target: list[str] | list = [],
37
- optimizer: str = "adam",
38
- optimizer_params: dict = {},
39
- loss: str | nn.Module | None = "bce",
40
- device: str = "cpu",
41
- model_id: str = "baseline",
42
- embedding_l1_reg=1e-6,
43
- dense_l1_reg=1e-5,
44
- embedding_l2_reg=1e-5,
45
- dense_l2_reg=1e-4,
46
- ):
47
-
26
+
27
+ def __init__(self,
28
+ dense_features: list[DenseFeature] | list = [],
29
+ sparse_features: list[SparseFeature] | list = [],
30
+ sequence_features: list[SequenceFeature] | list = [],
31
+ num_blocks: int = 3,
32
+ mask_hidden_dim: int = 64,
33
+ block_dropout: float = 0.1,
34
+ mlp_params: dict = {},
35
+ target: list[str] | list = [],
36
+ optimizer: str = "adam",
37
+ optimizer_params: dict = {},
38
+ loss: str | nn.Module | None = "bce",
39
+ device: str = 'cpu',
40
+ model_id: str = "baseline",
41
+ embedding_l1_reg=1e-6,
42
+ dense_l1_reg=1e-5,
43
+ embedding_l2_reg=1e-5,
44
+ dense_l2_reg=1e-4):
45
+
48
46
  super(MaskNet, self).__init__(
49
47
  dense_features=dense_features,
50
48
  sparse_features=sparse_features,
@@ -57,13 +55,13 @@ class MaskNet(BaseModel):
57
55
  embedding_l2_reg=embedding_l2_reg,
58
56
  dense_l2_reg=dense_l2_reg,
59
57
  early_stop_patience=20,
60
- model_id=model_id,
58
+ model_id=model_id
61
59
  )
62
60
 
63
61
  self.loss = loss
64
62
  if self.loss is None:
65
63
  self.loss = "bce"
66
-
64
+
67
65
  self.mask_features = sparse_features + sequence_features
68
66
  if len(self.mask_features) == 0:
69
67
  raise ValueError("MaskNet requires at least one sparse/sequence feature.")
@@ -72,9 +70,7 @@ class MaskNet(BaseModel):
72
70
  self.num_fields = len(self.mask_features)
73
71
  self.embedding_dim = self.mask_features[0].embedding_dim
74
72
  if any(f.embedding_dim != self.embedding_dim for f in self.mask_features):
75
- raise ValueError(
76
- "MaskNet expects identical embedding_dim across mask_features."
77
- )
73
+ raise ValueError("MaskNet expects identical embedding_dim across mask_features.")
78
74
 
79
75
  self.num_blocks = max(1, num_blocks)
80
76
  self.field_dim = self.num_fields * self.embedding_dim
@@ -86,7 +82,7 @@ class MaskNet(BaseModel):
86
82
  nn.Sequential(
87
83
  nn.Linear(self.field_dim, mask_hidden_dim),
88
84
  nn.ReLU(),
89
- nn.Linear(mask_hidden_dim, self.num_fields),
85
+ nn.Linear(mask_hidden_dim, self.num_fields)
90
86
  )
91
87
  )
92
88
 
@@ -95,11 +91,15 @@ class MaskNet(BaseModel):
95
91
  self.prediction_layer = PredictionLayer(task_type=self.task_type)
96
92
 
97
93
  self._register_regularization_weights(
98
- embedding_attr="embedding",
99
- include_modules=["linear", "mask_generators", "final_mlp"],
94
+ embedding_attr='embedding',
95
+ include_modules=['linear', 'mask_generators', 'final_mlp']
100
96
  )
101
97
 
102
- self.compile(optimizer=optimizer, optimizer_params=optimizer_params, loss=loss)
98
+ self.compile(
99
+ optimizer=optimizer,
100
+ optimizer_params=optimizer_params,
101
+ loss=loss
102
+ )
103
103
 
104
104
  def forward(self, x):
105
105
  field_emb = self.embedding(x=x, features=self.mask_features, squeeze_dim=False)
@@ -22,27 +22,25 @@ class PNN(BaseModel):
22
22
  @property
23
23
  def task_type(self):
24
24
  return "binary"
25
-
26
- def __init__(
27
- self,
28
- dense_features: list[DenseFeature] | list = [],
29
- sparse_features: list[SparseFeature] | list = [],
30
- sequence_features: list[SequenceFeature] | list = [],
31
- mlp_params: dict = {},
32
- product_type: str = "inner",
33
- outer_product_dim: int | None = None,
34
- target: list[str] | list = [],
35
- optimizer: str = "adam",
36
- optimizer_params: dict = {},
37
- loss: str | nn.Module | None = "bce",
38
- device: str = "cpu",
39
- model_id: str = "baseline",
40
- embedding_l1_reg=1e-6,
41
- dense_l1_reg=1e-5,
42
- embedding_l2_reg=1e-5,
43
- dense_l2_reg=1e-4,
44
- ):
45
-
25
+
26
+ def __init__(self,
27
+ dense_features: list[DenseFeature] | list = [],
28
+ sparse_features: list[SparseFeature] | list = [],
29
+ sequence_features: list[SequenceFeature] | list = [],
30
+ mlp_params: dict = {},
31
+ product_type: str = "inner",
32
+ outer_product_dim: int | None = None,
33
+ target: list[str] | list = [],
34
+ optimizer: str = "adam",
35
+ optimizer_params: dict = {},
36
+ loss: str | nn.Module | None = "bce",
37
+ device: str = 'cpu',
38
+ model_id: str = "baseline",
39
+ embedding_l1_reg=1e-6,
40
+ dense_l1_reg=1e-5,
41
+ embedding_l2_reg=1e-5,
42
+ dense_l2_reg=1e-4):
43
+
46
44
  super(PNN, self).__init__(
47
45
  dense_features=dense_features,
48
46
  sparse_features=sparse_features,
@@ -55,13 +53,13 @@ class PNN(BaseModel):
55
53
  embedding_l2_reg=embedding_l2_reg,
56
54
  dense_l2_reg=dense_l2_reg,
57
55
  early_stop_patience=20,
58
- model_id=model_id,
56
+ model_id=model_id
59
57
  )
60
58
 
61
59
  self.loss = loss
62
60
  if self.loss is None:
63
61
  self.loss = "bce"
64
-
62
+
65
63
  self.field_features = sparse_features + sequence_features
66
64
  if len(self.field_features) < 2:
67
65
  raise ValueError("PNN requires at least two sparse/sequence features.")
@@ -70,9 +68,7 @@ class PNN(BaseModel):
70
68
  self.num_fields = len(self.field_features)
71
69
  self.embedding_dim = self.field_features[0].embedding_dim
72
70
  if any(f.embedding_dim != self.embedding_dim for f in self.field_features):
73
- raise ValueError(
74
- "All field features must share the same embedding_dim for PNN."
75
- )
71
+ raise ValueError("All field features must share the same embedding_dim for PNN.")
76
72
 
77
73
  self.product_type = product_type.lower()
78
74
  if self.product_type not in {"inner", "outer"}:
@@ -91,14 +87,19 @@ class PNN(BaseModel):
91
87
  self.mlp = MLP(input_dim=linear_dim + product_dim, **mlp_params)
92
88
  self.prediction_layer = PredictionLayer(task_type=self.task_type)
93
89
 
94
- modules = ["mlp"]
90
+ modules = ['mlp']
95
91
  if self.product_type == "outer":
96
- modules.append("kernel")
92
+ modules.append('kernel')
97
93
  self._register_regularization_weights(
98
- embedding_attr="embedding", include_modules=modules
94
+ embedding_attr='embedding',
95
+ include_modules=modules
99
96
  )
100
97
 
101
- self.compile(optimizer=optimizer, optimizer_params=optimizer_params, loss=loss)
98
+ self.compile(
99
+ optimizer=optimizer,
100
+ optimizer_params=optimizer_params,
101
+ loss=loss
102
+ )
102
103
 
103
104
  def forward(self, x):
104
105
  field_emb = self.embedding(x=x, features=self.field_features, squeeze_dim=False)
@@ -24,25 +24,23 @@ class WideDeep(BaseModel):
24
24
  @property
25
25
  def task_type(self):
26
26
  return "binary"
27
-
28
- def __init__(
29
- self,
30
- dense_features: list[DenseFeature],
31
- sparse_features: list[SparseFeature],
32
- sequence_features: list[SequenceFeature],
33
- mlp_params: dict,
34
- target: list[str] = [],
35
- optimizer: str = "adam",
36
- optimizer_params: dict = {},
37
- loss: str | nn.Module | None = "bce",
38
- device: str = "cpu",
39
- model_id: str = "baseline",
40
- embedding_l1_reg=1e-6,
41
- dense_l1_reg=1e-5,
42
- embedding_l2_reg=1e-5,
43
- dense_l2_reg=1e-4,
44
- ):
45
-
27
+
28
+ def __init__(self,
29
+ dense_features: list[DenseFeature],
30
+ sparse_features: list[SparseFeature],
31
+ sequence_features: list[SequenceFeature],
32
+ mlp_params: dict,
33
+ target: list[str] = [],
34
+ optimizer: str = "adam",
35
+ optimizer_params: dict = {},
36
+ loss: str | nn.Module | None = "bce",
37
+ device: str = 'cpu',
38
+ model_id: str = "baseline",
39
+ embedding_l1_reg=1e-6,
40
+ dense_l1_reg=1e-5,
41
+ embedding_l2_reg=1e-5,
42
+ dense_l2_reg=1e-4):
43
+
46
44
  super(WideDeep, self).__init__(
47
45
  dense_features=dense_features,
48
46
  sparse_features=sparse_features,
@@ -55,16 +53,16 @@ class WideDeep(BaseModel):
55
53
  embedding_l2_reg=embedding_l2_reg,
56
54
  dense_l2_reg=dense_l2_reg,
57
55
  early_stop_patience=20,
58
- model_id=model_id,
56
+ model_id=model_id
59
57
  )
60
58
 
61
59
  self.loss = loss
62
60
  if self.loss is None:
63
61
  self.loss = "bce"
64
-
62
+
65
63
  # Wide part: use all features for linear model
66
64
  self.wide_features = sparse_features + sequence_features
67
-
65
+
68
66
  # Deep part: use all features
69
67
  self.deep_features = dense_features + sparse_features + sequence_features
70
68
 
@@ -74,33 +72,30 @@ class WideDeep(BaseModel):
74
72
  # Wide part: Linear layer
75
73
  wide_dim = sum([f.embedding_dim for f in self.wide_features])
76
74
  self.linear = LR(wide_dim)
77
-
75
+
78
76
  # Deep part: MLP
79
- deep_emb_dim_total = sum(
80
- [
81
- f.embedding_dim
82
- for f in self.deep_features
83
- if not isinstance(f, DenseFeature)
84
- ]
85
- )
86
- dense_input_dim = sum(
87
- [getattr(f, "embedding_dim", 1) or 1 for f in dense_features]
88
- )
77
+ deep_emb_dim_total = sum([f.embedding_dim for f in self.deep_features if not isinstance(f, DenseFeature)])
78
+ dense_input_dim = sum([getattr(f, "embedding_dim", 1) or 1 for f in dense_features])
89
79
  self.mlp = MLP(input_dim=deep_emb_dim_total + dense_input_dim, **mlp_params)
90
80
  self.prediction_layer = PredictionLayer(task_type=self.task_type)
91
81
 
92
82
  # Register regularization weights
93
83
  self._register_regularization_weights(
94
- embedding_attr="embedding", include_modules=["linear", "mlp"]
84
+ embedding_attr='embedding',
85
+ include_modules=['linear', 'mlp']
95
86
  )
96
87
 
97
- self.compile(optimizer=optimizer, optimizer_params=optimizer_params, loss=loss)
88
+ self.compile(
89
+ optimizer=optimizer,
90
+ optimizer_params=optimizer_params,
91
+ loss=loss
92
+ )
98
93
 
99
94
  def forward(self, x):
100
95
  # Deep part
101
96
  input_deep = self.embedding(x=x, features=self.deep_features, squeeze_dim=True)
102
97
  y_deep = self.mlp(input_deep) # [B, 1]
103
-
98
+
104
99
  # Wide part
105
100
  input_wide = self.embedding(x=x, features=self.wide_features, squeeze_dim=True)
106
101
  y_wide = self.linear(input_wide)
@@ -3,8 +3,8 @@ Date: create on 09/11/2025
3
3
  Author:
4
4
  Yang Zhou,zyaztec@gmail.com
5
5
  Reference:
6
- [1] Lian J, Zhou X, Zhang F, et al. xdeepfm: Combining explicit and implicit feature interactions
7
- for recommender systems[C]//Proceedings of the 24th ACM SIGKDD international conference on
6
+ [1] Lian J, Zhou X, Zhang F, et al. xdeepfm: Combining explicit and implicit feature interactions
7
+ for recommender systems[C]//Proceedings of the 24th ACM SIGKDD international conference on
8
8
  knowledge discovery & data mining. 2018: 1754-1763.
9
9
  (https://arxiv.org/abs/1803.05170)
10
10
  """
@@ -25,27 +25,25 @@ class xDeepFM(BaseModel):
25
25
  @property
26
26
  def task_type(self):
27
27
  return "binary"
28
-
29
- def __init__(
30
- self,
31
- dense_features: list[DenseFeature],
32
- sparse_features: list[SparseFeature],
33
- sequence_features: list[SequenceFeature],
34
- mlp_params: dict,
35
- cin_size: list[int] = [128, 128],
36
- split_half: bool = True,
37
- target: list[str] = [],
38
- optimizer: str = "adam",
39
- optimizer_params: dict = {},
40
- loss: str | nn.Module | None = "bce",
41
- device: str = "cpu",
42
- model_id: str = "baseline",
43
- embedding_l1_reg=1e-6,
44
- dense_l1_reg=1e-5,
45
- embedding_l2_reg=1e-5,
46
- dense_l2_reg=1e-4,
47
- ):
48
-
28
+
29
+ def __init__(self,
30
+ dense_features: list[DenseFeature],
31
+ sparse_features: list[SparseFeature],
32
+ sequence_features: list[SequenceFeature],
33
+ mlp_params: dict,
34
+ cin_size: list[int] = [128, 128],
35
+ split_half: bool = True,
36
+ target: list[str] = [],
37
+ optimizer: str = "adam",
38
+ optimizer_params: dict = {},
39
+ loss: str | nn.Module | None = "bce",
40
+ device: str = 'cpu',
41
+ model_id: str = "baseline",
42
+ embedding_l1_reg=1e-6,
43
+ dense_l1_reg=1e-5,
44
+ embedding_l2_reg=1e-5,
45
+ dense_l2_reg=1e-4):
46
+
49
47
  super(xDeepFM, self).__init__(
50
48
  dense_features=dense_features,
51
49
  sparse_features=sparse_features,
@@ -58,16 +56,16 @@ class xDeepFM(BaseModel):
58
56
  embedding_l2_reg=embedding_l2_reg,
59
57
  dense_l2_reg=dense_l2_reg,
60
58
  early_stop_patience=20,
61
- model_id=model_id,
59
+ model_id=model_id
62
60
  )
63
61
 
64
62
  self.loss = loss
65
63
  if self.loss is None:
66
64
  self.loss = "bce"
67
-
65
+
68
66
  # Linear part and CIN part: use sparse and sequence features
69
67
  self.linear_features = sparse_features + sequence_features
70
-
68
+
71
69
  # Deep part: use all features
72
70
  self.deep_features = dense_features + sparse_features + sequence_features
73
71
 
@@ -77,44 +75,39 @@ class xDeepFM(BaseModel):
77
75
  # Linear part
78
76
  linear_dim = sum([f.embedding_dim for f in self.linear_features])
79
77
  self.linear = LR(linear_dim)
80
-
78
+
81
79
  # CIN part: Compressed Interaction Network
82
80
  num_fields = len(self.linear_features)
83
81
  self.cin = CIN(input_dim=num_fields, cin_size=cin_size, split_half=split_half)
84
-
82
+
85
83
  # Deep part: DNN
86
- deep_emb_dim_total = sum(
87
- [
88
- f.embedding_dim
89
- for f in self.deep_features
90
- if not isinstance(f, DenseFeature)
91
- ]
92
- )
93
- dense_input_dim = sum(
94
- [getattr(f, "embedding_dim", 1) or 1 for f in dense_features]
95
- )
84
+ deep_emb_dim_total = sum([f.embedding_dim for f in self.deep_features if not isinstance(f, DenseFeature)])
85
+ dense_input_dim = sum([getattr(f, "embedding_dim", 1) or 1 for f in dense_features])
96
86
  self.mlp = MLP(input_dim=deep_emb_dim_total + dense_input_dim, **mlp_params)
97
87
  self.prediction_layer = PredictionLayer(task_type=self.task_type)
98
88
 
99
89
  # Register regularization weights
100
90
  self._register_regularization_weights(
101
- embedding_attr="embedding", include_modules=["linear", "cin", "mlp"]
91
+ embedding_attr='embedding',
92
+ include_modules=['linear', 'cin', 'mlp']
102
93
  )
103
94
 
104
- self.compile(optimizer=optimizer, optimizer_params=optimizer_params, loss=loss)
95
+ self.compile(
96
+ optimizer=optimizer,
97
+ optimizer_params=optimizer_params,
98
+ loss=loss
99
+ )
105
100
 
106
101
  def forward(self, x):
107
102
  # Get embeddings for linear and CIN (sparse features only)
108
- input_linear = self.embedding(
109
- x=x, features=self.linear_features, squeeze_dim=False
110
- )
111
-
103
+ input_linear = self.embedding(x=x, features=self.linear_features, squeeze_dim=False)
104
+
112
105
  # Linear part
113
106
  y_linear = self.linear(input_linear.flatten(start_dim=1))
114
-
107
+
115
108
  # CIN part
116
109
  y_cin = self.cin(input_linear) # [B, 1]
117
-
110
+
118
111
  # Deep part
119
112
  input_deep = self.embedding(x=x, features=self.deep_features, squeeze_dim=True)
120
113
  y_deep = self.mlp(input_deep) # [B, 1]