nextrec 0.4.1__py3-none-any.whl → 0.4.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. nextrec/__init__.py +1 -1
  2. nextrec/__version__.py +1 -1
  3. nextrec/basic/activation.py +10 -5
  4. nextrec/basic/callback.py +1 -0
  5. nextrec/basic/features.py +30 -22
  6. nextrec/basic/layers.py +220 -106
  7. nextrec/basic/loggers.py +62 -43
  8. nextrec/basic/metrics.py +268 -119
  9. nextrec/basic/model.py +1082 -400
  10. nextrec/basic/session.py +10 -3
  11. nextrec/cli.py +498 -0
  12. nextrec/data/__init__.py +19 -25
  13. nextrec/data/batch_utils.py +11 -3
  14. nextrec/data/data_processing.py +51 -45
  15. nextrec/data/data_utils.py +26 -15
  16. nextrec/data/dataloader.py +272 -95
  17. nextrec/data/preprocessor.py +320 -199
  18. nextrec/loss/listwise.py +17 -9
  19. nextrec/loss/loss_utils.py +7 -8
  20. nextrec/loss/pairwise.py +2 -0
  21. nextrec/loss/pointwise.py +30 -12
  22. nextrec/models/generative/hstu.py +103 -38
  23. nextrec/models/match/dssm.py +82 -68
  24. nextrec/models/match/dssm_v2.py +72 -57
  25. nextrec/models/match/mind.py +175 -107
  26. nextrec/models/match/sdm.py +104 -87
  27. nextrec/models/match/youtube_dnn.py +73 -59
  28. nextrec/models/multi_task/esmm.py +53 -37
  29. nextrec/models/multi_task/mmoe.py +64 -45
  30. nextrec/models/multi_task/ple.py +101 -48
  31. nextrec/models/multi_task/poso.py +113 -36
  32. nextrec/models/multi_task/share_bottom.py +48 -35
  33. nextrec/models/ranking/afm.py +72 -37
  34. nextrec/models/ranking/autoint.py +72 -55
  35. nextrec/models/ranking/dcn.py +55 -35
  36. nextrec/models/ranking/dcn_v2.py +64 -23
  37. nextrec/models/ranking/deepfm.py +32 -22
  38. nextrec/models/ranking/dien.py +155 -99
  39. nextrec/models/ranking/din.py +85 -57
  40. nextrec/models/ranking/fibinet.py +52 -32
  41. nextrec/models/ranking/fm.py +29 -23
  42. nextrec/models/ranking/masknet.py +91 -29
  43. nextrec/models/ranking/pnn.py +31 -28
  44. nextrec/models/ranking/widedeep.py +34 -26
  45. nextrec/models/ranking/xdeepfm.py +60 -38
  46. nextrec/utils/__init__.py +59 -34
  47. nextrec/utils/config.py +490 -0
  48. nextrec/utils/device.py +30 -20
  49. nextrec/utils/distributed.py +36 -9
  50. nextrec/utils/embedding.py +1 -0
  51. nextrec/utils/feature.py +1 -0
  52. nextrec/utils/file.py +32 -11
  53. nextrec/utils/initializer.py +61 -16
  54. nextrec/utils/optimizer.py +25 -9
  55. nextrec/utils/synthetic_data.py +283 -165
  56. nextrec/utils/tensor.py +24 -13
  57. {nextrec-0.4.1.dist-info → nextrec-0.4.2.dist-info}/METADATA +4 -4
  58. nextrec-0.4.2.dist-info/RECORD +69 -0
  59. nextrec-0.4.2.dist-info/entry_points.txt +2 -0
  60. nextrec-0.4.1.dist-info/RECORD +0 -66
  61. {nextrec-0.4.1.dist-info → nextrec-0.4.2.dist-info}/WHEEL +0 -0
  62. {nextrec-0.4.1.dist-info → nextrec-0.4.2.dist-info}/licenses/LICENSE +0 -0
@@ -22,27 +22,29 @@ class PNN(BaseModel):
22
22
  @property
23
23
  def default_task(self):
24
24
  return "binary"
25
-
26
- def __init__(self,
27
- dense_features: list[DenseFeature] | list = [],
28
- sparse_features: list[SparseFeature] | list = [],
29
- sequence_features: list[SequenceFeature] | list = [],
30
- mlp_params: dict = {},
31
- product_type: str = "inner",
32
- outer_product_dim: int | None = None,
33
- target: list[str] | list = [],
34
- task: str | list[str] | None = None,
35
- optimizer: str = "adam",
36
- optimizer_params: dict = {},
37
- loss: str | nn.Module | None = "bce",
38
- loss_params: dict | list[dict] | None = None,
39
- device: str = 'cpu',
40
- embedding_l1_reg=1e-6,
41
- dense_l1_reg=1e-5,
42
- embedding_l2_reg=1e-5,
43
- dense_l2_reg=1e-4,
44
- **kwargs):
45
-
25
+
26
+ def __init__(
27
+ self,
28
+ dense_features: list[DenseFeature] | list = [],
29
+ sparse_features: list[SparseFeature] | list = [],
30
+ sequence_features: list[SequenceFeature] | list = [],
31
+ mlp_params: dict = {},
32
+ product_type: str = "inner",
33
+ outer_product_dim: int | None = None,
34
+ target: list[str] | list = [],
35
+ task: str | list[str] | None = None,
36
+ optimizer: str = "adam",
37
+ optimizer_params: dict = {},
38
+ loss: str | nn.Module | None = "bce",
39
+ loss_params: dict | list[dict] | None = None,
40
+ device: str = "cpu",
41
+ embedding_l1_reg=1e-6,
42
+ dense_l1_reg=1e-5,
43
+ embedding_l2_reg=1e-5,
44
+ dense_l2_reg=1e-4,
45
+ **kwargs,
46
+ ):
47
+
46
48
  super(PNN, self).__init__(
47
49
  dense_features=dense_features,
48
50
  sparse_features=sparse_features,
@@ -54,13 +56,13 @@ class PNN(BaseModel):
54
56
  dense_l1_reg=dense_l1_reg,
55
57
  embedding_l2_reg=embedding_l2_reg,
56
58
  dense_l2_reg=dense_l2_reg,
57
- **kwargs
59
+ **kwargs,
58
60
  )
59
61
 
60
62
  self.loss = loss
61
63
  if self.loss is None:
62
64
  self.loss = "bce"
63
-
65
+
64
66
  self.field_features = sparse_features + sequence_features
65
67
  if len(self.field_features) < 2:
66
68
  raise ValueError("PNN requires at least two sparse/sequence features.")
@@ -69,7 +71,9 @@ class PNN(BaseModel):
69
71
  self.num_fields = len(self.field_features)
70
72
  self.embedding_dim = self.field_features[0].embedding_dim
71
73
  if any(f.embedding_dim != self.embedding_dim for f in self.field_features):
72
- raise ValueError("All field features must share the same embedding_dim for PNN.")
74
+ raise ValueError(
75
+ "All field features must share the same embedding_dim for PNN."
76
+ )
73
77
 
74
78
  self.product_type = product_type.lower()
75
79
  if self.product_type not in {"inner", "outer"}:
@@ -88,12 +92,11 @@ class PNN(BaseModel):
88
92
  self.mlp = MLP(input_dim=linear_dim + product_dim, **mlp_params)
89
93
  self.prediction_layer = PredictionLayer(task_type=self.task)
90
94
 
91
- modules = ['mlp']
95
+ modules = ["mlp"]
92
96
  if self.product_type == "outer":
93
- modules.append('kernel')
97
+ modules.append("kernel")
94
98
  self.register_regularization_weights(
95
- embedding_attr='embedding',
96
- include_modules=modules
99
+ embedding_attr="embedding", include_modules=modules
97
100
  )
98
101
 
99
102
  self.compile(
@@ -39,7 +39,6 @@ Wide & Deep 同时使用宽线性部分(记忆共现/手工交叉)与深网
39
39
  - 共享特征空间,减少工程开销
40
40
  """
41
41
 
42
- import torch
43
42
  import torch.nn as nn
44
43
 
45
44
  from nextrec.basic.model import BaseModel
@@ -55,25 +54,27 @@ class WideDeep(BaseModel):
55
54
  @property
56
55
  def default_task(self):
57
56
  return "binary"
58
-
59
- def __init__(self,
60
- dense_features: list[DenseFeature],
61
- sparse_features: list[SparseFeature],
62
- sequence_features: list[SequenceFeature],
63
- mlp_params: dict,
64
- target: list[str] = [],
65
- task: str | list[str] | None = None,
66
- optimizer: str = "adam",
67
- optimizer_params: dict = {},
68
- loss: str | nn.Module | None = "bce",
69
- loss_params: dict | list[dict] | None = None,
70
- device: str = 'cpu',
71
- embedding_l1_reg=1e-6,
72
- dense_l1_reg=1e-5,
73
- embedding_l2_reg=1e-5,
74
- dense_l2_reg=1e-4,
75
- **kwargs):
76
-
57
+
58
+ def __init__(
59
+ self,
60
+ dense_features: list[DenseFeature],
61
+ sparse_features: list[SparseFeature],
62
+ sequence_features: list[SequenceFeature],
63
+ mlp_params: dict,
64
+ target: list[str] = [],
65
+ task: str | list[str] | None = None,
66
+ optimizer: str = "adam",
67
+ optimizer_params: dict = {},
68
+ loss: str | nn.Module | None = "bce",
69
+ loss_params: dict | list[dict] | None = None,
70
+ device: str = "cpu",
71
+ embedding_l1_reg=1e-6,
72
+ dense_l1_reg=1e-5,
73
+ embedding_l2_reg=1e-5,
74
+ dense_l2_reg=1e-4,
75
+ **kwargs,
76
+ ):
77
+
77
78
  super(WideDeep, self).__init__(
78
79
  dense_features=dense_features,
79
80
  sparse_features=sparse_features,
@@ -85,13 +86,13 @@ class WideDeep(BaseModel):
85
86
  dense_l1_reg=dense_l1_reg,
86
87
  embedding_l2_reg=embedding_l2_reg,
87
88
  dense_l2_reg=dense_l2_reg,
88
- **kwargs
89
+ **kwargs,
89
90
  )
90
91
 
91
92
  self.loss = loss
92
93
  if self.loss is None:
93
94
  self.loss = "bce"
94
-
95
+
95
96
  # Wide part: use all features for linear model
96
97
  self.wide_features = sparse_features + sequence_features
97
98
  # Deep part: use all features
@@ -103,7 +104,7 @@ class WideDeep(BaseModel):
103
104
  # Wide part: Linear layer
104
105
  wide_dim = sum([f.embedding_dim for f in self.wide_features])
105
106
  self.linear = LR(wide_dim)
106
-
107
+
107
108
  # Deep part: MLP
108
109
  input_dim = self.embedding.input_dim
109
110
  # deep_emb_dim_total = sum([f.embedding_dim for f in self.deep_features if not isinstance(f, DenseFeature)])
@@ -111,14 +112,21 @@ class WideDeep(BaseModel):
111
112
  self.mlp = MLP(input_dim=input_dim, **mlp_params)
112
113
  self.prediction_layer = PredictionLayer(task_type=self.task)
113
114
  # Register regularization weights
114
- self.register_regularization_weights(embedding_attr='embedding', include_modules=['linear', 'mlp'])
115
- self.compile(optimizer=optimizer, optimizer_params=optimizer_params, loss=loss, loss_params=loss_params)
115
+ self.register_regularization_weights(
116
+ embedding_attr="embedding", include_modules=["linear", "mlp"]
117
+ )
118
+ self.compile(
119
+ optimizer=optimizer,
120
+ optimizer_params=optimizer_params,
121
+ loss=loss,
122
+ loss_params=loss_params,
123
+ )
116
124
 
117
125
  def forward(self, x):
118
126
  # Deep part
119
127
  input_deep = self.embedding(x=x, features=self.deep_features, squeeze_dim=True)
120
128
  y_deep = self.mlp(input_deep) # [B, 1]
121
-
129
+
122
130
  # Wide part
123
131
  input_wide = self.embedding(x=x, features=self.wide_features, squeeze_dim=True)
124
132
  y_wide = self.linear(input_wide)
@@ -3,8 +3,8 @@ Date: create on 09/11/2025
3
3
  Author:
4
4
  Yang Zhou,zyaztec@gmail.com
5
5
  Reference:
6
- [1] Lian J, Zhou X, Zhang F, et al. xdeepfm: Combining explicit and implicit feature interactions
7
- for recommender systems[C]//Proceedings of the 24th ACM SIGKDD international conference on
6
+ [1] Lian J, Zhou X, Zhang F, et al. xdeepfm: Combining explicit and implicit feature interactions
7
+ for recommender systems[C]//Proceedings of the 24th ACM SIGKDD international conference on
8
8
  knowledge discovery & data mining. 2018: 1754-1763.
9
9
  (https://arxiv.org/abs/1803.05170)
10
10
  """
@@ -17,6 +17,7 @@ from nextrec.basic.model import BaseModel
17
17
  from nextrec.basic.layers import LR, EmbeddingLayer, MLP, PredictionLayer
18
18
  from nextrec.basic.features import DenseFeature, SparseFeature, SequenceFeature
19
19
 
20
+
20
21
  class CIN(nn.Module):
21
22
  """Compressed Interaction Network from xDeepFM (Lian et al., 2018)."""
22
23
 
@@ -28,7 +29,16 @@ class CIN(nn.Module):
28
29
  prev_dim, fc_input_dim = input_dim, 0
29
30
  for i in range(self.num_layers):
30
31
  cross_layer_size = cin_size[i]
31
- self.conv_layers.append(torch.nn.Conv1d(input_dim * prev_dim, cross_layer_size, 1, stride=1, dilation=1, bias=True))
32
+ self.conv_layers.append(
33
+ torch.nn.Conv1d(
34
+ input_dim * prev_dim,
35
+ cross_layer_size,
36
+ 1,
37
+ stride=1,
38
+ dilation=1,
39
+ bias=True,
40
+ )
41
+ )
32
42
  if self.split_half and i != self.num_layers - 1:
33
43
  cross_layer_size //= 2
34
44
  prev_dim = cross_layer_size
@@ -49,7 +59,8 @@ class CIN(nn.Module):
49
59
  h = x
50
60
  xs.append(x)
51
61
  return self.fc(torch.sum(torch.cat(xs, dim=1), 2))
52
-
62
+
63
+
53
64
  class xDeepFM(BaseModel):
54
65
  @property
55
66
  def model_name(self):
@@ -58,27 +69,29 @@ class xDeepFM(BaseModel):
58
69
  @property
59
70
  def default_task(self):
60
71
  return "binary"
61
-
62
- def __init__(self,
63
- dense_features: list[DenseFeature],
64
- sparse_features: list[SparseFeature],
65
- sequence_features: list[SequenceFeature],
66
- mlp_params: dict,
67
- cin_size: list[int] = [128, 128],
68
- split_half: bool = True,
69
- target: list[str] = [],
70
- task: str | list[str] | None = None,
71
- optimizer: str = "adam",
72
- optimizer_params: dict = {},
73
- loss: str | nn.Module | None = "bce",
74
- loss_params: dict | list[dict] | None = None,
75
- device: str = 'cpu',
76
- embedding_l1_reg=1e-6,
77
- dense_l1_reg=1e-5,
78
- embedding_l2_reg=1e-5,
79
- dense_l2_reg=1e-4,
80
- **kwargs):
81
-
72
+
73
+ def __init__(
74
+ self,
75
+ dense_features: list[DenseFeature],
76
+ sparse_features: list[SparseFeature],
77
+ sequence_features: list[SequenceFeature],
78
+ mlp_params: dict,
79
+ cin_size: list[int] = [128, 128],
80
+ split_half: bool = True,
81
+ target: list[str] = [],
82
+ task: str | list[str] | None = None,
83
+ optimizer: str = "adam",
84
+ optimizer_params: dict = {},
85
+ loss: str | nn.Module | None = "bce",
86
+ loss_params: dict | list[dict] | None = None,
87
+ device: str = "cpu",
88
+ embedding_l1_reg=1e-6,
89
+ dense_l1_reg=1e-5,
90
+ embedding_l2_reg=1e-5,
91
+ dense_l2_reg=1e-4,
92
+ **kwargs,
93
+ ):
94
+
82
95
  super(xDeepFM, self).__init__(
83
96
  dense_features=dense_features,
84
97
  sparse_features=sparse_features,
@@ -90,16 +103,16 @@ class xDeepFM(BaseModel):
90
103
  dense_l1_reg=dense_l1_reg,
91
104
  embedding_l2_reg=embedding_l2_reg,
92
105
  dense_l2_reg=dense_l2_reg,
93
- **kwargs
106
+ **kwargs,
94
107
  )
95
108
 
96
109
  self.loss = loss
97
110
  if self.loss is None:
98
111
  self.loss = "bce"
99
-
112
+
100
113
  # Linear part and CIN part: use sparse and sequence features
101
114
  self.linear_features = sparse_features + sequence_features
102
-
115
+
103
116
  # Deep part: use all features
104
117
  self.deep_features = dense_features + sparse_features + sequence_features
105
118
 
@@ -109,21 +122,28 @@ class xDeepFM(BaseModel):
109
122
  # Linear part
110
123
  linear_dim = sum([f.embedding_dim for f in self.linear_features])
111
124
  self.linear = LR(linear_dim)
112
-
125
+
113
126
  # CIN part: Compressed Interaction Network
114
127
  num_fields = len(self.linear_features)
115
128
  self.cin = CIN(input_dim=num_fields, cin_size=cin_size, split_half=split_half)
116
-
129
+
117
130
  # Deep part: DNN
118
- deep_emb_dim_total = sum([f.embedding_dim for f in self.deep_features if not isinstance(f, DenseFeature)])
119
- dense_input_dim = sum([getattr(f, "embedding_dim", 1) or 1 for f in dense_features])
131
+ deep_emb_dim_total = sum(
132
+ [
133
+ f.embedding_dim
134
+ for f in self.deep_features
135
+ if not isinstance(f, DenseFeature)
136
+ ]
137
+ )
138
+ dense_input_dim = sum(
139
+ [getattr(f, "embedding_dim", 1) or 1 for f in dense_features]
140
+ )
120
141
  self.mlp = MLP(input_dim=deep_emb_dim_total + dense_input_dim, **mlp_params)
121
142
  self.prediction_layer = PredictionLayer(task_type=self.task)
122
143
 
123
144
  # Register regularization weights
124
145
  self.register_regularization_weights(
125
- embedding_attr='embedding',
126
- include_modules=['linear', 'cin', 'mlp']
146
+ embedding_attr="embedding", include_modules=["linear", "cin", "mlp"]
127
147
  )
128
148
 
129
149
  self.compile(
@@ -135,14 +155,16 @@ class xDeepFM(BaseModel):
135
155
 
136
156
  def forward(self, x):
137
157
  # Get embeddings for linear and CIN (sparse features only)
138
- input_linear = self.embedding(x=x, features=self.linear_features, squeeze_dim=False)
139
-
158
+ input_linear = self.embedding(
159
+ x=x, features=self.linear_features, squeeze_dim=False
160
+ )
161
+
140
162
  # Linear part
141
163
  y_linear = self.linear(input_linear.flatten(start_dim=1))
142
-
164
+
143
165
  # CIN part
144
166
  y_cin = self.cin(input_linear) # [B, 1]
145
-
167
+
146
168
  # Deep part
147
169
  input_deep = self.embedding(x=x, features=self.deep_features, squeeze_dim=True)
148
170
  y_deep = self.mlp(input_deep) # [B, 1]
nextrec/utils/__init__.py CHANGED
@@ -10,63 +10,88 @@ This package provides various utility functions organized by category:
10
10
  - file_utils: File I/O operations
11
11
  - model_utils: Model-related utilities
12
12
  - feature_utils: Feature processing utilities
13
+ - config_utils: Configuration loading and processing utilities
13
14
 
14
15
  Date: create on 13/11/2025
15
16
  Last update: 06/12/2025
16
17
  Author: Yang Zhou, zyaztec@gmail.com
17
18
  """
19
+
18
20
  from . import optimizer, initializer, embedding
19
21
  from .optimizer import get_optimizer, get_scheduler
20
22
  from .initializer import get_initializer
21
23
  from .embedding import get_auto_embedding_dim
22
24
  from .device import resolve_device, get_device_info
23
25
  from .tensor import to_tensor, stack_tensors, concat_tensors, pad_sequence_tensors
24
- from .file import resolve_file_paths, read_table, load_dataframes, iter_file_chunks, default_output_dir
26
+ from .file import (
27
+ resolve_file_paths,
28
+ read_table,
29
+ load_dataframes,
30
+ iter_file_chunks,
31
+ default_output_dir,
32
+ read_yaml,
33
+ )
25
34
  from .model import merge_features, get_mlp_output_dim
26
35
  from .feature import normalize_to_list
27
- from .synthetic_data import generate_ranking_data, generate_distributed_ranking_data, generate_match_data, generate_multitask_data
36
+ from .synthetic_data import (
37
+ generate_match_data,
38
+ generate_ranking_data,
39
+ generate_multitask_data,
40
+ generate_distributed_ranking_data,
41
+ )
42
+ from .config import (
43
+ resolve_path,
44
+ select_features,
45
+ register_processor_features,
46
+ build_feature_objects,
47
+ extract_feature_groups,
48
+ load_model_class,
49
+ build_model_instance,
50
+ )
28
51
 
29
52
  __all__ = [
30
53
  # Optimizer & Scheduler
31
- 'get_optimizer',
32
- 'get_scheduler',
33
-
54
+ "get_optimizer",
55
+ "get_scheduler",
34
56
  # Initializer
35
- 'get_initializer',
36
-
57
+ "get_initializer",
37
58
  # Embedding
38
- 'get_auto_embedding_dim',
39
-
59
+ "get_auto_embedding_dim",
40
60
  # Device utilities
41
- 'resolve_device',
42
- 'get_device_info',
43
-
61
+ "resolve_device",
62
+ "get_device_info",
44
63
  # Tensor utilities
45
- 'to_tensor',
46
- 'stack_tensors',
47
- 'concat_tensors',
48
- 'pad_sequence_tensors',
49
-
64
+ "to_tensor",
65
+ "stack_tensors",
66
+ "concat_tensors",
67
+ "pad_sequence_tensors",
50
68
  # File utilities
51
- 'resolve_file_paths',
52
- 'read_table',
53
- 'load_dataframes',
54
- 'iter_file_chunks',
55
- 'default_output_dir',
56
-
69
+ "resolve_file_paths",
70
+ "read_table",
71
+ "read_yaml",
72
+ "load_dataframes",
73
+ "iter_file_chunks",
74
+ "default_output_dir",
57
75
  # Model utilities
58
- 'merge_features',
59
- 'get_mlp_output_dim',
60
-
76
+ "merge_features",
77
+ "get_mlp_output_dim",
61
78
  # Feature utilities
62
- 'normalize_to_list',
63
-
79
+ "normalize_to_list",
80
+ # Config utilities
81
+ "resolve_path",
82
+ "select_features",
83
+ "register_processor_features",
84
+ "build_feature_objects",
85
+ "extract_feature_groups",
86
+ "load_model_class",
87
+ "build_model_instance",
64
88
  # Synthetic data utilities
65
- 'generate_ranking_data',
66
- 'generate_distributed_ranking_data',
67
-
89
+ "generate_ranking_data",
90
+ "generate_match_data",
91
+ "generate_multitask_data",
92
+ "generate_distributed_ranking_data",
68
93
  # Module exports
69
- 'optimizer',
70
- 'initializer',
71
- 'embedding',
94
+ "optimizer",
95
+ "initializer",
96
+ "embedding",
72
97
  ]