nextrec 0.4.1__py3-none-any.whl → 0.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. nextrec/__init__.py +1 -1
  2. nextrec/__version__.py +1 -1
  3. nextrec/basic/activation.py +10 -5
  4. nextrec/basic/callback.py +1 -0
  5. nextrec/basic/features.py +30 -22
  6. nextrec/basic/layers.py +250 -112
  7. nextrec/basic/loggers.py +63 -44
  8. nextrec/basic/metrics.py +270 -120
  9. nextrec/basic/model.py +1084 -402
  10. nextrec/basic/session.py +10 -3
  11. nextrec/cli.py +492 -0
  12. nextrec/data/__init__.py +19 -25
  13. nextrec/data/batch_utils.py +11 -3
  14. nextrec/data/data_processing.py +51 -45
  15. nextrec/data/data_utils.py +26 -15
  16. nextrec/data/dataloader.py +273 -96
  17. nextrec/data/preprocessor.py +320 -199
  18. nextrec/loss/listwise.py +17 -9
  19. nextrec/loss/loss_utils.py +7 -8
  20. nextrec/loss/pairwise.py +2 -0
  21. nextrec/loss/pointwise.py +30 -12
  22. nextrec/models/generative/hstu.py +103 -38
  23. nextrec/models/match/dssm.py +82 -68
  24. nextrec/models/match/dssm_v2.py +72 -57
  25. nextrec/models/match/mind.py +175 -107
  26. nextrec/models/match/sdm.py +104 -87
  27. nextrec/models/match/youtube_dnn.py +73 -59
  28. nextrec/models/multi_task/esmm.py +69 -46
  29. nextrec/models/multi_task/mmoe.py +91 -53
  30. nextrec/models/multi_task/ple.py +117 -58
  31. nextrec/models/multi_task/poso.py +163 -55
  32. nextrec/models/multi_task/share_bottom.py +63 -36
  33. nextrec/models/ranking/afm.py +80 -45
  34. nextrec/models/ranking/autoint.py +74 -57
  35. nextrec/models/ranking/dcn.py +110 -48
  36. nextrec/models/ranking/dcn_v2.py +265 -45
  37. nextrec/models/ranking/deepfm.py +39 -24
  38. nextrec/models/ranking/dien.py +335 -146
  39. nextrec/models/ranking/din.py +158 -92
  40. nextrec/models/ranking/fibinet.py +134 -52
  41. nextrec/models/ranking/fm.py +68 -26
  42. nextrec/models/ranking/masknet.py +95 -33
  43. nextrec/models/ranking/pnn.py +128 -58
  44. nextrec/models/ranking/widedeep.py +40 -28
  45. nextrec/models/ranking/xdeepfm.py +67 -40
  46. nextrec/utils/__init__.py +59 -34
  47. nextrec/utils/config.py +496 -0
  48. nextrec/utils/device.py +30 -20
  49. nextrec/utils/distributed.py +36 -9
  50. nextrec/utils/embedding.py +1 -0
  51. nextrec/utils/feature.py +1 -0
  52. nextrec/utils/file.py +33 -11
  53. nextrec/utils/initializer.py +61 -16
  54. nextrec/utils/model.py +22 -0
  55. nextrec/utils/optimizer.py +25 -9
  56. nextrec/utils/synthetic_data.py +283 -165
  57. nextrec/utils/tensor.py +24 -13
  58. {nextrec-0.4.1.dist-info → nextrec-0.4.3.dist-info}/METADATA +53 -24
  59. nextrec-0.4.3.dist-info/RECORD +69 -0
  60. nextrec-0.4.3.dist-info/entry_points.txt +2 -0
  61. nextrec-0.4.1.dist-info/RECORD +0 -66
  62. {nextrec-0.4.1.dist-info → nextrec-0.4.3.dist-info}/WHEEL +0 -0
  63. {nextrec-0.4.1.dist-info → nextrec-0.4.3.dist-info}/licenses/LICENSE +0 -0
@@ -39,7 +39,6 @@ Wide & Deep 同时使用宽线性部分(记忆共现/手工交叉)与深网
39
39
  - 共享特征空间,减少工程开销
40
40
  """
41
41
 
42
- import torch
43
42
  import torch.nn as nn
44
43
 
45
44
  from nextrec.basic.model import BaseModel
@@ -55,25 +54,33 @@ class WideDeep(BaseModel):
55
54
  @property
56
55
  def default_task(self):
57
56
  return "binary"
58
-
59
- def __init__(self,
60
- dense_features: list[DenseFeature],
61
- sparse_features: list[SparseFeature],
62
- sequence_features: list[SequenceFeature],
63
- mlp_params: dict,
64
- target: list[str] = [],
65
- task: str | list[str] | None = None,
66
- optimizer: str = "adam",
67
- optimizer_params: dict = {},
68
- loss: str | nn.Module | None = "bce",
69
- loss_params: dict | list[dict] | None = None,
70
- device: str = 'cpu',
71
- embedding_l1_reg=1e-6,
72
- dense_l1_reg=1e-5,
73
- embedding_l2_reg=1e-5,
74
- dense_l2_reg=1e-4,
75
- **kwargs):
76
-
57
+
58
+ def __init__(
59
+ self,
60
+ dense_features: list[DenseFeature],
61
+ sparse_features: list[SparseFeature],
62
+ sequence_features: list[SequenceFeature],
63
+ mlp_params: dict,
64
+ target: list[str] | str | None = None,
65
+ task: str | list[str] | None = None,
66
+ optimizer: str = "adam",
67
+ optimizer_params: dict | None = None,
68
+ loss: str | nn.Module | None = "bce",
69
+ loss_params: dict | list[dict] | None = None,
70
+ device: str = "cpu",
71
+ embedding_l1_reg=1e-6,
72
+ dense_l1_reg=1e-5,
73
+ embedding_l2_reg=1e-5,
74
+ dense_l2_reg=1e-4,
75
+ **kwargs,
76
+ ):
77
+
78
+ if target is None:
79
+ target = []
80
+ optimizer_params = optimizer_params or {}
81
+ if loss is None:
82
+ loss = "bce"
83
+
77
84
  super(WideDeep, self).__init__(
78
85
  dense_features=dense_features,
79
86
  sparse_features=sparse_features,
@@ -85,13 +92,11 @@ class WideDeep(BaseModel):
85
92
  dense_l1_reg=dense_l1_reg,
86
93
  embedding_l2_reg=embedding_l2_reg,
87
94
  dense_l2_reg=dense_l2_reg,
88
- **kwargs
95
+ **kwargs,
89
96
  )
90
97
 
91
98
  self.loss = loss
92
- if self.loss is None:
93
- self.loss = "bce"
94
-
99
+
95
100
  # Wide part: use all features for linear model
96
101
  self.wide_features = sparse_features + sequence_features
97
102
  # Deep part: use all features
@@ -103,7 +108,7 @@ class WideDeep(BaseModel):
103
108
  # Wide part: Linear layer
104
109
  wide_dim = sum([f.embedding_dim for f in self.wide_features])
105
110
  self.linear = LR(wide_dim)
106
-
111
+
107
112
  # Deep part: MLP
108
113
  input_dim = self.embedding.input_dim
109
114
  # deep_emb_dim_total = sum([f.embedding_dim for f in self.deep_features if not isinstance(f, DenseFeature)])
@@ -111,14 +116,21 @@ class WideDeep(BaseModel):
111
116
  self.mlp = MLP(input_dim=input_dim, **mlp_params)
112
117
  self.prediction_layer = PredictionLayer(task_type=self.task)
113
118
  # Register regularization weights
114
- self.register_regularization_weights(embedding_attr='embedding', include_modules=['linear', 'mlp'])
115
- self.compile(optimizer=optimizer, optimizer_params=optimizer_params, loss=loss, loss_params=loss_params)
119
+ self.register_regularization_weights(
120
+ embedding_attr="embedding", include_modules=["linear", "mlp"]
121
+ )
122
+ self.compile(
123
+ optimizer=optimizer,
124
+ optimizer_params=optimizer_params,
125
+ loss=loss,
126
+ loss_params=loss_params,
127
+ )
116
128
 
117
129
  def forward(self, x):
118
130
  # Deep part
119
131
  input_deep = self.embedding(x=x, features=self.deep_features, squeeze_dim=True)
120
132
  y_deep = self.mlp(input_deep) # [B, 1]
121
-
133
+
122
134
  # Wide part
123
135
  input_wide = self.embedding(x=x, features=self.wide_features, squeeze_dim=True)
124
136
  y_wide = self.linear(input_wide)
@@ -3,8 +3,8 @@ Date: create on 09/11/2025
3
3
  Author:
4
4
  Yang Zhou,zyaztec@gmail.com
5
5
  Reference:
6
- [1] Lian J, Zhou X, Zhang F, et al. xdeepfm: Combining explicit and implicit feature interactions
7
- for recommender systems[C]//Proceedings of the 24th ACM SIGKDD international conference on
6
+ [1] Lian J, Zhou X, Zhang F, et al. xdeepfm: Combining explicit and implicit feature interactions
7
+ for recommender systems[C]//Proceedings of the 24th ACM SIGKDD international conference on
8
8
  knowledge discovery & data mining. 2018: 1754-1763.
9
9
  (https://arxiv.org/abs/1803.05170)
10
10
  """
@@ -17,6 +17,7 @@ from nextrec.basic.model import BaseModel
17
17
  from nextrec.basic.layers import LR, EmbeddingLayer, MLP, PredictionLayer
18
18
  from nextrec.basic.features import DenseFeature, SparseFeature, SequenceFeature
19
19
 
20
+
20
21
  class CIN(nn.Module):
21
22
  """Compressed Interaction Network from xDeepFM (Lian et al., 2018)."""
22
23
 
@@ -28,7 +29,16 @@ class CIN(nn.Module):
28
29
  prev_dim, fc_input_dim = input_dim, 0
29
30
  for i in range(self.num_layers):
30
31
  cross_layer_size = cin_size[i]
31
- self.conv_layers.append(torch.nn.Conv1d(input_dim * prev_dim, cross_layer_size, 1, stride=1, dilation=1, bias=True))
32
+ self.conv_layers.append(
33
+ torch.nn.Conv1d(
34
+ input_dim * prev_dim,
35
+ cross_layer_size,
36
+ 1,
37
+ stride=1,
38
+ dilation=1,
39
+ bias=True,
40
+ )
41
+ )
32
42
  if self.split_half and i != self.num_layers - 1:
33
43
  cross_layer_size //= 2
34
44
  prev_dim = cross_layer_size
@@ -49,7 +59,8 @@ class CIN(nn.Module):
49
59
  h = x
50
60
  xs.append(x)
51
61
  return self.fc(torch.sum(torch.cat(xs, dim=1), 2))
52
-
62
+
63
+
53
64
  class xDeepFM(BaseModel):
54
65
  @property
55
66
  def model_name(self):
@@ -58,27 +69,36 @@ class xDeepFM(BaseModel):
58
69
  @property
59
70
  def default_task(self):
60
71
  return "binary"
61
-
62
- def __init__(self,
63
- dense_features: list[DenseFeature],
64
- sparse_features: list[SparseFeature],
65
- sequence_features: list[SequenceFeature],
66
- mlp_params: dict,
67
- cin_size: list[int] = [128, 128],
68
- split_half: bool = True,
69
- target: list[str] = [],
70
- task: str | list[str] | None = None,
71
- optimizer: str = "adam",
72
- optimizer_params: dict = {},
73
- loss: str | nn.Module | None = "bce",
74
- loss_params: dict | list[dict] | None = None,
75
- device: str = 'cpu',
76
- embedding_l1_reg=1e-6,
77
- dense_l1_reg=1e-5,
78
- embedding_l2_reg=1e-5,
79
- dense_l2_reg=1e-4,
80
- **kwargs):
81
-
72
+
73
+ def __init__(
74
+ self,
75
+ dense_features: list[DenseFeature],
76
+ sparse_features: list[SparseFeature],
77
+ sequence_features: list[SequenceFeature],
78
+ mlp_params: dict,
79
+ cin_size: list[int] | None = None,
80
+ split_half: bool = True,
81
+ target: list[str] | str | None = None,
82
+ task: str | list[str] | None = None,
83
+ optimizer: str = "adam",
84
+ optimizer_params: dict | None = None,
85
+ loss: str | nn.Module | None = "bce",
86
+ loss_params: dict | list[dict] | None = None,
87
+ device: str = "cpu",
88
+ embedding_l1_reg=1e-6,
89
+ dense_l1_reg=1e-5,
90
+ embedding_l2_reg=1e-5,
91
+ dense_l2_reg=1e-4,
92
+ **kwargs,
93
+ ):
94
+
95
+ cin_size = cin_size or [128, 128]
96
+ if target is None:
97
+ target = []
98
+ optimizer_params = optimizer_params or {}
99
+ if loss is None:
100
+ loss = "bce"
101
+
82
102
  super(xDeepFM, self).__init__(
83
103
  dense_features=dense_features,
84
104
  sparse_features=sparse_features,
@@ -90,16 +110,14 @@ class xDeepFM(BaseModel):
90
110
  dense_l1_reg=dense_l1_reg,
91
111
  embedding_l2_reg=embedding_l2_reg,
92
112
  dense_l2_reg=dense_l2_reg,
93
- **kwargs
113
+ **kwargs,
94
114
  )
95
115
 
96
116
  self.loss = loss
97
- if self.loss is None:
98
- self.loss = "bce"
99
-
117
+
100
118
  # Linear part and CIN part: use sparse and sequence features
101
119
  self.linear_features = sparse_features + sequence_features
102
-
120
+
103
121
  # Deep part: use all features
104
122
  self.deep_features = dense_features + sparse_features + sequence_features
105
123
 
@@ -109,21 +127,28 @@ class xDeepFM(BaseModel):
109
127
  # Linear part
110
128
  linear_dim = sum([f.embedding_dim for f in self.linear_features])
111
129
  self.linear = LR(linear_dim)
112
-
130
+
113
131
  # CIN part: Compressed Interaction Network
114
132
  num_fields = len(self.linear_features)
115
133
  self.cin = CIN(input_dim=num_fields, cin_size=cin_size, split_half=split_half)
116
-
134
+
117
135
  # Deep part: DNN
118
- deep_emb_dim_total = sum([f.embedding_dim for f in self.deep_features if not isinstance(f, DenseFeature)])
119
- dense_input_dim = sum([getattr(f, "embedding_dim", 1) or 1 for f in dense_features])
136
+ deep_emb_dim_total = sum(
137
+ [
138
+ f.embedding_dim
139
+ for f in self.deep_features
140
+ if not isinstance(f, DenseFeature)
141
+ ]
142
+ )
143
+ dense_input_dim = sum(
144
+ [getattr(f, "embedding_dim", 1) or 1 for f in dense_features]
145
+ )
120
146
  self.mlp = MLP(input_dim=deep_emb_dim_total + dense_input_dim, **mlp_params)
121
147
  self.prediction_layer = PredictionLayer(task_type=self.task)
122
148
 
123
149
  # Register regularization weights
124
150
  self.register_regularization_weights(
125
- embedding_attr='embedding',
126
- include_modules=['linear', 'cin', 'mlp']
151
+ embedding_attr="embedding", include_modules=["linear", "cin", "mlp"]
127
152
  )
128
153
 
129
154
  self.compile(
@@ -135,14 +160,16 @@ class xDeepFM(BaseModel):
135
160
 
136
161
  def forward(self, x):
137
162
  # Get embeddings for linear and CIN (sparse features only)
138
- input_linear = self.embedding(x=x, features=self.linear_features, squeeze_dim=False)
139
-
163
+ input_linear = self.embedding(
164
+ x=x, features=self.linear_features, squeeze_dim=False
165
+ )
166
+
140
167
  # Linear part
141
168
  y_linear = self.linear(input_linear.flatten(start_dim=1))
142
-
169
+
143
170
  # CIN part
144
171
  y_cin = self.cin(input_linear) # [B, 1]
145
-
172
+
146
173
  # Deep part
147
174
  input_deep = self.embedding(x=x, features=self.deep_features, squeeze_dim=True)
148
175
  y_deep = self.mlp(input_deep) # [B, 1]
nextrec/utils/__init__.py CHANGED
@@ -10,63 +10,88 @@ This package provides various utility functions organized by category:
10
10
  - file_utils: File I/O operations
11
11
  - model_utils: Model-related utilities
12
12
  - feature_utils: Feature processing utilities
13
+ - config_utils: Configuration loading and processing utilities
13
14
 
14
15
  Date: create on 13/11/2025
15
16
  Last update: 06/12/2025
16
17
  Author: Yang Zhou, zyaztec@gmail.com
17
18
  """
19
+
18
20
  from . import optimizer, initializer, embedding
19
21
  from .optimizer import get_optimizer, get_scheduler
20
22
  from .initializer import get_initializer
21
23
  from .embedding import get_auto_embedding_dim
22
24
  from .device import resolve_device, get_device_info
23
25
  from .tensor import to_tensor, stack_tensors, concat_tensors, pad_sequence_tensors
24
- from .file import resolve_file_paths, read_table, load_dataframes, iter_file_chunks, default_output_dir
26
+ from .file import (
27
+ resolve_file_paths,
28
+ read_table,
29
+ load_dataframes,
30
+ iter_file_chunks,
31
+ default_output_dir,
32
+ read_yaml,
33
+ )
25
34
  from .model import merge_features, get_mlp_output_dim
26
35
  from .feature import normalize_to_list
27
- from .synthetic_data import generate_ranking_data, generate_distributed_ranking_data, generate_match_data, generate_multitask_data
36
+ from .synthetic_data import (
37
+ generate_match_data,
38
+ generate_ranking_data,
39
+ generate_multitask_data,
40
+ generate_distributed_ranking_data,
41
+ )
42
+ from .config import (
43
+ resolve_path,
44
+ select_features,
45
+ register_processor_features,
46
+ build_feature_objects,
47
+ extract_feature_groups,
48
+ load_model_class,
49
+ build_model_instance,
50
+ )
28
51
 
29
52
  __all__ = [
30
53
  # Optimizer & Scheduler
31
- 'get_optimizer',
32
- 'get_scheduler',
33
-
54
+ "get_optimizer",
55
+ "get_scheduler",
34
56
  # Initializer
35
- 'get_initializer',
36
-
57
+ "get_initializer",
37
58
  # Embedding
38
- 'get_auto_embedding_dim',
39
-
59
+ "get_auto_embedding_dim",
40
60
  # Device utilities
41
- 'resolve_device',
42
- 'get_device_info',
43
-
61
+ "resolve_device",
62
+ "get_device_info",
44
63
  # Tensor utilities
45
- 'to_tensor',
46
- 'stack_tensors',
47
- 'concat_tensors',
48
- 'pad_sequence_tensors',
49
-
64
+ "to_tensor",
65
+ "stack_tensors",
66
+ "concat_tensors",
67
+ "pad_sequence_tensors",
50
68
  # File utilities
51
- 'resolve_file_paths',
52
- 'read_table',
53
- 'load_dataframes',
54
- 'iter_file_chunks',
55
- 'default_output_dir',
56
-
69
+ "resolve_file_paths",
70
+ "read_table",
71
+ "read_yaml",
72
+ "load_dataframes",
73
+ "iter_file_chunks",
74
+ "default_output_dir",
57
75
  # Model utilities
58
- 'merge_features',
59
- 'get_mlp_output_dim',
60
-
76
+ "merge_features",
77
+ "get_mlp_output_dim",
61
78
  # Feature utilities
62
- 'normalize_to_list',
63
-
79
+ "normalize_to_list",
80
+ # Config utilities
81
+ "resolve_path",
82
+ "select_features",
83
+ "register_processor_features",
84
+ "build_feature_objects",
85
+ "extract_feature_groups",
86
+ "load_model_class",
87
+ "build_model_instance",
64
88
  # Synthetic data utilities
65
- 'generate_ranking_data',
66
- 'generate_distributed_ranking_data',
67
-
89
+ "generate_ranking_data",
90
+ "generate_match_data",
91
+ "generate_multitask_data",
92
+ "generate_distributed_ranking_data",
68
93
  # Module exports
69
- 'optimizer',
70
- 'initializer',
71
- 'embedding',
94
+ "optimizer",
95
+ "initializer",
96
+ "embedding",
72
97
  ]