nextrec 0.4.8__py3-none-any.whl → 0.4.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nextrec/__version__.py +1 -1
- nextrec/basic/callback.py +30 -15
- nextrec/basic/features.py +1 -0
- nextrec/basic/layers.py +6 -8
- nextrec/basic/loggers.py +14 -7
- nextrec/basic/metrics.py +6 -76
- nextrec/basic/model.py +316 -321
- nextrec/cli.py +185 -43
- nextrec/data/__init__.py +13 -16
- nextrec/data/batch_utils.py +3 -2
- nextrec/data/data_processing.py +10 -2
- nextrec/data/data_utils.py +9 -14
- nextrec/data/dataloader.py +31 -33
- nextrec/data/preprocessor.py +328 -255
- nextrec/loss/__init__.py +1 -5
- nextrec/loss/loss_utils.py +2 -8
- nextrec/models/generative/__init__.py +1 -8
- nextrec/models/generative/hstu.py +6 -4
- nextrec/models/multi_task/esmm.py +2 -2
- nextrec/models/multi_task/mmoe.py +2 -2
- nextrec/models/multi_task/ple.py +2 -2
- nextrec/models/multi_task/poso.py +2 -3
- nextrec/models/multi_task/share_bottom.py +2 -2
- nextrec/models/ranking/afm.py +2 -2
- nextrec/models/ranking/autoint.py +2 -2
- nextrec/models/ranking/dcn.py +2 -2
- nextrec/models/ranking/dcn_v2.py +2 -2
- nextrec/models/ranking/deepfm.py +6 -7
- nextrec/models/ranking/dien.py +3 -3
- nextrec/models/ranking/din.py +3 -3
- nextrec/models/ranking/eulernet.py +365 -0
- nextrec/models/ranking/fibinet.py +5 -5
- nextrec/models/ranking/fm.py +3 -7
- nextrec/models/ranking/lr.py +120 -0
- nextrec/models/ranking/masknet.py +2 -2
- nextrec/models/ranking/pnn.py +2 -2
- nextrec/models/ranking/widedeep.py +2 -2
- nextrec/models/ranking/xdeepfm.py +2 -2
- nextrec/models/representation/__init__.py +9 -0
- nextrec/models/{generative → representation}/rqvae.py +9 -9
- nextrec/models/retrieval/__init__.py +0 -0
- nextrec/models/{match → retrieval}/dssm.py +8 -3
- nextrec/models/{match → retrieval}/dssm_v2.py +8 -3
- nextrec/models/{match → retrieval}/mind.py +4 -3
- nextrec/models/{match → retrieval}/sdm.py +4 -3
- nextrec/models/{match → retrieval}/youtube_dnn.py +8 -3
- nextrec/utils/__init__.py +60 -46
- nextrec/utils/config.py +8 -7
- nextrec/utils/console.py +371 -0
- nextrec/utils/{synthetic_data.py → data.py} +102 -15
- nextrec/utils/feature.py +15 -0
- nextrec/utils/torch_utils.py +411 -0
- {nextrec-0.4.8.dist-info → nextrec-0.4.10.dist-info}/METADATA +6 -7
- nextrec-0.4.10.dist-info/RECORD +70 -0
- nextrec/utils/cli_utils.py +0 -58
- nextrec/utils/device.py +0 -78
- nextrec/utils/distributed.py +0 -141
- nextrec/utils/file.py +0 -92
- nextrec/utils/initializer.py +0 -79
- nextrec/utils/optimizer.py +0 -75
- nextrec/utils/tensor.py +0 -72
- nextrec-0.4.8.dist-info/RECORD +0 -71
- /nextrec/models/{match/__init__.py → ranking/ffm.py} +0 -0
- {nextrec-0.4.8.dist-info → nextrec-0.4.10.dist-info}/WHEEL +0 -0
- {nextrec-0.4.8.dist-info → nextrec-0.4.10.dist-info}/entry_points.txt +0 -0
- {nextrec-0.4.8.dist-info → nextrec-0.4.10.dist-info}/licenses/LICENSE +0 -0
nextrec/models/ranking/fm.py
CHANGED
|
@@ -40,14 +40,10 @@ FM 是一种通过分解二阶特征交互矩阵、以线性复杂度建模特
|
|
|
40
40
|
|
|
41
41
|
import torch.nn as nn
|
|
42
42
|
|
|
43
|
+
from nextrec.basic.features import DenseFeature, SequenceFeature, SparseFeature
|
|
44
|
+
from nextrec.basic.layers import FM as FMInteraction
|
|
45
|
+
from nextrec.basic.layers import LR, EmbeddingLayer, PredictionLayer
|
|
43
46
|
from nextrec.basic.model import BaseModel
|
|
44
|
-
from nextrec.basic.layers import (
|
|
45
|
-
EmbeddingLayer,
|
|
46
|
-
FM as FMInteraction,
|
|
47
|
-
LR,
|
|
48
|
-
PredictionLayer,
|
|
49
|
-
)
|
|
50
|
-
from nextrec.basic.features import DenseFeature, SparseFeature, SequenceFeature
|
|
51
47
|
|
|
52
48
|
|
|
53
49
|
class FM(BaseModel):
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Date: create on 09/11/2025
|
|
3
|
+
Checkpoint: edit on 09/12/2025
|
|
4
|
+
Author: Yang Zhou, zyaztec@gmail.com
|
|
5
|
+
Reference:
|
|
6
|
+
[1] Hosmer D W, Lemeshow S, Sturdivant R X. Applied Logistic Regression.
|
|
7
|
+
|
|
8
|
+
Logistic Regression (LR) is a classic linear baseline for CTR/ranking tasks.
|
|
9
|
+
It maps each feature (dense, sparse, or sequence) into a numeric vector and
|
|
10
|
+
learns a single linear logit. Despite its simplicity, LR is strong for
|
|
11
|
+
high-dimensional sparse data and is commonly used as a baseline or a "wide"
|
|
12
|
+
component in hybrid models.
|
|
13
|
+
|
|
14
|
+
Pipeline:
|
|
15
|
+
(1) Embed sparse/sequence fields; project dense fields if configured
|
|
16
|
+
(2) Concatenate all feature vectors into a single linear input
|
|
17
|
+
(3) Apply a linear layer to produce logits
|
|
18
|
+
(4) Use the prediction layer to output task-specific probabilities
|
|
19
|
+
|
|
20
|
+
Key Advantages:
|
|
21
|
+
- Fast and easy to train
|
|
22
|
+
- Strong baseline for sparse, high-dimensional features
|
|
23
|
+
- Interpretable linear weights
|
|
24
|
+
|
|
25
|
+
LR 是 CTR/排序任务中最经典的线性基线模型。它将稠密、稀疏以及序列特征
|
|
26
|
+
映射为数值向量后做线性组合,输出 logit。虽然结构简单,但在稀疏高维场景
|
|
27
|
+
依然具有很强的基线效果,并常作为 Wide 端与深模型组合。
|
|
28
|
+
|
|
29
|
+
处理流程:
|
|
30
|
+
(1) 稀疏/序列特征做 embedding,稠密特征按需投影
|
|
31
|
+
(2) 拼接所有特征向量形成线性输入
|
|
32
|
+
(3) 线性层输出 logit
|
|
33
|
+
(4) 通过预测层输出任务概率
|
|
34
|
+
|
|
35
|
+
主要优点:
|
|
36
|
+
- 训练与推理速度快
|
|
37
|
+
- 稀疏高维特征下表现稳定
|
|
38
|
+
- 权重可解释性强
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
import torch.nn as nn
|
|
42
|
+
|
|
43
|
+
from nextrec.basic.features import DenseFeature, SequenceFeature, SparseFeature
|
|
44
|
+
from nextrec.basic.layers import EmbeddingLayer, LR as LinearLayer, PredictionLayer
|
|
45
|
+
from nextrec.basic.model import BaseModel
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class LR(BaseModel):
|
|
49
|
+
@property
|
|
50
|
+
def model_name(self):
|
|
51
|
+
return "LR"
|
|
52
|
+
|
|
53
|
+
@property
|
|
54
|
+
def default_task(self):
|
|
55
|
+
return "binary"
|
|
56
|
+
|
|
57
|
+
def __init__(
|
|
58
|
+
self,
|
|
59
|
+
dense_features: list[DenseFeature] | None = None,
|
|
60
|
+
sparse_features: list[SparseFeature] | None = None,
|
|
61
|
+
sequence_features: list[SequenceFeature] | None = None,
|
|
62
|
+
target: list[str] | str | None = None,
|
|
63
|
+
task: str | list[str] | None = None,
|
|
64
|
+
optimizer: str = "adam",
|
|
65
|
+
optimizer_params: dict | None = None,
|
|
66
|
+
loss: str | nn.Module | None = "bce",
|
|
67
|
+
loss_params: dict | list[dict] | None = None,
|
|
68
|
+
device: str = "cpu",
|
|
69
|
+
embedding_l1_reg=1e-6,
|
|
70
|
+
dense_l1_reg=1e-5,
|
|
71
|
+
embedding_l2_reg=1e-5,
|
|
72
|
+
dense_l2_reg=1e-4,
|
|
73
|
+
**kwargs,
|
|
74
|
+
):
|
|
75
|
+
|
|
76
|
+
dense_features = dense_features or []
|
|
77
|
+
sparse_features = sparse_features or []
|
|
78
|
+
sequence_features = sequence_features or []
|
|
79
|
+
optimizer_params = optimizer_params or {}
|
|
80
|
+
if loss is None:
|
|
81
|
+
loss = "bce"
|
|
82
|
+
|
|
83
|
+
super(LR, self).__init__(
|
|
84
|
+
dense_features=dense_features,
|
|
85
|
+
sparse_features=sparse_features,
|
|
86
|
+
sequence_features=sequence_features,
|
|
87
|
+
target=target,
|
|
88
|
+
task=task or self.default_task,
|
|
89
|
+
device=device,
|
|
90
|
+
embedding_l1_reg=embedding_l1_reg,
|
|
91
|
+
dense_l1_reg=dense_l1_reg,
|
|
92
|
+
embedding_l2_reg=embedding_l2_reg,
|
|
93
|
+
dense_l2_reg=dense_l2_reg,
|
|
94
|
+
**kwargs,
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
self.loss = loss
|
|
98
|
+
|
|
99
|
+
self.embedding = EmbeddingLayer(features=self.all_features)
|
|
100
|
+
linear_input_dim = self.embedding.input_dim
|
|
101
|
+
self.linear = LinearLayer(linear_input_dim)
|
|
102
|
+
self.prediction_layer = PredictionLayer(task_type=self.task)
|
|
103
|
+
|
|
104
|
+
self.register_regularization_weights(
|
|
105
|
+
embedding_attr="embedding", include_modules=["linear"]
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
self.compile(
|
|
109
|
+
optimizer=optimizer,
|
|
110
|
+
optimizer_params=optimizer_params,
|
|
111
|
+
loss=loss,
|
|
112
|
+
loss_params=loss_params,
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
def forward(self, x):
|
|
116
|
+
input_linear = self.embedding(
|
|
117
|
+
x=x, features=self.all_features, squeeze_dim=True
|
|
118
|
+
)
|
|
119
|
+
y = self.linear(input_linear)
|
|
120
|
+
return self.prediction_layer(y)
|
|
@@ -57,9 +57,9 @@ import torch
|
|
|
57
57
|
import torch.nn as nn
|
|
58
58
|
import torch.nn.functional as F
|
|
59
59
|
|
|
60
|
+
from nextrec.basic.features import DenseFeature, SequenceFeature, SparseFeature
|
|
61
|
+
from nextrec.basic.layers import MLP, EmbeddingLayer, PredictionLayer
|
|
60
62
|
from nextrec.basic.model import BaseModel
|
|
61
|
-
from nextrec.basic.layers import EmbeddingLayer, MLP, PredictionLayer
|
|
62
|
-
from nextrec.basic.features import DenseFeature, SparseFeature, SequenceFeature
|
|
63
63
|
|
|
64
64
|
|
|
65
65
|
class InstanceGuidedMask(nn.Module):
|
nextrec/models/ranking/pnn.py
CHANGED
|
@@ -37,9 +37,9 @@ PNN 是一种 CTR 预估模型,通过将线性信号与乘积信号结合,
|
|
|
37
37
|
import torch
|
|
38
38
|
import torch.nn as nn
|
|
39
39
|
|
|
40
|
+
from nextrec.basic.features import DenseFeature, SequenceFeature, SparseFeature
|
|
41
|
+
from nextrec.basic.layers import MLP, EmbeddingLayer, PredictionLayer
|
|
40
42
|
from nextrec.basic.model import BaseModel
|
|
41
|
-
from nextrec.basic.layers import EmbeddingLayer, MLP, PredictionLayer
|
|
42
|
-
from nextrec.basic.features import DenseFeature, SparseFeature, SequenceFeature
|
|
43
43
|
|
|
44
44
|
|
|
45
45
|
class PNN(BaseModel):
|
|
@@ -41,9 +41,9 @@ Wide & Deep 同时使用宽线性部分(记忆共现/手工交叉)与深网
|
|
|
41
41
|
|
|
42
42
|
import torch.nn as nn
|
|
43
43
|
|
|
44
|
+
from nextrec.basic.features import DenseFeature, SequenceFeature, SparseFeature
|
|
45
|
+
from nextrec.basic.layers import LR, MLP, EmbeddingLayer, PredictionLayer
|
|
44
46
|
from nextrec.basic.model import BaseModel
|
|
45
|
-
from nextrec.basic.layers import LR, EmbeddingLayer, MLP, PredictionLayer
|
|
46
|
-
from nextrec.basic.features import DenseFeature, SparseFeature, SequenceFeature
|
|
47
47
|
|
|
48
48
|
|
|
49
49
|
class WideDeep(BaseModel):
|
|
@@ -55,9 +55,9 @@ import torch
|
|
|
55
55
|
import torch.nn as nn
|
|
56
56
|
import torch.nn.functional as F
|
|
57
57
|
|
|
58
|
+
from nextrec.basic.features import DenseFeature, SequenceFeature, SparseFeature
|
|
59
|
+
from nextrec.basic.layers import LR, MLP, EmbeddingLayer, PredictionLayer
|
|
58
60
|
from nextrec.basic.model import BaseModel
|
|
59
|
-
from nextrec.basic.layers import LR, EmbeddingLayer, MLP, PredictionLayer
|
|
60
|
-
from nextrec.basic.features import DenseFeature, SparseFeature, SequenceFeature
|
|
61
61
|
|
|
62
62
|
|
|
63
63
|
class CIN(nn.Module):
|
|
@@ -46,21 +46,21 @@ RQ-VAE 通过残差量化学习分层离散表示,将连续嵌入(如物品/
|
|
|
46
46
|
|
|
47
47
|
from __future__ import annotations
|
|
48
48
|
|
|
49
|
+
import logging
|
|
49
50
|
import math
|
|
51
|
+
from typing import cast
|
|
52
|
+
|
|
50
53
|
import torch
|
|
51
54
|
import torch.nn as nn
|
|
52
55
|
import torch.nn.functional as F
|
|
53
56
|
from sklearn.cluster import KMeans
|
|
54
|
-
from typing import cast
|
|
55
|
-
import logging
|
|
56
|
-
import tqdm
|
|
57
|
-
|
|
58
57
|
from torch.utils.data import DataLoader
|
|
59
58
|
|
|
60
59
|
from nextrec.basic.features import DenseFeature
|
|
60
|
+
from nextrec.basic.loggers import colorize, setup_logger
|
|
61
61
|
from nextrec.basic.model import BaseModel
|
|
62
62
|
from nextrec.data.batch_utils import batch_to_dict
|
|
63
|
-
from nextrec.
|
|
63
|
+
from nextrec.utils.console import progress
|
|
64
64
|
|
|
65
65
|
|
|
66
66
|
def kmeans(
|
|
@@ -729,9 +729,9 @@ class RQVAE(BaseModel):
|
|
|
729
729
|
else:
|
|
730
730
|
tqdm_disable = not self.is_main_process
|
|
731
731
|
batch_iter = enumerate(
|
|
732
|
-
|
|
732
|
+
progress(
|
|
733
733
|
train_loader,
|
|
734
|
-
|
|
734
|
+
description=f"Epoch {epoch + 1}/{epochs}",
|
|
735
735
|
total=steps_per_epoch,
|
|
736
736
|
disable=tqdm_disable,
|
|
737
737
|
)
|
|
@@ -777,9 +777,9 @@ class RQVAE(BaseModel):
|
|
|
777
777
|
logging.info(colorize(train_log))
|
|
778
778
|
|
|
779
779
|
if self.is_main_process:
|
|
780
|
-
logging.info("
|
|
780
|
+
logging.info("")
|
|
781
781
|
logging.info(colorize("Training finished.", bold=True))
|
|
782
|
-
logging.info("
|
|
782
|
+
logging.info("")
|
|
783
783
|
return self
|
|
784
784
|
|
|
785
785
|
def predict(
|
|
File without changes
|
|
@@ -7,13 +7,14 @@ Reference:
|
|
|
7
7
|
//Proceedings of the 22nd ACM international conference on Information & Knowledge Management. 2013: 2333-2338.
|
|
8
8
|
"""
|
|
9
9
|
|
|
10
|
+
from typing import Literal
|
|
11
|
+
|
|
10
12
|
import torch
|
|
11
13
|
import torch.nn as nn
|
|
12
|
-
from typing import Literal
|
|
13
14
|
|
|
14
|
-
from nextrec.basic.
|
|
15
|
-
from nextrec.basic.features import DenseFeature, SparseFeature, SequenceFeature
|
|
15
|
+
from nextrec.basic.features import DenseFeature, SequenceFeature, SparseFeature
|
|
16
16
|
from nextrec.basic.layers import MLP, EmbeddingLayer
|
|
17
|
+
from nextrec.basic.model import BaseMatchModel
|
|
17
18
|
|
|
18
19
|
|
|
19
20
|
class DSSM(BaseMatchModel):
|
|
@@ -28,6 +29,10 @@ class DSSM(BaseMatchModel):
|
|
|
28
29
|
def model_name(self) -> str:
|
|
29
30
|
return "DSSM"
|
|
30
31
|
|
|
32
|
+
@property
|
|
33
|
+
def support_training_modes(self) -> list[str]:
|
|
34
|
+
return ["pointwise", "pairwise", "listwise"]
|
|
35
|
+
|
|
31
36
|
def __init__(
|
|
32
37
|
self,
|
|
33
38
|
user_dense_features: list[DenseFeature] | None = None,
|
|
@@ -6,13 +6,14 @@ Reference:
|
|
|
6
6
|
DSSM v2 - DSSM with pairwise training using BPR loss
|
|
7
7
|
"""
|
|
8
8
|
|
|
9
|
+
from typing import Literal
|
|
10
|
+
|
|
9
11
|
import torch
|
|
10
12
|
import torch.nn as nn
|
|
11
|
-
from typing import Literal
|
|
12
13
|
|
|
13
|
-
from nextrec.basic.
|
|
14
|
-
from nextrec.basic.features import DenseFeature, SparseFeature, SequenceFeature
|
|
14
|
+
from nextrec.basic.features import DenseFeature, SequenceFeature, SparseFeature
|
|
15
15
|
from nextrec.basic.layers import MLP, EmbeddingLayer
|
|
16
|
+
from nextrec.basic.model import BaseMatchModel
|
|
16
17
|
|
|
17
18
|
|
|
18
19
|
class DSSM_v2(BaseMatchModel):
|
|
@@ -24,6 +25,10 @@ class DSSM_v2(BaseMatchModel):
|
|
|
24
25
|
def model_name(self) -> str:
|
|
25
26
|
return "DSSM_v2"
|
|
26
27
|
|
|
28
|
+
@property
|
|
29
|
+
def support_training_modes(self) -> list[str]:
|
|
30
|
+
return ["pointwise", "pairwise", "listwise"]
|
|
31
|
+
|
|
27
32
|
def __init__(
|
|
28
33
|
self,
|
|
29
34
|
user_dense_features: list[DenseFeature] | None = None,
|
|
@@ -7,14 +7,15 @@ Reference:
|
|
|
7
7
|
//Proceedings of the 28th ACM international conference on information and knowledge management. 2019: 2615-2623.
|
|
8
8
|
"""
|
|
9
9
|
|
|
10
|
+
from typing import Literal
|
|
11
|
+
|
|
10
12
|
import torch
|
|
11
13
|
import torch.nn as nn
|
|
12
14
|
import torch.nn.functional as F
|
|
13
|
-
from typing import Literal
|
|
14
15
|
|
|
15
|
-
from nextrec.basic.
|
|
16
|
-
from nextrec.basic.features import DenseFeature, SparseFeature, SequenceFeature
|
|
16
|
+
from nextrec.basic.features import DenseFeature, SequenceFeature, SparseFeature
|
|
17
17
|
from nextrec.basic.layers import MLP, EmbeddingLayer
|
|
18
|
+
from nextrec.basic.model import BaseMatchModel
|
|
18
19
|
|
|
19
20
|
|
|
20
21
|
class MultiInterestSA(nn.Module):
|
|
@@ -7,14 +7,15 @@ Reference:
|
|
|
7
7
|
//IJCAI. 2018: 3926-3932.
|
|
8
8
|
"""
|
|
9
9
|
|
|
10
|
+
from typing import Literal
|
|
11
|
+
|
|
10
12
|
import torch
|
|
11
13
|
import torch.nn as nn
|
|
12
14
|
import torch.nn.functional as F
|
|
13
|
-
from typing import Literal
|
|
14
15
|
|
|
15
|
-
from nextrec.basic.
|
|
16
|
-
from nextrec.basic.features import DenseFeature, SparseFeature, SequenceFeature
|
|
16
|
+
from nextrec.basic.features import DenseFeature, SequenceFeature, SparseFeature
|
|
17
17
|
from nextrec.basic.layers import MLP, EmbeddingLayer
|
|
18
|
+
from nextrec.basic.model import BaseMatchModel
|
|
18
19
|
|
|
19
20
|
|
|
20
21
|
class SDM(BaseMatchModel):
|
|
@@ -7,13 +7,14 @@ Reference:
|
|
|
7
7
|
//Proceedings of the 10th ACM conference on recommender systems. 2016: 191-198.
|
|
8
8
|
"""
|
|
9
9
|
|
|
10
|
+
from typing import Literal
|
|
11
|
+
|
|
10
12
|
import torch
|
|
11
13
|
import torch.nn as nn
|
|
12
|
-
from typing import Literal
|
|
13
14
|
|
|
14
|
-
from nextrec.basic.
|
|
15
|
-
from nextrec.basic.features import DenseFeature, SparseFeature, SequenceFeature
|
|
15
|
+
from nextrec.basic.features import DenseFeature, SequenceFeature, SparseFeature
|
|
16
16
|
from nextrec.basic.layers import MLP, EmbeddingLayer
|
|
17
|
+
from nextrec.basic.model import BaseMatchModel
|
|
17
18
|
|
|
18
19
|
|
|
19
20
|
class YoutubeDNN(BaseMatchModel):
|
|
@@ -28,6 +29,10 @@ class YoutubeDNN(BaseMatchModel):
|
|
|
28
29
|
def model_name(self) -> str:
|
|
29
30
|
return "YouTubeDNN"
|
|
30
31
|
|
|
32
|
+
@property
|
|
33
|
+
def support_training_modes(self) -> list[str]:
|
|
34
|
+
return ["pointwise", "pairwise", "listwise"]
|
|
35
|
+
|
|
31
36
|
def __init__(
|
|
32
37
|
self,
|
|
33
38
|
user_dense_features: list[DenseFeature] | None = None,
|
nextrec/utils/__init__.py
CHANGED
|
@@ -1,71 +1,84 @@
|
|
|
1
1
|
"""
|
|
2
2
|
Utilities package for NextRec
|
|
3
3
|
|
|
4
|
-
This package provides various utility functions organized by category:
|
|
5
|
-
- optimizer: Optimizer and scheduler utilities
|
|
6
|
-
- initializer: Weight initialization utilities
|
|
7
|
-
- embedding: Embedding dimension calculation
|
|
8
|
-
- device_utils: Device management and selection
|
|
9
|
-
- tensor_utils: Tensor operations and conversions
|
|
10
|
-
- file_utils: File I/O operations
|
|
11
|
-
- model_utils: Model-related utilities
|
|
12
|
-
- feature_utils: Feature processing utilities
|
|
13
|
-
- config_utils: Configuration loading and processing utilities
|
|
14
|
-
|
|
15
4
|
Date: create on 13/11/2025
|
|
16
|
-
Last update:
|
|
5
|
+
Last update: 19/12/2025
|
|
17
6
|
Author: Yang Zhou, zyaztec@gmail.com
|
|
18
7
|
"""
|
|
19
8
|
|
|
20
|
-
from . import
|
|
21
|
-
from .optimizer import get_optimizer, get_scheduler
|
|
22
|
-
from .initializer import get_initializer
|
|
23
|
-
from .embedding import get_auto_embedding_dim
|
|
24
|
-
from .device import resolve_device, get_device_info
|
|
25
|
-
from .tensor import to_tensor, stack_tensors, concat_tensors, pad_sequence_tensors
|
|
26
|
-
from .file import (
|
|
27
|
-
resolve_file_paths,
|
|
28
|
-
read_table,
|
|
29
|
-
load_dataframes,
|
|
30
|
-
iter_file_chunks,
|
|
31
|
-
default_output_dir,
|
|
32
|
-
read_yaml,
|
|
33
|
-
)
|
|
34
|
-
from .model import merge_features, get_mlp_output_dim
|
|
35
|
-
from .feature import normalize_to_list
|
|
36
|
-
from .synthetic_data import (
|
|
37
|
-
generate_match_data,
|
|
38
|
-
generate_ranking_data,
|
|
39
|
-
generate_multitask_data,
|
|
40
|
-
generate_distributed_ranking_data,
|
|
41
|
-
)
|
|
9
|
+
from . import console, data, embedding, torch_utils
|
|
42
10
|
from .config import (
|
|
43
|
-
resolve_path,
|
|
44
|
-
select_features,
|
|
45
|
-
register_processor_features,
|
|
46
11
|
build_feature_objects,
|
|
12
|
+
build_model_instance,
|
|
47
13
|
extract_feature_groups,
|
|
48
14
|
load_model_class,
|
|
49
|
-
|
|
15
|
+
register_processor_features,
|
|
16
|
+
resolve_path,
|
|
17
|
+
select_features,
|
|
18
|
+
)
|
|
19
|
+
from .console import (
|
|
20
|
+
display_metrics_table,
|
|
21
|
+
get_nextrec_version,
|
|
22
|
+
log_startup_info,
|
|
23
|
+
progress,
|
|
24
|
+
)
|
|
25
|
+
from .data import (
|
|
26
|
+
default_output_dir,
|
|
27
|
+
generate_distributed_ranking_data,
|
|
28
|
+
generate_match_data,
|
|
29
|
+
generate_multitask_data,
|
|
30
|
+
generate_ranking_data,
|
|
31
|
+
iter_file_chunks,
|
|
32
|
+
load_dataframes,
|
|
33
|
+
read_table,
|
|
34
|
+
read_yaml,
|
|
35
|
+
resolve_file_paths,
|
|
36
|
+
)
|
|
37
|
+
from .embedding import get_auto_embedding_dim
|
|
38
|
+
from .feature import normalize_to_list
|
|
39
|
+
from .model import get_mlp_output_dim, merge_features
|
|
40
|
+
from .torch_utils import (
|
|
41
|
+
add_distributed_sampler,
|
|
42
|
+
concat_tensors,
|
|
43
|
+
configure_device,
|
|
44
|
+
gather_numpy,
|
|
45
|
+
get_device_info,
|
|
46
|
+
get_initializer,
|
|
47
|
+
get_optimizer,
|
|
48
|
+
get_scheduler,
|
|
49
|
+
init_process_group,
|
|
50
|
+
pad_sequence_tensors,
|
|
51
|
+
resolve_device,
|
|
52
|
+
stack_tensors,
|
|
53
|
+
to_tensor,
|
|
50
54
|
)
|
|
51
55
|
|
|
52
56
|
__all__ = [
|
|
53
|
-
#
|
|
57
|
+
# Console utilities
|
|
58
|
+
"get_nextrec_version",
|
|
59
|
+
"log_startup_info",
|
|
60
|
+
"progress",
|
|
61
|
+
"display_metrics_table",
|
|
62
|
+
# Optimizer & Scheduler (torch utils)
|
|
54
63
|
"get_optimizer",
|
|
55
64
|
"get_scheduler",
|
|
56
|
-
# Initializer
|
|
65
|
+
# Initializer (torch utils)
|
|
57
66
|
"get_initializer",
|
|
58
|
-
# Embedding
|
|
67
|
+
# Embedding utilities
|
|
59
68
|
"get_auto_embedding_dim",
|
|
60
|
-
# Device utilities
|
|
69
|
+
# Device utilities (torch utils)
|
|
61
70
|
"resolve_device",
|
|
62
71
|
"get_device_info",
|
|
72
|
+
"configure_device",
|
|
73
|
+
"init_process_group",
|
|
74
|
+
"gather_numpy",
|
|
75
|
+
"add_distributed_sampler",
|
|
63
76
|
# Tensor utilities
|
|
64
77
|
"to_tensor",
|
|
65
78
|
"stack_tensors",
|
|
66
79
|
"concat_tensors",
|
|
67
80
|
"pad_sequence_tensors",
|
|
68
|
-
#
|
|
81
|
+
# Data utilities
|
|
69
82
|
"resolve_file_paths",
|
|
70
83
|
"read_table",
|
|
71
84
|
"read_yaml",
|
|
@@ -79,10 +92,10 @@ __all__ = [
|
|
|
79
92
|
"normalize_to_list",
|
|
80
93
|
# Config utilities
|
|
81
94
|
"resolve_path",
|
|
82
|
-
"select_features",
|
|
83
95
|
"register_processor_features",
|
|
84
96
|
"build_feature_objects",
|
|
85
97
|
"extract_feature_groups",
|
|
98
|
+
"select_features",
|
|
86
99
|
"load_model_class",
|
|
87
100
|
"build_model_instance",
|
|
88
101
|
# Synthetic data utilities
|
|
@@ -91,7 +104,8 @@ __all__ = [
|
|
|
91
104
|
"generate_multitask_data",
|
|
92
105
|
"generate_distributed_ranking_data",
|
|
93
106
|
# Module exports
|
|
94
|
-
"
|
|
95
|
-
"
|
|
107
|
+
"console",
|
|
108
|
+
"data",
|
|
96
109
|
"embedding",
|
|
110
|
+
"torch_utils",
|
|
97
111
|
]
|
nextrec/utils/config.py
CHANGED
|
@@ -4,7 +4,8 @@ Configuration utilities for NextRec
|
|
|
4
4
|
This module provides utilities for loading and processing configuration files,
|
|
5
5
|
including feature configuration, model configuration, and training configuration.
|
|
6
6
|
|
|
7
|
-
Date: create on
|
|
7
|
+
Date: create on 27/10/2025
|
|
8
|
+
Checkpoint: edit on 19/12/2025
|
|
8
9
|
Author: Yang Zhou, zyaztec@gmail.com
|
|
9
10
|
"""
|
|
10
11
|
|
|
@@ -23,7 +24,7 @@ import torch
|
|
|
23
24
|
from nextrec.utils.feature import normalize_to_list
|
|
24
25
|
|
|
25
26
|
if TYPE_CHECKING:
|
|
26
|
-
from nextrec.basic.features import DenseFeature,
|
|
27
|
+
from nextrec.basic.features import DenseFeature, SequenceFeature, SparseFeature
|
|
27
28
|
from nextrec.data.preprocessor import DataProcessor
|
|
28
29
|
|
|
29
30
|
|
|
@@ -52,7 +53,7 @@ def select_features(
|
|
|
52
53
|
names = [name for name in cfg.keys() if name in columns]
|
|
53
54
|
missing = [name for name in cfg.keys() if name not in columns]
|
|
54
55
|
if missing:
|
|
55
|
-
print(f"[
|
|
56
|
+
print(f"[Feature Config] skipped missing {group} columns: {missing}")
|
|
56
57
|
return names
|
|
57
58
|
|
|
58
59
|
dense_names = pick("dense")
|
|
@@ -129,7 +130,7 @@ def build_feature_objects(
|
|
|
129
130
|
sparse_names: List of sparse feature names
|
|
130
131
|
sequence_names: List of sequence feature names
|
|
131
132
|
"""
|
|
132
|
-
from nextrec.basic.features import DenseFeature,
|
|
133
|
+
from nextrec.basic.features import DenseFeature, SequenceFeature, SparseFeature
|
|
133
134
|
|
|
134
135
|
dense_cfg = feature_cfg.get("dense", {}) or {}
|
|
135
136
|
sparse_cfg = feature_cfg.get("sparse", {}) or {}
|
|
@@ -236,7 +237,7 @@ def extract_feature_groups(
|
|
|
236
237
|
|
|
237
238
|
if missing_defined:
|
|
238
239
|
print(
|
|
239
|
-
f"[
|
|
240
|
+
f"[Feature Config] feature_groups.{group_name} contains features not defined in dense/sparse/sequence: {missing_defined}"
|
|
240
241
|
)
|
|
241
242
|
|
|
242
243
|
for n in name_list:
|
|
@@ -249,7 +250,7 @@ def extract_feature_groups(
|
|
|
249
250
|
|
|
250
251
|
if missing_cols:
|
|
251
252
|
print(
|
|
252
|
-
f"[
|
|
253
|
+
f"[Feature Config] feature_groups.{group_name} missing data columns: {missing_cols}"
|
|
253
254
|
)
|
|
254
255
|
|
|
255
256
|
resolved[group_name] = filtered
|
|
@@ -442,7 +443,7 @@ def build_model_instance(
|
|
|
442
443
|
|
|
443
444
|
if group_key not in feature_groups:
|
|
444
445
|
print(
|
|
445
|
-
f"[
|
|
446
|
+
f"[Feature Config] feature_bindings refers to unknown group '{group_key}', skipped"
|
|
446
447
|
)
|
|
447
448
|
continue
|
|
448
449
|
|