nextrec 0.4.8__py3-none-any.whl → 0.4.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nextrec/__version__.py +1 -1
- nextrec/basic/callback.py +30 -15
- nextrec/basic/features.py +1 -0
- nextrec/basic/layers.py +6 -8
- nextrec/basic/loggers.py +14 -7
- nextrec/basic/metrics.py +6 -76
- nextrec/basic/model.py +316 -321
- nextrec/cli.py +185 -43
- nextrec/data/__init__.py +13 -16
- nextrec/data/batch_utils.py +3 -2
- nextrec/data/data_processing.py +10 -2
- nextrec/data/data_utils.py +9 -14
- nextrec/data/dataloader.py +31 -33
- nextrec/data/preprocessor.py +328 -255
- nextrec/loss/__init__.py +1 -5
- nextrec/loss/loss_utils.py +2 -8
- nextrec/models/generative/__init__.py +1 -8
- nextrec/models/generative/hstu.py +6 -4
- nextrec/models/multi_task/esmm.py +2 -2
- nextrec/models/multi_task/mmoe.py +2 -2
- nextrec/models/multi_task/ple.py +2 -2
- nextrec/models/multi_task/poso.py +2 -3
- nextrec/models/multi_task/share_bottom.py +2 -2
- nextrec/models/ranking/afm.py +2 -2
- nextrec/models/ranking/autoint.py +2 -2
- nextrec/models/ranking/dcn.py +2 -2
- nextrec/models/ranking/dcn_v2.py +2 -2
- nextrec/models/ranking/deepfm.py +6 -7
- nextrec/models/ranking/dien.py +3 -3
- nextrec/models/ranking/din.py +3 -3
- nextrec/models/ranking/eulernet.py +365 -0
- nextrec/models/ranking/fibinet.py +5 -5
- nextrec/models/ranking/fm.py +3 -7
- nextrec/models/ranking/lr.py +120 -0
- nextrec/models/ranking/masknet.py +2 -2
- nextrec/models/ranking/pnn.py +2 -2
- nextrec/models/ranking/widedeep.py +2 -2
- nextrec/models/ranking/xdeepfm.py +2 -2
- nextrec/models/representation/__init__.py +9 -0
- nextrec/models/{generative → representation}/rqvae.py +9 -9
- nextrec/models/retrieval/__init__.py +0 -0
- nextrec/models/{match → retrieval}/dssm.py +8 -3
- nextrec/models/{match → retrieval}/dssm_v2.py +8 -3
- nextrec/models/{match → retrieval}/mind.py +4 -3
- nextrec/models/{match → retrieval}/sdm.py +4 -3
- nextrec/models/{match → retrieval}/youtube_dnn.py +8 -3
- nextrec/utils/__init__.py +60 -46
- nextrec/utils/config.py +8 -7
- nextrec/utils/console.py +371 -0
- nextrec/utils/{synthetic_data.py → data.py} +102 -15
- nextrec/utils/feature.py +15 -0
- nextrec/utils/torch_utils.py +411 -0
- {nextrec-0.4.8.dist-info → nextrec-0.4.10.dist-info}/METADATA +6 -7
- nextrec-0.4.10.dist-info/RECORD +70 -0
- nextrec/utils/cli_utils.py +0 -58
- nextrec/utils/device.py +0 -78
- nextrec/utils/distributed.py +0 -141
- nextrec/utils/file.py +0 -92
- nextrec/utils/initializer.py +0 -79
- nextrec/utils/optimizer.py +0 -75
- nextrec/utils/tensor.py +0 -72
- nextrec-0.4.8.dist-info/RECORD +0 -71
- /nextrec/models/{match/__init__.py → ranking/ffm.py} +0 -0
- {nextrec-0.4.8.dist-info → nextrec-0.4.10.dist-info}/WHEEL +0 -0
- {nextrec-0.4.8.dist-info → nextrec-0.4.10.dist-info}/entry_points.txt +0 -0
- {nextrec-0.4.8.dist-info → nextrec-0.4.10.dist-info}/licenses/LICENSE +0 -0
nextrec/loss/__init__.py
CHANGED
|
@@ -5,6 +5,7 @@ from nextrec.loss.listwise import (
|
|
|
5
5
|
ListNetLoss,
|
|
6
6
|
SampledSoftmaxLoss,
|
|
7
7
|
)
|
|
8
|
+
from nextrec.loss.loss_utils import VALID_TASK_TYPES, get_loss_fn, get_loss_kwargs
|
|
8
9
|
from nextrec.loss.pairwise import BPRLoss, HingeLoss, TripletLoss
|
|
9
10
|
from nextrec.loss.pointwise import (
|
|
10
11
|
ClassBalancedFocalLoss,
|
|
@@ -12,11 +13,6 @@ from nextrec.loss.pointwise import (
|
|
|
12
13
|
FocalLoss,
|
|
13
14
|
WeightedBCELoss,
|
|
14
15
|
)
|
|
15
|
-
from nextrec.loss.loss_utils import (
|
|
16
|
-
get_loss_fn,
|
|
17
|
-
get_loss_kwargs,
|
|
18
|
-
VALID_TASK_TYPES,
|
|
19
|
-
)
|
|
20
16
|
|
|
21
17
|
__all__ = [
|
|
22
18
|
# Pointwise
|
nextrec/loss/loss_utils.py
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
Loss utilities for NextRec.
|
|
3
3
|
|
|
4
4
|
Date: create on 27/10/2025
|
|
5
|
-
Checkpoint: edit on
|
|
5
|
+
Checkpoint: edit on 19/12/2025
|
|
6
6
|
Author: Yang Zhou, zyaztec@gmail.com
|
|
7
7
|
"""
|
|
8
8
|
|
|
@@ -18,16 +18,10 @@ from nextrec.loss.listwise import (
|
|
|
18
18
|
SampledSoftmaxLoss,
|
|
19
19
|
)
|
|
20
20
|
from nextrec.loss.pairwise import BPRLoss, HingeLoss, TripletLoss
|
|
21
|
-
from nextrec.loss.pointwise import
|
|
22
|
-
ClassBalancedFocalLoss,
|
|
23
|
-
FocalLoss,
|
|
24
|
-
WeightedBCELoss,
|
|
25
|
-
)
|
|
26
|
-
|
|
21
|
+
from nextrec.loss.pointwise import ClassBalancedFocalLoss, FocalLoss, WeightedBCELoss
|
|
27
22
|
|
|
28
23
|
VALID_TASK_TYPES = [
|
|
29
24
|
"binary",
|
|
30
|
-
"multiclass",
|
|
31
25
|
"multilabel",
|
|
32
26
|
"regression",
|
|
33
27
|
]
|
|
@@ -5,12 +5,5 @@ This module contains generative models for recommendation tasks.
|
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
7
|
from nextrec.models.generative.hstu import HSTU
|
|
8
|
-
from nextrec.models.generative.rqvae import (
|
|
9
|
-
RQVAE,
|
|
10
|
-
RQ,
|
|
11
|
-
VQEmbedding,
|
|
12
|
-
BalancedKmeans,
|
|
13
|
-
kmeans,
|
|
14
|
-
)
|
|
15
8
|
|
|
16
|
-
__all__ = ["HSTU"
|
|
9
|
+
__all__ = ["HSTU"]
|
|
@@ -54,10 +54,9 @@ import torch
|
|
|
54
54
|
import torch.nn as nn
|
|
55
55
|
import torch.nn.functional as F
|
|
56
56
|
|
|
57
|
-
from nextrec.basic.model import BaseModel
|
|
58
|
-
from nextrec.basic.layers import RMSNorm, EmbeddingLayer
|
|
59
57
|
from nextrec.basic.features import DenseFeature, SequenceFeature, SparseFeature
|
|
60
|
-
|
|
58
|
+
from nextrec.basic.layers import EmbeddingLayer, RMSNorm
|
|
59
|
+
from nextrec.basic.model import BaseModel
|
|
61
60
|
from nextrec.utils.model import select_features
|
|
62
61
|
|
|
63
62
|
|
|
@@ -302,7 +301,7 @@ class HSTU(BaseModel):
|
|
|
302
301
|
|
|
303
302
|
@property
|
|
304
303
|
def default_task(self) -> str:
|
|
305
|
-
return "
|
|
304
|
+
return "binary"
|
|
306
305
|
|
|
307
306
|
def __init__(
|
|
308
307
|
self,
|
|
@@ -336,6 +335,9 @@ class HSTU(BaseModel):
|
|
|
336
335
|
device: str = "cpu",
|
|
337
336
|
**kwargs,
|
|
338
337
|
):
|
|
338
|
+
raise NotImplementedError(
|
|
339
|
+
"[HSTU Error] NextRec no longer supports multiclass tasks; HSTU is disabled."
|
|
340
|
+
)
|
|
339
341
|
if not sequence_features:
|
|
340
342
|
raise ValueError(
|
|
341
343
|
"[HSTU Error] HSTU requires at least one SequenceFeature (user behavior history)."
|
|
@@ -44,9 +44,9 @@ CVR 预测 P(conversion|click),二者相乘得到 CTCVR 并在曝光标签上
|
|
|
44
44
|
import torch
|
|
45
45
|
import torch.nn as nn
|
|
46
46
|
|
|
47
|
+
from nextrec.basic.features import DenseFeature, SequenceFeature, SparseFeature
|
|
48
|
+
from nextrec.basic.layers import MLP, EmbeddingLayer, PredictionLayer
|
|
47
49
|
from nextrec.basic.model import BaseModel
|
|
48
|
-
from nextrec.basic.layers import EmbeddingLayer, MLP, PredictionLayer
|
|
49
|
-
from nextrec.basic.features import DenseFeature, SparseFeature, SequenceFeature
|
|
50
50
|
|
|
51
51
|
|
|
52
52
|
class ESMM(BaseModel):
|
|
@@ -45,9 +45,9 @@ MMoE(Multi-gate Mixture-of-Experts)是多任务学习框架,通过多个
|
|
|
45
45
|
import torch
|
|
46
46
|
import torch.nn as nn
|
|
47
47
|
|
|
48
|
+
from nextrec.basic.features import DenseFeature, SequenceFeature, SparseFeature
|
|
49
|
+
from nextrec.basic.layers import MLP, EmbeddingLayer, PredictionLayer
|
|
48
50
|
from nextrec.basic.model import BaseModel
|
|
49
|
-
from nextrec.basic.layers import EmbeddingLayer, MLP, PredictionLayer
|
|
50
|
-
from nextrec.basic.features import DenseFeature, SparseFeature, SequenceFeature
|
|
51
51
|
|
|
52
52
|
|
|
53
53
|
class MMOE(BaseModel):
|
nextrec/models/multi_task/ple.py
CHANGED
|
@@ -48,9 +48,9 @@ PLE(Progressive Layered Extraction)通过堆叠 CGC 模块,联合共享与
|
|
|
48
48
|
import torch
|
|
49
49
|
import torch.nn as nn
|
|
50
50
|
|
|
51
|
+
from nextrec.basic.features import DenseFeature, SequenceFeature, SparseFeature
|
|
52
|
+
from nextrec.basic.layers import MLP, EmbeddingLayer, PredictionLayer
|
|
51
53
|
from nextrec.basic.model import BaseModel
|
|
52
|
-
from nextrec.basic.layers import EmbeddingLayer, MLP, PredictionLayer
|
|
53
|
-
from nextrec.basic.features import DenseFeature, SparseFeature, SequenceFeature
|
|
54
54
|
from nextrec.utils.model import get_mlp_output_dim
|
|
55
55
|
|
|
56
56
|
|
|
@@ -42,11 +42,10 @@ import torch
|
|
|
42
42
|
import torch.nn as nn
|
|
43
43
|
import torch.nn.functional as F
|
|
44
44
|
|
|
45
|
-
from nextrec.basic.features import DenseFeature, SequenceFeature, SparseFeature
|
|
46
|
-
from nextrec.basic.layers import EmbeddingLayer, MLP, PredictionLayer
|
|
47
45
|
from nextrec.basic.activation import activation_layer
|
|
46
|
+
from nextrec.basic.features import DenseFeature, SequenceFeature, SparseFeature
|
|
47
|
+
from nextrec.basic.layers import MLP, EmbeddingLayer, PredictionLayer
|
|
48
48
|
from nextrec.basic.model import BaseModel
|
|
49
|
-
|
|
50
49
|
from nextrec.utils.model import select_features
|
|
51
50
|
|
|
52
51
|
|
|
@@ -42,9 +42,9 @@ Share-Bottom(硬共享底层)是多任务学习的经典基线:所有任
|
|
|
42
42
|
import torch
|
|
43
43
|
import torch.nn as nn
|
|
44
44
|
|
|
45
|
+
from nextrec.basic.features import DenseFeature, SequenceFeature, SparseFeature
|
|
46
|
+
from nextrec.basic.layers import MLP, EmbeddingLayer, PredictionLayer
|
|
45
47
|
from nextrec.basic.model import BaseModel
|
|
46
|
-
from nextrec.basic.layers import EmbeddingLayer, MLP, PredictionLayer
|
|
47
|
-
from nextrec.basic.features import DenseFeature, SparseFeature, SequenceFeature
|
|
48
48
|
|
|
49
49
|
|
|
50
50
|
class ShareBottom(BaseModel):
|
nextrec/models/ranking/afm.py
CHANGED
|
@@ -39,9 +39,9 @@ AFM 在 FM 的二阶交互上引入注意力,为每个特征对学习重要性
|
|
|
39
39
|
import torch
|
|
40
40
|
import torch.nn as nn
|
|
41
41
|
|
|
42
|
+
from nextrec.basic.features import DenseFeature, SequenceFeature, SparseFeature
|
|
43
|
+
from nextrec.basic.layers import EmbeddingLayer, InputMask, PredictionLayer
|
|
42
44
|
from nextrec.basic.model import BaseModel
|
|
43
|
-
from nextrec.basic.layers import EmbeddingLayer, PredictionLayer, InputMask
|
|
44
|
-
from nextrec.basic.features import DenseFeature, SparseFeature, SequenceFeature
|
|
45
45
|
|
|
46
46
|
|
|
47
47
|
class AFM(BaseModel):
|
|
@@ -57,9 +57,9 @@ AutoInt 通过对所有特征 embedding 进行注意力计算,捕捉特征之
|
|
|
57
57
|
import torch
|
|
58
58
|
import torch.nn as nn
|
|
59
59
|
|
|
60
|
-
from nextrec.basic.
|
|
60
|
+
from nextrec.basic.features import DenseFeature, SequenceFeature, SparseFeature
|
|
61
61
|
from nextrec.basic.layers import EmbeddingLayer, MultiHeadSelfAttention, PredictionLayer
|
|
62
|
-
from nextrec.basic.
|
|
62
|
+
from nextrec.basic.model import BaseModel
|
|
63
63
|
|
|
64
64
|
|
|
65
65
|
class AutoInt(BaseModel):
|
nextrec/models/ranking/dcn.py
CHANGED
|
@@ -53,9 +53,9 @@ Deep 分支提升表达能力;最终将 Cross(及 Deep)结果送入线性
|
|
|
53
53
|
import torch
|
|
54
54
|
import torch.nn as nn
|
|
55
55
|
|
|
56
|
+
from nextrec.basic.features import DenseFeature, SequenceFeature, SparseFeature
|
|
57
|
+
from nextrec.basic.layers import MLP, EmbeddingLayer, PredictionLayer
|
|
56
58
|
from nextrec.basic.model import BaseModel
|
|
57
|
-
from nextrec.basic.layers import EmbeddingLayer, MLP, PredictionLayer
|
|
58
|
-
from nextrec.basic.features import DenseFeature, SparseFeature, SequenceFeature
|
|
59
59
|
|
|
60
60
|
|
|
61
61
|
class CrossNetwork(nn.Module):
|
nextrec/models/ranking/dcn_v2.py
CHANGED
|
@@ -46,9 +46,9 @@ DCN v2 在原始 DCN 基础上,将标量交叉权重升级为向量/矩阵参
|
|
|
46
46
|
import torch
|
|
47
47
|
import torch.nn as nn
|
|
48
48
|
|
|
49
|
+
from nextrec.basic.features import DenseFeature, SequenceFeature, SparseFeature
|
|
50
|
+
from nextrec.basic.layers import MLP, EmbeddingLayer, PredictionLayer
|
|
49
51
|
from nextrec.basic.model import BaseModel
|
|
50
|
-
from nextrec.basic.layers import EmbeddingLayer, MLP, PredictionLayer
|
|
51
|
-
from nextrec.basic.features import DenseFeature, SparseFeature, SequenceFeature
|
|
52
52
|
|
|
53
53
|
|
|
54
54
|
class CrossNetV2(nn.Module):
|
nextrec/models/ranking/deepfm.py
CHANGED
|
@@ -1,12 +1,11 @@
|
|
|
1
1
|
"""
|
|
2
2
|
Date: create on 27/10/2025
|
|
3
3
|
Checkpoint: edit on 24/11/2025
|
|
4
|
-
Author:
|
|
5
|
-
Yang Zhou,zyaztec@gmail.com
|
|
4
|
+
Author: Yang Zhou,zyaztec@gmail.com
|
|
6
5
|
Reference:
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
6
|
+
[1] Guo H, Tang R, Ye Y, et al. DeepFM: A factorization-machine based neural network
|
|
7
|
+
for CTR prediction[J]. arXiv preprint arXiv:1703.04247, 2017.
|
|
8
|
+
(https://arxiv.org/abs/1703.04247)
|
|
10
9
|
|
|
11
10
|
DeepFM combines a Factorization Machine (FM) for explicit second-order feature
|
|
12
11
|
interactions with a deep MLP for high-order nonlinear patterns. Both parts share
|
|
@@ -45,9 +44,9 @@ embedding,无需手工构造交叉特征即可端到端训练,常用于 CTR/
|
|
|
45
44
|
|
|
46
45
|
import torch.nn as nn
|
|
47
46
|
|
|
47
|
+
from nextrec.basic.features import DenseFeature, SequenceFeature, SparseFeature
|
|
48
|
+
from nextrec.basic.layers import FM, LR, MLP, EmbeddingLayer, PredictionLayer
|
|
48
49
|
from nextrec.basic.model import BaseModel
|
|
49
|
-
from nextrec.basic.layers import FM, LR, EmbeddingLayer, MLP, PredictionLayer
|
|
50
|
-
from nextrec.basic.features import DenseFeature, SparseFeature, SequenceFeature
|
|
51
50
|
|
|
52
51
|
|
|
53
52
|
class DeepFM(BaseModel):
|
nextrec/models/ranking/dien.py
CHANGED
|
@@ -50,14 +50,14 @@ import torch
|
|
|
50
50
|
import torch.nn as nn
|
|
51
51
|
import torch.nn.functional as F
|
|
52
52
|
|
|
53
|
-
from nextrec.basic.
|
|
53
|
+
from nextrec.basic.features import DenseFeature, SequenceFeature, SparseFeature
|
|
54
54
|
from nextrec.basic.layers import (
|
|
55
|
-
EmbeddingLayer,
|
|
56
55
|
MLP,
|
|
57
56
|
AttentionPoolingLayer,
|
|
57
|
+
EmbeddingLayer,
|
|
58
58
|
PredictionLayer,
|
|
59
59
|
)
|
|
60
|
-
from nextrec.basic.
|
|
60
|
+
from nextrec.basic.model import BaseModel
|
|
61
61
|
|
|
62
62
|
|
|
63
63
|
class AUGRU(nn.Module):
|
nextrec/models/ranking/din.py
CHANGED
|
@@ -50,14 +50,14 @@ DIN 是一个 CTR 预估模型,通过对用户历史行为序列进行目标
|
|
|
50
50
|
import torch
|
|
51
51
|
import torch.nn as nn
|
|
52
52
|
|
|
53
|
-
from nextrec.basic.
|
|
53
|
+
from nextrec.basic.features import DenseFeature, SequenceFeature, SparseFeature
|
|
54
54
|
from nextrec.basic.layers import (
|
|
55
|
-
EmbeddingLayer,
|
|
56
55
|
MLP,
|
|
57
56
|
AttentionPoolingLayer,
|
|
57
|
+
EmbeddingLayer,
|
|
58
58
|
PredictionLayer,
|
|
59
59
|
)
|
|
60
|
-
from nextrec.basic.
|
|
60
|
+
from nextrec.basic.model import BaseModel
|
|
61
61
|
|
|
62
62
|
|
|
63
63
|
class DIN(BaseModel):
|
|
@@ -0,0 +1,365 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Date: create on 09/11/2025
|
|
3
|
+
Checkpoint: edit on 09/12/2025
|
|
4
|
+
Author: Yang Zhou, zyaztec@gmail.com
|
|
5
|
+
Reference:
|
|
6
|
+
[1] Zhao Z, Zhang H, Tang H, et al. EulerNet: Efficient and Effective Feature
|
|
7
|
+
Interaction Modeling with Euler's Formula. (SIGIR 2021)
|
|
8
|
+
|
|
9
|
+
EulerNet models feature interactions in the complex domain using Euler's
|
|
10
|
+
formula. Each field embedding is transformed into amplitude and phase,
|
|
11
|
+
then mapped to a complex vector. Feature interactions are captured by
|
|
12
|
+
multiplying complex vectors across fields, which corresponds to multiplying
|
|
13
|
+
amplitudes and summing phases. The resulting complex representation is
|
|
14
|
+
converted back to real-valued features for a linear readout, optionally
|
|
15
|
+
paired with a linear term for first-order signals.
|
|
16
|
+
|
|
17
|
+
Pipeline:
|
|
18
|
+
(1) Embed sparse/sequence features with a shared embedding dimension
|
|
19
|
+
(2) Map embeddings to complex vectors via amplitude/phase transforms
|
|
20
|
+
(3) Multiply complex vectors across fields (Euler interaction)
|
|
21
|
+
(4) Concatenate real & imaginary parts and apply a linear regression head
|
|
22
|
+
(5) Optionally add a linear term and apply the prediction layer
|
|
23
|
+
|
|
24
|
+
Key Advantages:
|
|
25
|
+
- Efficient higher-order interaction modeling via complex multiplication
|
|
26
|
+
- Compact representation without explicit cross-feature enumeration
|
|
27
|
+
- Works well on sparse high-dimensional feature spaces
|
|
28
|
+
|
|
29
|
+
EulerNet 使用欧拉公式将特征嵌入映射到复数域,通过复数相乘实现高效的
|
|
30
|
+
特征交互建模,再将复数表示转回实数向量做线性回归,并可选叠加线性项
|
|
31
|
+
以保留一阶信号。
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
from __future__ import annotations
|
|
35
|
+
|
|
36
|
+
import torch
|
|
37
|
+
import torch.nn as nn
|
|
38
|
+
import torch.nn.functional as F
|
|
39
|
+
|
|
40
|
+
from nextrec.basic.features import DenseFeature, SequenceFeature, SparseFeature
|
|
41
|
+
from nextrec.basic.layers import LR, EmbeddingLayer, PredictionLayer
|
|
42
|
+
from nextrec.basic.model import BaseModel
|
|
43
|
+
|
|
44
|
+
class EulerInteractionLayerPaper(nn.Module):
|
|
45
|
+
"""
|
|
46
|
+
Paper-aligned Euler Interaction Layer.
|
|
47
|
+
|
|
48
|
+
Input: r, p (rectangular form) as tensors with shape [B, m, d]
|
|
49
|
+
where each field j is complex feature: r_j + i p_j.
|
|
50
|
+
|
|
51
|
+
Output: r_out, p_out as tensors with shape [B, n, d]
|
|
52
|
+
representing {o_k}_{k=1..n} (Eq.15) which can be stacked.
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
def __init__(
|
|
56
|
+
self,
|
|
57
|
+
*,
|
|
58
|
+
embedding_dim: int,
|
|
59
|
+
num_fields: int,
|
|
60
|
+
num_orders: int,
|
|
61
|
+
use_implicit: bool = True,
|
|
62
|
+
norm: str | None = "ln", # None | "bn" | "ln"
|
|
63
|
+
eps: float = 1e-9,
|
|
64
|
+
):
|
|
65
|
+
super().__init__()
|
|
66
|
+
self.d = embedding_dim
|
|
67
|
+
self.m = num_fields
|
|
68
|
+
self.n = num_orders
|
|
69
|
+
self.use_implicit = use_implicit
|
|
70
|
+
self.eps = eps
|
|
71
|
+
|
|
72
|
+
# Explicit part parameters
|
|
73
|
+
# alpha_{k,j} : shape [n, m, d] (vector-wise coefficients)
|
|
74
|
+
self.alpha = nn.Parameter(torch.empty(self.n, self.m, self.d))
|
|
75
|
+
# delta_k, delta'_k : shape [n, d]
|
|
76
|
+
self.delta_phase = nn.Parameter(torch.zeros(self.n, self.d))
|
|
77
|
+
self.delta_logmod = nn.Parameter(torch.zeros(self.n, self.d))
|
|
78
|
+
nn.init.xavier_uniform_(self.alpha)
|
|
79
|
+
|
|
80
|
+
# Implicit part parameters
|
|
81
|
+
if self.use_implicit:
|
|
82
|
+
# W_k in R^{d x (m*d)} and bias b_k in R^d
|
|
83
|
+
self.W_r = nn.Parameter(torch.empty(self.n, self.d, self.m * self.d))
|
|
84
|
+
self.b_r = nn.Parameter(torch.zeros(self.n, self.d))
|
|
85
|
+
self.W_p = nn.Parameter(torch.empty(self.n, self.d, self.m * self.d))
|
|
86
|
+
self.b_p = nn.Parameter(torch.zeros(self.n, self.d))
|
|
87
|
+
nn.init.xavier_uniform_(self.W_r)
|
|
88
|
+
nn.init.xavier_uniform_(self.W_p)
|
|
89
|
+
else:
|
|
90
|
+
self.W, self.b = None, None
|
|
91
|
+
|
|
92
|
+
# Normalization
|
|
93
|
+
# Apply on concatenated [r_k, p_k] per k.
|
|
94
|
+
self.norm = norm
|
|
95
|
+
if norm == "bn":
|
|
96
|
+
self.bn = nn.BatchNorm1d(self.n * self.d * 2)
|
|
97
|
+
self.ln = None
|
|
98
|
+
elif norm == "ln":
|
|
99
|
+
self.ln = nn.LayerNorm(self.d * 2)
|
|
100
|
+
self.bn = None
|
|
101
|
+
else:
|
|
102
|
+
self.bn = None
|
|
103
|
+
self.ln = None
|
|
104
|
+
|
|
105
|
+
def forward(self, r: torch.Tensor, p: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
|
|
106
|
+
"""
|
|
107
|
+
r, p: [B, m, d]
|
|
108
|
+
return r_out, p_out: [B, n, d]
|
|
109
|
+
"""
|
|
110
|
+
B, m, d = r.shape
|
|
111
|
+
assert m == self.m and d == self.d, f"Expected [B,{self.m},{self.d}] got {r.shape}"
|
|
112
|
+
|
|
113
|
+
# Euler Transformation: rectangular -> polar
|
|
114
|
+
lam = torch.sqrt(r * r + p * p + self.eps) # [B,m,d]
|
|
115
|
+
theta = torch.atan2(p, r) # [B,m,d]
|
|
116
|
+
log_lam = torch.log(lam + self.eps) # [B,m,d]
|
|
117
|
+
|
|
118
|
+
# Generalized Multi-order Transformation
|
|
119
|
+
# psi_k = sum_j alpha_{k,j} * theta_j + delta_k
|
|
120
|
+
# l_k = exp(sum_j alpha_{k,j} * log(lam_j) + delta'_k)
|
|
121
|
+
psi = torch.einsum("bmd,nmd->bnd", theta, self.alpha) + self.delta_phase # [B,n,d]
|
|
122
|
+
log_l = torch.einsum("bmd,nmd->bnd", log_lam, self.alpha) + self.delta_logmod # [B,n,d]
|
|
123
|
+
l = torch.exp(log_l) # [B,n,d]
|
|
124
|
+
|
|
125
|
+
# Inverse Euler Transformation
|
|
126
|
+
r_hat = l * torch.cos(psi) # [B,n,d]
|
|
127
|
+
p_hat = l * torch.sin(psi) # [B,n,d]
|
|
128
|
+
|
|
129
|
+
# Implicit interactions + fusion
|
|
130
|
+
if self.use_implicit:
|
|
131
|
+
r_cat = r.reshape(B, self.m * self.d) # [B, m*d]
|
|
132
|
+
p_cat = p.reshape(B, self.m * self.d) # [B, m*d]
|
|
133
|
+
# For each k: W_k @ r_cat + b_k -> [B,d]
|
|
134
|
+
r_imp = torch.einsum("bq,ndq->bnd", r_cat, self.W_r) + self.b_r
|
|
135
|
+
p_imp = torch.einsum("bq,ndq->bnd", p_cat, self.W_p) + self.b_p
|
|
136
|
+
r_imp = F.relu(r_imp)
|
|
137
|
+
p_imp = F.relu(p_imp)
|
|
138
|
+
r_out = r_hat + r_imp
|
|
139
|
+
p_out = p_hat + p_imp
|
|
140
|
+
else:
|
|
141
|
+
r_out, p_out = r_hat, p_hat
|
|
142
|
+
|
|
143
|
+
# Optional normalization (paper says BN/LN can be used between layers)
|
|
144
|
+
if self.bn is not None:
|
|
145
|
+
x = torch.cat([r_out, p_out], dim=-1).reshape(B, self.n * self.d * 2)
|
|
146
|
+
x = self.bn(x).reshape(B, self.n, self.d * 2)
|
|
147
|
+
r_out, p_out = x[..., : self.d], x[..., self.d :]
|
|
148
|
+
elif self.ln is not None:
|
|
149
|
+
x = torch.cat([r_out, p_out], dim=-1) # [B,n,2d]
|
|
150
|
+
x = self.ln(x)
|
|
151
|
+
r_out, p_out = x[..., : self.d], x[..., self.d :]
|
|
152
|
+
|
|
153
|
+
return r_out, p_out
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
class ComplexSpaceMappingPaper(nn.Module):
|
|
157
|
+
"""
|
|
158
|
+
Map real embeddings e_j to complex features via Euler's formula (Eq.6-7).
|
|
159
|
+
For each field j:
|
|
160
|
+
r_j = mu_j * cos(e_j)
|
|
161
|
+
p_j = mu_j * sin(e_j)
|
|
162
|
+
mu_j is field-specific learnable vector (positive via exp).
|
|
163
|
+
"""
|
|
164
|
+
|
|
165
|
+
def __init__(self, embedding_dim: int, num_fields: int):
|
|
166
|
+
super().__init__()
|
|
167
|
+
self.d = embedding_dim
|
|
168
|
+
self.m = num_fields
|
|
169
|
+
self.log_mu = nn.Parameter(torch.zeros(self.m, self.d)) # mu = exp(log_mu)
|
|
170
|
+
|
|
171
|
+
def forward(self, e: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
|
|
172
|
+
# e: [B, m, d]
|
|
173
|
+
mu = torch.exp(self.log_mu).unsqueeze(0) # [1,m,d]
|
|
174
|
+
r = mu * torch.cos(e)
|
|
175
|
+
p = mu * torch.sin(e)
|
|
176
|
+
return r, p
|
|
177
|
+
|
|
178
|
+
class EulerNetPaper(nn.Module):
|
|
179
|
+
"""
|
|
180
|
+
Paper-aligned EulerNet core (embedding -> mapping -> L Euler layers -> linear regression).
|
|
181
|
+
"""
|
|
182
|
+
|
|
183
|
+
def __init__(
|
|
184
|
+
self,
|
|
185
|
+
*,
|
|
186
|
+
embedding_dim: int,
|
|
187
|
+
num_fields: int,
|
|
188
|
+
num_layers: int = 2,
|
|
189
|
+
num_orders: int = 8, # n in paper
|
|
190
|
+
use_implicit: bool = True,
|
|
191
|
+
norm: str | None = "ln", # None | "bn" | "ln"
|
|
192
|
+
):
|
|
193
|
+
super().__init__()
|
|
194
|
+
self.d = embedding_dim
|
|
195
|
+
self.m = num_fields
|
|
196
|
+
self.L = num_layers
|
|
197
|
+
self.n = num_orders
|
|
198
|
+
|
|
199
|
+
self.mapping = ComplexSpaceMappingPaper(embedding_dim, num_fields)
|
|
200
|
+
|
|
201
|
+
self.layers = nn.ModuleList([
|
|
202
|
+
EulerInteractionLayerPaper(
|
|
203
|
+
embedding_dim=embedding_dim,
|
|
204
|
+
num_fields=(num_fields if i == 0 else num_orders), # stack: m -> n -> n ...
|
|
205
|
+
num_orders=num_orders,
|
|
206
|
+
use_implicit=use_implicit,
|
|
207
|
+
norm=norm,
|
|
208
|
+
)
|
|
209
|
+
for i in range(num_layers)
|
|
210
|
+
])
|
|
211
|
+
|
|
212
|
+
# Output regression (Eq.16-17)
|
|
213
|
+
# After last layer: r,p are [B,n,d]. Concatenate to [B, n*d] each, then regress.
|
|
214
|
+
self.w = nn.Linear(self.n * self.d, 1, bias=False) # for real
|
|
215
|
+
self.w_im = nn.Linear(self.n * self.d, 1, bias=False) # for imag
|
|
216
|
+
|
|
217
|
+
def forward(self, field_emb: torch.Tensor) -> torch.Tensor:
|
|
218
|
+
"""
|
|
219
|
+
field_emb: [B, m, d] real embeddings e_j
|
|
220
|
+
return: logits, shape [B,1]
|
|
221
|
+
"""
|
|
222
|
+
r, p = self.mapping(field_emb) # [B,m,d]
|
|
223
|
+
|
|
224
|
+
# stack Euler interaction layers
|
|
225
|
+
for layer in self.layers:
|
|
226
|
+
r, p = layer(r, p) # -> [B,n,d]
|
|
227
|
+
|
|
228
|
+
r_flat = r.reshape(r.size(0), self.n * self.d)
|
|
229
|
+
p_flat = p.reshape(p.size(0), self.n * self.d)
|
|
230
|
+
|
|
231
|
+
z_re = self.w(r_flat)
|
|
232
|
+
z_im = self.w_im(p_flat)
|
|
233
|
+
return z_re + z_im # Eq.17 logits
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
class EulerNet(BaseModel):
|
|
237
|
+
@property
|
|
238
|
+
def model_name(self):
|
|
239
|
+
return "EulerNet"
|
|
240
|
+
|
|
241
|
+
@property
|
|
242
|
+
def default_task(self):
|
|
243
|
+
return "binary"
|
|
244
|
+
|
|
245
|
+
def __init__(
|
|
246
|
+
self,
|
|
247
|
+
dense_features: list[DenseFeature] | None = None,
|
|
248
|
+
sparse_features: list[SparseFeature] | None = None,
|
|
249
|
+
sequence_features: list[SequenceFeature] | None = None,
|
|
250
|
+
num_layers: int = 2,
|
|
251
|
+
num_orders: int = 8,
|
|
252
|
+
use_implicit: bool = True,
|
|
253
|
+
norm: str | None = "ln",
|
|
254
|
+
use_linear: bool = False,
|
|
255
|
+
target: list[str] | str | None = None,
|
|
256
|
+
task: str | list[str] | None = None,
|
|
257
|
+
optimizer: str = "adam",
|
|
258
|
+
optimizer_params: dict | None = None,
|
|
259
|
+
loss: str | nn.Module | None = "bce",
|
|
260
|
+
loss_params: dict | list[dict] | None = None,
|
|
261
|
+
device: str = "cpu",
|
|
262
|
+
embedding_l1_reg=1e-6,
|
|
263
|
+
dense_l1_reg=1e-5,
|
|
264
|
+
embedding_l2_reg=1e-5,
|
|
265
|
+
dense_l2_reg=1e-4,
|
|
266
|
+
**kwargs,
|
|
267
|
+
):
|
|
268
|
+
|
|
269
|
+
dense_features = dense_features or []
|
|
270
|
+
sparse_features = sparse_features or []
|
|
271
|
+
sequence_features = sequence_features or []
|
|
272
|
+
optimizer_params = optimizer_params or {}
|
|
273
|
+
if loss is None:
|
|
274
|
+
loss = "bce"
|
|
275
|
+
|
|
276
|
+
super(EulerNet, self).__init__(
|
|
277
|
+
dense_features=dense_features,
|
|
278
|
+
sparse_features=sparse_features,
|
|
279
|
+
sequence_features=sequence_features,
|
|
280
|
+
target=target,
|
|
281
|
+
task=task or self.default_task,
|
|
282
|
+
device=device,
|
|
283
|
+
embedding_l1_reg=embedding_l1_reg,
|
|
284
|
+
dense_l1_reg=dense_l1_reg,
|
|
285
|
+
embedding_l2_reg=embedding_l2_reg,
|
|
286
|
+
dense_l2_reg=dense_l2_reg,
|
|
287
|
+
**kwargs,
|
|
288
|
+
)
|
|
289
|
+
|
|
290
|
+
self.loss = loss
|
|
291
|
+
self.use_linear = use_linear
|
|
292
|
+
|
|
293
|
+
self.linear_features = dense_features + sparse_features + sequence_features
|
|
294
|
+
self.interaction_features = (
|
|
295
|
+
[f for f in dense_features if getattr(f, "use_embedding", False)]
|
|
296
|
+
+ sparse_features
|
|
297
|
+
+ sequence_features
|
|
298
|
+
)
|
|
299
|
+
|
|
300
|
+
if len(self.interaction_features) < 2:
|
|
301
|
+
raise ValueError(
|
|
302
|
+
"EulerNet requires at least two embedded features for interactions."
|
|
303
|
+
)
|
|
304
|
+
|
|
305
|
+
self.embedding = EmbeddingLayer(features=self.all_features)
|
|
306
|
+
|
|
307
|
+
self.num_fields = len(self.interaction_features)
|
|
308
|
+
self.embedding_dim = self.interaction_features[0].embedding_dim
|
|
309
|
+
if any(
|
|
310
|
+
f.embedding_dim != self.embedding_dim for f in self.interaction_features
|
|
311
|
+
):
|
|
312
|
+
raise ValueError(
|
|
313
|
+
"All interaction features must share the same embedding_dim in EulerNet."
|
|
314
|
+
)
|
|
315
|
+
|
|
316
|
+
self.euler = EulerNetPaper(
|
|
317
|
+
embedding_dim=self.embedding_dim,
|
|
318
|
+
num_fields=self.num_fields,
|
|
319
|
+
num_layers=num_layers,
|
|
320
|
+
num_orders=num_orders,
|
|
321
|
+
use_implicit=use_implicit,
|
|
322
|
+
norm=norm,
|
|
323
|
+
)
|
|
324
|
+
|
|
325
|
+
if self.use_linear:
|
|
326
|
+
if len(self.linear_features) == 0:
|
|
327
|
+
raise ValueError(
|
|
328
|
+
"EulerNet linear term requires at least one input feature."
|
|
329
|
+
)
|
|
330
|
+
linear_dim = self.embedding.get_input_dim(self.linear_features)
|
|
331
|
+
if linear_dim <= 0:
|
|
332
|
+
raise ValueError("EulerNet linear input_dim must be positive.")
|
|
333
|
+
self.linear = LR(linear_dim)
|
|
334
|
+
else:
|
|
335
|
+
self.linear = None
|
|
336
|
+
|
|
337
|
+
self.prediction_layer = PredictionLayer(task_type=self.task)
|
|
338
|
+
|
|
339
|
+
modules = ["euler"]
|
|
340
|
+
if self.use_linear:
|
|
341
|
+
modules.append("linear")
|
|
342
|
+
self.register_regularization_weights(
|
|
343
|
+
embedding_attr="embedding", include_modules=modules
|
|
344
|
+
)
|
|
345
|
+
|
|
346
|
+
self.compile(
|
|
347
|
+
optimizer=optimizer,
|
|
348
|
+
optimizer_params=optimizer_params,
|
|
349
|
+
loss=loss,
|
|
350
|
+
loss_params=loss_params,
|
|
351
|
+
)
|
|
352
|
+
|
|
353
|
+
def forward(self, x):
|
|
354
|
+
field_emb = self.embedding(
|
|
355
|
+
x=x, features=self.interaction_features, squeeze_dim=False
|
|
356
|
+
)
|
|
357
|
+
y_euler = self.euler(field_emb)
|
|
358
|
+
|
|
359
|
+
if self.use_linear and self.linear is not None:
|
|
360
|
+
linear_input = self.embedding(
|
|
361
|
+
x=x, features=self.linear_features, squeeze_dim=True
|
|
362
|
+
)
|
|
363
|
+
y_euler = y_euler + self.linear(linear_input)
|
|
364
|
+
|
|
365
|
+
return self.prediction_layer(y_euler)
|
|
@@ -43,17 +43,17 @@ FiBiNET 是一个 CTR 预估模型,通过 SENET 重新分配特征字段的重
|
|
|
43
43
|
import torch
|
|
44
44
|
import torch.nn as nn
|
|
45
45
|
|
|
46
|
-
from nextrec.basic.
|
|
46
|
+
from nextrec.basic.features import DenseFeature, SequenceFeature, SparseFeature
|
|
47
47
|
from nextrec.basic.layers import (
|
|
48
|
-
BiLinearInteractionLayer,
|
|
49
|
-
HadamardInteractionLayer,
|
|
50
|
-
EmbeddingLayer,
|
|
51
48
|
LR,
|
|
52
49
|
MLP,
|
|
50
|
+
BiLinearInteractionLayer,
|
|
51
|
+
EmbeddingLayer,
|
|
52
|
+
HadamardInteractionLayer,
|
|
53
53
|
PredictionLayer,
|
|
54
54
|
SENETLayer,
|
|
55
55
|
)
|
|
56
|
-
from nextrec.basic.
|
|
56
|
+
from nextrec.basic.model import BaseModel
|
|
57
57
|
|
|
58
58
|
|
|
59
59
|
class FiBiNET(BaseModel):
|