nextrec 0.4.7__py3-none-any.whl → 0.4.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nextrec/__version__.py +1 -1
- nextrec/basic/callback.py +30 -15
- nextrec/basic/features.py +1 -0
- nextrec/basic/layers.py +6 -8
- nextrec/basic/loggers.py +14 -7
- nextrec/basic/metrics.py +6 -76
- nextrec/basic/model.py +337 -328
- nextrec/cli.py +25 -4
- nextrec/data/__init__.py +13 -16
- nextrec/data/batch_utils.py +3 -2
- nextrec/data/data_processing.py +10 -2
- nextrec/data/data_utils.py +9 -14
- nextrec/data/dataloader.py +12 -13
- nextrec/data/preprocessor.py +328 -255
- nextrec/loss/__init__.py +1 -5
- nextrec/loss/loss_utils.py +2 -8
- nextrec/models/generative/__init__.py +1 -8
- nextrec/models/generative/hstu.py +6 -4
- nextrec/models/multi_task/esmm.py +2 -2
- nextrec/models/multi_task/mmoe.py +2 -2
- nextrec/models/multi_task/ple.py +2 -2
- nextrec/models/multi_task/poso.py +2 -3
- nextrec/models/multi_task/share_bottom.py +2 -2
- nextrec/models/ranking/afm.py +2 -2
- nextrec/models/ranking/autoint.py +2 -2
- nextrec/models/ranking/dcn.py +2 -2
- nextrec/models/ranking/dcn_v2.py +2 -2
- nextrec/models/ranking/deepfm.py +2 -2
- nextrec/models/ranking/dien.py +3 -3
- nextrec/models/ranking/din.py +3 -3
- nextrec/models/ranking/ffm.py +0 -0
- nextrec/models/ranking/fibinet.py +5 -5
- nextrec/models/ranking/fm.py +3 -7
- nextrec/models/ranking/lr.py +0 -0
- nextrec/models/ranking/masknet.py +2 -2
- nextrec/models/ranking/pnn.py +2 -2
- nextrec/models/ranking/widedeep.py +2 -2
- nextrec/models/ranking/xdeepfm.py +2 -2
- nextrec/models/representation/__init__.py +9 -0
- nextrec/models/{generative → representation}/rqvae.py +9 -9
- nextrec/models/retrieval/__init__.py +0 -0
- nextrec/models/{match → retrieval}/dssm.py +8 -3
- nextrec/models/{match → retrieval}/dssm_v2.py +8 -3
- nextrec/models/{match → retrieval}/mind.py +4 -3
- nextrec/models/{match → retrieval}/sdm.py +4 -3
- nextrec/models/{match → retrieval}/youtube_dnn.py +8 -3
- nextrec/utils/__init__.py +60 -46
- nextrec/utils/config.py +12 -10
- nextrec/utils/console.py +371 -0
- nextrec/utils/{synthetic_data.py → data.py} +102 -15
- nextrec/utils/feature.py +15 -0
- nextrec/utils/torch_utils.py +411 -0
- {nextrec-0.4.7.dist-info → nextrec-0.4.9.dist-info}/METADATA +8 -7
- nextrec-0.4.9.dist-info/RECORD +70 -0
- nextrec/utils/device.py +0 -78
- nextrec/utils/distributed.py +0 -141
- nextrec/utils/file.py +0 -92
- nextrec/utils/initializer.py +0 -79
- nextrec/utils/optimizer.py +0 -75
- nextrec/utils/tensor.py +0 -72
- nextrec-0.4.7.dist-info/RECORD +0 -70
- /nextrec/models/{match/__init__.py → ranking/eulernet.py} +0 -0
- {nextrec-0.4.7.dist-info → nextrec-0.4.9.dist-info}/WHEEL +0 -0
- {nextrec-0.4.7.dist-info → nextrec-0.4.9.dist-info}/entry_points.txt +0 -0
- {nextrec-0.4.7.dist-info → nextrec-0.4.9.dist-info}/licenses/LICENSE +0 -0
nextrec/cli.py
CHANGED
|
@@ -14,12 +14,14 @@ Examples:
|
|
|
14
14
|
nextrec --mode=predict --predict_config=nextrec_cli_preset/predict_config.yaml
|
|
15
15
|
|
|
16
16
|
Date: create on 06/12/2025
|
|
17
|
+
Checkpoint: edit on 18/12/2025
|
|
17
18
|
Author: Yang Zhou, zyaztec@gmail.com
|
|
18
19
|
"""
|
|
19
20
|
|
|
20
21
|
import argparse
|
|
21
22
|
import logging
|
|
22
23
|
import pickle
|
|
24
|
+
import sys
|
|
23
25
|
import time
|
|
24
26
|
from pathlib import Path
|
|
25
27
|
from typing import Any, Dict, List
|
|
@@ -27,6 +29,7 @@ from typing import Any, Dict, List
|
|
|
27
29
|
import pandas as pd
|
|
28
30
|
|
|
29
31
|
from nextrec.basic.features import DenseFeature, SequenceFeature, SparseFeature
|
|
32
|
+
from nextrec.basic.loggers import setup_logger
|
|
30
33
|
from nextrec.data.data_utils import split_dict_random
|
|
31
34
|
from nextrec.data.dataloader import RecDataLoader
|
|
32
35
|
from nextrec.data.preprocessor import DataProcessor
|
|
@@ -37,14 +40,14 @@ from nextrec.utils.config import (
|
|
|
37
40
|
resolve_path,
|
|
38
41
|
select_features,
|
|
39
42
|
)
|
|
40
|
-
from nextrec.utils.
|
|
41
|
-
from nextrec.utils.
|
|
43
|
+
from nextrec.utils.console import get_nextrec_version
|
|
44
|
+
from nextrec.utils.data import (
|
|
42
45
|
iter_file_chunks,
|
|
43
46
|
read_table,
|
|
44
47
|
read_yaml,
|
|
45
48
|
resolve_file_paths,
|
|
46
49
|
)
|
|
47
|
-
from nextrec.
|
|
50
|
+
from nextrec.utils.feature import normalize_to_list
|
|
48
51
|
|
|
49
52
|
logger = logging.getLogger(__name__)
|
|
50
53
|
|
|
@@ -71,6 +74,9 @@ def train_model(train_config_path: str) -> None:
|
|
|
71
74
|
artifact_root = Path(session_cfg.get("artifact_root", "nextrec_logs"))
|
|
72
75
|
session_dir = artifact_root / session_id
|
|
73
76
|
setup_logger(session_id=session_id)
|
|
77
|
+
logger.info(
|
|
78
|
+
f"[NextRec CLI] Training start | version={get_nextrec_version()} | session_id={session_id} | artifacts={session_dir.resolve()}"
|
|
79
|
+
)
|
|
74
80
|
|
|
75
81
|
processor_path = session_dir / "processor.pkl"
|
|
76
82
|
processor_path = Path(processor_path)
|
|
@@ -324,6 +330,9 @@ def predict_model(predict_config_path: str) -> None:
|
|
|
324
330
|
artifact_root = Path(session_cfg.get("artifact_root", "nextrec_logs"))
|
|
325
331
|
session_dir = Path(cfg.get("checkpoint_path") or (artifact_root / session_id))
|
|
326
332
|
setup_logger(session_id=session_id)
|
|
333
|
+
logger.info(
|
|
334
|
+
f"[NextRec CLI] Predict start | version={get_nextrec_version()} | session_id={session_id} | checkpoint={session_dir.resolve()}"
|
|
335
|
+
)
|
|
327
336
|
|
|
328
337
|
processor_path = Path(session_dir / "processor.pkl")
|
|
329
338
|
if not processor_path.exists():
|
|
@@ -454,6 +463,13 @@ def predict_model(predict_config_path: str) -> None:
|
|
|
454
463
|
|
|
455
464
|
def main() -> None:
|
|
456
465
|
"""Parse CLI arguments and dispatch to train or predict mode."""
|
|
466
|
+
|
|
467
|
+
root = logging.getLogger()
|
|
468
|
+
if not root.handlers:
|
|
469
|
+
handler = logging.StreamHandler(sys.stdout)
|
|
470
|
+
handler.setFormatter(logging.Formatter("%(message)s"))
|
|
471
|
+
root.addHandler(handler)
|
|
472
|
+
|
|
457
473
|
parser = argparse.ArgumentParser(
|
|
458
474
|
description="NextRec: Training and Prediction Pipeline",
|
|
459
475
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
@@ -466,16 +482,21 @@ Examples:
|
|
|
466
482
|
nextrec --mode=predict --predict_config=configs/predict_config.yaml
|
|
467
483
|
""",
|
|
468
484
|
)
|
|
485
|
+
|
|
469
486
|
parser.add_argument(
|
|
470
487
|
"--mode",
|
|
471
488
|
choices=["train", "predict"],
|
|
472
|
-
required=True,
|
|
473
489
|
help="Running mode: train or predict",
|
|
474
490
|
)
|
|
475
491
|
parser.add_argument("--train_config", help="Training configuration file path")
|
|
476
492
|
parser.add_argument("--predict_config", help="Prediction configuration file path")
|
|
477
493
|
args = parser.parse_args()
|
|
478
494
|
|
|
495
|
+
logger.info(get_nextrec_version())
|
|
496
|
+
|
|
497
|
+
if not args.mode:
|
|
498
|
+
parser.error("[NextRec CLI Error] --mode is required (train|predict)")
|
|
499
|
+
|
|
479
500
|
if args.mode == "train":
|
|
480
501
|
config_path = args.train_config
|
|
481
502
|
if not config_path:
|
nextrec/data/__init__.py
CHANGED
|
@@ -1,29 +1,26 @@
|
|
|
1
|
-
from nextrec.
|
|
1
|
+
from nextrec.basic.features import FeatureSet
|
|
2
|
+
from nextrec.data import data_utils
|
|
3
|
+
from nextrec.data.batch_utils import batch_to_dict, collate_fn, stack_section
|
|
2
4
|
from nextrec.data.data_processing import (
|
|
3
|
-
get_column_data,
|
|
4
|
-
split_dict_random,
|
|
5
5
|
build_eval_candidates,
|
|
6
|
+
get_column_data,
|
|
6
7
|
get_user_ids,
|
|
8
|
+
split_dict_random,
|
|
7
9
|
)
|
|
8
|
-
|
|
9
|
-
from nextrec.utils.file import (
|
|
10
|
-
resolve_file_paths,
|
|
11
|
-
iter_file_chunks,
|
|
12
|
-
read_table,
|
|
13
|
-
load_dataframes,
|
|
14
|
-
default_output_dir,
|
|
15
|
-
)
|
|
16
|
-
|
|
17
10
|
from nextrec.data.dataloader import (
|
|
18
|
-
TensorDictDataset,
|
|
19
11
|
FileDataset,
|
|
20
12
|
RecDataLoader,
|
|
13
|
+
TensorDictDataset,
|
|
21
14
|
build_tensors_from_data,
|
|
22
15
|
)
|
|
23
|
-
|
|
24
16
|
from nextrec.data.preprocessor import DataProcessor
|
|
25
|
-
from nextrec.
|
|
26
|
-
|
|
17
|
+
from nextrec.utils.data import (
|
|
18
|
+
default_output_dir,
|
|
19
|
+
iter_file_chunks,
|
|
20
|
+
load_dataframes,
|
|
21
|
+
read_table,
|
|
22
|
+
resolve_file_paths,
|
|
23
|
+
)
|
|
27
24
|
|
|
28
25
|
__all__ = [
|
|
29
26
|
# Batch utilities
|
nextrec/data/batch_utils.py
CHANGED
|
@@ -5,10 +5,11 @@ Date: create on 03/12/2025
|
|
|
5
5
|
Author: Yang Zhou, zyaztec@gmail.com
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
-
import torch
|
|
9
|
-
import numpy as np
|
|
10
8
|
from typing import Any, Mapping
|
|
11
9
|
|
|
10
|
+
import numpy as np
|
|
11
|
+
import torch
|
|
12
|
+
|
|
12
13
|
|
|
13
14
|
def stack_section(batch: list[dict], section: str):
|
|
14
15
|
entries = [item.get(section) for item in batch if item.get(section) is not None]
|
nextrec/data/data_processing.py
CHANGED
|
@@ -2,13 +2,16 @@
|
|
|
2
2
|
Data processing utilities for NextRec
|
|
3
3
|
|
|
4
4
|
Date: create on 03/12/2025
|
|
5
|
+
Checkpoint: edit on 19/12/2025
|
|
5
6
|
Author: Yang Zhou, zyaztec@gmail.com
|
|
6
7
|
"""
|
|
7
8
|
|
|
8
|
-
import
|
|
9
|
+
import hashlib
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
9
12
|
import numpy as np
|
|
10
13
|
import pandas as pd
|
|
11
|
-
|
|
14
|
+
import torch
|
|
12
15
|
|
|
13
16
|
|
|
14
17
|
def get_column_data(data: dict | pd.DataFrame, name: str):
|
|
@@ -166,3 +169,8 @@ def get_user_ids(
|
|
|
166
169
|
return arr.reshape(arr.shape[0])
|
|
167
170
|
|
|
168
171
|
return None
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def hash_md5_mod(value: str, hash_size: int) -> int:
|
|
175
|
+
digest = hashlib.md5(value.encode("utf-8")).digest()
|
|
176
|
+
return int.from_bytes(digest, byteorder="big", signed=False) % hash_size
|
nextrec/data/data_utils.py
CHANGED
|
@@ -1,30 +1,25 @@
|
|
|
1
1
|
"""
|
|
2
|
-
Data processing utilities for NextRec
|
|
3
|
-
|
|
4
|
-
This module now re-exports functions from specialized submodules:
|
|
5
|
-
- batch_utils: collate_fn, batch_to_dict
|
|
6
|
-
- data_processing: get_column_data, split_dict_random, build_eval_candidates, get_user_ids
|
|
7
|
-
- nextrec.utils.file_utils: resolve_file_paths, iter_file_chunks, read_table, load_dataframes, default_output_dir
|
|
2
|
+
Data processing utilities for NextRec
|
|
8
3
|
|
|
9
4
|
Date: create on 27/10/2025
|
|
10
|
-
Last update:
|
|
5
|
+
Last update: 19/12/2025
|
|
11
6
|
Author: Yang Zhou, zyaztec@gmail.com
|
|
12
7
|
"""
|
|
13
8
|
|
|
14
9
|
# Import from new organized modules
|
|
15
|
-
from nextrec.data.batch_utils import
|
|
10
|
+
from nextrec.data.batch_utils import batch_to_dict, collate_fn, stack_section
|
|
16
11
|
from nextrec.data.data_processing import (
|
|
17
|
-
get_column_data,
|
|
18
|
-
split_dict_random,
|
|
19
12
|
build_eval_candidates,
|
|
13
|
+
get_column_data,
|
|
20
14
|
get_user_ids,
|
|
15
|
+
split_dict_random,
|
|
21
16
|
)
|
|
22
|
-
from nextrec.utils.
|
|
23
|
-
|
|
17
|
+
from nextrec.utils.data import (
|
|
18
|
+
default_output_dir,
|
|
24
19
|
iter_file_chunks,
|
|
25
|
-
read_table,
|
|
26
20
|
load_dataframes,
|
|
27
|
-
|
|
21
|
+
read_table,
|
|
22
|
+
resolve_file_paths,
|
|
28
23
|
)
|
|
29
24
|
|
|
30
25
|
__all__ = [
|
nextrec/data/dataloader.py
CHANGED
|
@@ -2,33 +2,32 @@
|
|
|
2
2
|
Dataloader definitions
|
|
3
3
|
|
|
4
4
|
Date: create on 27/10/2025
|
|
5
|
-
Checkpoint: edit on
|
|
5
|
+
Checkpoint: edit on 19/12/2025
|
|
6
6
|
Author: Yang Zhou,zyaztec@gmail.com
|
|
7
7
|
"""
|
|
8
8
|
|
|
9
|
-
import os
|
|
10
|
-
import torch
|
|
11
9
|
import logging
|
|
10
|
+
import os
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import cast
|
|
13
|
+
|
|
12
14
|
import numpy as np
|
|
13
15
|
import pandas as pd
|
|
14
16
|
import pyarrow.parquet as pq
|
|
15
|
-
|
|
16
|
-
from
|
|
17
|
-
from typing import cast
|
|
17
|
+
import torch
|
|
18
|
+
from torch.utils.data import DataLoader, Dataset, IterableDataset
|
|
18
19
|
|
|
19
20
|
from nextrec.basic.features import (
|
|
20
21
|
DenseFeature,
|
|
21
|
-
SparseFeature,
|
|
22
|
-
SequenceFeature,
|
|
23
22
|
FeatureSet,
|
|
23
|
+
SequenceFeature,
|
|
24
|
+
SparseFeature,
|
|
24
25
|
)
|
|
25
|
-
from nextrec.data.preprocessor import DataProcessor
|
|
26
|
-
from torch.utils.data import DataLoader, Dataset, IterableDataset
|
|
27
|
-
|
|
28
|
-
from nextrec.utils.tensor import to_tensor
|
|
29
|
-
from nextrec.utils.file import resolve_file_paths, read_table
|
|
30
26
|
from nextrec.data.batch_utils import collate_fn
|
|
31
27
|
from nextrec.data.data_processing import get_column_data
|
|
28
|
+
from nextrec.data.preprocessor import DataProcessor
|
|
29
|
+
from nextrec.utils.data import read_table, resolve_file_paths
|
|
30
|
+
from nextrec.utils.torch_utils import to_tensor
|
|
32
31
|
|
|
33
32
|
|
|
34
33
|
class TensorDictDataset(Dataset):
|