nextrec 0.4.8__py3-none-any.whl → 0.4.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nextrec/__version__.py +1 -1
- nextrec/basic/callback.py +30 -15
- nextrec/basic/features.py +1 -0
- nextrec/basic/layers.py +6 -8
- nextrec/basic/loggers.py +14 -7
- nextrec/basic/metrics.py +6 -76
- nextrec/basic/model.py +312 -318
- nextrec/cli.py +5 -10
- nextrec/data/__init__.py +13 -16
- nextrec/data/batch_utils.py +3 -2
- nextrec/data/data_processing.py +10 -2
- nextrec/data/data_utils.py +9 -14
- nextrec/data/dataloader.py +12 -13
- nextrec/data/preprocessor.py +328 -255
- nextrec/loss/__init__.py +1 -5
- nextrec/loss/loss_utils.py +2 -8
- nextrec/models/generative/__init__.py +1 -8
- nextrec/models/generative/hstu.py +6 -4
- nextrec/models/multi_task/esmm.py +2 -2
- nextrec/models/multi_task/mmoe.py +2 -2
- nextrec/models/multi_task/ple.py +2 -2
- nextrec/models/multi_task/poso.py +2 -3
- nextrec/models/multi_task/share_bottom.py +2 -2
- nextrec/models/ranking/afm.py +2 -2
- nextrec/models/ranking/autoint.py +2 -2
- nextrec/models/ranking/dcn.py +2 -2
- nextrec/models/ranking/dcn_v2.py +2 -2
- nextrec/models/ranking/deepfm.py +2 -2
- nextrec/models/ranking/dien.py +3 -3
- nextrec/models/ranking/din.py +3 -3
- nextrec/models/ranking/ffm.py +0 -0
- nextrec/models/ranking/fibinet.py +5 -5
- nextrec/models/ranking/fm.py +3 -7
- nextrec/models/ranking/lr.py +0 -0
- nextrec/models/ranking/masknet.py +2 -2
- nextrec/models/ranking/pnn.py +2 -2
- nextrec/models/ranking/widedeep.py +2 -2
- nextrec/models/ranking/xdeepfm.py +2 -2
- nextrec/models/representation/__init__.py +9 -0
- nextrec/models/{generative → representation}/rqvae.py +9 -9
- nextrec/models/retrieval/__init__.py +0 -0
- nextrec/models/{match → retrieval}/dssm.py +8 -3
- nextrec/models/{match → retrieval}/dssm_v2.py +8 -3
- nextrec/models/{match → retrieval}/mind.py +4 -3
- nextrec/models/{match → retrieval}/sdm.py +4 -3
- nextrec/models/{match → retrieval}/youtube_dnn.py +8 -3
- nextrec/utils/__init__.py +60 -46
- nextrec/utils/config.py +8 -7
- nextrec/utils/console.py +371 -0
- nextrec/utils/{synthetic_data.py → data.py} +102 -15
- nextrec/utils/feature.py +15 -0
- nextrec/utils/torch_utils.py +411 -0
- {nextrec-0.4.8.dist-info → nextrec-0.4.9.dist-info}/METADATA +6 -6
- nextrec-0.4.9.dist-info/RECORD +70 -0
- nextrec/utils/cli_utils.py +0 -58
- nextrec/utils/device.py +0 -78
- nextrec/utils/distributed.py +0 -141
- nextrec/utils/file.py +0 -92
- nextrec/utils/initializer.py +0 -79
- nextrec/utils/optimizer.py +0 -75
- nextrec/utils/tensor.py +0 -72
- nextrec-0.4.8.dist-info/RECORD +0 -71
- /nextrec/models/{match/__init__.py → ranking/eulernet.py} +0 -0
- {nextrec-0.4.8.dist-info → nextrec-0.4.9.dist-info}/WHEEL +0 -0
- {nextrec-0.4.8.dist-info → nextrec-0.4.9.dist-info}/entry_points.txt +0 -0
- {nextrec-0.4.8.dist-info → nextrec-0.4.9.dist-info}/licenses/LICENSE +0 -0
nextrec/cli.py
CHANGED
|
@@ -18,10 +18,10 @@ Checkpoint: edit on 18/12/2025
|
|
|
18
18
|
Author: Yang Zhou, zyaztec@gmail.com
|
|
19
19
|
"""
|
|
20
20
|
|
|
21
|
-
import sys
|
|
22
21
|
import argparse
|
|
23
22
|
import logging
|
|
24
23
|
import pickle
|
|
24
|
+
import sys
|
|
25
25
|
import time
|
|
26
26
|
from pathlib import Path
|
|
27
27
|
from typing import Any, Dict, List
|
|
@@ -29,6 +29,7 @@ from typing import Any, Dict, List
|
|
|
29
29
|
import pandas as pd
|
|
30
30
|
|
|
31
31
|
from nextrec.basic.features import DenseFeature, SequenceFeature, SparseFeature
|
|
32
|
+
from nextrec.basic.loggers import setup_logger
|
|
32
33
|
from nextrec.data.data_utils import split_dict_random
|
|
33
34
|
from nextrec.data.dataloader import RecDataLoader
|
|
34
35
|
from nextrec.data.preprocessor import DataProcessor
|
|
@@ -39,18 +40,14 @@ from nextrec.utils.config import (
|
|
|
39
40
|
resolve_path,
|
|
40
41
|
select_features,
|
|
41
42
|
)
|
|
42
|
-
from nextrec.utils.
|
|
43
|
-
from nextrec.utils.
|
|
43
|
+
from nextrec.utils.console import get_nextrec_version
|
|
44
|
+
from nextrec.utils.data import (
|
|
44
45
|
iter_file_chunks,
|
|
45
46
|
read_table,
|
|
46
47
|
read_yaml,
|
|
47
48
|
resolve_file_paths,
|
|
48
49
|
)
|
|
49
|
-
from nextrec.utils.
|
|
50
|
-
get_nextrec_version,
|
|
51
|
-
log_startup_info,
|
|
52
|
-
)
|
|
53
|
-
from nextrec.basic.loggers import setup_logger
|
|
50
|
+
from nextrec.utils.feature import normalize_to_list
|
|
54
51
|
|
|
55
52
|
logger = logging.getLogger(__name__)
|
|
56
53
|
|
|
@@ -504,13 +501,11 @@ Examples:
|
|
|
504
501
|
config_path = args.train_config
|
|
505
502
|
if not config_path:
|
|
506
503
|
parser.error("[NextRec CLI Error] train mode requires --train_config")
|
|
507
|
-
log_startup_info(logger, mode="train", config_path=config_path)
|
|
508
504
|
train_model(config_path)
|
|
509
505
|
else:
|
|
510
506
|
config_path = args.predict_config
|
|
511
507
|
if not config_path:
|
|
512
508
|
parser.error("[NextRec CLI Error] predict mode requires --predict_config")
|
|
513
|
-
log_startup_info(logger, mode="predict", config_path=config_path)
|
|
514
509
|
predict_model(config_path)
|
|
515
510
|
|
|
516
511
|
|
nextrec/data/__init__.py
CHANGED
|
@@ -1,29 +1,26 @@
|
|
|
1
|
-
from nextrec.
|
|
1
|
+
from nextrec.basic.features import FeatureSet
|
|
2
|
+
from nextrec.data import data_utils
|
|
3
|
+
from nextrec.data.batch_utils import batch_to_dict, collate_fn, stack_section
|
|
2
4
|
from nextrec.data.data_processing import (
|
|
3
|
-
get_column_data,
|
|
4
|
-
split_dict_random,
|
|
5
5
|
build_eval_candidates,
|
|
6
|
+
get_column_data,
|
|
6
7
|
get_user_ids,
|
|
8
|
+
split_dict_random,
|
|
7
9
|
)
|
|
8
|
-
|
|
9
|
-
from nextrec.utils.file import (
|
|
10
|
-
resolve_file_paths,
|
|
11
|
-
iter_file_chunks,
|
|
12
|
-
read_table,
|
|
13
|
-
load_dataframes,
|
|
14
|
-
default_output_dir,
|
|
15
|
-
)
|
|
16
|
-
|
|
17
10
|
from nextrec.data.dataloader import (
|
|
18
|
-
TensorDictDataset,
|
|
19
11
|
FileDataset,
|
|
20
12
|
RecDataLoader,
|
|
13
|
+
TensorDictDataset,
|
|
21
14
|
build_tensors_from_data,
|
|
22
15
|
)
|
|
23
|
-
|
|
24
16
|
from nextrec.data.preprocessor import DataProcessor
|
|
25
|
-
from nextrec.
|
|
26
|
-
|
|
17
|
+
from nextrec.utils.data import (
|
|
18
|
+
default_output_dir,
|
|
19
|
+
iter_file_chunks,
|
|
20
|
+
load_dataframes,
|
|
21
|
+
read_table,
|
|
22
|
+
resolve_file_paths,
|
|
23
|
+
)
|
|
27
24
|
|
|
28
25
|
__all__ = [
|
|
29
26
|
# Batch utilities
|
nextrec/data/batch_utils.py
CHANGED
|
@@ -5,10 +5,11 @@ Date: create on 03/12/2025
|
|
|
5
5
|
Author: Yang Zhou, zyaztec@gmail.com
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
-
import torch
|
|
9
|
-
import numpy as np
|
|
10
8
|
from typing import Any, Mapping
|
|
11
9
|
|
|
10
|
+
import numpy as np
|
|
11
|
+
import torch
|
|
12
|
+
|
|
12
13
|
|
|
13
14
|
def stack_section(batch: list[dict], section: str):
|
|
14
15
|
entries = [item.get(section) for item in batch if item.get(section) is not None]
|
nextrec/data/data_processing.py
CHANGED
|
@@ -2,13 +2,16 @@
|
|
|
2
2
|
Data processing utilities for NextRec
|
|
3
3
|
|
|
4
4
|
Date: create on 03/12/2025
|
|
5
|
+
Checkpoint: edit on 19/12/2025
|
|
5
6
|
Author: Yang Zhou, zyaztec@gmail.com
|
|
6
7
|
"""
|
|
7
8
|
|
|
8
|
-
import
|
|
9
|
+
import hashlib
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
9
12
|
import numpy as np
|
|
10
13
|
import pandas as pd
|
|
11
|
-
|
|
14
|
+
import torch
|
|
12
15
|
|
|
13
16
|
|
|
14
17
|
def get_column_data(data: dict | pd.DataFrame, name: str):
|
|
@@ -166,3 +169,8 @@ def get_user_ids(
|
|
|
166
169
|
return arr.reshape(arr.shape[0])
|
|
167
170
|
|
|
168
171
|
return None
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def hash_md5_mod(value: str, hash_size: int) -> int:
|
|
175
|
+
digest = hashlib.md5(value.encode("utf-8")).digest()
|
|
176
|
+
return int.from_bytes(digest, byteorder="big", signed=False) % hash_size
|
nextrec/data/data_utils.py
CHANGED
|
@@ -1,30 +1,25 @@
|
|
|
1
1
|
"""
|
|
2
|
-
Data processing utilities for NextRec
|
|
3
|
-
|
|
4
|
-
This module now re-exports functions from specialized submodules:
|
|
5
|
-
- batch_utils: collate_fn, batch_to_dict
|
|
6
|
-
- data_processing: get_column_data, split_dict_random, build_eval_candidates, get_user_ids
|
|
7
|
-
- nextrec.utils.file_utils: resolve_file_paths, iter_file_chunks, read_table, load_dataframes, default_output_dir
|
|
2
|
+
Data processing utilities for NextRec
|
|
8
3
|
|
|
9
4
|
Date: create on 27/10/2025
|
|
10
|
-
Last update:
|
|
5
|
+
Last update: 19/12/2025
|
|
11
6
|
Author: Yang Zhou, zyaztec@gmail.com
|
|
12
7
|
"""
|
|
13
8
|
|
|
14
9
|
# Import from new organized modules
|
|
15
|
-
from nextrec.data.batch_utils import
|
|
10
|
+
from nextrec.data.batch_utils import batch_to_dict, collate_fn, stack_section
|
|
16
11
|
from nextrec.data.data_processing import (
|
|
17
|
-
get_column_data,
|
|
18
|
-
split_dict_random,
|
|
19
12
|
build_eval_candidates,
|
|
13
|
+
get_column_data,
|
|
20
14
|
get_user_ids,
|
|
15
|
+
split_dict_random,
|
|
21
16
|
)
|
|
22
|
-
from nextrec.utils.
|
|
23
|
-
|
|
17
|
+
from nextrec.utils.data import (
|
|
18
|
+
default_output_dir,
|
|
24
19
|
iter_file_chunks,
|
|
25
|
-
read_table,
|
|
26
20
|
load_dataframes,
|
|
27
|
-
|
|
21
|
+
read_table,
|
|
22
|
+
resolve_file_paths,
|
|
28
23
|
)
|
|
29
24
|
|
|
30
25
|
__all__ = [
|
nextrec/data/dataloader.py
CHANGED
|
@@ -2,33 +2,32 @@
|
|
|
2
2
|
Dataloader definitions
|
|
3
3
|
|
|
4
4
|
Date: create on 27/10/2025
|
|
5
|
-
Checkpoint: edit on
|
|
5
|
+
Checkpoint: edit on 19/12/2025
|
|
6
6
|
Author: Yang Zhou,zyaztec@gmail.com
|
|
7
7
|
"""
|
|
8
8
|
|
|
9
|
-
import os
|
|
10
|
-
import torch
|
|
11
9
|
import logging
|
|
10
|
+
import os
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import cast
|
|
13
|
+
|
|
12
14
|
import numpy as np
|
|
13
15
|
import pandas as pd
|
|
14
16
|
import pyarrow.parquet as pq
|
|
15
|
-
|
|
16
|
-
from
|
|
17
|
-
from typing import cast
|
|
17
|
+
import torch
|
|
18
|
+
from torch.utils.data import DataLoader, Dataset, IterableDataset
|
|
18
19
|
|
|
19
20
|
from nextrec.basic.features import (
|
|
20
21
|
DenseFeature,
|
|
21
|
-
SparseFeature,
|
|
22
|
-
SequenceFeature,
|
|
23
22
|
FeatureSet,
|
|
23
|
+
SequenceFeature,
|
|
24
|
+
SparseFeature,
|
|
24
25
|
)
|
|
25
|
-
from nextrec.data.preprocessor import DataProcessor
|
|
26
|
-
from torch.utils.data import DataLoader, Dataset, IterableDataset
|
|
27
|
-
|
|
28
|
-
from nextrec.utils.tensor import to_tensor
|
|
29
|
-
from nextrec.utils.file import resolve_file_paths, read_table
|
|
30
26
|
from nextrec.data.batch_utils import collate_fn
|
|
31
27
|
from nextrec.data.data_processing import get_column_data
|
|
28
|
+
from nextrec.data.preprocessor import DataProcessor
|
|
29
|
+
from nextrec.utils.data import read_table, resolve_file_paths
|
|
30
|
+
from nextrec.utils.torch_utils import to_tensor
|
|
32
31
|
|
|
33
32
|
|
|
34
33
|
class TensorDictDataset(Dataset):
|