nextrec 0.4.7__py3-none-any.whl → 0.4.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. nextrec/__version__.py +1 -1
  2. nextrec/basic/callback.py +30 -15
  3. nextrec/basic/features.py +1 -0
  4. nextrec/basic/layers.py +6 -8
  5. nextrec/basic/loggers.py +14 -7
  6. nextrec/basic/metrics.py +6 -76
  7. nextrec/basic/model.py +337 -328
  8. nextrec/cli.py +25 -4
  9. nextrec/data/__init__.py +13 -16
  10. nextrec/data/batch_utils.py +3 -2
  11. nextrec/data/data_processing.py +10 -2
  12. nextrec/data/data_utils.py +9 -14
  13. nextrec/data/dataloader.py +12 -13
  14. nextrec/data/preprocessor.py +328 -255
  15. nextrec/loss/__init__.py +1 -5
  16. nextrec/loss/loss_utils.py +2 -8
  17. nextrec/models/generative/__init__.py +1 -8
  18. nextrec/models/generative/hstu.py +6 -4
  19. nextrec/models/multi_task/esmm.py +2 -2
  20. nextrec/models/multi_task/mmoe.py +2 -2
  21. nextrec/models/multi_task/ple.py +2 -2
  22. nextrec/models/multi_task/poso.py +2 -3
  23. nextrec/models/multi_task/share_bottom.py +2 -2
  24. nextrec/models/ranking/afm.py +2 -2
  25. nextrec/models/ranking/autoint.py +2 -2
  26. nextrec/models/ranking/dcn.py +2 -2
  27. nextrec/models/ranking/dcn_v2.py +2 -2
  28. nextrec/models/ranking/deepfm.py +2 -2
  29. nextrec/models/ranking/dien.py +3 -3
  30. nextrec/models/ranking/din.py +3 -3
  31. nextrec/models/ranking/ffm.py +0 -0
  32. nextrec/models/ranking/fibinet.py +5 -5
  33. nextrec/models/ranking/fm.py +3 -7
  34. nextrec/models/ranking/lr.py +0 -0
  35. nextrec/models/ranking/masknet.py +2 -2
  36. nextrec/models/ranking/pnn.py +2 -2
  37. nextrec/models/ranking/widedeep.py +2 -2
  38. nextrec/models/ranking/xdeepfm.py +2 -2
  39. nextrec/models/representation/__init__.py +9 -0
  40. nextrec/models/{generative → representation}/rqvae.py +9 -9
  41. nextrec/models/retrieval/__init__.py +0 -0
  42. nextrec/models/{match → retrieval}/dssm.py +8 -3
  43. nextrec/models/{match → retrieval}/dssm_v2.py +8 -3
  44. nextrec/models/{match → retrieval}/mind.py +4 -3
  45. nextrec/models/{match → retrieval}/sdm.py +4 -3
  46. nextrec/models/{match → retrieval}/youtube_dnn.py +8 -3
  47. nextrec/utils/__init__.py +60 -46
  48. nextrec/utils/config.py +12 -10
  49. nextrec/utils/console.py +371 -0
  50. nextrec/utils/{synthetic_data.py → data.py} +102 -15
  51. nextrec/utils/feature.py +15 -0
  52. nextrec/utils/torch_utils.py +411 -0
  53. {nextrec-0.4.7.dist-info → nextrec-0.4.9.dist-info}/METADATA +8 -7
  54. nextrec-0.4.9.dist-info/RECORD +70 -0
  55. nextrec/utils/device.py +0 -78
  56. nextrec/utils/distributed.py +0 -141
  57. nextrec/utils/file.py +0 -92
  58. nextrec/utils/initializer.py +0 -79
  59. nextrec/utils/optimizer.py +0 -75
  60. nextrec/utils/tensor.py +0 -72
  61. nextrec-0.4.7.dist-info/RECORD +0 -70
  62. /nextrec/models/{match/__init__.py → ranking/eulernet.py} +0 -0
  63. {nextrec-0.4.7.dist-info → nextrec-0.4.9.dist-info}/WHEEL +0 -0
  64. {nextrec-0.4.7.dist-info → nextrec-0.4.9.dist-info}/entry_points.txt +0 -0
  65. {nextrec-0.4.7.dist-info → nextrec-0.4.9.dist-info}/licenses/LICENSE +0 -0
nextrec/cli.py CHANGED
@@ -14,12 +14,14 @@ Examples:
14
14
  nextrec --mode=predict --predict_config=nextrec_cli_preset/predict_config.yaml
15
15
 
16
16
  Date: create on 06/12/2025
17
+ Checkpoint: edit on 18/12/2025
17
18
  Author: Yang Zhou, zyaztec@gmail.com
18
19
  """
19
20
 
20
21
  import argparse
21
22
  import logging
22
23
  import pickle
24
+ import sys
23
25
  import time
24
26
  from pathlib import Path
25
27
  from typing import Any, Dict, List
@@ -27,6 +29,7 @@ from typing import Any, Dict, List
27
29
  import pandas as pd
28
30
 
29
31
  from nextrec.basic.features import DenseFeature, SequenceFeature, SparseFeature
32
+ from nextrec.basic.loggers import setup_logger
30
33
  from nextrec.data.data_utils import split_dict_random
31
34
  from nextrec.data.dataloader import RecDataLoader
32
35
  from nextrec.data.preprocessor import DataProcessor
@@ -37,14 +40,14 @@ from nextrec.utils.config import (
37
40
  resolve_path,
38
41
  select_features,
39
42
  )
40
- from nextrec.utils.feature import normalize_to_list
41
- from nextrec.utils.file import (
43
+ from nextrec.utils.console import get_nextrec_version
44
+ from nextrec.utils.data import (
42
45
  iter_file_chunks,
43
46
  read_table,
44
47
  read_yaml,
45
48
  resolve_file_paths,
46
49
  )
47
- from nextrec.basic.loggers import setup_logger
50
+ from nextrec.utils.feature import normalize_to_list
48
51
 
49
52
  logger = logging.getLogger(__name__)
50
53
 
@@ -71,6 +74,9 @@ def train_model(train_config_path: str) -> None:
71
74
  artifact_root = Path(session_cfg.get("artifact_root", "nextrec_logs"))
72
75
  session_dir = artifact_root / session_id
73
76
  setup_logger(session_id=session_id)
77
+ logger.info(
78
+ f"[NextRec CLI] Training start | version={get_nextrec_version()} | session_id={session_id} | artifacts={session_dir.resolve()}"
79
+ )
74
80
 
75
81
  processor_path = session_dir / "processor.pkl"
76
82
  processor_path = Path(processor_path)
@@ -324,6 +330,9 @@ def predict_model(predict_config_path: str) -> None:
324
330
  artifact_root = Path(session_cfg.get("artifact_root", "nextrec_logs"))
325
331
  session_dir = Path(cfg.get("checkpoint_path") or (artifact_root / session_id))
326
332
  setup_logger(session_id=session_id)
333
+ logger.info(
334
+ f"[NextRec CLI] Predict start | version={get_nextrec_version()} | session_id={session_id} | checkpoint={session_dir.resolve()}"
335
+ )
327
336
 
328
337
  processor_path = Path(session_dir / "processor.pkl")
329
338
  if not processor_path.exists():
@@ -454,6 +463,13 @@ def predict_model(predict_config_path: str) -> None:
454
463
 
455
464
  def main() -> None:
456
465
  """Parse CLI arguments and dispatch to train or predict mode."""
466
+
467
+ root = logging.getLogger()
468
+ if not root.handlers:
469
+ handler = logging.StreamHandler(sys.stdout)
470
+ handler.setFormatter(logging.Formatter("%(message)s"))
471
+ root.addHandler(handler)
472
+
457
473
  parser = argparse.ArgumentParser(
458
474
  description="NextRec: Training and Prediction Pipeline",
459
475
  formatter_class=argparse.RawDescriptionHelpFormatter,
@@ -466,16 +482,21 @@ Examples:
466
482
  nextrec --mode=predict --predict_config=configs/predict_config.yaml
467
483
  """,
468
484
  )
485
+
469
486
  parser.add_argument(
470
487
  "--mode",
471
488
  choices=["train", "predict"],
472
- required=True,
473
489
  help="Running mode: train or predict",
474
490
  )
475
491
  parser.add_argument("--train_config", help="Training configuration file path")
476
492
  parser.add_argument("--predict_config", help="Prediction configuration file path")
477
493
  args = parser.parse_args()
478
494
 
495
+ logger.info(get_nextrec_version())
496
+
497
+ if not args.mode:
498
+ parser.error("[NextRec CLI Error] --mode is required (train|predict)")
499
+
479
500
  if args.mode == "train":
480
501
  config_path = args.train_config
481
502
  if not config_path:
nextrec/data/__init__.py CHANGED
@@ -1,29 +1,26 @@
1
- from nextrec.data.batch_utils import collate_fn, batch_to_dict, stack_section
1
+ from nextrec.basic.features import FeatureSet
2
+ from nextrec.data import data_utils
3
+ from nextrec.data.batch_utils import batch_to_dict, collate_fn, stack_section
2
4
  from nextrec.data.data_processing import (
3
- get_column_data,
4
- split_dict_random,
5
5
  build_eval_candidates,
6
+ get_column_data,
6
7
  get_user_ids,
8
+ split_dict_random,
7
9
  )
8
-
9
- from nextrec.utils.file import (
10
- resolve_file_paths,
11
- iter_file_chunks,
12
- read_table,
13
- load_dataframes,
14
- default_output_dir,
15
- )
16
-
17
10
  from nextrec.data.dataloader import (
18
- TensorDictDataset,
19
11
  FileDataset,
20
12
  RecDataLoader,
13
+ TensorDictDataset,
21
14
  build_tensors_from_data,
22
15
  )
23
-
24
16
  from nextrec.data.preprocessor import DataProcessor
25
- from nextrec.basic.features import FeatureSet
26
- from nextrec.data import data_utils
17
+ from nextrec.utils.data import (
18
+ default_output_dir,
19
+ iter_file_chunks,
20
+ load_dataframes,
21
+ read_table,
22
+ resolve_file_paths,
23
+ )
27
24
 
28
25
  __all__ = [
29
26
  # Batch utilities
@@ -5,10 +5,11 @@ Date: create on 03/12/2025
5
5
  Author: Yang Zhou, zyaztec@gmail.com
6
6
  """
7
7
 
8
- import torch
9
- import numpy as np
10
8
  from typing import Any, Mapping
11
9
 
10
+ import numpy as np
11
+ import torch
12
+
12
13
 
13
14
  def stack_section(batch: list[dict], section: str):
14
15
  entries = [item.get(section) for item in batch if item.get(section) is not None]
@@ -2,13 +2,16 @@
2
2
  Data processing utilities for NextRec
3
3
 
4
4
  Date: create on 03/12/2025
5
+ Checkpoint: edit on 19/12/2025
5
6
  Author: Yang Zhou, zyaztec@gmail.com
6
7
  """
7
8
 
8
- import torch
9
+ import hashlib
10
+ from typing import Any
11
+
9
12
  import numpy as np
10
13
  import pandas as pd
11
- from typing import Any
14
+ import torch
12
15
 
13
16
 
14
17
  def get_column_data(data: dict | pd.DataFrame, name: str):
@@ -166,3 +169,8 @@ def get_user_ids(
166
169
  return arr.reshape(arr.shape[0])
167
170
 
168
171
  return None
172
+
173
+
174
+ def hash_md5_mod(value: str, hash_size: int) -> int:
175
+ digest = hashlib.md5(value.encode("utf-8")).digest()
176
+ return int.from_bytes(digest, byteorder="big", signed=False) % hash_size
@@ -1,30 +1,25 @@
1
1
  """
2
- Data processing utilities for NextRec (Refactored)
3
-
4
- This module now re-exports functions from specialized submodules:
5
- - batch_utils: collate_fn, batch_to_dict
6
- - data_processing: get_column_data, split_dict_random, build_eval_candidates, get_user_ids
7
- - nextrec.utils.file_utils: resolve_file_paths, iter_file_chunks, read_table, load_dataframes, default_output_dir
2
+ Data processing utilities for NextRec
8
3
 
9
4
  Date: create on 27/10/2025
10
- Last update: 03/12/2025 (refactored)
5
+ Last update: 19/12/2025
11
6
  Author: Yang Zhou, zyaztec@gmail.com
12
7
  """
13
8
 
14
9
  # Import from new organized modules
15
- from nextrec.data.batch_utils import collate_fn, batch_to_dict, stack_section
10
+ from nextrec.data.batch_utils import batch_to_dict, collate_fn, stack_section
16
11
  from nextrec.data.data_processing import (
17
- get_column_data,
18
- split_dict_random,
19
12
  build_eval_candidates,
13
+ get_column_data,
20
14
  get_user_ids,
15
+ split_dict_random,
21
16
  )
22
- from nextrec.utils.file import (
23
- resolve_file_paths,
17
+ from nextrec.utils.data import (
18
+ default_output_dir,
24
19
  iter_file_chunks,
25
- read_table,
26
20
  load_dataframes,
27
- default_output_dir,
21
+ read_table,
22
+ resolve_file_paths,
28
23
  )
29
24
 
30
25
  __all__ = [
@@ -2,33 +2,32 @@
2
2
  Dataloader definitions
3
3
 
4
4
  Date: create on 27/10/2025
5
- Checkpoint: edit on 02/12/2025
5
+ Checkpoint: edit on 19/12/2025
6
6
  Author: Yang Zhou,zyaztec@gmail.com
7
7
  """
8
8
 
9
- import os
10
- import torch
11
9
  import logging
10
+ import os
11
+ from pathlib import Path
12
+ from typing import cast
13
+
12
14
  import numpy as np
13
15
  import pandas as pd
14
16
  import pyarrow.parquet as pq
15
-
16
- from pathlib import Path
17
- from typing import cast
17
+ import torch
18
+ from torch.utils.data import DataLoader, Dataset, IterableDataset
18
19
 
19
20
  from nextrec.basic.features import (
20
21
  DenseFeature,
21
- SparseFeature,
22
- SequenceFeature,
23
22
  FeatureSet,
23
+ SequenceFeature,
24
+ SparseFeature,
24
25
  )
25
- from nextrec.data.preprocessor import DataProcessor
26
- from torch.utils.data import DataLoader, Dataset, IterableDataset
27
-
28
- from nextrec.utils.tensor import to_tensor
29
- from nextrec.utils.file import resolve_file_paths, read_table
30
26
  from nextrec.data.batch_utils import collate_fn
31
27
  from nextrec.data.data_processing import get_column_data
28
+ from nextrec.data.preprocessor import DataProcessor
29
+ from nextrec.utils.data import read_table, resolve_file_paths
30
+ from nextrec.utils.torch_utils import to_tensor
32
31
 
33
32
 
34
33
  class TensorDictDataset(Dataset):