nextrec 0.4.8__py3-none-any.whl → 0.4.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. nextrec/__version__.py +1 -1
  2. nextrec/basic/callback.py +30 -15
  3. nextrec/basic/features.py +1 -0
  4. nextrec/basic/layers.py +6 -8
  5. nextrec/basic/loggers.py +14 -7
  6. nextrec/basic/metrics.py +6 -76
  7. nextrec/basic/model.py +312 -318
  8. nextrec/cli.py +5 -10
  9. nextrec/data/__init__.py +13 -16
  10. nextrec/data/batch_utils.py +3 -2
  11. nextrec/data/data_processing.py +10 -2
  12. nextrec/data/data_utils.py +9 -14
  13. nextrec/data/dataloader.py +12 -13
  14. nextrec/data/preprocessor.py +328 -255
  15. nextrec/loss/__init__.py +1 -5
  16. nextrec/loss/loss_utils.py +2 -8
  17. nextrec/models/generative/__init__.py +1 -8
  18. nextrec/models/generative/hstu.py +6 -4
  19. nextrec/models/multi_task/esmm.py +2 -2
  20. nextrec/models/multi_task/mmoe.py +2 -2
  21. nextrec/models/multi_task/ple.py +2 -2
  22. nextrec/models/multi_task/poso.py +2 -3
  23. nextrec/models/multi_task/share_bottom.py +2 -2
  24. nextrec/models/ranking/afm.py +2 -2
  25. nextrec/models/ranking/autoint.py +2 -2
  26. nextrec/models/ranking/dcn.py +2 -2
  27. nextrec/models/ranking/dcn_v2.py +2 -2
  28. nextrec/models/ranking/deepfm.py +2 -2
  29. nextrec/models/ranking/dien.py +3 -3
  30. nextrec/models/ranking/din.py +3 -3
  31. nextrec/models/ranking/ffm.py +0 -0
  32. nextrec/models/ranking/fibinet.py +5 -5
  33. nextrec/models/ranking/fm.py +3 -7
  34. nextrec/models/ranking/lr.py +0 -0
  35. nextrec/models/ranking/masknet.py +2 -2
  36. nextrec/models/ranking/pnn.py +2 -2
  37. nextrec/models/ranking/widedeep.py +2 -2
  38. nextrec/models/ranking/xdeepfm.py +2 -2
  39. nextrec/models/representation/__init__.py +9 -0
  40. nextrec/models/{generative → representation}/rqvae.py +9 -9
  41. nextrec/models/retrieval/__init__.py +0 -0
  42. nextrec/models/{match → retrieval}/dssm.py +8 -3
  43. nextrec/models/{match → retrieval}/dssm_v2.py +8 -3
  44. nextrec/models/{match → retrieval}/mind.py +4 -3
  45. nextrec/models/{match → retrieval}/sdm.py +4 -3
  46. nextrec/models/{match → retrieval}/youtube_dnn.py +8 -3
  47. nextrec/utils/__init__.py +60 -46
  48. nextrec/utils/config.py +8 -7
  49. nextrec/utils/console.py +371 -0
  50. nextrec/utils/{synthetic_data.py → data.py} +102 -15
  51. nextrec/utils/feature.py +15 -0
  52. nextrec/utils/torch_utils.py +411 -0
  53. {nextrec-0.4.8.dist-info → nextrec-0.4.9.dist-info}/METADATA +6 -6
  54. nextrec-0.4.9.dist-info/RECORD +70 -0
  55. nextrec/utils/cli_utils.py +0 -58
  56. nextrec/utils/device.py +0 -78
  57. nextrec/utils/distributed.py +0 -141
  58. nextrec/utils/file.py +0 -92
  59. nextrec/utils/initializer.py +0 -79
  60. nextrec/utils/optimizer.py +0 -75
  61. nextrec/utils/tensor.py +0 -72
  62. nextrec-0.4.8.dist-info/RECORD +0 -71
  63. /nextrec/models/{match/__init__.py → ranking/eulernet.py} +0 -0
  64. {nextrec-0.4.8.dist-info → nextrec-0.4.9.dist-info}/WHEEL +0 -0
  65. {nextrec-0.4.8.dist-info → nextrec-0.4.9.dist-info}/entry_points.txt +0 -0
  66. {nextrec-0.4.8.dist-info → nextrec-0.4.9.dist-info}/licenses/LICENSE +0 -0
nextrec/cli.py CHANGED
@@ -18,10 +18,10 @@ Checkpoint: edit on 18/12/2025
18
18
  Author: Yang Zhou, zyaztec@gmail.com
19
19
  """
20
20
 
21
- import sys
22
21
  import argparse
23
22
  import logging
24
23
  import pickle
24
+ import sys
25
25
  import time
26
26
  from pathlib import Path
27
27
  from typing import Any, Dict, List
@@ -29,6 +29,7 @@ from typing import Any, Dict, List
29
29
  import pandas as pd
30
30
 
31
31
  from nextrec.basic.features import DenseFeature, SequenceFeature, SparseFeature
32
+ from nextrec.basic.loggers import setup_logger
32
33
  from nextrec.data.data_utils import split_dict_random
33
34
  from nextrec.data.dataloader import RecDataLoader
34
35
  from nextrec.data.preprocessor import DataProcessor
@@ -39,18 +40,14 @@ from nextrec.utils.config import (
39
40
  resolve_path,
40
41
  select_features,
41
42
  )
42
- from nextrec.utils.feature import normalize_to_list
43
- from nextrec.utils.file import (
43
+ from nextrec.utils.console import get_nextrec_version
44
+ from nextrec.utils.data import (
44
45
  iter_file_chunks,
45
46
  read_table,
46
47
  read_yaml,
47
48
  resolve_file_paths,
48
49
  )
49
- from nextrec.utils.cli_utils import (
50
- get_nextrec_version,
51
- log_startup_info,
52
- )
53
- from nextrec.basic.loggers import setup_logger
50
+ from nextrec.utils.feature import normalize_to_list
54
51
 
55
52
  logger = logging.getLogger(__name__)
56
53
 
@@ -504,13 +501,11 @@ Examples:
504
501
  config_path = args.train_config
505
502
  if not config_path:
506
503
  parser.error("[NextRec CLI Error] train mode requires --train_config")
507
- log_startup_info(logger, mode="train", config_path=config_path)
508
504
  train_model(config_path)
509
505
  else:
510
506
  config_path = args.predict_config
511
507
  if not config_path:
512
508
  parser.error("[NextRec CLI Error] predict mode requires --predict_config")
513
- log_startup_info(logger, mode="predict", config_path=config_path)
514
509
  predict_model(config_path)
515
510
 
516
511
 
nextrec/data/__init__.py CHANGED
@@ -1,29 +1,26 @@
1
- from nextrec.data.batch_utils import collate_fn, batch_to_dict, stack_section
1
+ from nextrec.basic.features import FeatureSet
2
+ from nextrec.data import data_utils
3
+ from nextrec.data.batch_utils import batch_to_dict, collate_fn, stack_section
2
4
  from nextrec.data.data_processing import (
3
- get_column_data,
4
- split_dict_random,
5
5
  build_eval_candidates,
6
+ get_column_data,
6
7
  get_user_ids,
8
+ split_dict_random,
7
9
  )
8
-
9
- from nextrec.utils.file import (
10
- resolve_file_paths,
11
- iter_file_chunks,
12
- read_table,
13
- load_dataframes,
14
- default_output_dir,
15
- )
16
-
17
10
  from nextrec.data.dataloader import (
18
- TensorDictDataset,
19
11
  FileDataset,
20
12
  RecDataLoader,
13
+ TensorDictDataset,
21
14
  build_tensors_from_data,
22
15
  )
23
-
24
16
  from nextrec.data.preprocessor import DataProcessor
25
- from nextrec.basic.features import FeatureSet
26
- from nextrec.data import data_utils
17
+ from nextrec.utils.data import (
18
+ default_output_dir,
19
+ iter_file_chunks,
20
+ load_dataframes,
21
+ read_table,
22
+ resolve_file_paths,
23
+ )
27
24
 
28
25
  __all__ = [
29
26
  # Batch utilities
@@ -5,10 +5,11 @@ Date: create on 03/12/2025
5
5
  Author: Yang Zhou, zyaztec@gmail.com
6
6
  """
7
7
 
8
- import torch
9
- import numpy as np
10
8
  from typing import Any, Mapping
11
9
 
10
+ import numpy as np
11
+ import torch
12
+
12
13
 
13
14
  def stack_section(batch: list[dict], section: str):
14
15
  entries = [item.get(section) for item in batch if item.get(section) is not None]
@@ -2,13 +2,16 @@
2
2
  Data processing utilities for NextRec
3
3
 
4
4
  Date: create on 03/12/2025
5
+ Checkpoint: edit on 19/12/2025
5
6
  Author: Yang Zhou, zyaztec@gmail.com
6
7
  """
7
8
 
8
- import torch
9
+ import hashlib
10
+ from typing import Any
11
+
9
12
  import numpy as np
10
13
  import pandas as pd
11
- from typing import Any
14
+ import torch
12
15
 
13
16
 
14
17
  def get_column_data(data: dict | pd.DataFrame, name: str):
@@ -166,3 +169,8 @@ def get_user_ids(
166
169
  return arr.reshape(arr.shape[0])
167
170
 
168
171
  return None
172
+
173
+
174
+ def hash_md5_mod(value: str, hash_size: int) -> int:
175
+ digest = hashlib.md5(value.encode("utf-8")).digest()
176
+ return int.from_bytes(digest, byteorder="big", signed=False) % hash_size
@@ -1,30 +1,25 @@
1
1
  """
2
- Data processing utilities for NextRec (Refactored)
3
-
4
- This module now re-exports functions from specialized submodules:
5
- - batch_utils: collate_fn, batch_to_dict
6
- - data_processing: get_column_data, split_dict_random, build_eval_candidates, get_user_ids
7
- - nextrec.utils.file_utils: resolve_file_paths, iter_file_chunks, read_table, load_dataframes, default_output_dir
2
+ Data processing utilities for NextRec
8
3
 
9
4
  Date: create on 27/10/2025
10
- Last update: 03/12/2025 (refactored)
5
+ Last update: 19/12/2025
11
6
  Author: Yang Zhou, zyaztec@gmail.com
12
7
  """
13
8
 
14
9
  # Import from new organized modules
15
- from nextrec.data.batch_utils import collate_fn, batch_to_dict, stack_section
10
+ from nextrec.data.batch_utils import batch_to_dict, collate_fn, stack_section
16
11
  from nextrec.data.data_processing import (
17
- get_column_data,
18
- split_dict_random,
19
12
  build_eval_candidates,
13
+ get_column_data,
20
14
  get_user_ids,
15
+ split_dict_random,
21
16
  )
22
- from nextrec.utils.file import (
23
- resolve_file_paths,
17
+ from nextrec.utils.data import (
18
+ default_output_dir,
24
19
  iter_file_chunks,
25
- read_table,
26
20
  load_dataframes,
27
- default_output_dir,
21
+ read_table,
22
+ resolve_file_paths,
28
23
  )
29
24
 
30
25
  __all__ = [
@@ -2,33 +2,32 @@
2
2
  Dataloader definitions
3
3
 
4
4
  Date: create on 27/10/2025
5
- Checkpoint: edit on 02/12/2025
5
+ Checkpoint: edit on 19/12/2025
6
6
  Author: Yang Zhou,zyaztec@gmail.com
7
7
  """
8
8
 
9
- import os
10
- import torch
11
9
  import logging
10
+ import os
11
+ from pathlib import Path
12
+ from typing import cast
13
+
12
14
  import numpy as np
13
15
  import pandas as pd
14
16
  import pyarrow.parquet as pq
15
-
16
- from pathlib import Path
17
- from typing import cast
17
+ import torch
18
+ from torch.utils.data import DataLoader, Dataset, IterableDataset
18
19
 
19
20
  from nextrec.basic.features import (
20
21
  DenseFeature,
21
- SparseFeature,
22
- SequenceFeature,
23
22
  FeatureSet,
23
+ SequenceFeature,
24
+ SparseFeature,
24
25
  )
25
- from nextrec.data.preprocessor import DataProcessor
26
- from torch.utils.data import DataLoader, Dataset, IterableDataset
27
-
28
- from nextrec.utils.tensor import to_tensor
29
- from nextrec.utils.file import resolve_file_paths, read_table
30
26
  from nextrec.data.batch_utils import collate_fn
31
27
  from nextrec.data.data_processing import get_column_data
28
+ from nextrec.data.preprocessor import DataProcessor
29
+ from nextrec.utils.data import read_table, resolve_file_paths
30
+ from nextrec.utils.torch_utils import to_tensor
32
31
 
33
32
 
34
33
  class TensorDictDataset(Dataset):