nextrec 0.3.5__tar.gz → 0.3.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. {nextrec-0.3.5 → nextrec-0.3.6}/.gitignore +1 -0
  2. {nextrec-0.3.5 → nextrec-0.3.6}/PKG-INFO +3 -3
  3. {nextrec-0.3.5 → nextrec-0.3.6}/README.md +2 -2
  4. {nextrec-0.3.5 → nextrec-0.3.6}/README_zh.md +2 -2
  5. {nextrec-0.3.5 → nextrec-0.3.6}/docs/rtd/conf.py +1 -1
  6. nextrec-0.3.6/nextrec/__init__.py +11 -0
  7. nextrec-0.3.6/nextrec/__version__.py +1 -0
  8. {nextrec-0.3.5 → nextrec-0.3.6}/nextrec/basic/loggers.py +1 -1
  9. {nextrec-0.3.5 → nextrec-0.3.6}/nextrec/basic/model.py +16 -13
  10. {nextrec-0.3.5 → nextrec-0.3.6}/nextrec/basic/session.py +4 -2
  11. {nextrec-0.3.5 → nextrec-0.3.6}/nextrec/data/__init__.py +0 -25
  12. {nextrec-0.3.5 → nextrec-0.3.6}/nextrec/data/dataloader.py +15 -10
  13. nextrec-0.3.6/nextrec/models/generative/tiger.py +0 -0
  14. nextrec-0.3.6/nextrec/models/match/__init__.py +0 -0
  15. nextrec-0.3.6/nextrec/models/multi_task/__init__.py +0 -0
  16. nextrec-0.3.6/nextrec/models/ranking/__init__.py +0 -0
  17. {nextrec-0.3.5 → nextrec-0.3.6}/nextrec/utils/device.py +2 -1
  18. {nextrec-0.3.5 → nextrec-0.3.6}/pyproject.toml +1 -1
  19. nextrec-0.3.5/nextrec/__init__.py +0 -41
  20. nextrec-0.3.5/nextrec/__version__.py +0 -1
  21. nextrec-0.3.5/nextrec/models/generative/__init__.py +0 -5
  22. nextrec-0.3.5/nextrec/models/match/__init__.py +0 -13
  23. nextrec-0.3.5/nextrec/models/ranking/__init__.py +0 -27
  24. {nextrec-0.3.5 → nextrec-0.3.6}/.github/workflows/publish.yml +0 -0
  25. {nextrec-0.3.5 → nextrec-0.3.6}/.github/workflows/tests.yml +0 -0
  26. {nextrec-0.3.5 → nextrec-0.3.6}/.readthedocs.yaml +0 -0
  27. {nextrec-0.3.5 → nextrec-0.3.6}/CODE_OF_CONDUCT.md +0 -0
  28. {nextrec-0.3.5 → nextrec-0.3.6}/CONTRIBUTING.md +0 -0
  29. {nextrec-0.3.5 → nextrec-0.3.6}/LICENSE +0 -0
  30. {nextrec-0.3.5 → nextrec-0.3.6}/MANIFEST.in +0 -0
  31. {nextrec-0.3.5 → nextrec-0.3.6}/asserts/Feature Configuration.png +0 -0
  32. {nextrec-0.3.5 → nextrec-0.3.6}/asserts/Model Parameters.png +0 -0
  33. {nextrec-0.3.5 → nextrec-0.3.6}/asserts/Training Configuration.png +0 -0
  34. {nextrec-0.3.5 → nextrec-0.3.6}/asserts/Training logs.png +0 -0
  35. {nextrec-0.3.5 → nextrec-0.3.6}/asserts/logo.png +0 -0
  36. {nextrec-0.3.5 → nextrec-0.3.6}/asserts/mmoe_tutorial.png +0 -0
  37. {nextrec-0.3.5 → nextrec-0.3.6}/asserts/nextrec_diagram_en.png +0 -0
  38. {nextrec-0.3.5 → nextrec-0.3.6}/asserts/nextrec_diagram_zh.png +0 -0
  39. {nextrec-0.3.5 → nextrec-0.3.6}/asserts/test data.png +0 -0
  40. {nextrec-0.3.5 → nextrec-0.3.6}/dataset/ctcvr_task.csv +0 -0
  41. {nextrec-0.3.5 → nextrec-0.3.6}/dataset/match_task.csv +0 -0
  42. {nextrec-0.3.5 → nextrec-0.3.6}/dataset/movielens_100k.csv +0 -0
  43. {nextrec-0.3.5 → nextrec-0.3.6}/dataset/multitask_task.csv +0 -0
  44. {nextrec-0.3.5 → nextrec-0.3.6}/dataset/ranking_task.csv +0 -0
  45. {nextrec-0.3.5 → nextrec-0.3.6}/docs/en/Getting started guide.md +0 -0
  46. {nextrec-0.3.5 → nextrec-0.3.6}/docs/rtd/Makefile +0 -0
  47. {nextrec-0.3.5 → nextrec-0.3.6}/docs/rtd/index.md +0 -0
  48. {nextrec-0.3.5 → nextrec-0.3.6}/docs/rtd/make.bat +0 -0
  49. {nextrec-0.3.5 → nextrec-0.3.6}/docs/rtd/modules.rst +0 -0
  50. {nextrec-0.3.5 → nextrec-0.3.6}/docs/rtd/nextrec.basic.rst +0 -0
  51. {nextrec-0.3.5 → nextrec-0.3.6}/docs/rtd/nextrec.data.rst +0 -0
  52. {nextrec-0.3.5 → nextrec-0.3.6}/docs/rtd/nextrec.loss.rst +0 -0
  53. {nextrec-0.3.5 → nextrec-0.3.6}/docs/rtd/nextrec.rst +0 -0
  54. {nextrec-0.3.5 → nextrec-0.3.6}/docs/rtd/nextrec.utils.rst +0 -0
  55. {nextrec-0.3.5 → nextrec-0.3.6}/docs/rtd/requirements.txt +0 -0
  56. {nextrec-0.3.5 → nextrec-0.3.6}/docs/zh//345/277/253/351/200/237/344/270/212/346/211/213.md" +0 -0
  57. {nextrec-0.3.5 → nextrec-0.3.6}/nextrec/basic/__init__.py +0 -0
  58. {nextrec-0.3.5 → nextrec-0.3.6}/nextrec/basic/activation.py +0 -0
  59. {nextrec-0.3.5 → nextrec-0.3.6}/nextrec/basic/callback.py +0 -0
  60. {nextrec-0.3.5 → nextrec-0.3.6}/nextrec/basic/features.py +0 -0
  61. {nextrec-0.3.5 → nextrec-0.3.6}/nextrec/basic/layers.py +0 -0
  62. {nextrec-0.3.5 → nextrec-0.3.6}/nextrec/basic/metrics.py +0 -0
  63. {nextrec-0.3.5 → nextrec-0.3.6}/nextrec/data/batch_utils.py +0 -0
  64. {nextrec-0.3.5 → nextrec-0.3.6}/nextrec/data/data_processing.py +0 -0
  65. {nextrec-0.3.5 → nextrec-0.3.6}/nextrec/data/data_utils.py +0 -0
  66. {nextrec-0.3.5 → nextrec-0.3.6}/nextrec/data/preprocessor.py +0 -0
  67. {nextrec-0.3.5 → nextrec-0.3.6}/nextrec/loss/__init__.py +0 -0
  68. {nextrec-0.3.5 → nextrec-0.3.6}/nextrec/loss/listwise.py +0 -0
  69. {nextrec-0.3.5 → nextrec-0.3.6}/nextrec/loss/loss_utils.py +0 -0
  70. {nextrec-0.3.5 → nextrec-0.3.6}/nextrec/loss/pairwise.py +0 -0
  71. {nextrec-0.3.5 → nextrec-0.3.6}/nextrec/loss/pointwise.py +0 -0
  72. /nextrec-0.3.5/nextrec/models/generative/tiger.py → /nextrec-0.3.6/nextrec/models/generative/__init__.py +0 -0
  73. {nextrec-0.3.5 → nextrec-0.3.6}/nextrec/models/generative/hstu.py +0 -0
  74. {nextrec-0.3.5 → nextrec-0.3.6}/nextrec/models/match/dssm.py +0 -0
  75. {nextrec-0.3.5 → nextrec-0.3.6}/nextrec/models/match/dssm_v2.py +0 -0
  76. {nextrec-0.3.5 → nextrec-0.3.6}/nextrec/models/match/mind.py +0 -0
  77. {nextrec-0.3.5 → nextrec-0.3.6}/nextrec/models/match/sdm.py +0 -0
  78. {nextrec-0.3.5 → nextrec-0.3.6}/nextrec/models/match/youtube_dnn.py +0 -0
  79. {nextrec-0.3.5 → nextrec-0.3.6}/nextrec/models/multi_task/esmm.py +0 -0
  80. {nextrec-0.3.5 → nextrec-0.3.6}/nextrec/models/multi_task/mmoe.py +0 -0
  81. {nextrec-0.3.5 → nextrec-0.3.6}/nextrec/models/multi_task/ple.py +0 -0
  82. {nextrec-0.3.5 → nextrec-0.3.6}/nextrec/models/multi_task/poso.py +0 -0
  83. {nextrec-0.3.5 → nextrec-0.3.6}/nextrec/models/multi_task/share_bottom.py +0 -0
  84. {nextrec-0.3.5 → nextrec-0.3.6}/nextrec/models/ranking/afm.py +0 -0
  85. {nextrec-0.3.5 → nextrec-0.3.6}/nextrec/models/ranking/autoint.py +0 -0
  86. {nextrec-0.3.5 → nextrec-0.3.6}/nextrec/models/ranking/dcn.py +0 -0
  87. {nextrec-0.3.5 → nextrec-0.3.6}/nextrec/models/ranking/dcn_v2.py +0 -0
  88. {nextrec-0.3.5 → nextrec-0.3.6}/nextrec/models/ranking/deepfm.py +0 -0
  89. {nextrec-0.3.5 → nextrec-0.3.6}/nextrec/models/ranking/dien.py +0 -0
  90. {nextrec-0.3.5 → nextrec-0.3.6}/nextrec/models/ranking/din.py +0 -0
  91. {nextrec-0.3.5 → nextrec-0.3.6}/nextrec/models/ranking/fibinet.py +0 -0
  92. {nextrec-0.3.5 → nextrec-0.3.6}/nextrec/models/ranking/fm.py +0 -0
  93. {nextrec-0.3.5 → nextrec-0.3.6}/nextrec/models/ranking/masknet.py +0 -0
  94. {nextrec-0.3.5 → nextrec-0.3.6}/nextrec/models/ranking/pnn.py +0 -0
  95. {nextrec-0.3.5 → nextrec-0.3.6}/nextrec/models/ranking/widedeep.py +0 -0
  96. {nextrec-0.3.5 → nextrec-0.3.6}/nextrec/models/ranking/xdeepfm.py +0 -0
  97. {nextrec-0.3.5 → nextrec-0.3.6}/nextrec/utils/__init__.py +0 -0
  98. {nextrec-0.3.5 → nextrec-0.3.6}/nextrec/utils/embedding.py +0 -0
  99. {nextrec-0.3.5 → nextrec-0.3.6}/nextrec/utils/feature.py +0 -0
  100. {nextrec-0.3.5 → nextrec-0.3.6}/nextrec/utils/file.py +0 -0
  101. {nextrec-0.3.5 → nextrec-0.3.6}/nextrec/utils/initializer.py +0 -0
  102. {nextrec-0.3.5 → nextrec-0.3.6}/nextrec/utils/model.py +0 -0
  103. {nextrec-0.3.5 → nextrec-0.3.6}/nextrec/utils/optimizer.py +0 -0
  104. {nextrec-0.3.5 → nextrec-0.3.6}/nextrec/utils/tensor.py +0 -0
  105. {nextrec-0.3.5 → nextrec-0.3.6}/pytest.ini +0 -0
  106. {nextrec-0.3.5 → nextrec-0.3.6}/requirements.txt +0 -0
  107. {nextrec-0.3.5 → nextrec-0.3.6}/test/__init__.py +0 -0
  108. {nextrec-0.3.5 → nextrec-0.3.6}/test/conftest.py +0 -0
  109. {nextrec-0.3.5 → nextrec-0.3.6}/test/run_tests.py +0 -0
  110. {nextrec-0.3.5 → nextrec-0.3.6}/test/test_layers.py +0 -0
  111. {nextrec-0.3.5 → nextrec-0.3.6}/test/test_losses.py +0 -0
  112. {nextrec-0.3.5 → nextrec-0.3.6}/test/test_match_models.py +0 -0
  113. {nextrec-0.3.5 → nextrec-0.3.6}/test/test_multitask_models.py +0 -0
  114. {nextrec-0.3.5 → nextrec-0.3.6}/test/test_preprocessor.py +0 -0
  115. {nextrec-0.3.5 → nextrec-0.3.6}/test/test_ranking_models.py +0 -0
  116. {nextrec-0.3.5 → nextrec-0.3.6}/test/test_utils.py +0 -0
  117. {nextrec-0.3.5 → nextrec-0.3.6}/test_requirements.txt +0 -0
  118. {nextrec-0.3.5 → nextrec-0.3.6}/tutorials/example_match_dssm.py +0 -0
  119. {nextrec-0.3.5 → nextrec-0.3.6}/tutorials/example_multitask.py +0 -0
  120. {nextrec-0.3.5 → nextrec-0.3.6}/tutorials/example_ranking_din.py +0 -0
  121. {nextrec-0.3.5 → nextrec-0.3.6}/tutorials/movielen_match_dssm.py +0 -0
  122. {nextrec-0.3.5 → nextrec-0.3.6}/tutorials/movielen_ranking_deepfm.py +0 -0
  123. {nextrec-0.3.5 → nextrec-0.3.6}/tutorials/notebooks/en/Hands on dataprocessor.ipynb +0 -0
  124. {nextrec-0.3.5 → nextrec-0.3.6}/tutorials/notebooks/en/Hands on nextrec.ipynb +0 -0
  125. {nextrec-0.3.5 → nextrec-0.3.6}/tutorials/notebooks/zh/Hands on dataprocessor.ipynb +0 -0
  126. {nextrec-0.3.5 → nextrec-0.3.6}/tutorials/notebooks/zh/Hands on nextrec.ipynb +0 -0
  127. {nextrec-0.3.5 → nextrec-0.3.6}/tutorials/run_all_tutorials.py +0 -0
@@ -116,6 +116,7 @@ dmypy.json
116
116
 
117
117
  # test files
118
118
  *iflytek*
119
+ generate_feature_config.py
119
120
 
120
121
  # DataProcessor saved files
121
122
  processor/
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nextrec
3
- Version: 0.3.5
3
+ Version: 0.3.6
4
4
  Summary: A comprehensive recommendation library with match, ranking, and multi-task learning models
5
5
  Project-URL: Homepage, https://github.com/zerolovesea/NextRec
6
6
  Project-URL: Repository, https://github.com/zerolovesea/NextRec
@@ -63,7 +63,7 @@ Description-Content-Type: text/markdown
63
63
  ![Python](https://img.shields.io/badge/Python-3.10+-blue.svg)
64
64
  ![PyTorch](https://img.shields.io/badge/PyTorch-1.10+-ee4c2c.svg)
65
65
  ![License](https://img.shields.io/badge/License-Apache%202.0-green.svg)
66
- ![Version](https://img.shields.io/badge/Version-0.3.5-orange.svg)
66
+ ![Version](https://img.shields.io/badge/Version-0.3.6-orange.svg)
67
67
 
68
68
  English | [中文文档](README_zh.md)
69
69
 
@@ -110,7 +110,7 @@ To dive deeper, Jupyter notebooks are available:
110
110
  - [Hands on the NextRec framework](/tutorials/notebooks/en/Hands%20on%20nextrec.ipynb)
111
111
  - [Using the data processor for preprocessing](/tutorials/notebooks/en/Hands%20on%20dataprocessor.ipynb)
112
112
 
113
- > Current version [0.3.5]: the matching module is not fully polished yet and may have compatibility issues or unexpected errors. Please raise an issue if you run into problems.
113
+ > Current version [0.3.6]: the matching module is not fully polished yet and may have compatibility issues or unexpected errors. Please raise an issue if you run into problems.
114
114
 
115
115
  ## 5-Minute Quick Start
116
116
 
@@ -7,7 +7,7 @@
7
7
  ![Python](https://img.shields.io/badge/Python-3.10+-blue.svg)
8
8
  ![PyTorch](https://img.shields.io/badge/PyTorch-1.10+-ee4c2c.svg)
9
9
  ![License](https://img.shields.io/badge/License-Apache%202.0-green.svg)
10
- ![Version](https://img.shields.io/badge/Version-0.3.5-orange.svg)
10
+ ![Version](https://img.shields.io/badge/Version-0.3.6-orange.svg)
11
11
 
12
12
  English | [中文文档](README_zh.md)
13
13
 
@@ -54,7 +54,7 @@ To dive deeper, Jupyter notebooks are available:
54
54
  - [Hands on the NextRec framework](/tutorials/notebooks/en/Hands%20on%20nextrec.ipynb)
55
55
  - [Using the data processor for preprocessing](/tutorials/notebooks/en/Hands%20on%20dataprocessor.ipynb)
56
56
 
57
- > Current version [0.3.5]: the matching module is not fully polished yet and may have compatibility issues or unexpected errors. Please raise an issue if you run into problems.
57
+ > Current version [0.3.6]: the matching module is not fully polished yet and may have compatibility issues or unexpected errors. Please raise an issue if you run into problems.
58
58
 
59
59
  ## 5-Minute Quick Start
60
60
 
@@ -7,7 +7,7 @@
7
7
  ![Python](https://img.shields.io/badge/Python-3.10+-blue.svg)
8
8
  ![PyTorch](https://img.shields.io/badge/PyTorch-1.10+-ee4c2c.svg)
9
9
  ![License](https://img.shields.io/badge/License-Apache%202.0-green.svg)
10
- ![Version](https://img.shields.io/badge/Version-0.3.5-orange.svg)
10
+ ![Version](https://img.shields.io/badge/Version-0.3.6-orange.svg)
11
11
 
12
12
  [English Version](README.md) | 中文文档
13
13
 
@@ -54,7 +54,7 @@ NextRec采用模块化、低耦合的工程设计,使得推荐系统从数据
54
54
  - [如何上手NextRec框架](/tutorials/notebooks/zh/Hands%20on%20nextrec.ipynb)
55
55
  - [如何使用数据处理器进行数据预处理](/tutorials/notebooks/zh/Hands%20on%20dataprocessor.ipynb)
56
56
 
57
- > 当前版本[0.3.5],召回模型模块尚不完善,可能存在一些兼容性问题或意外报错,如果遇到问题,欢迎开发者在Issue区提出问题。
57
+ > 当前版本[0.3.6],召回模型模块尚不完善,可能存在一些兼容性问题或意外报错,如果遇到问题,欢迎开发者在Issue区提出问题。
58
58
 
59
59
  ## 5分钟快速上手
60
60
 
@@ -11,7 +11,7 @@ sys.path.insert(0, str(PROJECT_ROOT / "nextrec"))
11
11
  project = "NextRec"
12
12
  copyright = "2025, Yang Zhou"
13
13
  author = "Yang Zhou"
14
- release = "0.3.5"
14
+ release = "0.3.6"
15
15
 
16
16
  extensions = [
17
17
  "myst_parser",
@@ -0,0 +1,11 @@
1
+ from nextrec.__version__ import __version__
2
+
3
+ __all__ = [
4
+ '__version__',
5
+ ]
6
+
7
+ # Package metadata
8
+ __author__ = "zerolovesea"
9
+ __email__ = "zyaztec@gmail.com"
10
+ __license__ = "Apache 2.0"
11
+ __url__ = "https://github.com/zerolovesea/NextRec"
@@ -0,0 +1 @@
1
+ __version__ = "0.3.6"
@@ -99,7 +99,7 @@ def setup_logger(session_id: str | os.PathLike | None = None):
99
99
  session = create_session(str(session_id) if session_id is not None else None)
100
100
  log_dir = session.logs_dir
101
101
  log_dir.mkdir(parents=True, exist_ok=True)
102
- log_file = log_dir / f"{session.experiment_id}.log"
102
+ log_file = log_dir / f"{session.log_basename}.log"
103
103
 
104
104
  console_format = '%(message)s'
105
105
  file_format = '%(asctime)s - %(levelname)s - %(message)s'
@@ -155,7 +155,7 @@ class BaseModel(FeatureSet, nn.Module):
155
155
  raise ValueError("[BaseModel-input Error] Labels are required but none were found in the input batch.")
156
156
  return X_input, y
157
157
 
158
- def handle_validation_split(self, train_data: dict | pd.DataFrame, validation_split: float, batch_size: int, shuffle: bool,) -> tuple[DataLoader, dict | pd.DataFrame]:
158
+ def handle_validation_split(self, train_data: dict | pd.DataFrame, validation_split: float, batch_size: int, shuffle: bool, num_workers: int = 0,) -> tuple[DataLoader, dict | pd.DataFrame]:
159
159
  """This function will split training data into training and validation sets when: 1. valid_data is None; 2. validation_split is provided."""
160
160
  if not (0 < validation_split < 1):
161
161
  raise ValueError(f"[BaseModel-validation Error] validation_split must be between 0 and 1, got {validation_split}")
@@ -184,7 +184,7 @@ class BaseModel(FeatureSet, nn.Module):
184
184
  arr = np.asarray(value)
185
185
  train_split[key] = arr[train_indices]
186
186
  valid_split[key] = arr[valid_indices]
187
- train_loader = self.prepare_data_loader(train_split, batch_size=batch_size, shuffle=shuffle)
187
+ train_loader = self.prepare_data_loader(train_split, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers)
188
188
  logging.info(f"Split data: {len(train_indices)} training samples, {len(valid_indices)} validation samples")
189
189
  return train_loader, valid_split
190
190
 
@@ -265,14 +265,14 @@ class BaseModel(FeatureSet, nn.Module):
265
265
  task_losses.append(task_loss)
266
266
  return torch.stack(task_losses).sum()
267
267
 
268
- def prepare_data_loader(self, data: dict | pd.DataFrame | DataLoader, batch_size: int = 32, shuffle: bool = True,):
268
+ def prepare_data_loader(self, data: dict | pd.DataFrame | DataLoader, batch_size: int = 32, shuffle: bool = True, num_workers: int = 0,) -> DataLoader:
269
269
  if isinstance(data, DataLoader):
270
270
  return data
271
271
  tensors = build_tensors_from_data(data=data, raw_data=data, features=self.all_features, target_columns=self.target_columns, id_columns=self.id_columns,)
272
272
  if tensors is None:
273
273
  raise ValueError("[BaseModel-prepare_data_loader Error] No data available to create DataLoader.")
274
274
  dataset = TensorDictDataset(tensors)
275
- return DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, collate_fn=collate_fn)
275
+ return DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, collate_fn=collate_fn, num_workers=num_workers)
276
276
 
277
277
  def fit(self,
278
278
  train_data: dict | pd.DataFrame | DataLoader,
@@ -281,6 +281,7 @@ class BaseModel(FeatureSet, nn.Module):
281
281
  epochs:int=1, shuffle:bool=True, batch_size:int=32,
282
282
  user_id_column: str | None = None,
283
283
  validation_split: float | None = None,
284
+ num_workers: int = 0,
284
285
  tensorboard: bool = True,):
285
286
  self.to(self.device)
286
287
  if not self.logger_initialized:
@@ -297,11 +298,11 @@ class BaseModel(FeatureSet, nn.Module):
297
298
  self.best_metric = float('-inf') if self.best_metrics_mode == 'max' else float('inf')
298
299
 
299
300
  if validation_split is not None and valid_data is None:
300
- train_loader, valid_data = self.handle_validation_split(train_data=train_data, validation_split=validation_split, batch_size=batch_size, shuffle=shuffle,) # type: ignore
301
+ train_loader, valid_data = self.handle_validation_split(train_data=train_data, validation_split=validation_split, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers) # type: ignore
301
302
  else:
302
- train_loader = (train_data if isinstance(train_data, DataLoader) else self.prepare_data_loader(train_data, batch_size=batch_size, shuffle=shuffle))
303
+ train_loader = (train_data if isinstance(train_data, DataLoader) else self.prepare_data_loader(train_data, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers))
303
304
 
304
- valid_loader, valid_user_ids = self.prepare_validation_data(valid_data=valid_data, batch_size=batch_size, needs_user_ids=self.needs_user_ids, user_id_column=user_id_column)
305
+ valid_loader, valid_user_ids = self.prepare_validation_data(valid_data=valid_data, batch_size=batch_size, needs_user_ids=self.needs_user_ids, user_id_column=user_id_column, num_workers=num_workers)
305
306
  try:
306
307
  self.steps_per_epoch = len(train_loader)
307
308
  is_streaming = False
@@ -388,7 +389,7 @@ class BaseModel(FeatureSet, nn.Module):
388
389
  self.training_logger.log_metrics(train_log_payload, step=epoch + 1, split="train")
389
390
  if valid_loader is not None:
390
391
  # pass user_ids only if needed for GAUC metric
391
- val_metrics = self.evaluate(valid_loader, user_ids=valid_user_ids if self.needs_user_ids else None) # {'auc': 0.75, 'logloss': 0.45} or {'auc_target1': 0.75, 'logloss_target1': 0.45, 'mse_target2': 3.2}
392
+ val_metrics = self.evaluate(valid_loader, user_ids=valid_user_ids if self.needs_user_ids else None, num_workers=num_workers) # {'auc': 0.75, 'logloss': 0.45} or {'auc_target1': 0.75, 'logloss_target1': 0.45, 'mse_target2': 3.2}
392
393
  if self.nums_task == 1:
393
394
  metrics_str = ", ".join([f"{k}={v:.4f}" for k, v in val_metrics.items()])
394
395
  logging.info(colorize(f" Epoch {epoch + 1}/{epochs} - Valid: {metrics_str}", color="cyan"))
@@ -513,12 +514,12 @@ class BaseModel(FeatureSet, nn.Module):
513
514
  return avg_loss, metrics_dict
514
515
  return avg_loss
515
516
 
516
- def prepare_validation_data(self, valid_data: dict | pd.DataFrame | DataLoader | None, batch_size: int, needs_user_ids: bool, user_id_column: str | None = 'user_id') -> tuple[DataLoader | None, np.ndarray | None]:
517
+ def prepare_validation_data(self, valid_data: dict | pd.DataFrame | DataLoader | None, batch_size: int, needs_user_ids: bool, user_id_column: str | None = 'user_id', num_workers: int = 0,) -> tuple[DataLoader | None, np.ndarray | None]:
517
518
  if valid_data is None:
518
519
  return None, None
519
520
  if isinstance(valid_data, DataLoader):
520
521
  return valid_data, None
521
- valid_loader = self.prepare_data_loader(valid_data, batch_size=batch_size, shuffle=False)
522
+ valid_loader = self.prepare_data_loader(valid_data, batch_size=batch_size, shuffle=False, num_workers=num_workers)
522
523
  valid_user_ids = None
523
524
  if needs_user_ids:
524
525
  if user_id_column is None:
@@ -531,7 +532,8 @@ class BaseModel(FeatureSet, nn.Module):
531
532
  metrics: list[str] | dict[str, list[str]] | None = None,
532
533
  batch_size: int = 32,
533
534
  user_ids: np.ndarray | None = None,
534
- user_id_column: str = 'user_id') -> dict:
535
+ user_id_column: str = 'user_id',
536
+ num_workers: int = 0,) -> dict:
535
537
  self.eval()
536
538
  eval_metrics = metrics if metrics is not None else self.metrics
537
539
  if eval_metrics is None:
@@ -543,7 +545,7 @@ class BaseModel(FeatureSet, nn.Module):
543
545
  else:
544
546
  if user_ids is None and needs_user_ids:
545
547
  user_ids = get_user_ids(data=data, id_columns=user_id_column)
546
- data_loader = self.prepare_data_loader(data, batch_size=batch_size, shuffle=False)
548
+ data_loader = self.prepare_data_loader(data, batch_size=batch_size, shuffle=False, num_workers=num_workers)
547
549
  y_true_list = []
548
550
  y_pred_list = []
549
551
  collected_user_ids = []
@@ -603,6 +605,7 @@ class BaseModel(FeatureSet, nn.Module):
603
605
  include_ids: bool | None = None,
604
606
  return_dataframe: bool = True,
605
607
  streaming_chunk_size: int = 10000,
608
+ num_workers: int = 0,
606
609
  ) -> pd.DataFrame | np.ndarray:
607
610
  self.eval()
608
611
  if include_ids is None:
@@ -615,7 +618,7 @@ class BaseModel(FeatureSet, nn.Module):
615
618
  rec_loader = RecDataLoader(dense_features=self.dense_features, sparse_features=self.sparse_features, sequence_features=self.sequence_features, target=self.target_columns, id_columns=self.id_columns,)
616
619
  data_loader = rec_loader.create_dataloader(data=data, batch_size=batch_size, shuffle=False, load_full=False, chunk_size=streaming_chunk_size,)
617
620
  elif not isinstance(data, DataLoader):
618
- data_loader = self.prepare_data_loader(data, batch_size=batch_size, shuffle=False,)
621
+ data_loader = self.prepare_data_loader(data, batch_size=batch_size, shuffle=False, num_workers=num_workers)
619
622
  else:
620
623
  data_loader = data
621
624
 
@@ -22,6 +22,7 @@ class Session:
22
22
 
23
23
  experiment_id: str
24
24
  root: Path
25
+ log_basename: str # The base name for log files, without path separators
25
26
 
26
27
  @property
27
28
  def logs_dir(self) -> Path:
@@ -60,7 +61,6 @@ class Session:
60
61
  return path
61
62
 
62
63
  def create_session(experiment_id: str | Path | None = None) -> Session:
63
- """Create a :class:`Session` instance with prepared directories."""
64
64
 
65
65
  if experiment_id is not None and str(experiment_id).strip():
66
66
  exp_id = str(experiment_id).strip()
@@ -68,6 +68,8 @@ def create_session(experiment_id: str | Path | None = None) -> Session:
68
68
  # Use local time for session naming
69
69
  exp_id = "nextrec_session_" + datetime.now().strftime("%Y%m%d")
70
70
 
71
+ log_basename = Path(exp_id).name if exp_id else exp_id
72
+
71
73
  if (
72
74
  os.getenv("PYTEST_CURRENT_TEST")
73
75
  or os.getenv("PYTEST_RUNNING")
@@ -82,7 +84,7 @@ def create_session(experiment_id: str | Path | None = None) -> Session:
82
84
  session_path.mkdir(parents=True, exist_ok=True)
83
85
  root = session_path.resolve()
84
86
 
85
- return Session(experiment_id=exp_id, root=root)
87
+ return Session(experiment_id=exp_id, root=root, log_basename=log_basename)
86
88
 
87
89
  def resolve_save_path(
88
90
  path: str | os.PathLike | Path | None,
@@ -1,22 +1,4 @@
1
- """
2
- Data utilities package for NextRec
3
-
4
- This package provides data processing and manipulation utilities organized by category:
5
- - batch_utils: Batch collation and processing
6
- - data_processing: Data manipulation and user ID extraction
7
- - data_utils: Legacy module (re-exports from specialized modules)
8
- - dataloader: Dataset and DataLoader implementations
9
- - preprocessor: Data preprocessing pipeline
10
-
11
- Date: create on 13/11/2025
12
- Last update: 03/12/2025 (refactored)
13
- Author: Yang Zhou, zyaztec@gmail.com
14
- """
15
-
16
- # Batch utilities
17
1
  from nextrec.data.batch_utils import collate_fn, batch_to_dict, stack_section
18
-
19
- # Data processing utilities
20
2
  from nextrec.data.data_processing import (
21
3
  get_column_data,
22
4
  split_dict_random,
@@ -24,7 +6,6 @@ from nextrec.data.data_processing import (
24
6
  get_user_ids,
25
7
  )
26
8
 
27
- # File utilities (from utils package)
28
9
  from nextrec.utils.file import (
29
10
  resolve_file_paths,
30
11
  iter_file_chunks,
@@ -33,7 +14,6 @@ from nextrec.utils.file import (
33
14
  default_output_dir,
34
15
  )
35
16
 
36
- # DataLoader components
37
17
  from nextrec.data.dataloader import (
38
18
  TensorDictDataset,
39
19
  FileDataset,
@@ -41,13 +21,8 @@ from nextrec.data.dataloader import (
41
21
  build_tensors_from_data,
42
22
  )
43
23
 
44
- # Preprocessor
45
24
  from nextrec.data.preprocessor import DataProcessor
46
-
47
- # Feature definitions
48
25
  from nextrec.basic.features import FeatureSet
49
-
50
- # Legacy module (for backward compatibility)
51
26
  from nextrec.data import data_utils
52
27
 
53
28
  __all__ = [
@@ -126,20 +126,22 @@ class RecDataLoader(FeatureSet):
126
126
  batch_size: int = 32,
127
127
  shuffle: bool = True,
128
128
  load_full: bool = True,
129
- chunk_size: int = 10000) -> DataLoader:
129
+ chunk_size: int = 10000,
130
+ num_workers: int = 0) -> DataLoader:
130
131
  if isinstance(data, DataLoader):
131
132
  return data
132
133
  elif isinstance(data, (str, os.PathLike)):
133
- return self.create_from_path(path=data, batch_size=batch_size, shuffle=shuffle, load_full=load_full, chunk_size=chunk_size)
134
+ return self.create_from_path(path=data, batch_size=batch_size, shuffle=shuffle, load_full=load_full, chunk_size=chunk_size, num_workers=num_workers)
134
135
  elif isinstance(data, (dict, pd.DataFrame)):
135
- return self.create_from_memory(data=data, batch_size=batch_size, shuffle=shuffle)
136
+ return self.create_from_memory(data=data, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers)
136
137
  else:
137
138
  raise ValueError(f"[RecDataLoader Error] Unsupported data type: {type(data)}")
138
139
 
139
140
  def create_from_memory(self,
140
141
  data: dict | pd.DataFrame,
141
142
  batch_size: int,
142
- shuffle: bool) -> DataLoader:
143
+ shuffle: bool,
144
+ num_workers: int = 0) -> DataLoader:
143
145
  raw_data = data
144
146
 
145
147
  if self.processor is not None:
@@ -150,14 +152,15 @@ class RecDataLoader(FeatureSet):
150
152
  if tensors is None:
151
153
  raise ValueError("[RecDataLoader Error] No valid tensors could be built from the provided data.")
152
154
  dataset = TensorDictDataset(tensors)
153
- return DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, collate_fn=collate_fn)
155
+ return DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, collate_fn=collate_fn, num_workers=num_workers)
154
156
 
155
157
  def create_from_path(self,
156
158
  path: str,
157
159
  batch_size: int,
158
160
  shuffle: bool,
159
161
  load_full: bool,
160
- chunk_size: int = 10000) -> DataLoader:
162
+ chunk_size: int = 10000,
163
+ num_workers: int = 0) -> DataLoader:
161
164
  file_paths, file_type = resolve_file_paths(str(Path(path)))
162
165
  # Load full data into memory
163
166
  if load_full:
@@ -169,6 +172,7 @@ class RecDataLoader(FeatureSet):
169
172
  except OSError:
170
173
  pass
171
174
  try:
175
+ df = read_table(file_path, file_type=file_type)
172
176
  dfs.append(df)
173
177
  except MemoryError as exc:
174
178
  raise MemoryError(f"[RecDataLoader Error] Out of memory while reading {file_path}. Consider using load_full=False with streaming.") from exc
@@ -176,22 +180,23 @@ class RecDataLoader(FeatureSet):
176
180
  combined_df = pd.concat(dfs, ignore_index=True)
177
181
  except MemoryError as exc:
178
182
  raise MemoryError(f"[RecDataLoader Error] Out of memory while concatenating loaded data (approx {total_bytes / (1024**3):.2f} GB). Use load_full=False to stream or reduce chunk_size.") from exc
179
- return self.create_from_memory(combined_df, batch_size, shuffle,)
183
+ return self.create_from_memory(combined_df, batch_size, shuffle, num_workers=num_workers)
180
184
  else:
181
- return self.load_files_streaming(file_paths, file_type, batch_size, chunk_size, shuffle)
185
+ return self.load_files_streaming(file_paths, file_type, batch_size, chunk_size, shuffle, num_workers=num_workers)
182
186
 
183
187
  def load_files_streaming(self,
184
188
  file_paths: list[str],
185
189
  file_type: str,
186
190
  batch_size: int,
187
191
  chunk_size: int,
188
- shuffle: bool) -> DataLoader:
192
+ shuffle: bool,
193
+ num_workers: int = 0) -> DataLoader:
189
194
  if shuffle:
190
195
  logging.info("[RecDataLoader Info] Shuffle is ignored in streaming mode (IterableDataset).")
191
196
  if batch_size != 1:
192
197
  logging.info("[RecDataLoader Info] Streaming mode enforces batch_size=1; tune chunk_size to control memory/throughput.")
193
198
  dataset = FileDataset(file_paths=file_paths, dense_features=self.dense_features, sparse_features=self.sparse_features, sequence_features=self.sequence_features, target_columns=self.target_columns, id_columns=self.id_columns, chunk_size=chunk_size, file_type=file_type, processor=self.processor)
194
- return DataLoader(dataset, batch_size=1, collate_fn=collate_fn)
199
+ return DataLoader(dataset, batch_size=1, collate_fn=collate_fn, num_workers=num_workers)
195
200
 
196
201
  def normalize_sequence_column(column, feature: SequenceFeature) -> np.ndarray:
197
202
  if isinstance(column, pd.Series):
File without changes
File without changes
File without changes
File without changes
@@ -4,9 +4,10 @@ Device management utilities for NextRec
4
4
  Date: create on 03/12/2025
5
5
  Author: Yang Zhou, zyaztec@gmail.com
6
6
  """
7
-
7
+ import os
8
8
  import torch
9
9
  import platform
10
+ import multiprocessing
10
11
 
11
12
 
12
13
  def resolve_device() -> str:
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "nextrec"
3
- version = "0.3.5"
3
+ version = "0.3.6"
4
4
  description = "A comprehensive recommendation library with match, ranking, and multi-task learning models"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.10"
@@ -1,41 +0,0 @@
1
- """
2
- NextRec - A Unified Deep Learning Framework for Recommender Systems
3
- ===================================================================
4
-
5
- NextRec provides a comprehensive suite of recommendation models including:
6
- - Ranking models (CTR prediction)
7
- - Matching models (retrieval)
8
- - Multi-task learning models
9
- - Generative recommendation models
10
-
11
- Quick Start
12
- -----------
13
- >>> from nextrec.basic.features import DenseFeature, SparseFeature
14
- >>> from nextrec.models.ranking.deepfm import DeepFM
15
- >>>
16
- >>> # Define features
17
- >>> dense_features = [DenseFeature('age')]
18
- >>> sparse_features = [SparseFeature('category', vocab_size=100, embedding_dim=16)]
19
- >>>
20
- >>> # Build model
21
- >>> model = DeepFM(
22
- ... dense_features=dense_features,
23
- ... sparse_features=sparse_features,
24
- ... targets=['label']
25
- ... )
26
- >>>
27
- >>> # Train model
28
- >>> model.fit(train_data=df_train, valid_data=df_valid)
29
- """
30
-
31
- from nextrec.__version__ import __version__
32
-
33
- __all__ = [
34
- '__version__',
35
- ]
36
-
37
- # Package metadata
38
- __author__ = "zerolovesea"
39
- __email__ = "zyaztec@gmail.com"
40
- __license__ = "Apache 2.0"
41
- __url__ = "https://github.com/zerolovesea/NextRec"
@@ -1 +0,0 @@
1
- __version__ = "0.3.5"
@@ -1,5 +0,0 @@
1
- from .hstu import HSTU
2
-
3
- __all__ = [
4
- "HSTU",
5
- ]
@@ -1,13 +0,0 @@
1
- from .dssm import DSSM
2
- from .dssm_v2 import DSSM_v2
3
- from .youtube_dnn import YoutubeDNN
4
- from .mind import MIND
5
- from .sdm import SDM
6
-
7
- __all__ = [
8
- 'DSSM',
9
- 'DSSM_v2',
10
- 'YoutubeDNN',
11
- 'MIND',
12
- 'SDM',
13
- ]
@@ -1,27 +0,0 @@
1
- from .fm import FM
2
- from .afm import AFM
3
- from .masknet import MaskNet
4
- from .pnn import PNN
5
- from .deepfm import DeepFM
6
- from .autoint import AutoInt
7
- from .widedeep import WideDeep
8
- from .xdeepfm import xDeepFM
9
- from .dcn import DCN
10
- from .fibinet import FiBiNET
11
- from .din import DIN
12
- from .dien import DIEN
13
-
14
- __all__ = [
15
- 'DeepFM',
16
- 'AutoInt',
17
- 'WideDeep',
18
- 'xDeepFM',
19
- 'DCN',
20
- 'DIN',
21
- 'DIEN',
22
- 'FM',
23
- 'AFM',
24
- 'MaskNet',
25
- 'PNN',
26
- 'FiBiNET',
27
- ]
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes