nextrec 0.4.14__py3-none-any.whl → 0.4.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
nextrec/__version__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.4.14"
1
+ __version__ = "0.4.16"
nextrec/basic/metrics.py CHANGED
@@ -77,6 +77,8 @@ def check_user_id(*metric_sources: Any) -> bool:
77
77
 
78
78
  def compute_ks(y_true: np.ndarray, y_pred: np.ndarray) -> float:
79
79
  """Compute Kolmogorov-Smirnov statistic."""
80
+ y_true = np.asarray(y_true).reshape(-1)
81
+ y_pred = np.asarray(y_pred).reshape(-1)
80
82
  sorted_indices = np.argsort(y_pred)[::-1]
81
83
  y_true_sorted = y_true[sorted_indices]
82
84
 
nextrec/basic/model.py CHANGED
@@ -481,7 +481,7 @@ class BaseModel(FeatureSet, nn.Module):
481
481
  "[BaseModel-compile Error] loss_weights list must have exactly one element for single-task setup."
482
482
  )
483
483
  loss_weights = loss_weights[0]
484
- self.loss_weights = [float(loss_weights)] # type: ignore
484
+ self.loss_weights = [float(loss_weights)] # type: ignore
485
485
  else:
486
486
  if isinstance(loss_weights, (int, float)):
487
487
  weights = [float(loss_weights)] * self.nums_task
@@ -591,8 +591,8 @@ class BaseModel(FeatureSet, nn.Module):
591
591
 
592
592
  def fit(
593
593
  self,
594
- train_data = None,
595
- valid_data = None,
594
+ train_data=None,
595
+ valid_data=None,
596
596
  metrics: (
597
597
  list[str] | dict[str, list[str]] | None
598
598
  ) = None, # ['auc', 'logloss'] or {'target1': ['auc', 'logloss'], 'target2': ['mse']}
@@ -1420,6 +1420,11 @@ class BaseModel(FeatureSet, nn.Module):
1420
1420
  # Create DataLoader based on data type
1421
1421
  if isinstance(data, DataLoader):
1422
1422
  data_loader = data
1423
+ if num_workers != 0:
1424
+ logging.warning(
1425
+ "[Predict Warning] num_workers parameter is ignored when data is already a DataLoader. "
1426
+ "The DataLoader's existing num_workers configuration will be used."
1427
+ )
1423
1428
  elif isinstance(data, (str, os.PathLike)):
1424
1429
  rec_loader = RecDataLoader(
1425
1430
  dense_features=self.dense_features,
@@ -1578,6 +1583,17 @@ class BaseModel(FeatureSet, nn.Module):
1578
1583
  else:
1579
1584
  data_loader = data
1580
1585
 
1586
+ if hasattr(data_loader, "num_workers") and data_loader.num_workers > 0:
1587
+ if (
1588
+ hasattr(data_loader.dataset, "__class__")
1589
+ and "Streaming" in data_loader.dataset.__class__.__name__
1590
+ ):
1591
+ logging.warning(
1592
+ f"[Predict Streaming Warning] Detected DataLoader with num_workers={data_loader.num_workers} "
1593
+ "and streaming dataset. This may cause data duplication! "
1594
+ "When using streaming mode, set num_workers=0 to avoid reading data multiple times."
1595
+ )
1596
+
1581
1597
  suffix = ".csv" if save_format == "csv" else ".parquet"
1582
1598
  target_path = resolve_save_path(
1583
1599
  path=save_path,
nextrec/cli.py CHANGED
@@ -21,6 +21,7 @@ Author: Yang Zhou, zyaztec@gmail.com
21
21
  import argparse
22
22
  import logging
23
23
  import pickle
24
+ import resource
24
25
  import sys
25
26
  import time
26
27
  from pathlib import Path
@@ -625,6 +626,17 @@ def predict_model(predict_config_path: str) -> None:
625
626
  def main() -> None:
626
627
  """Parse CLI arguments and dispatch to train or predict mode."""
627
628
 
629
+ # Increase file descriptor limit to avoid "Too many open files" error
630
+ # when using DataLoader with multiple workers
631
+ try:
632
+ soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE)
633
+ target_limit = 65535
634
+ if soft < target_limit:
635
+ resource.setrlimit(resource.RLIMIT_NOFILE, (min(target_limit, hard), hard))
636
+ except (ValueError, OSError):
637
+ # If we can't set the limit, continue anyway
638
+ pass
639
+
628
640
  root = logging.getLogger()
629
641
  if not root.handlers:
630
642
  handler = logging.StreamHandler(sys.stdout)
@@ -210,6 +210,15 @@ class RecDataLoader(FeatureSet):
210
210
  DataLoader instance.
211
211
  """
212
212
 
213
+ # Enforce num_workers=0 for streaming mode to prevent data duplication
214
+ if streaming and num_workers > 0:
215
+ logging.warning(
216
+ f"[RecDataLoader Warning] num_workers={num_workers} is not compatible with streaming=True. "
217
+ "Each worker would create its own data stream, causing data duplication. "
218
+ "Forcing num_workers=0."
219
+ )
220
+ num_workers = 0
221
+
213
222
  if isinstance(data, DataLoader):
214
223
  return data
215
224
  elif isinstance(data, (str, os.PathLike)):
@@ -363,6 +372,13 @@ class RecDataLoader(FeatureSet):
363
372
  logging.info(
364
373
  "[RecDataLoader Info] Streaming mode enforces batch_size=1; tune chunk_size to control memory/throughput."
365
374
  )
375
+ if num_workers > 0:
376
+ logging.warning(
377
+ f"[RecDataLoader Warning] num_workers={num_workers} is not compatible with streaming mode. "
378
+ "Each worker would create its own data stream, causing data duplication. "
379
+ "Forcing num_workers=0."
380
+ )
381
+ num_workers = 0
366
382
  dataset = FileDataset(
367
383
  file_paths=file_paths,
368
384
  dense_features=self.dense_features,
nextrec/utils/config.py CHANGED
@@ -28,7 +28,9 @@ if TYPE_CHECKING:
28
28
  from nextrec.data.preprocessor import DataProcessor
29
29
 
30
30
 
31
- def resolve_path(path_str: str | Path | None = None, base_dir: Path | None = None) -> Path:
31
+ def resolve_path(
32
+ path_str: str | Path | None = None, base_dir: Path | None = None
33
+ ) -> Path:
32
34
  if path_str is None:
33
35
  return Path.cwd()
34
36
  path = Path(path_str).expanduser()
@@ -36,9 +38,16 @@ def resolve_path(path_str: str | Path | None = None, base_dir: Path | None = Non
36
38
  return path
37
39
  # Prefer resolving relative to current working directory when the path (or its parent)
38
40
  # already exists there; otherwise fall back to the config file's directory.
39
- candidates = ((Path.cwd() / path).resolve(), ((base_dir or Path.cwd()) / path).resolve())
41
+ candidates = (
42
+ (Path.cwd() / path).resolve(),
43
+ ((base_dir or Path.cwd()) / path).resolve(),
44
+ )
40
45
  return next(
41
- (candidate for candidate in candidates if candidate.exists() or candidate.parent.exists()),
46
+ (
47
+ candidate
48
+ for candidate in candidates
49
+ if candidate.exists() or candidate.parent.exists()
50
+ ),
42
51
  candidates[0],
43
52
  )
44
53
 
nextrec/utils/console.py CHANGED
@@ -16,8 +16,9 @@ import numbers
16
16
  import os
17
17
  import platform
18
18
  import sys
19
+ import time
19
20
  from datetime import datetime, timedelta
20
- from typing import Any, Callable, Iterable, Mapping, TypeVar
21
+ from typing import Any, Callable, Mapping, TypeVar
21
22
 
22
23
  import numpy as np
23
24
  from rich import box
@@ -128,45 +129,85 @@ class BlackMofNCompleteColumn(MofNCompleteColumn):
128
129
  )
129
130
 
130
131
 
131
- def progress(
132
- iterable: Iterable[T],
133
- *,
134
- description: str | None = None,
135
- total: int | None = None,
136
- disable: bool = False,
137
- ) -> Iterable[T]:
132
+ def progress(iterable, *, description=None, total=None, disable=False):
138
133
  if disable:
139
- for item in iterable:
140
- yield item
134
+ yield from iterable
141
135
  return
136
+
142
137
  resolved_total = total
143
138
  if resolved_total is None:
144
139
  try:
145
- resolved_total = len(iterable) # type: ignore[arg-type]
140
+ resolved_total = len(iterable)
146
141
  except TypeError:
147
142
  resolved_total = None
148
143
 
144
+ stream = sys.stderr
145
+
146
+ if not stream.isatty():
147
+ start_time = time.monotonic()
148
+ last_tick = start_time
149
+ min_interval_seconds = 10.0
150
+ max_interval_seconds = 300.0
151
+ target_steps = (
152
+ max(1, resolved_total // 20) if resolved_total is not None else 500
153
+ )
154
+ interval_seconds = min_interval_seconds
155
+ completed = 0
156
+
157
+ def emit(now: float):
158
+ elapsed = max(0.0, now - start_time)
159
+ speed = completed / elapsed if elapsed > 0 else 0.0
160
+ if resolved_total is not None and speed > 0:
161
+ remaining = max(0.0, resolved_total - completed)
162
+ eta_seconds = remaining / speed
163
+ eta_text = str(timedelta(seconds=int(eta_seconds)))
164
+ else:
165
+ eta_text = "--:--:--"
166
+ total_text = str(resolved_total) if resolved_total is not None else "?"
167
+ stream.write(
168
+ f"{description or 'Working'}: {completed}/{total_text} "
169
+ f"elapsed={timedelta(seconds=int(elapsed))} "
170
+ f"speed={speed:.2f}/s ETA={eta_text}\n"
171
+ )
172
+ stream.flush()
173
+ return speed
174
+
175
+ for item in iterable:
176
+ yield item
177
+ completed += 1
178
+ now = time.monotonic()
179
+ if now - last_tick >= interval_seconds:
180
+ speed = emit(now)
181
+ last_tick = now
182
+ if speed > 0:
183
+ interval_seconds = min(
184
+ max_interval_seconds,
185
+ max(min_interval_seconds, target_steps / speed),
186
+ )
187
+ end_now = time.monotonic()
188
+ if end_now - last_tick >= 1e-6:
189
+ emit(end_now)
190
+ return
191
+
192
+ # TTY: rich
193
+ console = Console(file=stream, force_terminal=True)
149
194
  progress_bar = Progress(
150
- SpinnerColumn(style="black"),
151
- TextColumn("{task.description}", style="black"),
152
- BarColumn(
153
- bar_width=36, style="black", complete_style="black", finished_style="black"
154
- ),
155
- TaskProgressColumn(style="black"),
156
- BlackMofNCompleteColumn(),
157
- BlackTimeElapsedColumn(),
158
- BlackTimeRemainingColumn(),
195
+ SpinnerColumn(),
196
+ TextColumn("{task.description}"),
197
+ BarColumn(bar_width=36),
198
+ TaskProgressColumn(),
199
+ MofNCompleteColumn(),
200
+ TimeElapsedColumn(),
201
+ TimeRemainingColumn(),
159
202
  refresh_per_second=12,
203
+ console=console,
160
204
  )
161
205
 
162
- task_id = progress_bar.add_task(description or "Working", total=resolved_total)
163
- progress_bar.start()
164
- try:
206
+ with progress_bar:
207
+ task_id = progress_bar.add_task(description or "Working", total=resolved_total)
165
208
  for item in iterable:
166
209
  yield item
167
210
  progress_bar.advance(task_id, 1)
168
- finally:
169
- progress_bar.stop()
170
211
 
171
212
 
172
213
  def group_metrics_by_task(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nextrec
3
- Version: 0.4.14
3
+ Version: 0.4.16
4
4
  Summary: A comprehensive recommendation library with match, ranking, and multi-task learning models
5
5
  Project-URL: Homepage, https://github.com/zerolovesea/NextRec
6
6
  Project-URL: Repository, https://github.com/zerolovesea/NextRec
@@ -63,10 +63,13 @@ Description-Content-Type: text/markdown
63
63
 
64
64
  <div align="center">
65
65
 
66
+ [![PyPI Downloads](https://static.pepy.tech/personalized-badge/nextrec?period=total&units=NONE&left_color=BLACK&right_color=GREEN&left_text=PyPI-downloads)](https://pepy.tech/projects/nextrec)
66
67
  ![Python](https://img.shields.io/badge/Python-3.10+-blue.svg)
67
68
  ![PyTorch](https://img.shields.io/badge/PyTorch-1.10+-ee4c2c.svg)
69
+
68
70
  ![License](https://img.shields.io/badge/License-Apache%202.0-green.svg)
69
- ![Version](https://img.shields.io/badge/Version-0.4.14-orange.svg)
71
+ ![Version](https://img.shields.io/badge/Version-0.4.16-orange.svg)
72
+
70
73
 
71
74
  中文文档 | [English Version](README_en.md)
72
75
 
@@ -99,7 +102,7 @@ NextRec是一个基于PyTorch的现代推荐系统框架,旨在为研究工程
99
102
 
100
103
  ## NextRec近期进展
101
104
 
102
- - **21/12/2025** 在v0.4.14中加入了对[GradNorm](/nextrec/loss/grad_norm.py)的支持,通过compile的`loss_weight='grad_norm'`进行配置
105
+ - **21/12/2025** 在v0.4.16中加入了对[GradNorm](/nextrec/loss/grad_norm.py)的支持,通过compile的`loss_weight='grad_norm'`进行配置
103
106
  - **12/12/2025** 在v0.4.9中加入了[RQ-VAE](/nextrec/models/representation/rqvae.py)模块。配套的[数据集](/dataset/ecommerce_task.csv)和[代码](tutorials/notebooks/zh/使用RQ-VAE构建语义ID.ipynb)已经同步在仓库中
104
107
  - **07/12/2025** 发布了NextRec CLI命令行工具,它允许用户根据配置文件进行一键训练和推理,我们提供了相关的[教程](/nextrec_cli_preset/NextRec-CLI_zh.md)和[教学代码](/nextrec_cli_preset)
105
108
  - **03/12/2025** NextRec获得了100颗🌟!感谢大家的支持
@@ -241,11 +244,11 @@ nextrec --mode=train --train_config=path/to/train_config.yaml
241
244
  nextrec --mode=predict --predict_config=path/to/predict_config.yaml
242
245
  ```
243
246
 
244
- > 截止当前版本0.4.14,NextRec CLI支持单机训练,分布式训练相关功能尚在开发中。
247
+ > 截止当前版本0.4.16,NextRec CLI支持单机训练,分布式训练相关功能尚在开发中。
245
248
 
246
249
  ## 兼容平台
247
250
 
248
- 当前最新版本为0.4.14,所有模型和测试代码均已在以下平台通过验证,如果开发者在使用中遇到兼容问题,请在issue区提出错误报告及系统版本:
251
+ 当前最新版本为0.4.16,所有模型和测试代码均已在以下平台通过验证,如果开发者在使用中遇到兼容问题,请在issue区提出错误报告及系统版本:
249
252
 
250
253
  | 平台 | 配置 |
251
254
  |------|------|
@@ -1,20 +1,20 @@
1
1
  nextrec/__init__.py,sha256=_M3oUqyuvQ5k8Th_3wId6hQ_caclh7M5ad51XN09m98,235
2
- nextrec/__version__.py,sha256=kBEbn8dkCFa3vKochkZqeCl78cbsUbutSFlOYZrn__w,23
3
- nextrec/cli.py,sha256=6nBY8O8-0931h428eQS8CALkKn1FmizovJme7Q1c_O0,23978
2
+ nextrec/__version__.py,sha256=Qgm7I6hoEastyorRf_J_YW-gsesKcI737L731iY3jes,23
3
+ nextrec/cli.py,sha256=JUprwpoVbT4tXsGgMpj9Y_5yYByQXYMliMdWd38ReKo,24441
4
4
  nextrec/basic/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
5
  nextrec/basic/activation.py,sha256=uzTWfCOtBSkbu_Gk9XBNTj8__s241CaYLJk6l8nGX9I,2885
6
6
  nextrec/basic/callback.py,sha256=nn1f8FG9c52vJ-gvwteqPbk3-1QuNS1vmhBlkENdb0I,14636
7
7
  nextrec/basic/features.py,sha256=GyCUzGPuizUofrZSSOdqHK84YhnX4MGTdu7Cx2OGhUA,4654
8
8
  nextrec/basic/layers.py,sha256=ZM3Nka3e2cit3e3peL0ukJCMgKZK1ovNFfAWvVOwlos,28556
9
9
  nextrec/basic/loggers.py,sha256=Zh1A5DVAFqlGglyaQ4_IMgvFbWAcXX5H3aHbCWA82nE,6524
10
- nextrec/basic/metrics.py,sha256=saNgM7kuHk9xqDxZF6x33irTaxeXCU-hxYTUQauuGgg,23074
11
- nextrec/basic/model.py,sha256=wzz2yMnzls7zxJmCNt2z51k0ZNTKRJ1HQdk3HQ61ObU,102854
10
+ nextrec/basic/metrics.py,sha256=1r6efTc9TpARNBt5X9ISoppTZflej6EdFkjPYHV-YZI,23162
11
+ nextrec/basic/model.py,sha256=eC4qV6wLHsIiYr2HZ4dOpliiD4mjTJhr4XdiezMO690,103767
12
12
  nextrec/basic/session.py,sha256=UOG_-EgCOxvqZwCkiEd8sgNV2G1sm_HbzKYVQw8yYDI,4483
13
13
  nextrec/data/__init__.py,sha256=YZQjpty1pDCM7q_YNmiA2sa5kbujUw26ObLHWjMPjKY,1194
14
14
  nextrec/data/batch_utils.py,sha256=0bYGVX7RlhnHv_ZBaUngjDIpBNw-igCk98DgOsF7T6o,2879
15
15
  nextrec/data/data_processing.py,sha256=lKXDBszrO5fJMAQetgSPr2mSQuzOluuz1eHV4jp0TDU,5538
16
16
  nextrec/data/data_utils.py,sha256=0Ls1cnG9lBz0ovtyedw5vwp7WegGK_iF-F8e_3DEddo,880
17
- nextrec/data/dataloader.py,sha256=As2AvO2IGc-ofVl98HM7CIwSuzIvQtxJrSfvpJ2gamA,18787
17
+ nextrec/data/dataloader.py,sha256=0JYoQk5MXCwtbj-h8X38SOsb4YPJQawASCatuPNgTw4,19561
18
18
  nextrec/data/preprocessor.py,sha256=K-cUP-YdlQx1VJ2m1CXuprncpjDJe2ERVO5xCSoxHKI,44470
19
19
  nextrec/loss/__init__.py,sha256=ZCgsfyR5YAecv6MdOsnUjkfacvZg2coQVjuKAfPvmRo,923
20
20
  nextrec/loss/grad_norm.py,sha256=91Grspx95Xu_639TkL_WZRX1xt5QOTZCzBeJWbUGPiE,8385
@@ -64,15 +64,15 @@ nextrec/models/retrieval/youtube_dnn.py,sha256=xtGPV6_5LeSZBKkrTaU1CmtxlhgYLvZmj
64
64
  nextrec/models/sequential/hstu.py,sha256=P2Kl7HEL3afwiCApGKQ6UbUNO9eNXXrB10H7iiF8cI0,19735
65
65
  nextrec/models/sequential/sasrec.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
66
66
  nextrec/utils/__init__.py,sha256=C-1l-suSsN_MlPlj_5LApyCRQLOao5l7bO0SccwKHw4,2598
67
- nextrec/utils/config.py,sha256=2zcjK4TeN8ow-JSXbWpqyh9C1vFeKnEsYHPg1x564KU,19969
68
- nextrec/utils/console.py,sha256=e94SiwA0gKn2pfpP94mY_jl-kFok3TCjxo298KdFuP4,11696
67
+ nextrec/utils/config.py,sha256=VgCh5fto8HGodwXPJacenqjxre3Aw6tw-mntW9n3OYA,20044
68
+ nextrec/utils/console.py,sha256=u-hq_VcA62B0HPok12pnLOf9aI8B3Hjyg0ysMrOBOxI,13220
69
69
  nextrec/utils/data.py,sha256=alruiWZFbmwy3kO12q42VXmtHmXFFjVULpHa43fx_mI,21098
70
70
  nextrec/utils/embedding.py,sha256=akAEc062MG2cD7VIOllHaqtwzAirQR2gq5iW7oKpGAU,1449
71
71
  nextrec/utils/feature.py,sha256=rsUAv3ELyDpehVw8nPEEsLCCIjuKGTJJZuFaWB_wrPk,633
72
72
  nextrec/utils/model.py,sha256=3B85a0IJCggI26dxv25IX8R_5yQPo7wXI0JIAns6bkQ,1727
73
73
  nextrec/utils/torch_utils.py,sha256=AKfYbSOJjEw874xsDB5IO3Ote4X7vnqzt_E0jJny0o8,13468
74
- nextrec-0.4.14.dist-info/METADATA,sha256=HDpc5CcuuuVeBhmTwZTCEJw1T_w0GgA7K5CaL-N1Kq8,21103
75
- nextrec-0.4.14.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
76
- nextrec-0.4.14.dist-info/entry_points.txt,sha256=NN-dNSdfMRTv86bNXM7d3ZEPW2BQC6bRi7QP7i9cIps,45
77
- nextrec-0.4.14.dist-info/licenses/LICENSE,sha256=2fQfVKeafywkni7MYHyClC6RGGC3laLTXCNBx-ubtp0,1064
78
- nextrec-0.4.14.dist-info/RECORD,,
74
+ nextrec-0.4.16.dist-info/METADATA,sha256=21yLJcEh_Y70_gsTysLVt6X8l-YN15x9AX99x0wVB-o,21298
75
+ nextrec-0.4.16.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
76
+ nextrec-0.4.16.dist-info/entry_points.txt,sha256=NN-dNSdfMRTv86bNXM7d3ZEPW2BQC6bRi7QP7i9cIps,45
77
+ nextrec-0.4.16.dist-info/licenses/LICENSE,sha256=2fQfVKeafywkni7MYHyClC6RGGC3laLTXCNBx-ubtp0,1064
78
+ nextrec-0.4.16.dist-info/RECORD,,