PyPI - nextrec - Versions diffs - 0.4.7__py3-none-any.whl → 0.4.9__py3-none-any.whl - Mend

nextrec 0.4.7py3-none-any.whl → 0.4.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (65) hide show

nextrec/__version__.py +1 -1
nextrec/basic/callback.py +30 -15
nextrec/basic/features.py +1 -0
nextrec/basic/layers.py +6 -8
nextrec/basic/loggers.py +14 -7
nextrec/basic/metrics.py +6 -76
nextrec/basic/model.py +337 -328
nextrec/cli.py +25 -4
nextrec/data/__init__.py +13 -16
nextrec/data/batch_utils.py +3 -2
nextrec/data/data_processing.py +10 -2
nextrec/data/data_utils.py +9 -14
nextrec/data/dataloader.py +12 -13
nextrec/data/preprocessor.py +328 -255
nextrec/loss/__init__.py +1 -5
nextrec/loss/loss_utils.py +2 -8
nextrec/models/generative/__init__.py +1 -8
nextrec/models/generative/hstu.py +6 -4
nextrec/models/multi_task/esmm.py +2 -2
nextrec/models/multi_task/mmoe.py +2 -2
nextrec/models/multi_task/ple.py +2 -2
nextrec/models/multi_task/poso.py +2 -3
nextrec/models/multi_task/share_bottom.py +2 -2
nextrec/models/ranking/afm.py +2 -2
nextrec/models/ranking/autoint.py +2 -2
nextrec/models/ranking/dcn.py +2 -2
nextrec/models/ranking/dcn_v2.py +2 -2
nextrec/models/ranking/deepfm.py +2 -2
nextrec/models/ranking/dien.py +3 -3
nextrec/models/ranking/din.py +3 -3
nextrec/models/ranking/ffm.py +0 -0
nextrec/models/ranking/fibinet.py +5 -5
nextrec/models/ranking/fm.py +3 -7
nextrec/models/ranking/lr.py +0 -0
nextrec/models/ranking/masknet.py +2 -2
nextrec/models/ranking/pnn.py +2 -2
nextrec/models/ranking/widedeep.py +2 -2
nextrec/models/ranking/xdeepfm.py +2 -2
nextrec/models/representation/__init__.py +9 -0
nextrec/models/{generative → representation}/rqvae.py +9 -9
nextrec/models/retrieval/__init__.py +0 -0
nextrec/models/{match → retrieval}/dssm.py +8 -3
nextrec/models/{match → retrieval}/dssm_v2.py +8 -3
nextrec/models/{match → retrieval}/mind.py +4 -3
nextrec/models/{match → retrieval}/sdm.py +4 -3
nextrec/models/{match → retrieval}/youtube_dnn.py +8 -3
nextrec/utils/__init__.py +60 -46
nextrec/utils/config.py +12 -10
nextrec/utils/console.py +371 -0
nextrec/utils/{synthetic_data.py → data.py} +102 -15
nextrec/utils/feature.py +15 -0
nextrec/utils/torch_utils.py +411 -0
{nextrec-0.4.7.dist-info → nextrec-0.4.9.dist-info}/METADATA +8 -7
nextrec-0.4.9.dist-info/RECORD +70 -0
nextrec/utils/device.py +0 -78
nextrec/utils/distributed.py +0 -141
nextrec/utils/file.py +0 -92
nextrec/utils/initializer.py +0 -79
nextrec/utils/optimizer.py +0 -75
nextrec/utils/tensor.py +0 -72
nextrec-0.4.7.dist-info/RECORD +0 -70
/nextrec/models/{match/__init__.py → ranking/eulernet.py} +0 -0
{nextrec-0.4.7.dist-info → nextrec-0.4.9.dist-info}/WHEEL +0 -0
{nextrec-0.4.7.dist-info → nextrec-0.4.9.dist-info}/entry_points.txt +0 -0
{nextrec-0.4.7.dist-info → nextrec-0.4.9.dist-info}/licenses/LICENSE +0 -0

nextrec/__version__.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "0.4.7"
1	+ __version__ = "0.4.9"

nextrec/basic/callback.py CHANGED Viewed

@@ -2,17 +2,20 @@
 Callback System for Training Process
 Date: create on 27/10/2025
-Checkpoint: edit on 17/12/2025
+Checkpoint: edit on 19/12/2025
 Author: Yang Zhou, zyaztec@gmail.com
 """
 import copy
 import logging
-from typing import Optional
+import pickle
 from pathlib import Path
+from typing import Optional
 import torch
-import pickle
 from nextrec import __version__
+from nextrec.basic.loggers import colorize, format_kv
 class Callback:
@@ -209,8 +212,13 @@ class EarlyStopper(Callback):
         if self.restore_best_weights and self.best_weights is not None:
             if self.verbose > 0:
                 logging.info(
-                    f"Restoring model weights from epoch {self.best_epoch + 1} "
-                    f"with best {self.monitor}: {self.best_value:.6f}"
+                    colorize(
+                        format_kv(
+                            "Restoring model weights from epoch",
+                            f"{self.best_epoch + 1} with best {self.monitor}: {self.best_value:.6f}",
+                        ),
+                        color="bright_blue",
+                    )
                 )
             self.model.load_state_dict(self.best_weights)
@@ -229,7 +237,8 @@ class CheckpointSaver(Callback):
     def __init__(
         self,
-        save_path: str | Path,
+        best_path: str | Path,
+        checkpoint_path: str | Path,
         monitor: str = "val_auc",
         mode: str = "max",
         save_best_only: bool = False,
@@ -239,7 +248,8 @@ class CheckpointSaver(Callback):
     ):
         super().__init__()
         self.run_on_main_process_only = run_on_main_process_only
-        self.save_path = Path(save_path)
+        self.best_path = Path(best_path)
+        self.checkpoint_path = Path(checkpoint_path)
         self.monitor = monitor
         self.mode = mode
         self.save_best_only = save_best_only
@@ -260,14 +270,13 @@ class CheckpointSaver(Callback):
             self.best_value = float("inf")
         else:
             self.best_value = float("-inf")
-        # Create directory if it doesn't exist
-        self.save_path.parent.mkdir(parents=True, exist_ok=True)
+        self.best_path.parent.mkdir(parents=True, exist_ok=True)
+        self.checkpoint_path.parent.mkdir(parents=True, exist_ok=True)
     def on_epoch_end(self, epoch: int, logs: Optional[dict] = None):
+        logging.info("")
         logs = logs or {}
-        # Check if we should save this epoch
         should_save = False
         if self.save_freq == "epoch":
             should_save = True
@@ -289,17 +298,23 @@ class CheckpointSaver(Callback):
         if should_save:
             if not self.save_best_only or is_best:
                 checkpoint_path = (
-                    self.save_path.parent
-                    / f"{self.save_path.stem}_epoch_{epoch + 1}{self.save_path.suffix}"
+                    self.checkpoint_path.parent
+                    / f"{self.checkpoint_path.stem}{self.checkpoint_path.suffix}"
                 )
                 self.save_checkpoint(checkpoint_path, epoch, logs)
                 if is_best:
                     # Use save_path directly without adding _best suffix since it may already contain it
-                    self.save_checkpoint(self.save_path, epoch, logs)
+                    self.save_checkpoint(self.best_path, epoch, logs)
                     if self.verbose > 0:
                         logging.info(
-                            f"Saved best model to {self.save_path} with {self.monitor}: {current:.6f}"
+                            colorize(
+                                format_kv(
+                                    "Saved best model to",
+                                    f"{self.best_path} with {self.monitor}: {current:.6f}",
+                                ),
+                                color="bright_blue",
+                            )
                         )
     def save_checkpoint(self, path: Path, epoch: int, logs: dict):

nextrec/basic/features.py CHANGED Viewed

@@ -7,6 +7,7 @@ Author: Yang Zhou, zyaztec@gmail.com
 """
 import torch
 from nextrec.utils.embedding import get_auto_embedding_dim
 from nextrec.utils.feature import normalize_to_list

nextrec/basic/layers.py CHANGED Viewed

@@ -2,22 +2,22 @@
 Layer implementations used across NextRec models.
 Date: create on 27/10/2025
-Checkpoint: edit on 29/11/2025
+Checkpoint: edit on 19/12/2025
 Author: Yang Zhou, zyaztec@gmail.com
 """
 from __future__ import annotations
+from collections import OrderedDict
+from itertools import combinations
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
-from itertools import combinations
-from collections import OrderedDict
-from nextrec.basic.features import DenseFeature, SequenceFeature, SparseFeature
-from nextrec.utils.initializer import get_initializer
 from nextrec.basic.activation import activation_layer
+from nextrec.basic.features import DenseFeature, SequenceFeature, SparseFeature
+from nextrec.utils.torch_utils import get_initializer
 class PredictionLayer(nn.Module):
@@ -81,8 +81,6 @@ class PredictionLayer(nn.Module):
                 outputs.append(torch.sigmoid(task_logits))
             elif task == "regression":
                 outputs.append(task_logits)
-            elif task == "multiclass":
-                outputs.append(torch.softmax(task_logits, dim=-1))
             else:
                 raise ValueError(
                     f"[PredictionLayer Error]: Unsupported task_type '{task_type}'."

nextrec/basic/loggers.py CHANGED Viewed

@@ -2,20 +2,20 @@
 NextRec Basic Loggers
 Date: create on 27/10/2025
-Checkpoint: edit on 03/12/2025
+Checkpoint: edit on 19/12/2025
 Author: Yang Zhou, zyaztec@gmail.com
 """
-import os
-import re
-import sys
-import json
 import copy
+import json
 import logging
 import numbers
+import os
+import re
+import sys
+from typing import Any, Mapping
-from typing import Mapping, Any
-from nextrec.basic.session import create_session, Session
+from nextrec.basic.session import Session, create_session
 ANSI_CODES = {
     "black": "\033[30m",
@@ -91,6 +91,13 @@ def colorize(text: str, color: str | None = None, bold: bool = False) -> str:
     return result
+def format_kv(label: str, value: Any, width: int = 34, indent: int = 0) -> str:
+    """Format key-value lines with consistent alignment."""
+    label_text = label if label.endswith(":") else f"{label}:"
+    prefix = " " * indent
+    return f"{prefix}{label_text:<{width}} {value}"
 def setup_logger(session_id: str | os.PathLike | None = None):
     """Set up a logger that logs to both console and a file with ANSI formatting.
     Only console output has colors; file output is stripped of ANSI codes.

nextrec/basic/metrics.py CHANGED Viewed

@@ -2,7 +2,7 @@
 Metrics computation and configuration for model evaluation.
 Date: create on 27/10/2025
-Checkpoint: edit on 02/12/2025
+Checkpoint: edit on 19/12/2025
 Author: Yang Zhou,zyaztec@gmail.com
 """
@@ -11,15 +11,15 @@ from typing import Any
 import numpy as np
 from sklearn.metrics import (
-    roc_auc_score,
+    accuracy_score,
+    f1_score,
     log_loss,
-    mean_squared_error,
     mean_absolute_error,
-    accuracy_score,
+    mean_squared_error,
     precision_score,
-    recall_score,
-    f1_score,
     r2_score,
+    recall_score,
+    roc_auc_score,
 )
 CLASSIFICATION_METRICS = {
@@ -44,11 +44,6 @@ TASK_DEFAULT_METRICS = {
     + [f"recall@{k}" for k in (5, 10, 20)]
     + [f"ndcg@{k}" for k in (5, 10, 20)]
     + [f"mrr@{k}" for k in (5, 10, 20)],
-    # generative/multiclass next-item prediction defaults
-    "multiclass": ["accuracy"]
-    + [f"hitrate@{k}" for k in (1, 5, 10)]
-    + [f"recall@{k}" for k in (1, 5, 10)]
-    + [f"mrr@{k}" for k in (1, 5, 10)],
 }
@@ -163,51 +158,6 @@ def group_indices_by_user(user_ids: np.ndarray, n_samples: int) -> list[np.ndarr
     return groups
-def normalize_multiclass_inputs(
-    y_true: np.ndarray, y_pred: np.ndarray
-) -> tuple[np.ndarray, np.ndarray]:
-    """
-    Normalize multiclass inputs to consistent shapes.
-    y_true: [N] of class ids
-    y_pred: [N, C] of logits/probabilities
-    """
-    labels = np.asarray(y_true).reshape(-1)
-    scores = np.asarray(y_pred)
-    if scores.ndim == 1:
-        scores = scores.reshape(scores.shape[0], -1)
-    if scores.shape[0] != labels.shape[0]:
-        raise ValueError(
-            f"[Metric Warning] y_true length {labels.shape[0]} != y_pred batch {scores.shape[0]} for multiclass metrics."
-        )
-    return labels.astype(int), scores
-def multiclass_topk_hit_rate(y_true: np.ndarray, y_pred: np.ndarray, k: int) -> float:
-    labels, scores = normalize_multiclass_inputs(y_true, y_pred)
-    if scores.shape[1] == 0:
-        return 0.0
-    k = min(k, scores.shape[1])
-    topk_idx = np.argpartition(-scores, kth=k - 1, axis=1)[:, :k]
-    hits = (topk_idx == labels[:, None]).any(axis=1)
-    return float(hits.mean()) if hits.size > 0 else 0.0
-def multiclass_mrr_at_k(y_true: np.ndarray, y_pred: np.ndarray, k: int) -> float:
-    labels, scores = normalize_multiclass_inputs(y_true, y_pred)
-    if scores.shape[1] == 0:
-        return 0.0
-    k = min(k, scores.shape[1])
-    # full sort for stable ranks
-    topk_idx = np.argsort(-scores, axis=1)[:, :k]
-    ranks = np.full(labels.shape, fill_value=k + 1, dtype=np.float32)
-    for idx in range(k):
-        match = topk_idx[:, idx] == labels
-        ranks[match] = idx + 1
-    reciprocals = np.where(ranks <= k, 1.0 / ranks, 0.0)
-    return float(reciprocals.mean()) if reciprocals.size > 0 else 0.0
 def compute_precision_at_k(
     y_true: np.ndarray, y_pred: np.ndarray, user_ids: np.ndarray, k: int
 ) -> float:
@@ -514,26 +464,6 @@ def compute_single_metric(
     """Compute a single metric given true and predicted values."""
     y_p_binary = (y_pred > 0.5).astype(int)
     metric_lower = metric.lower()
-    is_multiclass = task_type == "multiclass" and y_pred.ndim >= 2
-    if is_multiclass:
-        # Dedicated path for multiclass logits (e.g., next-item prediction)
-        labels, scores = normalize_multiclass_inputs(y_true, y_pred)
-        if metric_lower in ("accuracy", "acc"):
-            preds = scores.argmax(axis=1)
-            return float((preds == labels).mean())
-        if metric_lower.startswith("hitrate@") or metric_lower.startswith("hr@"):
-            k_str = metric_lower.split("@")[1]
-            k = int(k_str)
-            return multiclass_topk_hit_rate(labels, scores, k)
-        if metric_lower.startswith("recall@"):
-            k = int(metric_lower.split("@")[1])
-            return multiclass_topk_hit_rate(labels, scores, k)
-        if metric_lower.startswith("mrr@"):
-            k = int(metric_lower.split("@")[1])
-            return multiclass_mrr_at_k(labels, scores, k)
-        # fall back to accuracy if unsupported metric is requested
-        preds = scores.argmax(axis=1)
-        return float((preds == labels).mean())
     try:
         if metric_lower.startswith("recall@"):
             k = int(metric_lower.split("@")[1])

nextrec 0.4.7__py3-none-any.whl → 0.4.9__py3-none-any.whl

nextrec 0.4.7py3-none-any.whl → 0.4.9py3-none-any.whl