PyPI - nextrec - Versions diffs - 0.3.2__py3-none-any.whl → 0.3.4__py3-none-any.whl - Mend

nextrec 0.3.2py3-none-any.whl → 0.3.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

nextrec/__version__.py +1 -1
nextrec/basic/features.py +10 -23
nextrec/basic/layers.py +18 -61
nextrec/basic/loggers.py +71 -8
nextrec/basic/metrics.py +55 -33
nextrec/basic/model.py +287 -397
nextrec/data/__init__.py +2 -2
nextrec/data/data_utils.py +80 -4
nextrec/data/dataloader.py +38 -59
nextrec/data/preprocessor.py +38 -73
nextrec/models/generative/hstu.py +1 -1
nextrec/models/match/dssm.py +2 -2
nextrec/models/match/dssm_v2.py +2 -2
nextrec/models/match/mind.py +2 -2
nextrec/models/match/sdm.py +2 -2
nextrec/models/match/youtube_dnn.py +2 -2
nextrec/models/multi_task/esmm.py +1 -1
nextrec/models/multi_task/mmoe.py +1 -1
nextrec/models/multi_task/ple.py +1 -1
nextrec/models/multi_task/poso.py +1 -1
nextrec/models/multi_task/share_bottom.py +1 -1
nextrec/models/ranking/afm.py +1 -1
nextrec/models/ranking/autoint.py +1 -1
nextrec/models/ranking/dcn.py +1 -1
nextrec/models/ranking/deepfm.py +1 -1
nextrec/models/ranking/dien.py +1 -1
nextrec/models/ranking/din.py +1 -1
nextrec/models/ranking/fibinet.py +1 -1
nextrec/models/ranking/fm.py +1 -1
nextrec/models/ranking/masknet.py +2 -2
nextrec/models/ranking/pnn.py +1 -1
nextrec/models/ranking/widedeep.py +1 -1
nextrec/models/ranking/xdeepfm.py +1 -1
nextrec/utils/__init__.py +2 -1
nextrec/utils/common.py +21 -2
{nextrec-0.3.2.dist-info → nextrec-0.3.4.dist-info}/METADATA +3 -3
nextrec-0.3.4.dist-info/RECORD +57 -0
nextrec-0.3.2.dist-info/RECORD +0 -57
{nextrec-0.3.2.dist-info → nextrec-0.3.4.dist-info}/WHEEL +0 -0
{nextrec-0.3.2.dist-info → nextrec-0.3.4.dist-info}/licenses/LICENSE +0 -0

nextrec/__version__.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "0.3.2"
1	+ __version__ = "0.3.4"

nextrec/basic/features.py CHANGED Viewed

@@ -2,19 +2,16 @@
 Feature definitions
 Date: create on 27/10/2025
-Checkpoint: edit on 29/11/2025
+Checkpoint: edit on 02/12/2025
 Author: Yang Zhou, zyaztec@gmail.com
 """
 import torch
 from nextrec.utils.embedding import get_auto_embedding_dim
+from nextrec.utils.common import normalize_to_list
 class BaseFeature(object):
     def __repr__(self):
-        params = {
-            k: v
-            for k, v in self.__dict__.items()
-            if not k.startswith("_")
-        }
+        params = {k: v for k, v in self.__dict__.items() if not k.startswith("_") }
         param_str = ", ".join(f"{k}={v!r}" for k, v in params.items())
         return f"{self.__class__.__name__}({param_str})"
@@ -93,11 +90,8 @@ class DenseFeature(BaseFeature):
         else:
             self.use_embedding = use_embedding  # user decides for dim <= 1
-class FeatureSpecMixin:
-    """
-    Mixin that normalizes dense/sparse/sequence feature lists and target/id columns.
-    """
-    def _set_feature_config(
+class FeatureSet:
+    def set_all_features(
         self,
         dense_features: list[DenseFeature] | None = None,
         sparse_features: list[SparseFeature] | None = None,
@@ -111,21 +105,14 @@ class FeatureSpecMixin:
         self.all_features = self.dense_features + self.sparse_features + self.sequence_features
         self.feature_names = [feat.name for feat in self.all_features]
-        self.target_columns = self._normalize_to_list(target)
-        self.id_columns = self._normalize_to_list(id_columns)
+        self.target_columns = normalize_to_list(target)
+        self.id_columns = normalize_to_list(id_columns)
-    def _set_target_id_config(
+    def set_target_id(
         self,
         target: str | list[str] | None = None,
         id_columns: str | list[str] | None = None,
     ) -> None:
-        self.target_columns = self._normalize_to_list(target)
-        self.id_columns = self._normalize_to_list(id_columns)
+        self.target_columns = normalize_to_list(target)
+        self.id_columns = normalize_to_list(id_columns)
-    @staticmethod
-    def _normalize_to_list(value: str | list[str] | None) -> list[str]:
-        if value is None:
-            return []
-        if isinstance(value, str):
-            return [value]
-        return list(value)

nextrec/basic/layers.py CHANGED Viewed

@@ -18,23 +18,6 @@ from nextrec.basic.features import DenseFeature, SequenceFeature, SparseFeature
 from nextrec.utils.initializer import get_initializer
 from nextrec.basic.activation import activation_layer
-__all__ = [
-    "PredictionLayer",
-    "EmbeddingLayer",
-    "InputMask",
-    "LR",
-    "ConcatPooling",
-    "AveragePooling",
-    "SumPooling",
-    "MLP",
-    "FM",
-    "CrossLayer",
-    "SENETLayer",
-    "BiLinearInteractionLayer",
-    "MultiHeadSelfAttention",
-    "AttentionPoolingLayer",
-]
 class PredictionLayer(nn.Module):
     def __init__(
         self,
@@ -44,12 +27,10 @@ class PredictionLayer(nn.Module):
         return_logits: bool = False,
     ):
         super().__init__()
-        if isinstance(task_type, str):
-            self.task_types = [task_type]
-        else:
-            self.task_types = list(task_type)
+        self.task_types = [task_type] if isinstance(task_type, str) else list(task_type)
         if len(self.task_types) == 0:
             raise ValueError("At least one task_type must be specified.")
         if task_dims is None:
             dims = [1] * len(self.task_types)
         elif isinstance(task_dims, int):
@@ -64,7 +45,7 @@ class PredictionLayer(nn.Module):
         self.total_dim = sum(self.task_dims)
         self.return_logits = return_logits
-        # Keep slice offsets per task
+        # slice offsets per task
         start = 0
         self._task_slices: list[tuple[int, int]] = []
         for dim in self.task_dims:
@@ -85,27 +66,25 @@ class PredictionLayer(nn.Module):
         logits = x if self.bias is None else x + self.bias
         outputs = []
         for task_type, (start, end) in zip(self.task_types, self._task_slices):
-            task_logits = logits[..., start:end] # Extract logits for the current task
+            task_logits = logits[..., start:end] # logits for the current task
             if self.return_logits:
                 outputs.append(task_logits)
                 continue
-            activation = self._get_activation(task_type)
+            task = task_type.lower()
+            if task == 'binary':
+                activation = torch.sigmoid
+            elif task == 'regression':
+                activation = lambda x: x
+            elif task == 'multiclass':
+                activation = lambda x: torch.softmax(x, dim=-1)
+            else:
+                raise ValueError(f"[PredictionLayer Error]: Unsupported task_type '{task_type}'.")
             outputs.append(activation(task_logits))
         result = torch.cat(outputs, dim=-1)
         if result.shape[-1] == 1:
             result = result.squeeze(-1)
         return result
-    def _get_activation(self, task_type: str):
-        task = task_type.lower()
-        if task == 'binary':
-            return torch.sigmoid
-        if task == 'regression':
-            return lambda x: x
-        if task == 'multiclass':
-            return lambda x: torch.softmax(x, dim=-1)
-        raise ValueError(f"[PredictionLayer Error]: Unsupported task_type '{task_type}'.")
 class EmbeddingLayer(nn.Module):
     def __init__(self, features: list):
         super().__init__()
@@ -145,7 +124,7 @@ class EmbeddingLayer(nn.Module):
                 self.dense_input_dims[feature.name] = in_dim
             else:
                 raise TypeError(f"[EmbeddingLayer Error]: Unsupported feature type: {type(feature)}")
-        self.output_dim = self._compute_output_dim()
+        self.output_dim = self.compute_output_dim()
     def forward(
         self,
@@ -181,7 +160,7 @@ class EmbeddingLayer(nn.Module):
                 sparse_embeds.append(pooling_layer(seq_emb, feature_mask).unsqueeze(1))
             elif isinstance(feature, DenseFeature):
-                dense_embeds.append(self._project_dense(feature, x))
+                dense_embeds.append(self.project_dense(feature, x))
         if squeeze_dim:
             flattened_sparse = [emb.flatten(start_dim=1) for emb in sparse_embeds]
@@ -212,7 +191,7 @@ class EmbeddingLayer(nn.Module):
             raise ValueError("[EmbeddingLayer Error]: squeeze_dim=False requires at least one sparse/sequence feature or dense features with identical projected dimensions.")
         return torch.cat(output_embeddings, dim=1)
-    def _project_dense(self, feature: DenseFeature, x: dict[str, torch.Tensor]) -> torch.Tensor:
+    def project_dense(self, feature: DenseFeature, x: dict[str, torch.Tensor]) -> torch.Tensor:
         if feature.name not in x:
             raise KeyError(f"[EmbeddingLayer Error]:Dense feature '{feature.name}' is missing from input.")
         value = x[feature.name].float()
@@ -228,11 +207,7 @@ class EmbeddingLayer(nn.Module):
         dense_layer = self.dense_transforms[feature.name]
         return dense_layer(value)
-    def _compute_output_dim(self, features: list[DenseFeature | SequenceFeature | SparseFeature] | None = None) -> int:
-        """
-        Compute flattened embedding dimension for provided features or all tracked features.
-        Deduplicates by feature name to avoid double-counting shared embeddings.
-        """
+    def compute_output_dim(self, features: list[DenseFeature | SequenceFeature | SparseFeature] | None = None) -> int:
         candidates = list(features) if features is not None else self.features
         unique_feats = OrderedDict((feat.name, feat) for feat in candidates) # type: ignore[assignment]
         dim = 0
@@ -249,14 +224,13 @@ class EmbeddingLayer(nn.Module):
         return dim
     def get_input_dim(self, features: list[object] | None = None) -> int:
-        return self._compute_output_dim(features) # type: ignore[assignment]
+        return self.compute_output_dim(features) # type: ignore[assignment]
     @property
     def input_dim(self) -> int:
         return self.output_dim
 class InputMask(nn.Module):
-    """Utility module to build sequence masks for pooling layers."""
     def __init__(self):
         super().__init__()
@@ -271,7 +245,6 @@ class InputMask(nn.Module):
         return mask.unsqueeze(1).float()
 class LR(nn.Module):
-    """Wide component from Wide&Deep (Cheng et al., 2016)."""
     def __init__(
             self,
             input_dim: int,
@@ -287,7 +260,6 @@ class LR(nn.Module):
             return self.fc(x)
 class ConcatPooling(nn.Module):
-    """Concatenates sequence embeddings along the temporal dimension."""
     def __init__(self):
         super().__init__()
@@ -295,7 +267,6 @@ class ConcatPooling(nn.Module):
         return x.flatten(start_dim=1, end_dim=2)
 class AveragePooling(nn.Module):
-    """Mean pooling with optional padding mask."""
     def __init__(self):
         super().__init__()
@@ -308,7 +279,6 @@ class AveragePooling(nn.Module):
             return sum_pooling_matrix / (non_padding_length.float() + 1e-16)
 class SumPooling(nn.Module):
-    """Sum pooling with optional padding mask."""
     def __init__(self):
         super().__init__()
@@ -319,7 +289,6 @@ class SumPooling(nn.Module):
             return torch.bmm(mask, x).squeeze(1)
 class MLP(nn.Module):
-    """Stacked fully connected layers used in the deep component."""
     def __init__(
             self,
             input_dim: int,
@@ -345,7 +314,6 @@ class MLP(nn.Module):
         return self.mlp(x)
 class FM(nn.Module):
-    """Factorization Machine (Rendle, 2010) second-order interaction term."""
     def __init__(self, reduce_sum: bool = True):
         super().__init__()
         self.reduce_sum = reduce_sum
@@ -359,7 +327,6 @@ class FM(nn.Module):
         return 0.5 * ix
 class CrossLayer(nn.Module):
-    """Single cross layer used in DCN (Wang et al., 2017)."""
     def __init__(self, input_dim: int):
         super(CrossLayer, self).__init__()
         self.w = torch.nn.Linear(input_dim, 1, bias=False)
@@ -370,7 +337,6 @@ class CrossLayer(nn.Module):
         return x
 class SENETLayer(nn.Module):
-    """Squeeze-and-Excitation block adopted by FiBiNET (Huang et al., 2019)."""
     def __init__(
             self,
             num_fields: int,
@@ -388,7 +354,6 @@ class SENETLayer(nn.Module):
         return v
 class BiLinearInteractionLayer(nn.Module):
-    """Bilinear feature interaction from FiBiNET (Huang et al., 2019)."""
     def __init__(
             self,
             input_dim: int,
@@ -416,7 +381,6 @@ class BiLinearInteractionLayer(nn.Module):
         return torch.cat(bilinear_list, dim=1)
 class MultiHeadSelfAttention(nn.Module):
-    """Multi-head self-attention layer from AutoInt (Song et al., 2019)."""
     def __init__(
             self,
             embedding_dim: int,
@@ -438,13 +402,6 @@ class MultiHeadSelfAttention(nn.Module):
         self.dropout = nn.Dropout(dropout)
     def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """
-        Args:
-            x (torch.Tensor): Tensor of shape (batch_size, num_fields, embedding_dim)
-        Returns:
-            torch.Tensor: Output tensor of shape (batch_size, num_fields, embedding_dim)
-        """
         batch_size, num_fields, _ = x.shape
         Q = self.W_Q(x)  # [batch_size, num_fields, embedding_dim]
         K = self.W_K(x)

nextrec/basic/loggers.py CHANGED Viewed

@@ -2,17 +2,19 @@
 NextRec Basic Loggers
 Date: create on 27/10/2025
-Checkpoint: edit on 29/11/2025
+Checkpoint: edit on 03/12/2025
 Author: Yang Zhou, zyaztec@gmail.com
 """
 import os
 import re
 import sys
+import json
 import copy
 import logging
-from nextrec.basic.session import create_session
+import numbers
+from typing import Mapping, Any
+from nextrec.basic.session import create_session, Session
 ANSI_CODES = {
     'black': '\033[30m',
@@ -77,17 +79,12 @@ def colorize(text: str, color: str | None = None, bold: bool = False) -> str:
     """Apply ANSI color and bold formatting to the given text."""
     if not color and not bold:
         return text
     result = ""
     if bold:
         result += ANSI_BOLD
     if color and color in ANSI_CODES:
         result += ANSI_CODES[color]
     result += text + ANSI_RESET
     return result
 def setup_logger(session_id: str | os.PathLike | None = None):
@@ -126,3 +123,69 @@ def setup_logger(session_id: str | os.PathLike | None = None):
     logger.addHandler(console_handler)
     return logger
+class TrainingLogger:
+    def __init__(
+        self,
+        session: Session,
+        enable_tensorboard: bool,
+        log_name: str = "training_metrics.jsonl",
+    ) -> None:
+        self.session = session
+        self.enable_tensorboard = enable_tensorboard
+        self.log_path = session.metrics_dir / log_name
+        self.log_path.parent.mkdir(parents=True, exist_ok=True)
+        self.tb_writer = None
+        self.tb_dir = None
+        if self.enable_tensorboard:
+            self._init_tensorboard()
+    def _init_tensorboard(self) -> None:
+        try:
+            from torch.utils.tensorboard import SummaryWriter  # type: ignore
+        except ImportError:
+            logging.warning("[TrainingLogger] tensorboard not installed, disable tensorboard logging.")
+            self.enable_tensorboard = False
+            return
+        tb_dir = self.session.logs_dir / "tensorboard"
+        tb_dir.mkdir(parents=True, exist_ok=True)
+        self.tb_dir = tb_dir
+        self.tb_writer = SummaryWriter(log_dir=str(tb_dir))
+    @property
+    def tensorboard_logdir(self):
+        return self.tb_dir
+    def format_metrics(self, metrics: Mapping[str, Any], split: str) -> dict[str, float]:
+        formatted: dict[str, float] = {}
+        for key, value in metrics.items():
+            if isinstance(value, numbers.Number):
+                formatted[f"{split}/{key}"] = float(value)
+            elif hasattr(value, "item"):
+                try:
+                    formatted[f"{split}/{key}"] = float(value.item())
+                except Exception:
+                    continue
+        return formatted
+    def log_metrics(self, metrics: Mapping[str, Any], step: int, split: str = "train") -> None:
+        payload = self.format_metrics(metrics, split)
+        payload["step"] = int(step)
+        with self.log_path.open("a", encoding="utf-8") as f:
+            f.write(json.dumps(payload, ensure_ascii=False) + "\n")
+        if not self.tb_writer:
+            return
+        step = int(payload.get("step", 0))
+        for key, value in payload.items():
+            if key == "step":
+                continue
+            self.tb_writer.add_scalar(key, value, global_step=step)
+    def close(self) -> None:
+        if self.tb_writer:
+            self.tb_writer.flush()
+            self.tb_writer.close()
+            self.tb_writer = None

nextrec/basic/metrics.py CHANGED Viewed

@@ -2,10 +2,12 @@
 Metrics computation and configuration for model evaluation.
 Date: create on 27/10/2025
-Checkpoint: edit on 29/11/2025
+Checkpoint: edit on 02/12/2025
 Author: Yang Zhou,zyaztec@gmail.com
 """
 import logging
+from typing import Any
 import numpy as np
 from sklearn.metrics import (
     roc_auc_score, log_loss, mean_squared_error, mean_absolute_error,
@@ -21,6 +23,32 @@ TASK_DEFAULT_METRICS = {
     'matching': ['auc', 'gauc', 'precision@10', 'hitrate@10', 'map@10','cosine']+ [f'recall@{k}' for k in (5,10,20)] + [f'ndcg@{k}' for k in (5,10,20)] + [f'mrr@{k}' for k in (5,10,20)]
 }
+def check_user_id(*metric_sources: Any) -> bool:
+    """Return True when GAUC or ranking@K metrics appear in the provided sources."""
+    metric_names: set[str] = set()
+    stack: list[Any] = list(metric_sources)
+    while stack:
+        item = stack.pop()
+        if not item:
+            continue
+        if isinstance(item, dict):
+            stack.extend(item.values())
+            continue
+        if isinstance(item, str):
+            metric_names.add(item.lower())
+            continue
+        try:
+            stack.extend(item)
+        except TypeError:
+            continue
+    for name in metric_names:
+        if name == "gauc":
+            return True
+        if name.startswith(("recall@", "precision@", "hitrate@", "hr@", "mrr@", "ndcg@", "map@")):
+            return True
+    return False
 def compute_ks(y_true: np.ndarray, y_pred: np.ndarray) -> float:
     """Compute Kolmogorov-Smirnov statistic."""
     sorted_indices = np.argsort(y_pred)[::-1]
@@ -80,7 +108,7 @@ def compute_gauc(y_true: np.ndarray, y_pred: np.ndarray, user_ids: np.ndarray |
     gauc = float(np.sum(user_aucs * user_weights) / np.sum(user_weights))
     return gauc
-def _group_indices_by_user(user_ids: np.ndarray, n_samples: int) -> list[np.ndarray]:
+def group_indices_by_user(user_ids: np.ndarray, n_samples: int) -> list[np.ndarray]:
     """Group sample indices by user_id. If user_ids is None, treat all as one group."""
     if user_ids is None:
         return [np.arange(n_samples)]
@@ -92,13 +120,13 @@ def _group_indices_by_user(user_ids: np.ndarray, n_samples: int) -> list[np.ndar
     groups = [np.where(user_ids == u)[0] for u in unique_users]
     return groups
-def _compute_precision_at_k(y_true: np.ndarray, y_pred: np.ndarray, user_ids: np.ndarray, k: int) -> float:
+def compute_precision_at_k(y_true: np.ndarray, y_pred: np.ndarray, user_ids: np.ndarray, k: int) -> float:
     """Compute Precision@K."""
     if user_ids is None:
         raise ValueError("[Metrics Error: Precision@K] user_ids must be provided for Precision@K computation.")
     y_true = (y_true > 0).astype(int)
     n = len(y_true)
-    groups = _group_indices_by_user(user_ids, n)
+    groups = group_indices_by_user(user_ids, n)
     precisions = []
     for idx in groups:
         if idx.size == 0:
@@ -112,13 +140,13 @@ def _compute_precision_at_k(y_true: np.ndarray, y_pred: np.ndarray, user_ids: np
         precisions.append(hits / float(k_user))
     return float(np.mean(precisions)) if precisions else 0.0
-def _compute_recall_at_k(y_true: np.ndarray, y_pred: np.ndarray, user_ids: np.ndarray, k: int) -> float:
+def compute_recall_at_k(y_true: np.ndarray, y_pred: np.ndarray, user_ids: np.ndarray, k: int) -> float:
     """Compute Recall@K."""
     if user_ids is None:
         raise ValueError("[Metrics Error: Recall@K] user_ids must be provided for Recall@K computation.")
     y_true = (y_true > 0).astype(int)
     n = len(y_true)
-    groups = _group_indices_by_user(user_ids, n)
+    groups = group_indices_by_user(user_ids, n)
     recalls = []
     for idx in groups:
         if idx.size == 0:
@@ -135,13 +163,13 @@ def _compute_recall_at_k(y_true: np.ndarray, y_pred: np.ndarray, user_ids: np.nd
         recalls.append(hits / float(num_pos))
     return float(np.mean(recalls)) if recalls else 0.0
-def _compute_hitrate_at_k(y_true: np.ndarray, y_pred: np.ndarray, user_ids: np.ndarray, k: int) -> float:
+def compute_hitrate_at_k(y_true: np.ndarray, y_pred: np.ndarray, user_ids: np.ndarray, k: int) -> float:
     """Compute HitRate@K."""
     if user_ids is None:
         raise ValueError("[Metrics Error: HitRate@K] user_ids must be provided for HitRate@K computation.")
     y_true = (y_true > 0).astype(int)
     n = len(y_true)
-    groups = _group_indices_by_user(user_ids, n)
+    groups = group_indices_by_user(user_ids, n)
     hits_per_user = []
     for idx in groups:
         if idx.size == 0:
@@ -157,13 +185,13 @@ def _compute_hitrate_at_k(y_true: np.ndarray, y_pred: np.ndarray, user_ids: np.n
         hits_per_user.append(1.0 if hits > 0 else 0.0)
     return float(np.mean(hits_per_user)) if hits_per_user else 0.0
-def _compute_mrr_at_k(y_true: np.ndarray, y_pred: np.ndarray, user_ids: np.ndarray, k: int) -> float:
+def compute_mrr_at_k(y_true: np.ndarray, y_pred: np.ndarray, user_ids: np.ndarray, k: int) -> float:
     """Compute MRR@K."""
     if user_ids is None:
         raise ValueError("[Metrics Error: MRR@K] user_ids must be provided for MRR@K computation.")
     y_true = (y_true > 0).astype(int)
     n = len(y_true)
-    groups = _group_indices_by_user(user_ids, n)
+    groups = group_indices_by_user(user_ids, n)
     mrrs = []
     for idx in groups:
         if idx.size == 0:
@@ -184,7 +212,7 @@ def _compute_mrr_at_k(y_true: np.ndarray, y_pred: np.ndarray, user_ids: np.ndarr
         mrrs.append(rr)
     return float(np.mean(mrrs)) if mrrs else 0.0
-def _compute_dcg_at_k(labels: np.ndarray, k: int) -> float:
+def compute_dcg_at_k(labels: np.ndarray, k: int) -> float:
     k_user = min(k, labels.size)
     if k_user == 0:
         return 0.0
@@ -192,13 +220,13 @@ def _compute_dcg_at_k(labels: np.ndarray, k: int) -> float:
     discounts = np.log2(np.arange(2, k_user + 2))
     return float(np.sum(gains / discounts))
-def _compute_ndcg_at_k(y_true: np.ndarray, y_pred: np.ndarray, user_ids: np.ndarray, k: int) -> float:
+def compute_ndcg_at_k(y_true: np.ndarray, y_pred: np.ndarray, user_ids: np.ndarray, k: int) -> float:
     """Compute NDCG@K."""
     if user_ids is None:
         raise ValueError("[Metrics Error: NDCG@K] user_ids must be provided for NDCG@K computation.")
     y_true = (y_true > 0).astype(int)
     n = len(y_true)
-    groups = _group_indices_by_user(user_ids, n)
+    groups = group_indices_by_user(user_ids, n)
     ndcgs = []
     for idx in groups:
         if idx.size == 0:
@@ -209,23 +237,23 @@ def _compute_ndcg_at_k(y_true: np.ndarray, y_pred: np.ndarray, user_ids: np.ndar
         scores = y_pred[idx]
         order = np.argsort(scores)[::-1]
         ranked_labels = labels[order]
-        dcg = _compute_dcg_at_k(ranked_labels, k)
+        dcg = compute_dcg_at_k(ranked_labels, k)
         # ideal DCG
         ideal_labels = np.sort(labels)[::-1]
-        idcg = _compute_dcg_at_k(ideal_labels, k)
+        idcg = compute_dcg_at_k(ideal_labels, k)
         if idcg == 0.0:
             continue
         ndcgs.append(dcg / idcg)
     return float(np.mean(ndcgs)) if ndcgs else 0.0
-def _compute_map_at_k(y_true: np.ndarray, y_pred: np.ndarray, user_ids: np.ndarray, k: int) -> float:
+def compute_map_at_k(y_true: np.ndarray, y_pred: np.ndarray, user_ids: np.ndarray, k: int) -> float:
     """Mean Average Precision@K."""
     if user_ids is None:
         raise ValueError("[Metrics Error: MAP@K] user_ids must be provided for MAP@K computation.")
     y_true = (y_true > 0).astype(int)
     n = len(y_true)
-    groups = _group_indices_by_user(user_ids, n)
+    groups = group_indices_by_user(user_ids, n)
     aps = []
     for idx in groups:
         if idx.size == 0:
@@ -250,7 +278,7 @@ def _compute_map_at_k(y_true: np.ndarray, y_pred: np.ndarray, user_ids: np.ndarr
     return float(np.mean(aps)) if aps else 0.0
-def _compute_cosine_separation(y_true: np.ndarray, y_pred: np.ndarray) -> float:
+def compute_cosine_separation(y_true: np.ndarray, y_pred: np.ndarray) -> float:
     """Compute Cosine Separation."""
     y_true = (y_true > 0).astype(int)
     pos_mask = y_true == 1
@@ -310,10 +338,10 @@ def configure_metrics(
         if primary_task not in TASK_DEFAULT_METRICS:
             raise ValueError(f"Unsupported task type: {primary_task}")
         metrics_list = TASK_DEFAULT_METRICS[primary_task]
-    best_metrics_mode = get_best_metric_mode(metrics_list[0], primary_task)
+    best_metrics_mode = getbest_metric_mode(metrics_list[0], primary_task)
     return metrics_list, task_specific_metrics, best_metrics_mode
-def get_best_metric_mode(first_metric: str, primary_task: str) -> str:
+def getbest_metric_mode(first_metric: str, primary_task: str) -> str:
     """Determine if metric should be maximized or minimized."""
     first_metric_lower = first_metric.lower()
     # Metrics that should be maximized
@@ -350,34 +378,28 @@ def compute_single_metric(
     y_p_binary = (y_pred > 0.5).astype(int)
     try:
         metric_lower = metric.lower()
-        # recall@K
         if metric_lower.startswith('recall@'):
             k = int(metric_lower.split('@')[1])
-            return _compute_recall_at_k(y_true, y_pred, user_ids, k) # type: ignore
-        # precision@K
+            return compute_recall_at_k(y_true, y_pred, user_ids, k) # type: ignore
         if metric_lower.startswith('precision@'):
             k = int(metric_lower.split('@')[1])
-            return _compute_precision_at_k(y_true, y_pred, user_ids, k) # type: ignore
-        # hitrate@K / hr@K
+            return compute_precision_at_k(y_true, y_pred, user_ids, k) # type: ignore
         if metric_lower.startswith('hitrate@') or metric_lower.startswith('hr@'):
             k_str = metric_lower.split('@')[1]
             k = int(k_str)
-            return _compute_hitrate_at_k(y_true, y_pred, user_ids, k) # type: ignore
-        # mrr@K
+            return compute_hitrate_at_k(y_true, y_pred, user_ids, k) # type: ignore
         if metric_lower.startswith('mrr@'):
             k = int(metric_lower.split('@')[1])
-            return _compute_mrr_at_k(y_true, y_pred, user_ids, k) # type: ignore
-        # ndcg@K
+            return compute_mrr_at_k(y_true, y_pred, user_ids, k) # type: ignore
         if metric_lower.startswith('ndcg@'):
             k = int(metric_lower.split('@')[1])
-            return _compute_ndcg_at_k(y_true, y_pred, user_ids, k) # type: ignore
-        # map@K
+            return compute_ndcg_at_k(y_true, y_pred, user_ids, k) # type: ignore
         if metric_lower.startswith('map@'):
             k = int(metric_lower.split('@')[1])
-            return _compute_map_at_k(y_true, y_pred, user_ids, k) # type: ignore
+            return compute_map_at_k(y_true, y_pred, user_ids, k) # type: ignore
         # cosine for matching task
         if metric_lower == 'cosine':
-            return _compute_cosine_separation(y_true, y_pred)
+            return compute_cosine_separation(y_true, y_pred)
         if metric == 'auc':
             value = float(roc_auc_score(y_true, y_pred, average='macro' if task_type == 'multilabel' else None))
         elif metric == 'gauc':

nextrec 0.3.2__py3-none-any.whl → 0.3.4__py3-none-any.whl

nextrec 0.3.2py3-none-any.whl → 0.3.4py3-none-any.whl