PyPI - nextrec - Versions diffs - 0.2.7__py3-none-any.whl → 0.3.2__py3-none-any.whl - Mend

nextrec 0.2.7py3-none-any.whl → 0.3.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

nextrec/__version__.py +1 -1
nextrec/basic/activation.py +4 -8
nextrec/basic/callback.py +1 -1
nextrec/basic/features.py +33 -25
nextrec/basic/layers.py +164 -601
nextrec/basic/loggers.py +4 -5
nextrec/basic/metrics.py +39 -115
nextrec/basic/model.py +257 -177
nextrec/basic/session.py +1 -5
nextrec/data/__init__.py +12 -0
nextrec/data/data_utils.py +3 -27
nextrec/data/dataloader.py +26 -34
nextrec/data/preprocessor.py +2 -1
nextrec/loss/listwise.py +6 -4
nextrec/loss/loss_utils.py +10 -6
nextrec/loss/pairwise.py +5 -3
nextrec/loss/pointwise.py +7 -13
nextrec/models/generative/__init__.py +5 -0
nextrec/models/generative/hstu.py +399 -0
nextrec/models/match/mind.py +110 -1
nextrec/models/multi_task/esmm.py +46 -27
nextrec/models/multi_task/mmoe.py +48 -30
nextrec/models/multi_task/ple.py +156 -141
nextrec/models/multi_task/poso.py +413 -0
nextrec/models/multi_task/share_bottom.py +43 -26
nextrec/models/ranking/__init__.py +2 -0
nextrec/models/ranking/dcn.py +20 -1
nextrec/models/ranking/dcn_v2.py +84 -0
nextrec/models/ranking/deepfm.py +44 -18
nextrec/models/ranking/dien.py +130 -27
nextrec/models/ranking/masknet.py +13 -67
nextrec/models/ranking/widedeep.py +39 -18
nextrec/models/ranking/xdeepfm.py +34 -1
nextrec/utils/common.py +26 -1
nextrec/utils/optimizer.py +7 -3
nextrec-0.3.2.dist-info/METADATA +312 -0
nextrec-0.3.2.dist-info/RECORD +57 -0
nextrec-0.2.7.dist-info/METADATA +0 -281
nextrec-0.2.7.dist-info/RECORD +0 -54
{nextrec-0.2.7.dist-info → nextrec-0.3.2.dist-info}/WHEEL +0 -0
{nextrec-0.2.7.dist-info → nextrec-0.3.2.dist-info}/licenses/LICENSE +0 -0

nextrec/basic/loggers.py CHANGED Viewed

@@ -2,7 +2,8 @@
 NextRec Basic Loggers
 Date: create on 27/10/2025
-Author: Yang Zhou,zyaztec@gmail.com
+Checkpoint: edit on 29/11/2025
+Author: Yang Zhou, zyaztec@gmail.com
 """
@@ -10,10 +11,8 @@ import os
 import re
 import sys
 import copy
-import datetime
 import logging
-from pathlib import Path
-from nextrec.basic.session import resolve_save_path, create_session
+from nextrec.basic.session import create_session
 ANSI_CODES = {
     'black': '\033[30m',
@@ -107,7 +106,7 @@ def setup_logger(session_id: str | os.PathLike | None = None):
     console_format = '%(message)s'
     file_format = '%(asctime)s - %(levelname)s - %(message)s'
-    date_format = '%H:%M:%S'
+    date_format = '%Y-%m-%d %H:%M:%S'
     logger = logging.getLogger()
     logger.setLevel(logging.INFO)

nextrec/basic/metrics.py CHANGED Viewed

@@ -2,6 +2,7 @@
 Metrics computation and configuration for model evaluation.
 Date: create on 27/10/2025
+Checkpoint: edit on 29/11/2025
 Author: Yang Zhou,zyaztec@gmail.com
 """
 import logging
@@ -11,7 +12,6 @@ from sklearn.metrics import (
     accuracy_score, precision_score, recall_score, f1_score, r2_score,
 )
 CLASSIFICATION_METRICS = {'auc', 'gauc', 'ks', 'logloss', 'accuracy', 'acc', 'precision', 'recall', 'f1', 'micro_f1', 'macro_f1'}
 REGRESSION_METRICS = {'mse', 'mae', 'rmse', 'r2', 'mape', 'msle'}
 TASK_DEFAULT_METRICS = {
@@ -21,8 +21,6 @@ TASK_DEFAULT_METRICS = {
     'matching': ['auc', 'gauc', 'precision@10', 'hitrate@10', 'map@10','cosine']+ [f'recall@{k}' for k in (5,10,20)] + [f'ndcg@{k}' for k in (5,10,20)] + [f'mrr@{k}' for k in (5,10,20)]
 }
 def compute_ks(y_true: np.ndarray, y_pred: np.ndarray) -> float:
     """Compute Kolmogorov-Smirnov statistic."""
     sorted_indices = np.argsort(y_pred)[::-1]
@@ -38,7 +36,6 @@ def compute_ks(y_true: np.ndarray, y_pred: np.ndarray) -> float:
         return float(ks_value)
     return 0.0
 def compute_mape(y_true: np.ndarray, y_pred: np.ndarray) -> float:
     """Compute Mean Absolute Percentage Error."""
     mask = y_true != 0
@@ -46,83 +43,62 @@ def compute_mape(y_true: np.ndarray, y_pred: np.ndarray) -> float:
         return float(np.mean(np.abs((y_true[mask] - y_pred[mask]) / y_true[mask])) * 100)
     return 0.0
 def compute_msle(y_true: np.ndarray, y_pred: np.ndarray) -> float:
     """Compute Mean Squared Log Error."""
     y_pred_pos = np.maximum(y_pred, 0)
     return float(mean_squared_error(np.log1p(y_true), np.log1p(y_pred_pos)))
-def compute_gauc(
-    y_true: np.ndarray,
-    y_pred: np.ndarray,
-    user_ids: np.ndarray | None = None
-) -> float:
+def compute_gauc(y_true: np.ndarray, y_pred: np.ndarray, user_ids: np.ndarray | None = None) -> float:
     if user_ids is None:
         # If no user_ids provided, fall back to regular AUC
         try:
             return float(roc_auc_score(y_true, y_pred))
         except:
             return 0.0
     # Group by user_id and calculate AUC for each user
     user_aucs = []
     user_weights = []
     unique_users = np.unique(user_ids)
     for user_id in unique_users:
         mask = user_ids == user_id
         user_y_true = y_true[mask]
         user_y_pred = y_pred[mask]
         # Skip users with only one class (cannot compute AUC)
         if len(np.unique(user_y_true)) < 2:
             continue
         try:
             user_auc = roc_auc_score(user_y_true, user_y_pred)
             user_aucs.append(user_auc)
             user_weights.append(len(user_y_true))
         except:
             continue
     if len(user_aucs) == 0:
         return 0.0
     # Weighted average
     user_aucs = np.array(user_aucs)
     user_weights = np.array(user_weights)
     gauc = float(np.sum(user_aucs * user_weights) / np.sum(user_weights))
     return gauc
 def _group_indices_by_user(user_ids: np.ndarray, n_samples: int) -> list[np.ndarray]:
     """Group sample indices by user_id. If user_ids is None, treat all as one group."""
     if user_ids is None:
         return [np.arange(n_samples)]
     user_ids = np.asarray(user_ids)
     if user_ids.shape[0] != n_samples:
-        logging.warning(
-            "user_ids length (%d) != number of samples (%d), "
-            "treating all samples as a single group for ranking metrics.",
-            user_ids.shape[0],
-            n_samples,
-        )
+        logging.warning(f"[Metrics Warning: GAUC] user_ids length {user_ids.shape[0]} != number of samples {n_samples}, treating all samples as a single group for ranking metrics.")
         return [np.arange(n_samples)]
     unique_users = np.unique(user_ids)
     groups = [np.where(user_ids == u)[0] for u in unique_users]
     return groups
-def _compute_precision_at_k(y_true: np.ndarray, y_pred: np.ndarray, user_ids: np.ndarray | None, k: int) -> float:
+def _compute_precision_at_k(y_true: np.ndarray, y_pred: np.ndarray, user_ids: np.ndarray, k: int) -> float:
+    """Compute Precision@K."""
+    if user_ids is None:
+        raise ValueError("[Metrics Error: Precision@K] user_ids must be provided for Precision@K computation.")
     y_true = (y_true > 0).astype(int)
     n = len(y_true)
     groups = _group_indices_by_user(user_ids, n)
     precisions = []
     for idx in groups:
         if idx.size == 0:
@@ -134,16 +110,15 @@ def _compute_precision_at_k(y_true: np.ndarray, y_pred: np.ndarray, user_ids: np
         topk = order[:k_user]
         hits = labels[topk].sum()
         precisions.append(hits / float(k_user))
     return float(np.mean(precisions)) if precisions else 0.0
-def _compute_recall_at_k(y_true: np.ndarray, y_pred: np.ndarray, user_ids: np.ndarray | None, k: int) -> float:
+def _compute_recall_at_k(y_true: np.ndarray, y_pred: np.ndarray, user_ids: np.ndarray, k: int) -> float:
     """Compute Recall@K."""
+    if user_ids is None:
+        raise ValueError("[Metrics Error: Recall@K] user_ids must be provided for Recall@K computation.")
     y_true = (y_true > 0).astype(int)
     n = len(y_true)
     groups = _group_indices_by_user(user_ids, n)
     recalls = []
     for idx in groups:
         if idx.size == 0:
@@ -151,46 +126,44 @@ def _compute_recall_at_k(y_true: np.ndarray, y_pred: np.ndarray, user_ids: np.nd
         labels = y_true[idx]
         num_pos = labels.sum()
         if num_pos == 0:
-            continue  # 跳过没有正样本的用户
+            continue  # dont count users with no positive labels
         scores = y_pred[idx]
         order = np.argsort(scores)[::-1]
         k_user = min(k, idx.size)
         topk = order[:k_user]
         hits = labels[topk].sum()
         recalls.append(hits / float(num_pos))
     return float(np.mean(recalls)) if recalls else 0.0
-def _compute_hitrate_at_k(y_true: np.ndarray, y_pred: np.ndarray, user_ids: np.ndarray | None, k: int) -> float:
+def _compute_hitrate_at_k(y_true: np.ndarray, y_pred: np.ndarray, user_ids: np.ndarray, k: int) -> float:
     """Compute HitRate@K."""
+    if user_ids is None:
+        raise ValueError("[Metrics Error: HitRate@K] user_ids must be provided for HitRate@K computation.")
     y_true = (y_true > 0).astype(int)
     n = len(y_true)
     groups = _group_indices_by_user(user_ids, n)
     hits_per_user = []
     for idx in groups:
         if idx.size == 0:
             continue
         labels = y_true[idx]
         if labels.sum() == 0:
-            continue  # 无正样本用户不计入
+            continue  # dont count users with no positive labels
         scores = y_pred[idx]
         order = np.argsort(scores)[::-1]
         k_user = min(k, idx.size)
         topk = order[:k_user]
         hits = labels[topk].sum()
         hits_per_user.append(1.0 if hits > 0 else 0.0)
     return float(np.mean(hits_per_user)) if hits_per_user else 0.0
-def _compute_mrr_at_k(y_true: np.ndarray, y_pred: np.ndarray, user_ids: np.ndarray | None, k: int) -> float:
+def _compute_mrr_at_k(y_true: np.ndarray, y_pred: np.ndarray, user_ids: np.ndarray, k: int) -> float:
     """Compute MRR@K."""
+    if user_ids is None:
+        raise ValueError("[Metrics Error: MRR@K] user_ids must be provided for MRR@K computation.")
     y_true = (y_true > 0).astype(int)
     n = len(y_true)
     groups = _group_indices_by_user(user_ids, n)
     mrrs = []
     for idx in groups:
         if idx.size == 0:
@@ -203,17 +176,14 @@ def _compute_mrr_at_k(y_true: np.ndarray, y_pred: np.ndarray, user_ids: np.ndarr
         k_user = min(k, idx.size)
         topk = order[:k_user]
         ranked_labels = labels[order]
         rr = 0.0
         for rank, lab in enumerate(ranked_labels[:k_user], start=1):
             if lab > 0:
                 rr = 1.0 / rank
                 break
         mrrs.append(rr)
     return float(np.mean(mrrs)) if mrrs else 0.0
 def _compute_dcg_at_k(labels: np.ndarray, k: int) -> float:
     k_user = min(k, labels.size)
     if k_user == 0:
@@ -222,13 +192,13 @@ def _compute_dcg_at_k(labels: np.ndarray, k: int) -> float:
     discounts = np.log2(np.arange(2, k_user + 2))
     return float(np.sum(gains / discounts))
-def _compute_ndcg_at_k(y_true: np.ndarray, y_pred: np.ndarray, user_ids: np.ndarray | None, k: int) -> float:
+def _compute_ndcg_at_k(y_true: np.ndarray, y_pred: np.ndarray, user_ids: np.ndarray, k: int) -> float:
     """Compute NDCG@K."""
+    if user_ids is None:
+        raise ValueError("[Metrics Error: NDCG@K] user_ids must be provided for NDCG@K computation.")
     y_true = (y_true > 0).astype(int)
     n = len(y_true)
     groups = _group_indices_by_user(user_ids, n)
     ndcgs = []
     for idx in groups:
         if idx.size == 0:
@@ -237,27 +207,25 @@ def _compute_ndcg_at_k(y_true: np.ndarray, y_pred: np.ndarray, user_ids: np.ndar
         if labels.sum() == 0:
             continue
         scores = y_pred[idx]
         order = np.argsort(scores)[::-1]
         ranked_labels = labels[order]
         dcg = _compute_dcg_at_k(ranked_labels, k)
         # ideal DCG
         ideal_labels = np.sort(labels)[::-1]
         idcg = _compute_dcg_at_k(ideal_labels, k)
         if idcg == 0.0:
             continue
         ndcgs.append(dcg / idcg)
     return float(np.mean(ndcgs)) if ndcgs else 0.0
-def _compute_map_at_k(y_true: np.ndarray, y_pred: np.ndarray, user_ids: np.ndarray | None, k: int) -> float:
+def _compute_map_at_k(y_true: np.ndarray, y_pred: np.ndarray, user_ids: np.ndarray, k: int) -> float:
     """Mean Average Precision@K."""
+    if user_ids is None:
+        raise ValueError("[Metrics Error: MAP@K] user_ids must be provided for MAP@K computation.")
     y_true = (y_true > 0).astype(int)
     n = len(y_true)
     groups = _group_indices_by_user(user_ids, n)
     aps = []
     for idx in groups:
         if idx.size == 0:
@@ -266,23 +234,19 @@ def _compute_map_at_k(y_true: np.ndarray, y_pred: np.ndarray, user_ids: np.ndarr
         num_pos = labels.sum()
         if num_pos == 0:
             continue
         scores = y_pred[idx]
         order = np.argsort(scores)[::-1]
         k_user = min(k, idx.size)
         hits = 0
         sum_precisions = 0.0
         for rank, i in enumerate(order[:k_user], start=1):
             if labels[i] > 0:
                 hits += 1
                 sum_precisions += hits / float(rank)
         if hits == 0:
             aps.append(0.0)
         else:
             aps.append(sum_precisions / float(num_pos))
     return float(np.mean(aps)) if aps else 0.0
@@ -308,31 +272,22 @@ def configure_metrics(
     """Configure metrics based on task and user input."""
     primary_task = task[0] if isinstance(task, list) else task
     nums_task = len(task) if isinstance(task, list) else 1
     metrics_list: list[str] = []
     task_specific_metrics: dict[str, list[str]] | None = None
     if isinstance(metrics, dict):
         metrics_list = []
         task_specific_metrics = {}
         for task_name, task_metrics in metrics.items():
             if task_name not in target_names:
-                logging.warning(
-                    "Task '%s' not found in targets %s, skipping its metrics",
-                    task_name,
-                    target_names,
-                )
+                logging.warning(f"[Metrics Warning] Task {task_name} not found in targets {target_names}, skipping its metrics")
                 continue
             lowered = [m.lower() for m in task_metrics]
             task_specific_metrics[task_name] = lowered
             for metric in lowered:
                 if metric not in metrics_list:
                     metrics_list.append(metric)
     elif metrics:
         metrics_list = [m.lower() for m in metrics]
     else:
         # No user provided metrics, derive per task type
         if nums_task > 1 and isinstance(task, list):
@@ -350,26 +305,20 @@ def configure_metrics(
             if primary_task not in TASK_DEFAULT_METRICS:
                 raise ValueError(f"Unsupported task type: {primary_task}")
             metrics_list = TASK_DEFAULT_METRICS[primary_task]
     if not metrics_list:
         # Inline get_default_metrics_for_task logic
         if primary_task not in TASK_DEFAULT_METRICS:
             raise ValueError(f"Unsupported task type: {primary_task}")
         metrics_list = TASK_DEFAULT_METRICS[primary_task]
     best_metrics_mode = get_best_metric_mode(metrics_list[0], primary_task)
     return metrics_list, task_specific_metrics, best_metrics_mode
 def get_best_metric_mode(first_metric: str, primary_task: str) -> str:
     """Determine if metric should be maximized or minimized."""
     first_metric_lower = first_metric.lower()
     # Metrics that should be maximized
     if first_metric_lower in {'auc', 'gauc', 'ks', 'accuracy', 'acc', 'precision', 'recall', 'f1', 'r2', 'micro_f1', 'macro_f1'}:
         return 'max'
     # Ranking metrics that should be maximized (with @K suffix)
     if (first_metric_lower.startswith('recall@') or
         first_metric_lower.startswith('precision@') or
@@ -379,21 +328,17 @@ def get_best_metric_mode(first_metric: str, primary_task: str) -> str:
         first_metric_lower.startswith('ndcg@') or
         first_metric_lower.startswith('map@')):
         return 'max'
     # Cosine separation should be maximized
     if first_metric_lower == 'cosine':
         return 'max'
     # Metrics that should be minimized
     if first_metric_lower in {'logloss', 'mse', 'mae', 'rmse', 'mape', 'msle'}:
         return 'min'
     # Default based on task type
     if primary_task == 'regression':
         return 'min'
     return 'max'
 def compute_single_metric(
     metric: str,
     y_true: np.ndarray,
@@ -403,45 +348,36 @@ def compute_single_metric(
 ) -> float:
     """Compute a single metric given true and predicted values."""
     y_p_binary = (y_pred > 0.5).astype(int)
     try:
         metric_lower = metric.lower()
         # recall@K
         if metric_lower.startswith('recall@'):
             k = int(metric_lower.split('@')[1])
-            return _compute_recall_at_k(y_true, y_pred, user_ids, k)
+            return _compute_recall_at_k(y_true, y_pred, user_ids, k) # type: ignore
         # precision@K
         if metric_lower.startswith('precision@'):
             k = int(metric_lower.split('@')[1])
-            return _compute_precision_at_k(y_true, y_pred, user_ids, k)
+            return _compute_precision_at_k(y_true, y_pred, user_ids, k) # type: ignore
         # hitrate@K / hr@K
         if metric_lower.startswith('hitrate@') or metric_lower.startswith('hr@'):
             k_str = metric_lower.split('@')[1]
             k = int(k_str)
-            return _compute_hitrate_at_k(y_true, y_pred, user_ids, k)
+            return _compute_hitrate_at_k(y_true, y_pred, user_ids, k) # type: ignore
         # mrr@K
         if metric_lower.startswith('mrr@'):
             k = int(metric_lower.split('@')[1])
-            return _compute_mrr_at_k(y_true, y_pred, user_ids, k)
+            return _compute_mrr_at_k(y_true, y_pred, user_ids, k) # type: ignore
         # ndcg@K
         if metric_lower.startswith('ndcg@'):
             k = int(metric_lower.split('@')[1])
-            return _compute_ndcg_at_k(y_true, y_pred, user_ids, k)
+            return _compute_ndcg_at_k(y_true, y_pred, user_ids, k) # type: ignore
         # map@K
         if metric_lower.startswith('map@'):
             k = int(metric_lower.split('@')[1])
-            return _compute_map_at_k(y_true, y_pred, user_ids, k)
+            return _compute_map_at_k(y_true, y_pred, user_ids, k) # type: ignore
         # cosine for matching task
         if metric_lower == 'cosine':
             return _compute_cosine_separation(y_true, y_pred)
         if metric == 'auc':
             value = float(roc_auc_score(y_true, y_pred, average='macro' if task_type == 'multilabel' else None))
         elif metric == 'gauc':
@@ -475,12 +411,11 @@ def compute_single_metric(
         elif metric == 'msle':
             value = float(compute_msle(y_true, y_pred))
         else:
-            logging.warning(f"Metric '{metric}' is not supported, returning 0.0")
+            logging.warning(f"[Metric Warning] Metric '{metric}' is not supported, returning 0.0")
             value = 0.0
     except Exception as exception:
-        logging.warning(f"Failed to compute metric {metric}: {exception}")
+        logging.warning(f"[Metric Warning] Failed to compute metric {metric}: {exception}")
         value = 0.0
     return value
 def evaluate_metrics(
@@ -494,21 +429,17 @@ def evaluate_metrics(
 ) -> dict:                                                     # {'auc': 0.75, 'logloss': 0.45, 'mse_target2': 3.2}
     """Evaluate specified metrics for given true and predicted values."""
     result = {}
     if y_true is None or y_pred is None:
         return result
     # Main evaluation logic
     primary_task = task[0] if isinstance(task, list) else task
     nums_task = len(task) if isinstance(task, list) else 1
     # Single task evaluation
     if nums_task == 1:
         for metric in metrics:
             metric_lower = metric.lower()
             value = compute_single_metric(metric_lower, y_true, y_pred, primary_task, user_ids)
             result[metric_lower] = value
     # Multi-task evaluation
     else:
         for metric in metrics:
@@ -526,31 +457,24 @@ def evaluate_metrics(
                     elif isinstance(task, str):
                         task_type = task
                     else:
-                        task_type = 'binary'
+                        task_type = 'binary'
                     if task_type in ['binary', 'multilabel']:
                         should_compute = metric_lower in {'auc', 'ks', 'logloss', 'accuracy', 'acc', 'precision', 'recall', 'f1', 'micro_f1', 'macro_f1'}
                     elif task_type == 'regression':
-                        should_compute = metric_lower in {'mse', 'mae', 'rmse', 'r2', 'mape', 'msle'}
+                        should_compute = metric_lower in {'mse', 'mae', 'rmse', 'r2', 'mape', 'msle'}
                 if not should_compute:
-                    continue
-                target_name = target_names[task_idx]
+                    continue
+                target_name = target_names[task_idx]
                 # Get task type for specific index
                 if isinstance(task, list) and task_idx < len(task):
                     task_type = task[task_idx]
                 elif isinstance(task, str):
                     task_type = task
                 else:
-                    task_type = 'binary'
+                    task_type = 'binary'
                 y_true_task = y_true[:, task_idx]
-                y_pred_task = y_pred[:, task_idx]
+                y_pred_task = y_pred[:, task_idx]
                 # Compute metric
                 value = compute_single_metric(metric_lower, y_true_task, y_pred_task, task_type, user_ids)
                 result[f'{metric_lower}_{target_name}'] = value
     return result

nextrec 0.2.7__py3-none-any.whl → 0.3.2__py3-none-any.whl

nextrec 0.2.7py3-none-any.whl → 0.3.2py3-none-any.whl