PyPI - nextrec - Versions diffs - 0.4.10__py3-none-any.whl → 0.4.12__py3-none-any.whl - Mend

nextrec 0.4.10py3-none-any.whl → 0.4.12py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

nextrec/__version__.py +1 -1
nextrec/basic/callback.py +44 -54
nextrec/basic/features.py +35 -22
nextrec/basic/layers.py +64 -68
nextrec/basic/loggers.py +2 -2
nextrec/basic/metrics.py +9 -5
nextrec/basic/model.py +162 -106
nextrec/cli.py +16 -5
nextrec/data/preprocessor.py +4 -4
nextrec/loss/loss_utils.py +1 -1
nextrec/models/generative/__init__.py +1 -1
nextrec/models/ranking/eulernet.py +44 -75
nextrec/models/ranking/ffm.py +275 -0
nextrec/models/ranking/lr.py +1 -3
nextrec/models/representation/autorec.py +0 -0
nextrec/models/representation/bpr.py +0 -0
nextrec/models/representation/cl4srec.py +0 -0
nextrec/models/representation/lightgcn.py +0 -0
nextrec/models/representation/mf.py +0 -0
nextrec/models/representation/s3rec.py +0 -0
nextrec/models/sequential/sasrec.py +0 -0
nextrec/utils/__init__.py +2 -1
nextrec/utils/console.py +9 -1
nextrec/utils/model.py +14 -0
{nextrec-0.4.10.dist-info → nextrec-0.4.12.dist-info}/METADATA +32 -11
{nextrec-0.4.10.dist-info → nextrec-0.4.12.dist-info}/RECORD +30 -23
/nextrec/models/{generative → sequential}/hstu.py +0 -0
{nextrec-0.4.10.dist-info → nextrec-0.4.12.dist-info}/WHEEL +0 -0
{nextrec-0.4.10.dist-info → nextrec-0.4.12.dist-info}/entry_points.txt +0 -0
{nextrec-0.4.10.dist-info → nextrec-0.4.12.dist-info}/licenses/LICENSE +0 -0

nextrec/__version__.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "0.4.10"
1	+ __version__ = "0.4.12"

nextrec/basic/callback.py CHANGED Viewed

@@ -22,10 +22,10 @@ class Callback:
     """
     Base callback.
-    Notes (DDP):
-        - In distributed training, the training loop runs on every rank.
-        - For callbacks with side effects (saving, logging, etc.), set
-          ``run_on_main_process_only=True`` to avoid multi-rank duplication.
+    Notes for DDP training:
+    In distributed training, the training loop runs on every rank.
+    For callbacks with side effects (saving, logging, etc.), set
+    ``run_on_main_process_only=True`` to avoid multi-rank duplication.
     """
     run_on_main_process_only: bool = False
@@ -70,7 +70,7 @@ class Callback:
 class CallbackList:
-    """Container for managing multiple callbacks."""
+    """Generates a list of callbacks"""
     def __init__(self, callbacks: Optional[list[Callback]] = None):
         self.callbacks = callbacks or []
@@ -78,61 +78,41 @@ class CallbackList:
     def append(self, callback: Callback):
         self.callbacks.append(callback)
-    def set_model(self, model):
+    def call(self, fn_name: str, *args, **kwargs):
         for callback in self.callbacks:
-            callback.set_model(model)
+            if not callback.should_run():
+                continue
+            getattr(callback, fn_name)(*args, **kwargs)
+    def set_model(self, model):
+        self.call("set_model", model)
     def set_params(self, params: dict):
-        for callback in self.callbacks:
-            callback.set_params(params)
+        self.call("set_params", params)
     def on_train_begin(self, logs: Optional[dict] = None):
-        for callback in self.callbacks:
-            if not callback.should_run():
-                continue
-            callback.on_train_begin(logs)
+        self.call("on_train_begin", logs)
     def on_train_end(self, logs: Optional[dict] = None):
-        for callback in self.callbacks:
-            if not callback.should_run():
-                continue
-            callback.on_train_end(logs)
+        self.call("on_train_end", logs)
     def on_epoch_begin(self, epoch: int, logs: Optional[dict] = None):
-        for callback in self.callbacks:
-            if not callback.should_run():
-                continue
-            callback.on_epoch_begin(epoch, logs)
+        self.call("on_epoch_begin", epoch, logs)
     def on_epoch_end(self, epoch: int, logs: Optional[dict] = None):
-        for callback in self.callbacks:
-            if not callback.should_run():
-                continue
-            callback.on_epoch_end(epoch, logs)
+        self.call("on_epoch_end", epoch, logs)
     def on_batch_begin(self, batch: int, logs: Optional[dict] = None):
-        for callback in self.callbacks:
-            if not callback.should_run():
-                continue
-            callback.on_batch_begin(batch, logs)
+        self.call("on_batch_begin", batch, logs)
     def on_batch_end(self, batch: int, logs: Optional[dict] = None):
-        for callback in self.callbacks:
-            if not callback.should_run():
-                continue
-            callback.on_batch_end(batch, logs)
+        self.call("on_batch_end", batch, logs)
     def on_validation_begin(self, logs: Optional[dict] = None):
-        for callback in self.callbacks:
-            if not callback.should_run():
-                continue
-            callback.on_validation_begin(logs)
+        self.call("on_validation_begin", logs)
     def on_validation_end(self, logs: Optional[dict] = None):
-        for callback in self.callbacks:
-            if not callback.should_run():
-                continue
-            callback.on_validation_end(logs)
+        self.call("on_validation_end", logs)
 class EarlyStopper(Callback):
@@ -146,6 +126,20 @@ class EarlyStopper(Callback):
         restore_best_weights: bool = True,
         verbose: int = 1,
     ):
+        """
+        Callback to stop training early if no improvement.
+        Args:
+            monitor: Metric name to monitor.
+            patience: Number of epochs with no improvement after which training will be stopped.
+            mode: One of {'min', 'max'}. In 'min' mode, training will stop when the
+                monitored metric has stopped decreasing; in 'max' mode it will stop
+                when the monitored metric has stopped increasing.
+            min_delta: Minimum change in the monitored metric to qualify as an improvement.
+            restore_best_weights: Whether to restore model weights from the epoch with the best value
+                of the monitored metric.
+            verbose: Verbosity mode. 1: messages will be printed. 0: silent.
+        """
         super().__init__()
         self.monitor = monitor
         self.patience = patience
@@ -233,6 +227,7 @@ class CheckpointSaver(Callback):
         save_best_only: If True, only save when the model is considered the "best".
         save_freq: Frequency of checkpoint saving ('epoch' or integer for every N epochs).
         verbose: Verbosity mode.
+        run_on_main_process_only: Whether to run this callback only on the main process in DDP.
     """
     def __init__(
@@ -274,7 +269,6 @@ class CheckpointSaver(Callback):
         self.checkpoint_path.parent.mkdir(parents=True, exist_ok=True)
     def on_epoch_end(self, epoch: int, logs: Optional[dict] = None):
-        logging.info("")
         logs = logs or {}
         should_save = False
@@ -283,9 +277,6 @@ class CheckpointSaver(Callback):
         elif isinstance(self.save_freq, int) and (epoch + 1) % self.save_freq == 0:
             should_save = True
-        if not should_save and self.save_best_only:
-            should_save = False
         # Check if this is the best model
         current = logs.get(self.monitor)
         is_best = False
@@ -297,11 +288,7 @@ class CheckpointSaver(Callback):
         if should_save:
             if not self.save_best_only or is_best:
-                checkpoint_path = (
-                    self.checkpoint_path.parent
-                    / f"{self.checkpoint_path.stem}{self.checkpoint_path.suffix}"
-                )
-                self.save_checkpoint(checkpoint_path, epoch, logs)
+                self.save_checkpoint(self.checkpoint_path, epoch, logs)
                 if is_best:
                     # Use save_path directly without adding _best suffix since it may already contain it
@@ -371,7 +358,9 @@ class LearningRateScheduler(Callback):
             # Step the scheduler
             if hasattr(self.scheduler, "step"):
                 # Some schedulers need metrics
-                if "val_loss" in (logs or {}) and hasattr(self.scheduler, "mode"):
+                if logs is None:
+                    logs = {}
+                if "val_loss" in logs and hasattr(self.scheduler, "mode"):
                     self.scheduler.step(logs["val_loss"])
                 else:
                     self.scheduler.step()
@@ -399,7 +388,6 @@ class MetricsLogger(Callback):
         self.run_on_main_process_only = True
         self.log_freq = log_freq
         self.verbose = verbose
-        self.batch_count = 0
     def on_epoch_end(self, epoch: int, logs: Optional[dict] = None):
         if self.verbose > 0 and (
@@ -416,8 +404,10 @@ class MetricsLogger(Callback):
             logging.info(f"Epoch {epoch + 1}: {metrics_str}")
     def on_batch_end(self, batch: int, logs: Optional[dict] = None):
-        self.batch_count += 1
-        if self.verbose > 1 and self.log_freq == "batch":
+        if self.verbose > 1 and (
+            self.log_freq == "batch"
+            or (isinstance(self.log_freq, int) and (batch + 1) % self.log_freq == 0)
+        ):
             logs = logs or {}
             metrics_str = " - ".join(
                 [

nextrec/basic/features.py CHANGED Viewed

@@ -2,7 +2,7 @@
 Feature definitions
 Date: create on 27/10/2025
-Checkpoint: edit on 02/12/2025
+Checkpoint: edit on 20/12/2025
 Author: Yang Zhou, zyaztec@gmail.com
 """
@@ -12,22 +12,20 @@ from nextrec.utils.embedding import get_auto_embedding_dim
 from nextrec.utils.feature import normalize_to_list
-class BaseFeature(object):
+class BaseFeature:
     def __repr__(self):
         params = {k: v for k, v in self.__dict__.items() if not k.startswith("_")}
         param_str = ", ".join(f"{k}={v!r}" for k, v in params.items())
         return f"{self.__class__.__name__}({param_str})"
-class SequenceFeature(BaseFeature):
+class EmbeddingFeature(BaseFeature):
     def __init__(
         self,
         name: str,
         vocab_size: int,
-        max_len: int = 20,
         embedding_name: str = "",
         embedding_dim: int | None = 4,
-        combiner: str = "mean",
         padding_idx: int | None = None,
         init_type: str = "normal",
         init_params: dict | None = None,
@@ -39,13 +37,15 @@ class SequenceFeature(BaseFeature):
     ):
         self.name = name
         self.vocab_size = vocab_size
-        self.max_len = max_len
         self.embedding_name = embedding_name or name
-        self.embedding_dim = embedding_dim or get_auto_embedding_dim(vocab_size)
+        self.embedding_dim = (
+            get_auto_embedding_dim(vocab_size)
+            if embedding_dim is None
+            else embedding_dim
+        )
         self.init_type = init_type
         self.init_params = init_params or {}
-        self.combiner = combiner
         self.padding_idx = padding_idx
         self.l1_reg = l1_reg
         self.l2_reg = l2_reg
@@ -54,13 +54,15 @@ class SequenceFeature(BaseFeature):
         self.freeze_pretrained = freeze_pretrained
-class SparseFeature(BaseFeature):
+class SequenceFeature(EmbeddingFeature):
     def __init__(
         self,
         name: str,
         vocab_size: int,
+        max_len: int = 20,
         embedding_name: str = "",
         embedding_dim: int | None = 4,
+        combiner: str = "mean",
         padding_idx: int | None = None,
         init_type: str = "normal",
         init_params: dict | None = None,
@@ -70,19 +72,26 @@ class SparseFeature(BaseFeature):
         pretrained_weight: torch.Tensor | None = None,
         freeze_pretrained: bool = False,
     ):
-        self.name = name
-        self.vocab_size = vocab_size
-        self.embedding_name = embedding_name or name
-        self.embedding_dim = embedding_dim or get_auto_embedding_dim(vocab_size)
+        super().__init__(
+            name=name,
+            vocab_size=vocab_size,
+            embedding_name=embedding_name,
+            embedding_dim=embedding_dim,
+            padding_idx=padding_idx,
+            init_type=init_type,
+            init_params=init_params,
+            l1_reg=l1_reg,
+            l2_reg=l2_reg,
+            trainable=trainable,
+            pretrained_weight=pretrained_weight,
+            freeze_pretrained=freeze_pretrained,
+        )
+        self.max_len = max_len
+        self.combiner = combiner
-        self.init_type = init_type
-        self.init_params = init_params or {}
-        self.padding_idx = padding_idx
-        self.l1_reg = l1_reg
-        self.l2_reg = l2_reg
-        self.trainable = trainable
-        self.pretrained_weight = pretrained_weight
-        self.freeze_pretrained = freeze_pretrained
+class SparseFeature(EmbeddingFeature):
+    pass
 class DenseFeature(BaseFeature):
@@ -95,7 +104,11 @@ class DenseFeature(BaseFeature):
     ):
         self.name = name
         self.input_dim = max(int(input_dim or 1), 1)
-        self.embedding_dim = embedding_dim or self.input_dim
+        self.embedding_dim = self.input_dim if embedding_dim is None else embedding_dim
+        if use_embedding and self.embedding_dim == 0:
+            raise ValueError(
+                "[Features Error] DenseFeature: use_embedding=True is incompatible with embedding_dim=0"
+            )
         if embedding_dim is not None and embedding_dim > 1:
             self.use_embedding = True
         else:

nextrec/basic/layers.py CHANGED Viewed

@@ -2,7 +2,7 @@
 Layer implementations used across NextRec models.
 Date: create on 27/10/2025
-Checkpoint: edit on 19/12/2025
+Checkpoint: edit on 20/12/2025
 Author: Yang Zhou, zyaztec@gmail.com
 """
@@ -28,6 +28,16 @@ class PredictionLayer(nn.Module):
         use_bias: bool = True,
         return_logits: bool = False,
     ):
+        """
+        Prediction layer supporting binary and regression outputs.
+        Args:
+            task_type: A string or list of strings specifying the type of each task. supported types are "binary" and "regression".
+            task_dims: An integer or list of integers specifying the output dimension for each task.
+                If None, defaults to 1 for each task. If a single integer is provided, it is shared across all tasks.
+            use_bias: Whether to include a bias term in the prediction layer.
+            return_logits: If True, returns raw logits without applying activation functions.
+        """
         super().__init__()
         self.task_types = [task_type] if isinstance(task_type, str) else list(task_type)
         if len(self.task_types) == 0:
@@ -253,8 +263,11 @@ class EmbeddingLayer(nn.Module):
         for feat in unique_feats.values():
             if isinstance(feat, DenseFeature):
                 in_dim = max(int(getattr(feat, "input_dim", 1)), 1)
-                emb_dim = getattr(feat, "embedding_dim", None)
-                out_dim = max(int(emb_dim), 1) if emb_dim else in_dim
+                if getattr(feat, "use_embedding", False):
+                    emb_dim = getattr(feat, "embedding_dim", None)
+                    out_dim = max(int(emb_dim), 1) if emb_dim else in_dim
+                else:
+                    out_dim = in_dim
                 dim += out_dim
             elif isinstance(feat, SequenceFeature) and feat.combiner == "concat":
                 dim += feat.embedding_dim * feat.max_len
@@ -518,13 +531,17 @@ class MultiHeadSelfAttention(nn.Module):
         self.use_residual = use_residual
         self.dropout_rate = dropout
-        self.W_Q = nn.Linear(embedding_dim, embedding_dim, bias=False)
-        self.W_K = nn.Linear(embedding_dim, embedding_dim, bias=False)
-        self.W_V = nn.Linear(embedding_dim, embedding_dim, bias=False)
-        self.W_O = nn.Linear(embedding_dim, embedding_dim, bias=False)
+        self.W_Q = nn.Linear(
+            embedding_dim, embedding_dim, bias=False
+        )  # Query projection
+        self.W_K = nn.Linear(embedding_dim, embedding_dim, bias=False)  # Key projection
+        self.W_V = nn.Linear(
+            embedding_dim, embedding_dim, bias=False
+        )  # Value projection
+        self.W_O = nn.Linear(
+            embedding_dim, embedding_dim, bias=False
+        )  # Output projection
-        if self.use_residual:
-            self.W_Res = nn.Linear(embedding_dim, embedding_dim, bias=False)
         if use_layer_norm:
             self.layer_norm = nn.LayerNorm(embedding_dim)
         else:
@@ -537,81 +554,60 @@ class MultiHeadSelfAttention(nn.Module):
     def forward(
         self, x: torch.Tensor, attention_mask: torch.Tensor | None = None
     ) -> torch.Tensor:
-        """
-        Args:
-            x: [batch_size, seq_len, embedding_dim]
-            attention_mask: [batch_size, seq_len] or [batch_size, seq_len, seq_len], boolean mask where True indicates valid positions
-        Returns:
-            output: [batch_size, seq_len, embedding_dim]
-        """
-        batch_size, seq_len, _ = x.shape
-        Q = self.W_Q(x)  # [batch_size, seq_len, embedding_dim]
+        # x: [Batch, Length, Dim]
+        B, L, D = x.shape
+        Q = self.W_Q(x)
         K = self.W_K(x)
         V = self.W_V(x)
-        # Split into multiple heads: [batch_size, num_heads, seq_len, head_dim]
-        Q = Q.view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1, 2)
-        K = K.view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1, 2)
-        V = V.view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1, 2)
+        Q = Q.view(B, L, self.num_heads, self.head_dim).transpose(
+            1, 2
+        )  # [Batch, Heads, Length, head_dim]
+        K = K.view(B, L, self.num_heads, self.head_dim).transpose(1, 2)
+        V = V.view(B, L, self.num_heads, self.head_dim).transpose(1, 2)
+        key_padding_mask = None
+        if attention_mask is not None:
+            if attention_mask.dim() == 2:  # [B,L], 1=valid, 0=pad
+                key_padding_mask = ~attention_mask.bool()
+                attn_mask = key_padding_mask[:, None, None, :]
+                attn_mask = attn_mask.expand(B, 1, L, L)
+            elif attention_mask.dim() == 3:  # [B,L,L], 1=allowed, 0=masked
+                attn_mask = (~attention_mask.bool()).view(B, 1, L, L)
+            else:
+                raise ValueError("attention_mask must be [B,L] or [B,L,L]")
+        else:
+            attn_mask = None
         if self.use_flash_attention:
-            # Use PyTorch 2.0+ Flash Attention
-            if attention_mask is not None:
-                # Convert mask to [batch_size, 1, seq_len, seq_len] format
-                if attention_mask.dim() == 2:
-                    # [B, L] -> [B, 1, 1, L]
-                    attention_mask = attention_mask.unsqueeze(1).unsqueeze(2)
-                elif attention_mask.dim() == 3:
-                    # [B, L, L] -> [B, 1, L, L]
-                    attention_mask = attention_mask.unsqueeze(1)
-            attention_output = F.scaled_dot_product_attention(
+            attn = F.scaled_dot_product_attention(
                 Q,
                 K,
                 V,
-                attn_mask=attention_mask,
+                attn_mask=attn_mask,
                 dropout_p=self.dropout_rate if self.training else 0.0,
-            )
-            # Handle potential NaN values
-            attention_output = torch.nan_to_num(attention_output, nan=0.0)
+            )  # [B,H,L,dh]
         else:
-            # Fallback to standard attention
             scores = torch.matmul(Q, K.transpose(-2, -1)) / (self.head_dim**0.5)
+            if attn_mask is not None:
+                scores = scores.masked_fill(attn_mask, float("-inf"))
+            attn_weights = torch.softmax(scores, dim=-1)
+            attn_weights = self.dropout(attn_weights)
+            attn = torch.matmul(attn_weights, V)  # [B,H,L,dh]
-            if attention_mask is not None:
-                # Process mask for standard attention
-                if attention_mask.dim() == 2:
-                    # [B, L] -> [B, 1, 1, L]
-                    attention_mask = attention_mask.unsqueeze(1).unsqueeze(2)
-                elif attention_mask.dim() == 3:
-                    # [B, L, L] -> [B, 1, L, L]
-                    attention_mask = attention_mask.unsqueeze(1)
-                scores = scores.masked_fill(~attention_mask, float("-1e9"))
-            attention_weights = F.softmax(scores, dim=-1)
-            attention_weights = self.dropout(attention_weights)
-            attention_output = torch.matmul(
-                attention_weights, V
-            )  # [batch_size, num_heads, seq_len, head_dim]
-        # Concatenate heads
-        attention_output = attention_output.transpose(1, 2).contiguous()
-        attention_output = attention_output.view(
-            batch_size, seq_len, self.embedding_dim
-        )
+        attn = attn.transpose(1, 2).contiguous().view(B, L, D)
+        out = self.W_O(attn)
-        # Output projection
-        output = self.W_O(attention_output)
-        # Residual connection
         if self.use_residual:
-            output = output + self.W_Res(x)
-        # Layer normalization
+            out = out + x
         if self.layer_norm is not None:
-            output = self.layer_norm(output)
+            out = self.layer_norm(out)
-        output = F.relu(output)
-        return output
+        if key_padding_mask is not None:
+            out = out * (~key_padding_mask).unsqueeze(-1)
+        return out
 class AttentionPoolingLayer(nn.Module):

nextrec/basic/loggers.py CHANGED Viewed

@@ -2,7 +2,7 @@
 NextRec Basic Loggers
 Date: create on 27/10/2025
-Checkpoint: edit on 19/12/2025
+Checkpoint: edit on 20/12/2025
 Author: Yang Zhou, zyaztec@gmail.com
 """
@@ -185,7 +185,7 @@ class TrainingLogger:
     ) -> dict[str, float]:
         formatted: dict[str, float] = {}
         for key, value in metrics.items():
-            if isinstance(value, numbers.Number):
+            if isinstance(value, numbers.Real):
                 formatted[f"{split}/{key}"] = float(value)
             elif hasattr(value, "item"):
                 try:

nextrec/basic/metrics.py CHANGED Viewed

@@ -2,7 +2,7 @@
 Metrics computation and configuration for model evaluation.
 Date: create on 27/10/2025
-Checkpoint: edit on 19/12/2025
+Checkpoint: edit on 20/12/2025
 Author: Yang Zhou,zyaztec@gmail.com
 """
@@ -49,8 +49,8 @@ TASK_DEFAULT_METRICS = {
 def check_user_id(*metric_sources: Any) -> bool:
     """Return True when GAUC or ranking@K metrics appear in the provided sources."""
-    metric_names: set[str] = set()
-    stack: list[Any] = list(metric_sources)
+    metric_names = set()
+    stack = list(metric_sources)
     while stack:
         item = stack.pop()
         if not item:
@@ -367,10 +367,12 @@ def configure_metrics(
     target_names: list[str],  # ['target1', 'target2']
 ) -> tuple[list[str], dict[str, list[str]] | None, str]:
     """Configure metrics based on task and user input."""
     primary_task = task[0] if isinstance(task, list) else task
     nums_task = len(task) if isinstance(task, list) else 1
-    metrics_list: list[str] = []
-    task_specific_metrics: dict[str, list[str]] | None = None
+    metrics_list = []
+    task_specific_metrics = None
     if isinstance(metrics, dict):
         metrics_list = []
         task_specific_metrics = {}
@@ -462,6 +464,7 @@ def compute_single_metric(
     user_ids: np.ndarray | None = None,
 ) -> float:
     """Compute a single metric given true and predicted values."""
     y_p_binary = (y_pred > 0.5).astype(int)
     metric_lower = metric.lower()
     try:
@@ -575,6 +578,7 @@ def evaluate_metrics(
     user_ids: np.ndarray | None = None,  # example: User IDs for GAUC computation
 ) -> dict:  # {'auc': 0.75, 'logloss': 0.45, 'mse_target2': 3.2}
     """Evaluate specified metrics for given true and predicted values."""
     result = {}
     if y_true is None or y_pred is None:
         return result

nextrec 0.4.10__py3-none-any.whl → 0.4.12__py3-none-any.whl

nextrec 0.4.10py3-none-any.whl → 0.4.12py3-none-any.whl