PyPI - nextrec - Versions diffs - 0.4.5__py3-none-any.whl → 0.4.6__py3-none-any.whl - Mend

nextrec 0.4.5py3-none-any.whl → 0.4.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

nextrec/__version__.py +1 -1
nextrec/basic/features.py +4 -0
nextrec/basic/layers.py +103 -24
nextrec/basic/metrics.py +71 -1
nextrec/basic/model.py +11 -13
nextrec/data/data_processing.py +1 -3
nextrec/models/generative/__init__.py +16 -0
nextrec/models/generative/hstu.py +110 -57
nextrec/models/generative/rqvae.py +826 -0
nextrec/models/ranking/masknet.py +1 -1
nextrec/utils/config.py +38 -1
nextrec/utils/embedding.py +28 -0
nextrec/utils/initializer.py +4 -4
nextrec/utils/synthetic_data.py +19 -0
nextrec-0.4.6.dist-info/METADATA +371 -0
{nextrec-0.4.5.dist-info → nextrec-0.4.6.dist-info}/RECORD +19 -18
nextrec-0.4.5.dist-info/METADATA +0 -357
{nextrec-0.4.5.dist-info → nextrec-0.4.6.dist-info}/WHEEL +0 -0
{nextrec-0.4.5.dist-info → nextrec-0.4.6.dist-info}/entry_points.txt +0 -0
{nextrec-0.4.5.dist-info → nextrec-0.4.6.dist-info}/licenses/LICENSE +0 -0

nextrec/__version__.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "0.4.5"
1	+ __version__ = "0.4.6"

nextrec/basic/features.py CHANGED Viewed

@@ -33,6 +33,8 @@ class SequenceFeature(BaseFeature):
         l1_reg: float = 0.0,
         l2_reg: float = 1e-5,
         trainable: bool = True,
+        pretrained_weight: torch.Tensor | None = None,
+        freeze_pretrained: bool = False,
     ):
         self.name = name
         self.vocab_size = vocab_size
@@ -47,6 +49,8 @@ class SequenceFeature(BaseFeature):
         self.l1_reg = l1_reg
         self.l2_reg = l2_reg
         self.trainable = trainable
+        self.pretrained_weight = pretrained_weight
+        self.freeze_pretrained = freeze_pretrained
 class SparseFeature(BaseFeature):

nextrec/basic/layers.py CHANGED Viewed

@@ -496,12 +496,18 @@ class HadamardInteractionLayer(nn.Module):
 class MultiHeadSelfAttention(nn.Module):
+    """
+    Multi-Head Self-Attention layer with Flash Attention support.
+    Uses PyTorch 2.0+ scaled_dot_product_attention when available for better performance.
+    """
     def __init__(
         self,
         embedding_dim: int,
         num_heads: int = 2,
         dropout: float = 0.0,
         use_residual: bool = True,
+        use_layer_norm: bool = False,
     ):
         super().__init__()
         if embedding_dim % num_heads != 0:
@@ -512,45 +518,100 @@ class MultiHeadSelfAttention(nn.Module):
         self.num_heads = num_heads
         self.head_dim = embedding_dim // num_heads
         self.use_residual = use_residual
+        self.dropout_rate = dropout
         self.W_Q = nn.Linear(embedding_dim, embedding_dim, bias=False)
         self.W_K = nn.Linear(embedding_dim, embedding_dim, bias=False)
         self.W_V = nn.Linear(embedding_dim, embedding_dim, bias=False)
+        self.W_O = nn.Linear(embedding_dim, embedding_dim, bias=False)
         if self.use_residual:
             self.W_Res = nn.Linear(embedding_dim, embedding_dim, bias=False)
+        if use_layer_norm:
+            self.layer_norm = nn.LayerNorm(embedding_dim)
+        else:
+            self.layer_norm = None
         self.dropout = nn.Dropout(dropout)
+        # Check if Flash Attention is available
+        self.use_flash_attention = hasattr(F, "scaled_dot_product_attention")
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        batch_size, num_fields, _ = x.shape
-        Q = self.W_Q(x)  # [batch_size, num_fields, embedding_dim]
+    def forward(
+        self, x: torch.Tensor, attention_mask: torch.Tensor | None = None
+    ) -> torch.Tensor:
+        """
+        Args:
+            x: [batch_size, seq_len, embedding_dim]
+            attention_mask: [batch_size, seq_len] or [batch_size, seq_len, seq_len], boolean mask where True indicates valid positions
+        Returns:
+            output: [batch_size, seq_len, embedding_dim]
+        """
+        batch_size, seq_len, _ = x.shape
+        Q = self.W_Q(x)  # [batch_size, seq_len, embedding_dim]
         K = self.W_K(x)
         V = self.W_V(x)
-        # Split into multiple heads: [batch_size, num_heads, num_fields, head_dim]
-        Q = Q.view(batch_size, num_fields, self.num_heads, self.head_dim).transpose(
-            1, 2
-        )
-        K = K.view(batch_size, num_fields, self.num_heads, self.head_dim).transpose(
-            1, 2
-        )
-        V = V.view(batch_size, num_fields, self.num_heads, self.head_dim).transpose(
-            1, 2
-        )
-        # Attention scores
-        scores = torch.matmul(Q, K.transpose(-2, -1)) / (self.head_dim**0.5)
-        attention_weights = F.softmax(scores, dim=-1)
-        attention_weights = self.dropout(attention_weights)
-        attention_output = torch.matmul(
-            attention_weights, V
-        )  # [batch_size, num_heads, num_fields, head_dim]
+        # Split into multiple heads: [batch_size, num_heads, seq_len, head_dim]
+        Q = Q.view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1, 2)
+        K = K.view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1, 2)
+        V = V.view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1, 2)
+        if self.use_flash_attention:
+            # Use PyTorch 2.0+ Flash Attention
+            if attention_mask is not None:
+                # Convert mask to [batch_size, 1, seq_len, seq_len] format
+                if attention_mask.dim() == 2:
+                    # [B, L] -> [B, 1, 1, L]
+                    attention_mask = attention_mask.unsqueeze(1).unsqueeze(2)
+                elif attention_mask.dim() == 3:
+                    # [B, L, L] -> [B, 1, L, L]
+                    attention_mask = attention_mask.unsqueeze(1)
+            attention_output = F.scaled_dot_product_attention(
+                Q,
+                K,
+                V,
+                attn_mask=attention_mask,
+                dropout_p=self.dropout_rate if self.training else 0.0,
+            )
+            # Handle potential NaN values
+            attention_output = torch.nan_to_num(attention_output, nan=0.0)
+        else:
+            # Fallback to standard attention
+            scores = torch.matmul(Q, K.transpose(-2, -1)) / (self.head_dim**0.5)
+            if attention_mask is not None:
+                # Process mask for standard attention
+                if attention_mask.dim() == 2:
+                    # [B, L] -> [B, 1, 1, L]
+                    attention_mask = attention_mask.unsqueeze(1).unsqueeze(2)
+                elif attention_mask.dim() == 3:
+                    # [B, L, L] -> [B, 1, L, L]
+                    attention_mask = attention_mask.unsqueeze(1)
+                scores = scores.masked_fill(~attention_mask, float("-1e9"))
+            attention_weights = F.softmax(scores, dim=-1)
+            attention_weights = self.dropout(attention_weights)
+            attention_output = torch.matmul(
+                attention_weights, V
+            )  # [batch_size, num_heads, seq_len, head_dim]
         # Concatenate heads
         attention_output = attention_output.transpose(1, 2).contiguous()
         attention_output = attention_output.view(
-            batch_size, num_fields, self.embedding_dim
+            batch_size, seq_len, self.embedding_dim
         )
+        # Output projection
+        output = self.W_O(attention_output)
         # Residual connection
         if self.use_residual:
-            output = attention_output + self.W_Res(x)
-        else:
-            output = attention_output
+            output = output + self.W_Res(x)
+        # Layer normalization
+        if self.layer_norm is not None:
+            output = self.layer_norm(output)
         output = F.relu(output)
         return output
@@ -653,3 +714,21 @@ class AttentionPoolingLayer(nn.Module):
         # Weighted sum over keys: (B, L, 1) * (B, L, D) -> (B, D)
         output = torch.sum(attention_weights * keys, dim=1)
         return output
+class RMSNorm(torch.nn.Module):
+    """
+    Root Mean Square Layer Normalization.
+    Reference: https://arxiv.org/abs/1910.07467
+    """
+    def __init__(self, hidden_size: int, eps: float = 1e-6):
+        super().__init__()
+        self.eps = eps
+        self.weight = torch.nn.Parameter(torch.ones(hidden_size))
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        # RMS(x) = sqrt(mean(x^2) + eps)
+        variance = torch.mean(x**2, dim=-1, keepdim=True)
+        x_normalized = x * torch.rsqrt(variance + self.eps)
+        return self.weight * x_normalized

nextrec/basic/metrics.py CHANGED Viewed

@@ -44,6 +44,11 @@ TASK_DEFAULT_METRICS = {
     + [f"recall@{k}" for k in (5, 10, 20)]
     + [f"ndcg@{k}" for k in (5, 10, 20)]
     + [f"mrr@{k}" for k in (5, 10, 20)],
+    # generative/multiclass next-item prediction defaults
+    "multiclass": ["accuracy"]
+    + [f"hitrate@{k}" for k in (1, 5, 10)]
+    + [f"recall@{k}" for k in (1, 5, 10)]
+    + [f"mrr@{k}" for k in (1, 5, 10)],
 }
@@ -158,6 +163,51 @@ def group_indices_by_user(user_ids: np.ndarray, n_samples: int) -> list[np.ndarr
     return groups
+def normalize_multiclass_inputs(
+    y_true: np.ndarray, y_pred: np.ndarray
+) -> tuple[np.ndarray, np.ndarray]:
+    """
+    Normalize multiclass inputs to consistent shapes.
+    y_true: [N] of class ids
+    y_pred: [N, C] of logits/probabilities
+    """
+    labels = np.asarray(y_true).reshape(-1)
+    scores = np.asarray(y_pred)
+    if scores.ndim == 1:
+        scores = scores.reshape(scores.shape[0], -1)
+    if scores.shape[0] != labels.shape[0]:
+        raise ValueError(
+            f"[Metric Warning] y_true length {labels.shape[0]} != y_pred batch {scores.shape[0]} for multiclass metrics."
+        )
+    return labels.astype(int), scores
+def multiclass_topk_hit_rate(y_true: np.ndarray, y_pred: np.ndarray, k: int) -> float:
+    labels, scores = normalize_multiclass_inputs(y_true, y_pred)
+    if scores.shape[1] == 0:
+        return 0.0
+    k = min(k, scores.shape[1])
+    topk_idx = np.argpartition(-scores, kth=k - 1, axis=1)[:, :k]
+    hits = (topk_idx == labels[:, None]).any(axis=1)
+    return float(hits.mean()) if hits.size > 0 else 0.0
+def multiclass_mrr_at_k(y_true: np.ndarray, y_pred: np.ndarray, k: int) -> float:
+    labels, scores = normalize_multiclass_inputs(y_true, y_pred)
+    if scores.shape[1] == 0:
+        return 0.0
+    k = min(k, scores.shape[1])
+    # full sort for stable ranks
+    topk_idx = np.argsort(-scores, axis=1)[:, :k]
+    ranks = np.full(labels.shape, fill_value=k + 1, dtype=np.float32)
+    for idx in range(k):
+        match = topk_idx[:, idx] == labels
+        ranks[match] = idx + 1
+    reciprocals = np.where(ranks <= k, 1.0 / ranks, 0.0)
+    return float(reciprocals.mean()) if reciprocals.size > 0 else 0.0
 def compute_precision_at_k(
     y_true: np.ndarray, y_pred: np.ndarray, user_ids: np.ndarray, k: int
 ) -> float:
@@ -463,8 +513,28 @@ def compute_single_metric(
 ) -> float:
     """Compute a single metric given true and predicted values."""
     y_p_binary = (y_pred > 0.5).astype(int)
+    metric_lower = metric.lower()
+    is_multiclass = task_type == "multiclass" and y_pred.ndim >= 2
+    if is_multiclass:
+        # Dedicated path for multiclass logits (e.g., next-item prediction)
+        labels, scores = normalize_multiclass_inputs(y_true, y_pred)
+        if metric_lower in ("accuracy", "acc"):
+            preds = scores.argmax(axis=1)
+            return float((preds == labels).mean())
+        if metric_lower.startswith("hitrate@") or metric_lower.startswith("hr@"):
+            k_str = metric_lower.split("@")[1]
+            k = int(k_str)
+            return multiclass_topk_hit_rate(labels, scores, k)
+        if metric_lower.startswith("recall@"):
+            k = int(metric_lower.split("@")[1])
+            return multiclass_topk_hit_rate(labels, scores, k)
+        if metric_lower.startswith("mrr@"):
+            k = int(metric_lower.split("@")[1])
+            return multiclass_mrr_at_k(labels, scores, k)
+        # fall back to accuracy if unsupported metric is requested
+        preds = scores.argmax(axis=1)
+        return float((preds == labels).mean())
     try:
-        metric_lower = metric.lower()
         if metric_lower.startswith("recall@"):
             k = int(metric_lower.split("@")[1])
             return compute_recall_at_k(y_true, y_pred, user_ids, k)  # type: ignore

nextrec/basic/model.py CHANGED Viewed

@@ -126,11 +126,9 @@ class BaseModel(FeatureSet, nn.Module):
         self.session = create_session(session_id)
         self.session_path = self.session.root  # pwd/session_id, path for this session
         self.checkpoint_path = os.path.join(
-            self.session_path, self.model_name + "_checkpoint.model"
-        )  # example: pwd/session_id/DeepFM_checkpoint.model
-        self.best_path = os.path.join(
-            self.session_path, self.model_name + "_best.model"
-        )
+            self.session_path, self.model_name + "_checkpoint.pt"
+        )  # example: pwd/session_id/DeepFM_checkpoint.pt
+        self.best_path = os.path.join(self.session_path, self.model_name + "_best.pt")
         self.features_config_path = os.path.join(
             self.session_path, "features_config.pkl"
         )
@@ -1563,7 +1561,7 @@ class BaseModel(FeatureSet, nn.Module):
             path=save_path,
             default_dir=self.session_path,
             default_name=self.model_name,
-            suffix=".model",
+            suffix=".pt",
             add_timestamp=add_timestamp,
         )
         model_path = Path(target_path)
@@ -1603,16 +1601,16 @@ class BaseModel(FeatureSet, nn.Module):
         self.to(self.device)
         base_path = Path(save_path)
         if base_path.is_dir():
-            model_files = sorted(base_path.glob("*.model"))
+            model_files = sorted(base_path.glob("*.pt"))
             if not model_files:
                 raise FileNotFoundError(
-                    f"[BaseModel-load-model Error] No *.model file found in directory: {base_path}"
+                    f"[BaseModel-load-model Error] No *.pt file found in directory: {base_path}"
                 )
             model_path = model_files[-1]
             config_dir = base_path
         else:
             model_path = (
-                base_path.with_suffix(".model") if base_path.suffix == "" else base_path
+                base_path.with_suffix(".pt") if base_path.suffix == "" else base_path
             )
             config_dir = model_path.parent
         if not model_path.exists():
@@ -1665,21 +1663,21 @@ class BaseModel(FeatureSet, nn.Module):
     ) -> "BaseModel":
         """
         Load a model from a checkpoint path. The checkpoint path should contain:
-        a .model file and a features_config.pkl file.
+        a .pt file and a features_config.pkl file.
         """
         base_path = Path(checkpoint_path)
         verbose = kwargs.pop("verbose", True)
         if base_path.is_dir():
-            model_candidates = sorted(base_path.glob("*.model"))
+            model_candidates = sorted(base_path.glob("*.pt"))
             if not model_candidates:
                 raise FileNotFoundError(
-                    f"[BaseModel-from-checkpoint Error] No *.model file found under: {base_path}"
+                    f"[BaseModel-from-checkpoint Error] No *.pt file found under: {base_path}"
                 )
             model_file = model_candidates[-1]
             config_dir = base_path
         else:
             model_file = (
-                base_path.with_suffix(".model") if base_path.suffix == "" else base_path
+                base_path.with_suffix(".pt") if base_path.suffix == "" else base_path
             )
             config_dir = model_file.parent
         features_config_path = config_dir / "features_config.pkl"

nextrec/data/data_processing.py CHANGED Viewed

@@ -25,9 +25,7 @@ def get_column_data(data: dict | pd.DataFrame, name: str):
         raise KeyError(f"Unsupported data type for extracting column {name}")
-def split_dict_random(
-    data_dict: dict, test_size: float = 0.2, random_state: int | None = None
-):
+def split_dict_random(data_dict, test_size=0.2, random_state=None):
     lengths = [len(v) for v in data_dict.values()]
     if len(set(lengths)) != 1:

nextrec/models/generative/__init__.py CHANGED Viewed

@@ -0,0 +1,16 @@
+"""
+Generative Recommendation Models
+This module contains generative models for recommendation tasks.
+"""
+from nextrec.models.generative.hstu import HSTU
+from nextrec.models.generative.rqvae import (
+    RQVAE,
+    RQ,
+    VQEmbedding,
+    BalancedKmeans,
+    kmeans,
+)
+__all__ = ["HSTU", "RQVAE", "RQ", "VQEmbedding", "BalancedKmeans", "kmeans"]

nextrec 0.4.5__py3-none-any.whl → 0.4.6__py3-none-any.whl

nextrec 0.4.5py3-none-any.whl → 0.4.6py3-none-any.whl