PyPI - nextrec - Versions diffs - 0.3.2__tar.gz → 0.3.4__tar.gz - Mend

nextrec 0.3.2tar.gz → 0.3.4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (116) hide show

{nextrec-0.3.2 → nextrec-0.3.4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: nextrec
-Version: 0.3.2
+Version: 0.3.4
 Summary: A comprehensive recommendation library with match, ranking, and multi-task learning models
 Project-URL: Homepage, https://github.com/zerolovesea/NextRec
 Project-URL: Repository, https://github.com/zerolovesea/NextRec
@@ -63,7 +63,7 @@ Description-Content-Type: text/markdown
 ![Python](https://img.shields.io/badge/Python-3.10+-blue.svg)
 ![PyTorch](https://img.shields.io/badge/PyTorch-1.10+-ee4c2c.svg)
 ![License](https://img.shields.io/badge/License-Apache%202.0-green.svg)
-![Version](https://img.shields.io/badge/Version-0.3.2-orange.svg)
+![Version](https://img.shields.io/badge/Version-0.3.4-orange.svg)
 English | [中文文档](README_zh.md)
@@ -110,7 +110,7 @@ To dive deeper, Jupyter notebooks are available:
 - [Hands on the NextRec framework](/tutorials/notebooks/en/Hands%20on%20nextrec.ipynb)
 - [Using the data processor for preprocessing](/tutorials/notebooks/en/Hands%20on%20dataprocessor.ipynb)
-> Current version [0.3.2]: the matching module is not fully polished yet and may have compatibility issues or unexpected errors. Please raise an issue if you run into problems.
+> Current version [0.3.4]: the matching module is not fully polished yet and may have compatibility issues or unexpected errors. Please raise an issue if you run into problems.
 ## 5-Minute Quick Start

{nextrec-0.3.2 → nextrec-0.3.4}/README.md RENAMED Viewed

@@ -7,7 +7,7 @@
 ![Python](https://img.shields.io/badge/Python-3.10+-blue.svg)
 ![PyTorch](https://img.shields.io/badge/PyTorch-1.10+-ee4c2c.svg)
 ![License](https://img.shields.io/badge/License-Apache%202.0-green.svg)
-![Version](https://img.shields.io/badge/Version-0.3.2-orange.svg)
+![Version](https://img.shields.io/badge/Version-0.3.4-orange.svg)
 English | [中文文档](README_zh.md)
@@ -54,7 +54,7 @@ To dive deeper, Jupyter notebooks are available:
 - [Hands on the NextRec framework](/tutorials/notebooks/en/Hands%20on%20nextrec.ipynb)
 - [Using the data processor for preprocessing](/tutorials/notebooks/en/Hands%20on%20dataprocessor.ipynb)
-> Current version [0.3.2]: the matching module is not fully polished yet and may have compatibility issues or unexpected errors. Please raise an issue if you run into problems.
+> Current version [0.3.4]: the matching module is not fully polished yet and may have compatibility issues or unexpected errors. Please raise an issue if you run into problems.
 ## 5-Minute Quick Start

{nextrec-0.3.2 → nextrec-0.3.4}/README_zh.md RENAMED Viewed

@@ -7,7 +7,7 @@
 ![Python](https://img.shields.io/badge/Python-3.10+-blue.svg)
 ![PyTorch](https://img.shields.io/badge/PyTorch-1.10+-ee4c2c.svg)
 ![License](https://img.shields.io/badge/License-Apache%202.0-green.svg)
-![Version](https://img.shields.io/badge/Version-0.3.2-orange.svg)
+![Version](https://img.shields.io/badge/Version-0.3.4-orange.svg)
 [English Version](README.md) | 中文文档
@@ -54,7 +54,7 @@ NextRec采用模块化、低耦合的工程设计，使得推荐系统从数据
 - [如何上手NextRec框架](/tutorials/notebooks/zh/Hands%20on%20nextrec.ipynb)
 - [如何使用数据处理器进行数据预处理](/tutorials/notebooks/zh/Hands%20on%20dataprocessor.ipynb)
-> 当前版本[0.3.2]，召回模型模块尚不完善，可能存在一些兼容性问题或意外报错，如果遇到问题，欢迎开发者在Issue区提出问题。
+> 当前版本[0.3.4]，召回模型模块尚不完善，可能存在一些兼容性问题或意外报错，如果遇到问题，欢迎开发者在Issue区提出问题。
 ## 5分钟快速上手

{nextrec-0.3.2 → nextrec-0.3.4}/docs/rtd/conf.py RENAMED Viewed

@@ -11,7 +11,7 @@ sys.path.insert(0, str(PROJECT_ROOT / "nextrec"))
 project = "NextRec"
 copyright = "2025, Yang Zhou"
 author = "Yang Zhou"
-release = "0.3.2"
+release = "0.3.4"
 extensions = [
     "myst_parser",

nextrec-0.3.4/nextrec/__version__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ __version__ = "0.3.4"

{nextrec-0.3.2 → nextrec-0.3.4}/nextrec/basic/features.py RENAMED Viewed

@@ -2,19 +2,16 @@
 Feature definitions
 Date: create on 27/10/2025
-Checkpoint: edit on 29/11/2025
+Checkpoint: edit on 02/12/2025
 Author: Yang Zhou, zyaztec@gmail.com
 """
 import torch
 from nextrec.utils.embedding import get_auto_embedding_dim
+from nextrec.utils.common import normalize_to_list
 class BaseFeature(object):
     def __repr__(self):
-        params = {
-            k: v
-            for k, v in self.__dict__.items()
-            if not k.startswith("_")
-        }
+        params = {k: v for k, v in self.__dict__.items() if not k.startswith("_") }
         param_str = ", ".join(f"{k}={v!r}" for k, v in params.items())
         return f"{self.__class__.__name__}({param_str})"
@@ -93,11 +90,8 @@ class DenseFeature(BaseFeature):
         else:
             self.use_embedding = use_embedding  # user decides for dim <= 1
-class FeatureSpecMixin:
-    """
-    Mixin that normalizes dense/sparse/sequence feature lists and target/id columns.
-    """
-    def _set_feature_config(
+class FeatureSet:
+    def set_all_features(
         self,
         dense_features: list[DenseFeature] | None = None,
         sparse_features: list[SparseFeature] | None = None,
@@ -111,21 +105,14 @@ class FeatureSpecMixin:
         self.all_features = self.dense_features + self.sparse_features + self.sequence_features
         self.feature_names = [feat.name for feat in self.all_features]
-        self.target_columns = self._normalize_to_list(target)
-        self.id_columns = self._normalize_to_list(id_columns)
+        self.target_columns = normalize_to_list(target)
+        self.id_columns = normalize_to_list(id_columns)
-    def _set_target_id_config(
+    def set_target_id(
         self,
         target: str | list[str] | None = None,
         id_columns: str | list[str] | None = None,
     ) -> None:
-        self.target_columns = self._normalize_to_list(target)
-        self.id_columns = self._normalize_to_list(id_columns)
+        self.target_columns = normalize_to_list(target)
+        self.id_columns = normalize_to_list(id_columns)
-    @staticmethod
-    def _normalize_to_list(value: str | list[str] | None) -> list[str]:
-        if value is None:
-            return []
-        if isinstance(value, str):
-            return [value]
-        return list(value)

{nextrec-0.3.2 → nextrec-0.3.4}/nextrec/basic/layers.py RENAMED Viewed

@@ -18,23 +18,6 @@ from nextrec.basic.features import DenseFeature, SequenceFeature, SparseFeature
 from nextrec.utils.initializer import get_initializer
 from nextrec.basic.activation import activation_layer
-__all__ = [
-    "PredictionLayer",
-    "EmbeddingLayer",
-    "InputMask",
-    "LR",
-    "ConcatPooling",
-    "AveragePooling",
-    "SumPooling",
-    "MLP",
-    "FM",
-    "CrossLayer",
-    "SENETLayer",
-    "BiLinearInteractionLayer",
-    "MultiHeadSelfAttention",
-    "AttentionPoolingLayer",
-]
 class PredictionLayer(nn.Module):
     def __init__(
         self,
@@ -44,12 +27,10 @@ class PredictionLayer(nn.Module):
         return_logits: bool = False,
     ):
         super().__init__()
-        if isinstance(task_type, str):
-            self.task_types = [task_type]
-        else:
-            self.task_types = list(task_type)
+        self.task_types = [task_type] if isinstance(task_type, str) else list(task_type)
         if len(self.task_types) == 0:
             raise ValueError("At least one task_type must be specified.")
         if task_dims is None:
             dims = [1] * len(self.task_types)
         elif isinstance(task_dims, int):
@@ -64,7 +45,7 @@ class PredictionLayer(nn.Module):
         self.total_dim = sum(self.task_dims)
         self.return_logits = return_logits
-        # Keep slice offsets per task
+        # slice offsets per task
         start = 0
         self._task_slices: list[tuple[int, int]] = []
         for dim in self.task_dims:
@@ -85,27 +66,25 @@ class PredictionLayer(nn.Module):
         logits = x if self.bias is None else x + self.bias
         outputs = []
         for task_type, (start, end) in zip(self.task_types, self._task_slices):
-            task_logits = logits[..., start:end] # Extract logits for the current task
+            task_logits = logits[..., start:end] # logits for the current task
             if self.return_logits:
                 outputs.append(task_logits)
                 continue
-            activation = self._get_activation(task_type)
+            task = task_type.lower()
+            if task == 'binary':
+                activation = torch.sigmoid
+            elif task == 'regression':
+                activation = lambda x: x
+            elif task == 'multiclass':
+                activation = lambda x: torch.softmax(x, dim=-1)
+            else:
+                raise ValueError(f"[PredictionLayer Error]: Unsupported task_type '{task_type}'.")
             outputs.append(activation(task_logits))
         result = torch.cat(outputs, dim=-1)
         if result.shape[-1] == 1:
             result = result.squeeze(-1)
         return result
-    def _get_activation(self, task_type: str):
-        task = task_type.lower()
-        if task == 'binary':
-            return torch.sigmoid
-        if task == 'regression':
-            return lambda x: x
-        if task == 'multiclass':
-            return lambda x: torch.softmax(x, dim=-1)
-        raise ValueError(f"[PredictionLayer Error]: Unsupported task_type '{task_type}'.")
 class EmbeddingLayer(nn.Module):
     def __init__(self, features: list):
         super().__init__()
@@ -145,7 +124,7 @@ class EmbeddingLayer(nn.Module):
                 self.dense_input_dims[feature.name] = in_dim
             else:
                 raise TypeError(f"[EmbeddingLayer Error]: Unsupported feature type: {type(feature)}")
-        self.output_dim = self._compute_output_dim()
+        self.output_dim = self.compute_output_dim()
     def forward(
         self,
@@ -181,7 +160,7 @@ class EmbeddingLayer(nn.Module):
                 sparse_embeds.append(pooling_layer(seq_emb, feature_mask).unsqueeze(1))
             elif isinstance(feature, DenseFeature):
-                dense_embeds.append(self._project_dense(feature, x))
+                dense_embeds.append(self.project_dense(feature, x))
         if squeeze_dim:
             flattened_sparse = [emb.flatten(start_dim=1) for emb in sparse_embeds]
@@ -212,7 +191,7 @@ class EmbeddingLayer(nn.Module):
             raise ValueError("[EmbeddingLayer Error]: squeeze_dim=False requires at least one sparse/sequence feature or dense features with identical projected dimensions.")
         return torch.cat(output_embeddings, dim=1)
-    def _project_dense(self, feature: DenseFeature, x: dict[str, torch.Tensor]) -> torch.Tensor:
+    def project_dense(self, feature: DenseFeature, x: dict[str, torch.Tensor]) -> torch.Tensor:
         if feature.name not in x:
             raise KeyError(f"[EmbeddingLayer Error]:Dense feature '{feature.name}' is missing from input.")
         value = x[feature.name].float()
@@ -228,11 +207,7 @@ class EmbeddingLayer(nn.Module):
         dense_layer = self.dense_transforms[feature.name]
         return dense_layer(value)
-    def _compute_output_dim(self, features: list[DenseFeature | SequenceFeature | SparseFeature] | None = None) -> int:
-        """
-        Compute flattened embedding dimension for provided features or all tracked features.
-        Deduplicates by feature name to avoid double-counting shared embeddings.
-        """
+    def compute_output_dim(self, features: list[DenseFeature | SequenceFeature | SparseFeature] | None = None) -> int:
         candidates = list(features) if features is not None else self.features
         unique_feats = OrderedDict((feat.name, feat) for feat in candidates) # type: ignore[assignment]
         dim = 0
@@ -249,14 +224,13 @@ class EmbeddingLayer(nn.Module):
         return dim
     def get_input_dim(self, features: list[object] | None = None) -> int:
-        return self._compute_output_dim(features) # type: ignore[assignment]
+        return self.compute_output_dim(features) # type: ignore[assignment]
     @property
     def input_dim(self) -> int:
         return self.output_dim
 class InputMask(nn.Module):
-    """Utility module to build sequence masks for pooling layers."""
     def __init__(self):
         super().__init__()
@@ -271,7 +245,6 @@ class InputMask(nn.Module):
         return mask.unsqueeze(1).float()
 class LR(nn.Module):
-    """Wide component from Wide&Deep (Cheng et al., 2016)."""
     def __init__(
             self,
             input_dim: int,
@@ -287,7 +260,6 @@ class LR(nn.Module):
             return self.fc(x)
 class ConcatPooling(nn.Module):
-    """Concatenates sequence embeddings along the temporal dimension."""
     def __init__(self):
         super().__init__()
@@ -295,7 +267,6 @@ class ConcatPooling(nn.Module):
         return x.flatten(start_dim=1, end_dim=2)
 class AveragePooling(nn.Module):
-    """Mean pooling with optional padding mask."""
     def __init__(self):
         super().__init__()
@@ -308,7 +279,6 @@ class AveragePooling(nn.Module):
             return sum_pooling_matrix / (non_padding_length.float() + 1e-16)
 class SumPooling(nn.Module):
-    """Sum pooling with optional padding mask."""
     def __init__(self):
         super().__init__()
@@ -319,7 +289,6 @@ class SumPooling(nn.Module):
             return torch.bmm(mask, x).squeeze(1)
 class MLP(nn.Module):
-    """Stacked fully connected layers used in the deep component."""
     def __init__(
             self,
             input_dim: int,
@@ -345,7 +314,6 @@ class MLP(nn.Module):
         return self.mlp(x)
 class FM(nn.Module):
-    """Factorization Machine (Rendle, 2010) second-order interaction term."""
     def __init__(self, reduce_sum: bool = True):
         super().__init__()
         self.reduce_sum = reduce_sum
@@ -359,7 +327,6 @@ class FM(nn.Module):
         return 0.5 * ix
 class CrossLayer(nn.Module):
-    """Single cross layer used in DCN (Wang et al., 2017)."""
     def __init__(self, input_dim: int):
         super(CrossLayer, self).__init__()
         self.w = torch.nn.Linear(input_dim, 1, bias=False)
@@ -370,7 +337,6 @@ class CrossLayer(nn.Module):
         return x
 class SENETLayer(nn.Module):
-    """Squeeze-and-Excitation block adopted by FiBiNET (Huang et al., 2019)."""
     def __init__(
             self,
             num_fields: int,
@@ -388,7 +354,6 @@ class SENETLayer(nn.Module):
         return v
 class BiLinearInteractionLayer(nn.Module):
-    """Bilinear feature interaction from FiBiNET (Huang et al., 2019)."""
     def __init__(
             self,
             input_dim: int,
@@ -416,7 +381,6 @@ class BiLinearInteractionLayer(nn.Module):
         return torch.cat(bilinear_list, dim=1)
 class MultiHeadSelfAttention(nn.Module):
-    """Multi-head self-attention layer from AutoInt (Song et al., 2019)."""
     def __init__(
             self,
             embedding_dim: int,
@@ -438,13 +402,6 @@ class MultiHeadSelfAttention(nn.Module):
         self.dropout = nn.Dropout(dropout)
     def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """
-        Args:
-            x (torch.Tensor): Tensor of shape (batch_size, num_fields, embedding_dim)
-        Returns:
-            torch.Tensor: Output tensor of shape (batch_size, num_fields, embedding_dim)
-        """
         batch_size, num_fields, _ = x.shape
         Q = self.W_Q(x)  # [batch_size, num_fields, embedding_dim]
         K = self.W_K(x)

{nextrec-0.3.2 → nextrec-0.3.4}/nextrec/basic/loggers.py RENAMED Viewed

@@ -2,17 +2,19 @@
 NextRec Basic Loggers
 Date: create on 27/10/2025
-Checkpoint: edit on 29/11/2025
+Checkpoint: edit on 03/12/2025
 Author: Yang Zhou, zyaztec@gmail.com
 """
 import os
 import re
 import sys
+import json
 import copy
 import logging
-from nextrec.basic.session import create_session
+import numbers
+from typing import Mapping, Any
+from nextrec.basic.session import create_session, Session
 ANSI_CODES = {
     'black': '\033[30m',
@@ -77,17 +79,12 @@ def colorize(text: str, color: str | None = None, bold: bool = False) -> str:
     """Apply ANSI color and bold formatting to the given text."""
     if not color and not bold:
         return text
     result = ""
     if bold:
         result += ANSI_BOLD
     if color and color in ANSI_CODES:
         result += ANSI_CODES[color]
     result += text + ANSI_RESET
     return result
 def setup_logger(session_id: str | os.PathLike | None = None):
@@ -126,3 +123,69 @@ def setup_logger(session_id: str | os.PathLike | None = None):
     logger.addHandler(console_handler)
     return logger
+class TrainingLogger:
+    def __init__(
+        self,
+        session: Session,
+        enable_tensorboard: bool,
+        log_name: str = "training_metrics.jsonl",
+    ) -> None:
+        self.session = session
+        self.enable_tensorboard = enable_tensorboard
+        self.log_path = session.metrics_dir / log_name
+        self.log_path.parent.mkdir(parents=True, exist_ok=True)
+        self.tb_writer = None
+        self.tb_dir = None
+        if self.enable_tensorboard:
+            self._init_tensorboard()
+    def _init_tensorboard(self) -> None:
+        try:
+            from torch.utils.tensorboard import SummaryWriter  # type: ignore
+        except ImportError:
+            logging.warning("[TrainingLogger] tensorboard not installed, disable tensorboard logging.")
+            self.enable_tensorboard = False
+            return
+        tb_dir = self.session.logs_dir / "tensorboard"
+        tb_dir.mkdir(parents=True, exist_ok=True)
+        self.tb_dir = tb_dir
+        self.tb_writer = SummaryWriter(log_dir=str(tb_dir))
+    @property
+    def tensorboard_logdir(self):
+        return self.tb_dir
+    def format_metrics(self, metrics: Mapping[str, Any], split: str) -> dict[str, float]:
+        formatted: dict[str, float] = {}
+        for key, value in metrics.items():
+            if isinstance(value, numbers.Number):
+                formatted[f"{split}/{key}"] = float(value)
+            elif hasattr(value, "item"):
+                try:
+                    formatted[f"{split}/{key}"] = float(value.item())
+                except Exception:
+                    continue
+        return formatted
+    def log_metrics(self, metrics: Mapping[str, Any], step: int, split: str = "train") -> None:
+        payload = self.format_metrics(metrics, split)
+        payload["step"] = int(step)
+        with self.log_path.open("a", encoding="utf-8") as f:
+            f.write(json.dumps(payload, ensure_ascii=False) + "\n")
+        if not self.tb_writer:
+            return
+        step = int(payload.get("step", 0))
+        for key, value in payload.items():
+            if key == "step":
+                continue
+            self.tb_writer.add_scalar(key, value, global_step=step)
+    def close(self) -> None:
+        if self.tb_writer:
+            self.tb_writer.flush()
+            self.tb_writer.close()
+            self.tb_writer = None

nextrec 0.3.2__tar.gz → 0.3.4__tar.gz

nextrec 0.3.2tar.gz → 0.3.4tar.gz