PyPI - nextrec - Versions diffs - 0.4.1__py3-none-any.whl → 0.4.2__py3-none-any.whl - Mend

nextrec 0.4.1py3-none-any.whl → 0.4.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (62) hide show

nextrec/__init__.py +1 -1
nextrec/__version__.py +1 -1
nextrec/basic/activation.py +10 -5
nextrec/basic/callback.py +1 -0
nextrec/basic/features.py +30 -22
nextrec/basic/layers.py +220 -106
nextrec/basic/loggers.py +62 -43
nextrec/basic/metrics.py +268 -119
nextrec/basic/model.py +1082 -400
nextrec/basic/session.py +10 -3
nextrec/cli.py +498 -0
nextrec/data/__init__.py +19 -25
nextrec/data/batch_utils.py +11 -3
nextrec/data/data_processing.py +51 -45
nextrec/data/data_utils.py +26 -15
nextrec/data/dataloader.py +272 -95
nextrec/data/preprocessor.py +320 -199
nextrec/loss/listwise.py +17 -9
nextrec/loss/loss_utils.py +7 -8
nextrec/loss/pairwise.py +2 -0
nextrec/loss/pointwise.py +30 -12
nextrec/models/generative/hstu.py +103 -38
nextrec/models/match/dssm.py +82 -68
nextrec/models/match/dssm_v2.py +72 -57
nextrec/models/match/mind.py +175 -107
nextrec/models/match/sdm.py +104 -87
nextrec/models/match/youtube_dnn.py +73 -59
nextrec/models/multi_task/esmm.py +53 -37
nextrec/models/multi_task/mmoe.py +64 -45
nextrec/models/multi_task/ple.py +101 -48
nextrec/models/multi_task/poso.py +113 -36
nextrec/models/multi_task/share_bottom.py +48 -35
nextrec/models/ranking/afm.py +72 -37
nextrec/models/ranking/autoint.py +72 -55
nextrec/models/ranking/dcn.py +55 -35
nextrec/models/ranking/dcn_v2.py +64 -23
nextrec/models/ranking/deepfm.py +32 -22
nextrec/models/ranking/dien.py +155 -99
nextrec/models/ranking/din.py +85 -57
nextrec/models/ranking/fibinet.py +52 -32
nextrec/models/ranking/fm.py +29 -23
nextrec/models/ranking/masknet.py +91 -29
nextrec/models/ranking/pnn.py +31 -28
nextrec/models/ranking/widedeep.py +34 -26
nextrec/models/ranking/xdeepfm.py +60 -38
nextrec/utils/__init__.py +59 -34
nextrec/utils/config.py +490 -0
nextrec/utils/device.py +30 -20
nextrec/utils/distributed.py +36 -9
nextrec/utils/embedding.py +1 -0
nextrec/utils/feature.py +1 -0
nextrec/utils/file.py +32 -11
nextrec/utils/initializer.py +61 -16
nextrec/utils/optimizer.py +25 -9
nextrec/utils/synthetic_data.py +283 -165
nextrec/utils/tensor.py +24 -13
{nextrec-0.4.1.dist-info → nextrec-0.4.2.dist-info}/METADATA +4 -4
nextrec-0.4.2.dist-info/RECORD +69 -0
nextrec-0.4.2.dist-info/entry_points.txt +2 -0
nextrec-0.4.1.dist-info/RECORD +0 -66
{nextrec-0.4.1.dist-info → nextrec-0.4.2.dist-info}/WHEEL +0 -0
{nextrec-0.4.1.dist-info → nextrec-0.4.2.dist-info}/licenses/LICENSE +0 -0

nextrec/utils/file.py CHANGED Viewed

@@ -2,11 +2,14 @@
 File I/O utilities for NextRec
 Date: create on 03/12/2025
+Checkpoint: edit on 06/12/2025
 Author: Yang Zhou, zyaztec@gmail.com
 """
+import yaml
 import pandas as pd
 import pyarrow.parquet as pq
 from pathlib import Path
 from typing import Generator
@@ -14,7 +17,7 @@ from typing import Generator
 def resolve_file_paths(path: str) -> tuple[list[str], str]:
     """
     Resolve file or directory path into a sorted list of files and file type.
     Args: path: Path to a file or directory
     Returns: tuple: (list of file paths, file type)
     """
@@ -22,16 +25,23 @@ def resolve_file_paths(path: str) -> tuple[list[str], str]:
     if path_obj.is_file():
         file_type = path_obj.suffix.lower().lstrip(".")
-        assert file_type in ["csv", "parquet"], f"Unsupported file extension: {file_type}"
+        assert file_type in [
+            "csv",
+            "parquet",
+        ], f"Unsupported file extension: {file_type}"
         return [str(path_obj)], file_type
     if path_obj.is_dir():
         collected_files = [p for p in path_obj.iterdir() if p.is_file()]
         csv_files = [str(p) for p in collected_files if p.suffix.lower() == ".csv"]
-        parquet_files = [str(p) for p in collected_files if p.suffix.lower() == ".parquet"]
+        parquet_files = [
+            str(p) for p in collected_files if p.suffix.lower() == ".parquet"
+        ]
         if csv_files and parquet_files:
-            raise ValueError("Directory contains both CSV and Parquet files. Please keep a single format.")
+            raise ValueError(
+                "Directory contains both CSV and Parquet files. Please keep a single format."
+            )
         file_paths = csv_files if csv_files else parquet_files
         if not file_paths:
             raise ValueError(f"No CSV or Parquet files found in directory: {path}")
@@ -42,18 +52,24 @@ def resolve_file_paths(path: str) -> tuple[list[str], str]:
     raise ValueError(f"Invalid path: {path}")
-def read_table(file_path: str, file_type: str) -> pd.DataFrame:
-    if file_type == "csv":
-        return pd.read_csv(file_path)
-    return pd.read_parquet(file_path)
+def read_table(path: str | Path, data_format: str | None = None) -> pd.DataFrame:
+    data_path = Path(path)
+    fmt = data_format.lower() if data_format else data_path.suffix.lower().lstrip(".")
+    if data_path.is_dir() and not fmt:
+        fmt = "parquet"
+    if fmt in {"parquet", ""}:
+        return pd.read_parquet(data_path)
+    if fmt in {"csv", "txt"}:
+        return pd.read_csv(data_path)
+    raise ValueError(f"Unsupported data format: {data_path}")
 def load_dataframes(file_paths: list[str], file_type: str) -> list[pd.DataFrame]:
     return [read_table(fp, file_type) for fp in file_paths]
 def iter_file_chunks(
-    file_path: str,
-    file_type: str,
-    chunk_size: int
+    file_path: str, file_type: str, chunk_size: int
 ) -> Generator[pd.DataFrame, None, None]:
     if file_type == "csv":
         yield from pd.read_csv(file_path, chunksize=chunk_size)
@@ -68,3 +84,8 @@ def default_output_dir(path: str) -> Path:
     if path_obj.is_file():
         return path_obj.parent / f"{path_obj.stem}_preprocessed"
     return path_obj.with_name(f"{path_obj.name}_preprocessed")
+def read_yaml(path: str | Path):
+    with open(path, "r", encoding="utf-8") as file:
+        return yaml.safe_load(file) or {}

nextrec/utils/initializer.py CHANGED Viewed

@@ -5,32 +5,77 @@ Date: create on 13/11/2025
 Author: Yang Zhou, zyaztec@gmail.com
 """
+from typing import Any, Dict, Set, cast
 import torch.nn as nn
+from torch.nn.init import _NonlinearityType
+KNOWN_NONLINEARITIES: Set[str] = {
+    "linear",
+    "conv1d",
+    "conv2d",
+    "conv3d",
+    "conv_transpose1d",
+    "conv_transpose2d",
+    "conv_transpose3d",
+    "sigmoid",
+    "tanh",
+    "relu",
+    "leaky_relu",
+    "selu",
+    "gelu",
+}
+def resolve_nonlinearity(activation: str | _NonlinearityType) -> _NonlinearityType:
+    if isinstance(activation, str):
+        if activation in KNOWN_NONLINEARITIES:
+            return cast(_NonlinearityType, activation)
+        # Fall back to linear for custom activations (gain handled separately).
+        return "linear"
+    return activation
-def get_initializer(init_type='normal', activation='linear', param=None):
-    param = param or {}
+def resolve_gain(activation: str | _NonlinearityType, param: Dict[str, Any]) -> float:
+    if "gain" in param:
+        return param["gain"]
+    nonlinearity = resolve_nonlinearity(activation)
     try:
-        gain = param.get('gain', nn.init.calculate_gain(activation, param.get('param', None)))
+        return nn.init.calculate_gain(nonlinearity, param.get("param"))
     except ValueError:
-        gain = 1.0  # for custom activations like 'dice'
+        return 1.0  # custom activation with no gain estimate available
+def get_initializer(
+    init_type: str = "normal",
+    activation: str | _NonlinearityType = "linear",
+    param: Dict[str, Any] | None = None,
+):
+    param = param or {}
+    nonlinearity = resolve_nonlinearity(activation)
+    gain = resolve_gain(activation, param)
     def initializer_fn(tensor):
-        if init_type == 'xavier_uniform':
+        if init_type == "xavier_uniform":
             nn.init.xavier_uniform_(tensor, gain=gain)
-        elif init_type == 'xavier_normal':
+        elif init_type == "xavier_normal":
             nn.init.xavier_normal_(tensor, gain=gain)
-        elif init_type == 'kaiming_uniform':
-            nn.init.kaiming_uniform_(tensor, a=param.get('a', 0), nonlinearity=activation)
-        elif init_type == 'kaiming_normal':
-            nn.init.kaiming_normal_(tensor, a=param.get('a', 0), nonlinearity=activation)
-        elif init_type == 'orthogonal':
+        elif init_type == "kaiming_uniform":
+            nn.init.kaiming_uniform_(
+                tensor, a=param.get("a", 0), nonlinearity=nonlinearity
+            )
+        elif init_type == "kaiming_normal":
+            nn.init.kaiming_normal_(
+                tensor, a=param.get("a", 0), nonlinearity=nonlinearity
+            )
+        elif init_type == "orthogonal":
             nn.init.orthogonal_(tensor, gain=gain)
-        elif init_type == 'normal':
-            nn.init.normal_(tensor, mean=param.get('mean', 0.0), std=param.get('std', 0.0001))
-        elif init_type == 'uniform':
-            nn.init.uniform_(tensor, a=param.get('a', -0.05), b=param.get('b', 0.05))
+        elif init_type == "normal":
+            nn.init.normal_(
+                tensor, mean=param.get("mean", 0.0), std=param.get("std", 0.0001)
+            )
+        elif init_type == "uniform":
+            nn.init.uniform_(tensor, a=param.get("a", -0.05), b=param.get("b", 0.05))
         else:
             raise ValueError(f"Unknown init_type: {init_type}")
         return tensor

nextrec/utils/optimizer.py CHANGED Viewed

@@ -8,16 +8,17 @@ Author: Yang Zhou, zyaztec@gmail.com
 import torch
 from typing import Iterable
 def get_optimizer(
     optimizer: str | torch.optim.Optimizer = "adam",
     params: Iterable[torch.nn.Parameter] | None = None,
-    **optimizer_params
+    **optimizer_params,
 ):
     if params is None:
         raise ValueError("params cannot be None. Please provide model parameters.")
-    if 'lr' not in optimizer_params:
-        optimizer_params['lr'] = 1e-3
+    if "lr" not in optimizer_params:
+        optimizer_params["lr"] = 1e-3
     if isinstance(optimizer, str):
         opt_name = optimizer.lower()
         if opt_name == "adam":
@@ -39,21 +40,36 @@ def get_optimizer(
         raise TypeError(f"Invalid optimizer type: {type(optimizer)}")
     return optimizer_fn
 def get_scheduler(
-    scheduler: str | torch.optim.lr_scheduler._LRScheduler | torch.optim.lr_scheduler.LRScheduler | type[torch.optim.lr_scheduler._LRScheduler] | type[torch.optim.lr_scheduler.LRScheduler] | None,
+    scheduler: (
+        str
+        | torch.optim.lr_scheduler._LRScheduler
+        | torch.optim.lr_scheduler.LRScheduler
+        | type[torch.optim.lr_scheduler._LRScheduler]
+        | type[torch.optim.lr_scheduler.LRScheduler]
+        | None
+    ),
     optimizer,
-    **scheduler_params
+    **scheduler_params,
 ):
     if isinstance(scheduler, str):
         if scheduler == "step":
-            scheduler_fn = torch.optim.lr_scheduler.StepLR(optimizer, **scheduler_params)
+            scheduler_fn = torch.optim.lr_scheduler.StepLR(
+                optimizer, **scheduler_params
+            )
         elif scheduler == "cosine":
-            scheduler_fn = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, **scheduler_params)
+            scheduler_fn = torch.optim.lr_scheduler.CosineAnnealingLR(
+                optimizer, **scheduler_params
+            )
         else:
             raise NotImplementedError(f"Unsupported scheduler: {scheduler}")
-    elif isinstance(scheduler, (torch.optim.lr_scheduler._LRScheduler, torch.optim.lr_scheduler.LRScheduler)):
+    elif isinstance(
+        scheduler,
+        (torch.optim.lr_scheduler._LRScheduler, torch.optim.lr_scheduler.LRScheduler),
+    ):
         scheduler_fn = scheduler
     else:
         raise TypeError(f"Invalid scheduler type: {type(scheduler)}")
     return scheduler_fn

nextrec 0.4.1__py3-none-any.whl → 0.4.2__py3-none-any.whl

nextrec 0.4.1py3-none-any.whl → 0.4.2py3-none-any.whl