PyPI - nextrec - Versions diffs - 0.3.4__py3-none-any.whl → 0.3.5__py3-none-any.whl - Mend

nextrec 0.3.4py3-none-any.whl → 0.3.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

nextrec/__version__.py +1 -1
nextrec/basic/features.py +1 -1
nextrec/basic/model.py +4 -2
nextrec/basic/session.py +3 -10
nextrec/data/__init__.py +47 -9
nextrec/data/batch_utils.py +80 -0
nextrec/data/data_processing.py +152 -0
nextrec/data/data_utils.py +35 -268
nextrec/data/dataloader.py +4 -2
nextrec/data/preprocessor.py +6 -16
nextrec/models/multi_task/poso.py +1 -1
nextrec/utils/__init__.py +53 -3
nextrec/utils/device.py +37 -0
nextrec/utils/feature.py +13 -0
nextrec/utils/file.py +70 -0
nextrec/utils/initializer.py +0 -8
nextrec/utils/model.py +22 -0
nextrec/utils/optimizer.py +0 -19
nextrec/utils/tensor.py +61 -0
{nextrec-0.3.4.dist-info → nextrec-0.3.5.dist-info}/METADATA +3 -3
{nextrec-0.3.4.dist-info → nextrec-0.3.5.dist-info}/RECORD +23 -17
nextrec/utils/common.py +0 -60
{nextrec-0.3.4.dist-info → nextrec-0.3.5.dist-info}/WHEEL +0 -0
{nextrec-0.3.4.dist-info → nextrec-0.3.5.dist-info}/licenses/LICENSE +0 -0

nextrec/__version__.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "0.3.4"
1	+ __version__ = "0.3.5"

nextrec/basic/features.py CHANGED Viewed

@@ -7,7 +7,7 @@ Author: Yang Zhou, zyaztec@gmail.com
 """
 import torch
 from nextrec.utils.embedding import get_auto_embedding_dim
-from nextrec.utils.common import normalize_to_list
+from nextrec.utils.feature import normalize_to_list
 class BaseFeature(object):
     def __repr__(self):

nextrec/basic/model.py CHANGED Viewed

@@ -31,10 +31,12 @@ from nextrec.basic.session import resolve_save_path, create_session
 from nextrec.basic.metrics import configure_metrics, evaluate_metrics, check_user_id
 from nextrec.data.dataloader import build_tensors_from_data
-from nextrec.data.data_utils import get_column_data, collate_fn, batch_to_dict, get_user_ids
+from nextrec.data.data_processing import get_column_data, get_user_ids
+from nextrec.data.batch_utils import collate_fn, batch_to_dict
 from nextrec.loss import get_loss_fn, get_loss_kwargs
-from nextrec.utils import get_optimizer, get_scheduler, to_tensor
+from nextrec.utils import get_optimizer, get_scheduler
+from nextrec.utils.tensor import to_tensor
 from nextrec import __version__

nextrec/basic/session.py CHANGED Viewed

@@ -1,14 +1,5 @@
 """Session and experiment utilities.
-This module centralizes session/experiment management so the rest of the
-framework writes all artifacts to a consistent location:: <pwd>/log/<experiment_id>/
-Within that folder we keep model parameters, checkpoints, training metrics,
-evaluation metrics, and consolidated log output. When users do not provide an
-``experiment_id`` a timestamp-based identifier is generated once per process to
-avoid scattering files across multiple directories. Test runs are redirected to
-temporary folders so local trees are not polluted.
 Date: create on 23/11/2025
 Author: Yang Zhou,zyaztec@gmail.com
 """
@@ -16,7 +7,7 @@ Author: Yang Zhou,zyaztec@gmail.com
 import os
 import tempfile
 from dataclasses import dataclass
-from datetime import datetime
+from datetime import datetime, timezone
 from pathlib import Path
 __all__ = [
@@ -74,6 +65,7 @@ def create_session(experiment_id: str | Path | None = None) -> Session:
     if experiment_id is not None and str(experiment_id).strip():
         exp_id = str(experiment_id).strip()
     else:
+        # Use local time for session naming
         exp_id = "nextrec_session_" + datetime.now().strftime("%Y%m%d")
     if (
@@ -111,6 +103,7 @@ def resolve_save_path(
       timestamp.
     - Parent directories are created.
     """
+    # Use local time for file timestamps
     timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") if add_timestamp else None
     normalized_suffix = suffix if suffix.startswith(".") else f".{suffix}"

nextrec/data/__init__.py CHANGED Viewed

@@ -1,48 +1,86 @@
 """
 Data utilities package for NextRec
-This package provides data processing and manipulation utilities.
+This package provides data processing and manipulation utilities organized by category:
+- batch_utils: Batch collation and processing
+- data_processing: Data manipulation and user ID extraction
+- data_utils: Legacy module (re-exports from specialized modules)
+- dataloader: Dataset and DataLoader implementations
+- preprocessor: Data preprocessing pipeline
 Date: create on 13/11/2025
+Last update: 03/12/2025 (refactored)
 Author: Yang Zhou, zyaztec@gmail.com
 """
-from nextrec.data.data_utils import (
-    collate_fn,
+# Batch utilities
+from nextrec.data.batch_utils import collate_fn, batch_to_dict, stack_section
+# Data processing utilities
+from nextrec.data.data_processing import (
     get_column_data,
-    default_output_dir,
     split_dict_random,
     build_eval_candidates,
+    get_user_ids,
+)
+# File utilities (from utils package)
+from nextrec.utils.file import (
     resolve_file_paths,
     iter_file_chunks,
     read_table,
     load_dataframes,
+    default_output_dir,
 )
-from nextrec.basic.features import FeatureSet
-from nextrec.data import data_utils
+# DataLoader components
 from nextrec.data.dataloader import (
     TensorDictDataset,
     FileDataset,
     RecDataLoader,
     build_tensors_from_data,
 )
+# Preprocessor
 from nextrec.data.preprocessor import DataProcessor
+# Feature definitions
+from nextrec.basic.features import FeatureSet
+# Legacy module (for backward compatibility)
+from nextrec.data import data_utils
 __all__ = [
+    # Batch utilities
     'collate_fn',
+    'batch_to_dict',
+    'stack_section',
+    # Data processing
     'get_column_data',
-    'default_output_dir',
     'split_dict_random',
     'build_eval_candidates',
+    'get_user_ids',
+    # File utilities
     'resolve_file_paths',
     'iter_file_chunks',
     'read_table',
     'load_dataframes',
-    'FeatureSet',
-    'data_utils',
+    'default_output_dir',
+    # DataLoader
     'TensorDictDataset',
     'FileDataset',
     'RecDataLoader',
     'build_tensors_from_data',
+    # Preprocessor
     'DataProcessor',
+    # Features
+    'FeatureSet',
+    # Legacy module
+    'data_utils',
 ]

nextrec/data/batch_utils.py ADDED Viewed

@@ -0,0 +1,80 @@
+"""
+Batch collation utilities for NextRec
+Date: create on 03/12/2025
+Author: Yang Zhou, zyaztec@gmail.com
+"""
+import torch
+import numpy as np
+from typing import Any, Mapping
+def stack_section(batch: list[dict], section: str):
+    entries = [item.get(section) for item in batch if item.get(section) is not None]
+    if not entries:
+        return None
+    merged: dict = {}
+    for name in entries[0]:  # type: ignore
+        tensors = [item[section][name] for item in batch if item.get(section) is not None and name in item[section]]
+        merged[name] = torch.stack(tensors, dim=0)
+    return merged
+def collate_fn(batch):
+    """
+    Collate a list of sample dicts into the unified batch format:
+    {
+        "features": {name: Tensor(B, ...)},
+        "labels": {target: Tensor(B, ...)} or None,
+        "ids": {id_name: Tensor(B, ...)} or None,
+    }
+    Args: batch: List of samples from DataLoader
+    Returns: dict: Batched data in unified format
+    """
+    if not batch:
+        return {"features": {}, "labels": None, "ids": None}
+    first = batch[0]
+    if isinstance(first, dict) and "features" in first:
+        # Streaming dataset yields already-batched chunks; avoid adding an extra dim.
+        if first.get("_already_batched") and len(batch) == 1:
+            return {
+                "features": first.get("features", {}),
+                "labels": first.get("labels"),
+                "ids": first.get("ids"),
+            }
+        return {
+            "features": stack_section(batch, "features") or {},
+            "labels": stack_section(batch, "labels"),
+            "ids": stack_section(batch, "ids"),
+        }
+    # Fallback: stack tuples/lists of tensors
+    num_tensors = len(first)
+    result = []
+    for i in range(num_tensors):
+        tensor_list = [item[i] for item in batch]
+        first_item = tensor_list[0]
+        if isinstance(first_item, torch.Tensor):
+            stacked = torch.cat(tensor_list, dim=0)
+        elif isinstance(first_item, np.ndarray):
+            stacked = np.concatenate(tensor_list, axis=0)
+        elif isinstance(first_item, list):
+            combined = []
+            for entry in tensor_list:
+                combined.extend(entry)
+            stacked = combined
+        else:
+            stacked = tensor_list
+        result.append(stacked)
+    return tuple(result)
+def batch_to_dict(batch_data: Any, include_ids: bool = True) -> dict:
+    if not (isinstance(batch_data, Mapping) and "features" in batch_data):
+        raise TypeError("[BaseModel-batch_to_dict Error] Batch data must be a dict with 'features' produced by the current DataLoader.")
+    return {
+        "features": batch_data.get("features", {}),
+        "labels": batch_data.get("labels"),
+        "ids": batch_data.get("ids") if include_ids else None,
+    }

nextrec/data/data_processing.py ADDED Viewed

@@ -0,0 +1,152 @@
+"""
+Data processing utilities for NextRec
+Date: create on 03/12/2025
+Author: Yang Zhou, zyaztec@gmail.com
+"""
+import torch
+import numpy as np
+import pandas as pd
+from typing import Any, Mapping
+def get_column_data(data: dict | pd.DataFrame, name: str):
+    if isinstance(data, dict):
+        return data[name] if name in data else None
+    elif isinstance(data, pd.DataFrame):
+        if name not in data.columns:
+            return None
+        return data[name].values
+    else:
+        if hasattr(data, name):
+            return getattr(data, name)
+        raise KeyError(f"Unsupported data type for extracting column {name}")
+def split_dict_random(
+    data_dict: dict,
+    test_size: float = 0.2,
+    random_state: int | None = None
+):
+    lengths = [len(v) for v in data_dict.values()]
+    if len(set(lengths)) != 1:
+        raise ValueError(f"Length mismatch: {lengths}")
+    n = lengths[0]
+    rng = np.random.default_rng(random_state)
+    perm = rng.permutation(n)
+    cut = int(round(n * (1 - test_size)))
+    train_idx, test_idx = perm[:cut], perm[cut:]
+    def take(v, idx):
+        if isinstance(v, np.ndarray):
+            return v[idx]
+        elif isinstance(v, pd.Series):
+            return v.iloc[idx].to_numpy()
+        else:
+            v_arr = np.asarray(v, dtype=object)
+            return v_arr[idx]
+    train_dict = {k: take(v, train_idx) for k, v in data_dict.items()}
+    test_dict = {k: take(v, test_idx) for k, v in data_dict.items()}
+    return train_dict, test_dict
+def build_eval_candidates(
+    df_all: pd.DataFrame,
+    user_col: str,
+    item_col: str,
+    label_col: str,
+    user_features: pd.DataFrame,
+    item_features: pd.DataFrame,
+    num_pos_per_user: int = 5,
+    num_neg_per_pos: int = 50,
+    random_seed: int = 2025,
+) -> pd.DataFrame:
+    """
+    Build evaluation candidates with positive and negative samples for each user.
+    Args:
+        df_all: Full interaction DataFrame
+        user_col: Name of the user ID column
+        item_col: Name of the item ID column
+        label_col: Name of the label column
+        user_features: DataFrame containing user features
+        item_features: DataFrame containing item features
+        num_pos_per_user: Number of positive samples per user (default: 5)
+        num_neg_per_pos: Number of negative samples per positive (default: 50)
+        random_seed: Random seed for reproducibility (default: 2025)
+    Returns:
+        pd.DataFrame: Evaluation candidates with features
+    """
+    rng = np.random.default_rng(random_seed)
+    users = df_all[user_col].unique()
+    all_items = item_features[item_col].unique()
+    rows = []
+    user_hist_items = {u: df_all[df_all[user_col] == u][item_col].unique() for u in users}
+    for u in users:
+        df_user = df_all[df_all[user_col] == u]
+        pos_items = df_user[df_user[label_col] == 1][item_col].unique()
+        if len(pos_items) == 0:
+            continue
+        pos_items = pos_items[:num_pos_per_user]
+        seen_items = set(user_hist_items[u])
+        neg_pool = np.setdiff1d(all_items, np.fromiter(seen_items, dtype=all_items.dtype))
+        if len(neg_pool) == 0:
+            continue
+        for pos in pos_items:
+            if len(neg_pool) <= num_neg_per_pos:
+                neg_items = neg_pool
+            else:
+                neg_items = rng.choice(neg_pool, size=num_neg_per_pos, replace=False)
+            rows.append((u, pos, 1))
+            for ni in neg_items:
+                rows.append((u, ni, 0))
+    eval_df = pd.DataFrame(rows, columns=[user_col, item_col, label_col])
+    eval_df = eval_df.merge(user_features, on=user_col, how='left')
+    eval_df = eval_df.merge(item_features, on=item_col, how='left')
+    return eval_df
+def get_user_ids(
+    data: Any,
+    id_columns: list[str] | str | None = None
+) -> np.ndarray | None:
+    """
+    Extract user IDs from various data structures.
+    Args:
+        data: Data source (DataFrame, dict, or batch dict)
+        id_columns: List or single ID column name(s) (default: None)
+    Returns:
+        np.ndarray | None: User IDs as numpy array, or None if not found
+    """
+    id_columns = (
+        id_columns if isinstance(id_columns, list)
+        else [id_columns] if isinstance(id_columns, str)
+        else []
+    )
+    if not id_columns:
+        return None
+    main_id = id_columns[0]
+    if isinstance(data, pd.DataFrame) and main_id in data.columns:
+        arr = np.asarray(data[main_id].values)
+        return arr.reshape(arr.shape[0])
+    if isinstance(data, dict):
+        ids_container = data.get("ids")
+        if isinstance(ids_container, dict) and main_id in ids_container:
+            val = ids_container[main_id]
+            val = val.detach().cpu().numpy() if isinstance(val, torch.Tensor) else np.asarray(val)
+            return val.reshape(val.shape[0])
+        if main_id in data:
+            arr = np.asarray(data[main_id])
+            return arr.reshape(arr.shape[0])
+    return None

nextrec/data/data_utils.py CHANGED Viewed

@@ -1,268 +1,35 @@
-"""Data processing utilities for NextRec."""
-import torch
-import numpy as np
-import pandas as pd
-import pyarrow.parquet as pq
-from pathlib import Path
-from typing import Any, Mapping, Sequence
-def stack_section(batch: list[dict], section: str):
-    """Stack one section of the batch (features/labels/ids)."""
-    entries = [item.get(section) for item in batch if item.get(section) is not None]
-    if not entries:
-        return None
-    merged: dict = {}
-    for name in entries[0]: # type: ignore
-        tensors = [item[section][name] for item in batch if item.get(section) is not None and name in item[section]]
-        merged[name] = torch.stack(tensors, dim=0)
-    return merged
-def collate_fn(batch):
-    """
-    Collate a list of sample dicts into the unified batch format:
-    {
-        "features": {name: Tensor(B, ...)},
-        "labels": {target: Tensor(B, ...)} or None,
-        "ids": {id_name: Tensor(B, ...)} or None,
-    }
-    """
-    if not batch:
-        return {"features": {}, "labels": None, "ids": None}
-    first = batch[0]
-    if isinstance(first, dict) and "features" in first:
-        # Streaming dataset yields already-batched chunks; avoid adding an extra dim.
-        if first.get("_already_batched") and len(batch) == 1:
-            return {
-                "features": first.get("features", {}),
-                "labels": first.get("labels"),
-                "ids": first.get("ids"),
-            }
-        return {
-            "features": stack_section(batch, "features") or {},
-            "labels": stack_section(batch, "labels"),
-            "ids": stack_section(batch, "ids"),
-        }
-    # Fallback: stack tuples/lists of tensors
-    num_tensors = len(first)
-    result = []
-    for i in range(num_tensors):
-        tensor_list = [item[i] for item in batch]
-        first_item = tensor_list[0]
-        if isinstance(first_item, torch.Tensor):
-            stacked = torch.cat(tensor_list, dim=0)
-        elif isinstance(first_item, np.ndarray):
-            stacked = np.concatenate(tensor_list, axis=0)
-        elif isinstance(first_item, list):
-            combined = []
-            for entry in tensor_list:
-                combined.extend(entry)
-            stacked = combined
-        else:
-            stacked = tensor_list
-        result.append(stacked)
-    return tuple(result)
-def get_column_data(data: dict | pd.DataFrame, name: str):
-    """Extract column data from various data structures."""
-    if isinstance(data, dict):
-        return data[name] if name in data else None
-    elif isinstance(data, pd.DataFrame):
-        if name not in data.columns:
-            return None
-        return data[name].values
-    else:
-        if hasattr(data, name):
-            return getattr(data, name)
-        raise KeyError(f"Unsupported data type for extracting column {name}")
-def resolve_file_paths(path: str) -> tuple[list[str], str]:
-    """Resolve file or directory path into a sorted list of files and file type."""
-    path_obj = Path(path)
-    if path_obj.is_file():
-        file_type = path_obj.suffix.lower().lstrip(".")
-        assert file_type in ["csv", "parquet"], f"Unsupported file extension: {file_type}"
-        return [str(path_obj)], file_type
-    if path_obj.is_dir():
-        collected_files = [p for p in path_obj.iterdir() if p.is_file()]
-        csv_files = [str(p) for p in collected_files if p.suffix.lower() == ".csv"]
-        parquet_files = [str(p) for p in collected_files if p.suffix.lower() == ".parquet"]
-        if csv_files and parquet_files:
-            raise ValueError("Directory contains both CSV and Parquet files. Please keep a single format.")
-        file_paths = csv_files if csv_files else parquet_files
-        if not file_paths:
-            raise ValueError(f"No CSV or Parquet files found in directory: {path}")
-        file_paths.sort()
-        file_type = "csv" if csv_files else "parquet"
-        return file_paths, file_type
-    raise ValueError(f"Invalid path: {path}")
-def iter_file_chunks(file_path: str, file_type: str, chunk_size: int):
-    """Yield DataFrame chunks for CSV/Parquet without loading the whole file."""
-    if file_type == "csv":
-        yield from pd.read_csv(file_path, chunksize=chunk_size)
-        return
-    parquet_file = pq.ParquetFile(file_path)
-    for batch in parquet_file.iter_batches(batch_size=chunk_size):
-        yield batch.to_pandas()
-def read_table(file_path: str, file_type: str) -> pd.DataFrame:
-    """Read a single CSV/Parquet file."""
-    if file_type == "csv":
-        return pd.read_csv(file_path)
-    return pd.read_parquet(file_path)
-def load_dataframes(file_paths: list[str], file_type: str) -> list[pd.DataFrame]:
-    """Load multiple files of the same type into DataFrames."""
-    return [read_table(fp, file_type) for fp in file_paths]
-def default_output_dir(path: str) -> Path:
-    """Generate a default output directory path based on the input path."""
-    path_obj = Path(path)
-    if path_obj.is_file():
-        return path_obj.parent / f"{path_obj.stem}_preprocessed"
-    return path_obj.with_name(f"{path_obj.name}_preprocessed")
-def split_dict_random(data_dict: dict, test_size: float = 0.2, random_state: int | None = None):
-    """Randomly split a dictionary of data into training and testing sets."""
-    lengths = [len(v) for v in data_dict.values()]
-    if len(set(lengths)) != 1:
-        raise ValueError(f"Length mismatch: {lengths}")
-    n = lengths[0]
-    rng = np.random.default_rng(random_state)
-    perm = rng.permutation(n)
-    cut = int(round(n * (1 - test_size)))
-    train_idx, test_idx = perm[:cut], perm[cut:]
-    def take(v, idx):
-        if isinstance(v, np.ndarray):
-            return v[idx]
-        elif isinstance(v, pd.Series):
-            return v.iloc[idx].to_numpy()
-        else:
-            v_arr = np.asarray(v, dtype=object)
-            return v_arr[idx]
-    train_dict = {k: take(v, train_idx) for k, v in data_dict.items()}
-    test_dict  = {k: take(v, test_idx)  for k, v in data_dict.items()}
-    return train_dict, test_dict
-def build_eval_candidates(
-    df_all: pd.DataFrame,
-    user_col: str,
-    item_col: str,
-    label_col: str,
-    user_features: pd.DataFrame,
-    item_features: pd.DataFrame,
-    num_pos_per_user: int = 5,
-    num_neg_per_pos: int = 50,
-    random_seed: int = 2025,
-) -> pd.DataFrame:
-    """Build evaluation candidates with positive and negative samples for each user.   """
-    rng = np.random.default_rng(random_seed)
-    users = df_all[user_col].unique()
-    all_items = item_features[item_col].unique()
-    rows = []
-    user_hist_items = {u: df_all[df_all[user_col] == u][item_col].unique() for u in users}
-    for u in users:
-        df_user = df_all[df_all[user_col] == u]
-        pos_items = df_user[df_user[label_col] == 1][item_col].unique()
-        if len(pos_items) == 0:
-            continue
-        pos_items = pos_items[:num_pos_per_user]
-        seen_items = set(user_hist_items[u])
-        neg_pool = np.setdiff1d(all_items, np.fromiter(seen_items, dtype=all_items.dtype))
-        if len(neg_pool) == 0:
-            continue
-        for pos in pos_items:
-            if len(neg_pool) <= num_neg_per_pos:
-                neg_items = neg_pool
-            else:
-                neg_items = rng.choice(neg_pool, size=num_neg_per_pos, replace=False)
-            rows.append((u, pos, 1))
-            for ni in neg_items:
-                rows.append((u, ni, 0))
-    eval_df = pd.DataFrame(rows, columns=[user_col, item_col, label_col])
-    eval_df = eval_df.merge(user_features, on=user_col, how='left')
-    eval_df = eval_df.merge(item_features, on=item_col, how='left')
-    return eval_df
-def batch_to_dict(batch_data: Any, include_ids: bool = True) -> dict:
-    """Standardize a dataloader batch into a dict of features, labels, and ids."""
-    if not (isinstance(batch_data, Mapping) and "features" in batch_data):
-        raise TypeError(
-            "[BaseModel-batch_to_dict Error] Batch data must be a dict with 'features' produced by the current DataLoader."
-        )
-    return {
-        "features": batch_data.get("features", {}),
-        "labels": batch_data.get("labels"),
-        "ids": batch_data.get("ids") if include_ids else None,
-    }
-# def get_user_ids(
-#     data: dict | pd.DataFrame | None, user_id_column: str = "user_id"
-# ) -> np.ndarray | None:
-#     """Extract user IDs from a dataset dict or DataFrame."""
-#     if data is None:
-#         return None
-#     if isinstance(data, pd.DataFrame) and user_id_column in data.columns:
-#         return np.asarray(data[user_id_column].values)
-#     if isinstance(data, dict) and user_id_column in data:
-#         return np.asarray(data[user_id_column])
-#     return None
-# def get_user_ids_from_batch(
-#     batch_dict: Mapping[str, Any], id_columns: Sequence[str] | None = None
-# ) -> np.ndarray | None:
-#     """Extract the prioritized user id column from a batch dict."""
-#     ids_container = batch_dict.get("ids") if isinstance(batch_dict, Mapping) else None
-#     if not ids_container:
-#         return None
-#     batch_user_id = None
-#     if id_columns:
-#         for id_name in id_columns:
-#             if id_name in ids_container:
-#                 batch_user_id = ids_container[id_name]
-#                 break
-#     if batch_user_id is None:
-#         batch_user_id = next(iter(ids_container.values()), None)
-#     if batch_user_id is None:
-#         return None
-#     if isinstance(batch_user_id, torch.Tensor):
-#         ids_np = batch_user_id.detach().cpu().numpy()
-#     else:
-#         ids_np = np.asarray(batch_user_id)
-#     if ids_np.ndim == 0:
-#         ids_np = ids_np.reshape(1)
-#     return ids_np.reshape(ids_np.shape[0])
-def get_user_ids(data, id_columns: list[str] | str | None = None) -> np.ndarray | None:
-    id_columns = id_columns if isinstance(id_columns, list) else [id_columns] if isinstance(id_columns, str) else []
-    if not id_columns:
-        return None
-    main_id = id_columns[0]
-    if isinstance(data, pd.DataFrame) and main_id in data.columns:
-        arr = np.asarray(data[main_id].values)
-        return arr.reshape(arr.shape[0])
-    if isinstance(data, dict):
-        ids_container = data.get("ids")
-        if isinstance(ids_container, dict) and main_id in ids_container:
-            val = ids_container[main_id]
-            val = val.detach().cpu().numpy() if isinstance(val, torch.Tensor) else np.asarray(val)
-            return val.reshape(val.shape[0])
-        if main_id in data:
-            arr = np.asarray(data[main_id])
-            return arr.reshape(arr.shape[0])
-    return None
+"""
+Data processing utilities for NextRec (Refactored)
+This module now re-exports functions from specialized submodules:
+- batch_utils: collate_fn, batch_to_dict
+- data_processing: get_column_data, split_dict_random, build_eval_candidates, get_user_ids
+- nextrec.utils.file_utils: resolve_file_paths, iter_file_chunks, read_table, load_dataframes, default_output_dir
+Date: create on 27/10/2025
+Last update: 03/12/2025 (refactored)
+Author: Yang Zhou, zyaztec@gmail.com
+"""
+# Import from new organized modules
+from nextrec.data.batch_utils import collate_fn, batch_to_dict, stack_section
+from nextrec.data.data_processing import get_column_data, split_dict_random, build_eval_candidates, get_user_ids
+from nextrec.utils.file import resolve_file_paths, iter_file_chunks, read_table, load_dataframes, default_output_dir
+__all__ = [
+    # Batch utilities
+    'collate_fn',
+    'batch_to_dict',
+    'stack_section',
+    # Data processing
+    'get_column_data',
+    'split_dict_random',
+    'build_eval_candidates',
+    'get_user_ids',
+    # File utilities
+    'resolve_file_paths',
+    'iter_file_chunks',
+    'read_table',
+    'load_dataframes',
+    'default_output_dir',
+]

nextrec/data/dataloader.py CHANGED Viewed

@@ -20,8 +20,10 @@ from nextrec.data.preprocessor import DataProcessor
 from nextrec.basic.features import DenseFeature, SparseFeature, SequenceFeature, FeatureSet
 from nextrec.basic.loggers import colorize
-from nextrec.data import get_column_data, collate_fn, resolve_file_paths, read_table
-from nextrec.utils import to_tensor
+from nextrec.data.data_processing import get_column_data
+from nextrec.data.batch_utils import collate_fn
+from nextrec.utils.file import resolve_file_paths, read_table
+from nextrec.utils.tensor import to_tensor
 class TensorDictDataset(Dataset):
     """Dataset returning sample-level dicts matching the unified batch schema."""

nextrec/data/preprocessor.py CHANGED Viewed

@@ -16,24 +16,14 @@ import pandas as pd
 import tqdm
 from pathlib import Path
 from typing import Dict, Union, Optional, Literal, Any
-from sklearn.preprocessing import (
-    StandardScaler,
-    MinMaxScaler,
-    RobustScaler,
-    MaxAbsScaler,
-    LabelEncoder
-)
+from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler, MaxAbsScaler, LabelEncoder
-from nextrec.basic.loggers import setup_logger, colorize
-from nextrec.data.data_utils import (
-    resolve_file_paths,
-    iter_file_chunks,
-    read_table,
-    load_dataframes,
-    default_output_dir,
-)
-from nextrec.basic.session import resolve_save_path
 from nextrec.basic.features import FeatureSet
+from nextrec.basic.loggers import colorize
+from nextrec.basic.session import resolve_save_path
+from nextrec.utils.file import resolve_file_paths, iter_file_chunks, read_table, load_dataframes, default_output_dir
 from nextrec.__version__ import __version__

nextrec/models/multi_task/poso.py CHANGED Viewed

@@ -46,7 +46,7 @@ from nextrec.basic.features import DenseFeature, SequenceFeature, SparseFeature
 from nextrec.basic.layers import EmbeddingLayer, MLP, PredictionLayer
 from nextrec.basic.activation import activation_layer
 from nextrec.basic.model import BaseModel
-from nextrec.utils.common import merge_features
+from nextrec.utils.model import merge_features
 class POSOGate(nn.Module):

nextrec/utils/__init__.py CHANGED Viewed

@@ -1,18 +1,68 @@
+"""
+Utilities package for NextRec
+This package provides various utility functions organized by category:
+- optimizer: Optimizer and scheduler utilities
+- initializer: Weight initialization utilities
+- embedding: Embedding dimension calculation
+- device_utils: Device management and selection
+- tensor_utils: Tensor operations and conversions
+- file_utils: File I/O operations
+- model_utils: Model-related utilities
+- feature_utils: Feature processing utilities
+Date: create on 13/11/2025
+Last update: 03/12/2025 (refactored)
+Author: Yang Zhou, zyaztec@gmail.com
+"""
 from .optimizer import get_optimizer, get_scheduler
 from .initializer import get_initializer
 from .embedding import get_auto_embedding_dim
-from .common import resolve_device, to_tensor
-from . import optimizer, initializer, embedding, common
+from .device import resolve_device, get_device_info
+from .tensor import to_tensor, stack_tensors, concat_tensors, pad_sequence_tensors
+from .file import resolve_file_paths, read_table, load_dataframes, iter_file_chunks, default_output_dir
+from .model import merge_features, get_mlp_output_dim
+from .feature import normalize_to_list
+from . import optimizer, initializer, embedding
 __all__ = [
+    # Optimizer & Scheduler
     'get_optimizer',
     'get_scheduler',
+    # Initializer
     'get_initializer',
+    # Embedding
     'get_auto_embedding_dim',
+    # Device utilities
     'resolve_device',
+    'get_device_info',
+    # Tensor utilities
     'to_tensor',
+    'stack_tensors',
+    'concat_tensors',
+    'pad_sequence_tensors',
+    # File utilities
+    'resolve_file_paths',
+    'read_table',
+    'load_dataframes',
+    'iter_file_chunks',
+    'default_output_dir',
+    # Model utilities
+    'merge_features',
+    'get_mlp_output_dim',
+    # Feature utilities
+    'normalize_to_list',
+    # Module exports
     'optimizer',
     'initializer',
     'embedding',
-    'common',
 ]

nextrec/utils/device.py ADDED Viewed

@@ -0,0 +1,37 @@
+"""
+Device management utilities for NextRec
+Date: create on 03/12/2025
+Author: Yang Zhou, zyaztec@gmail.com
+"""
+import torch
+import platform
+def resolve_device() -> str:
+    if torch.cuda.is_available():
+        return "cuda"
+    if torch.backends.mps.is_available():
+        mac_ver = platform.mac_ver()[0]
+        try:
+            major, minor = (int(x) for x in mac_ver.split(".")[:2])
+        except Exception:
+            major, minor = 0, 0
+        if major >= 14:
+            return "mps"
+    return "cpu"
+def get_device_info() -> dict:
+    info = {
+        'cuda_available': torch.cuda.is_available(),
+        'cuda_device_count': torch.cuda.device_count() if torch.cuda.is_available() else 0,
+        'mps_available': torch.backends.mps.is_available(),
+        'current_device': resolve_device(),
+    }
+    if torch.cuda.is_available():
+        info['cuda_device_name'] = torch.cuda.get_device_name(0)
+        info['cuda_capability'] = torch.cuda.get_device_capability(0)
+    return info

nextrec/utils/feature.py ADDED Viewed

@@ -0,0 +1,13 @@
+"""
+Feature processing utilities for NextRec
+Date: create on 03/12/2025
+Author: Yang Zhou, zyaztec@gmail.com
+"""
+def normalize_to_list(value: str | list[str] | None) -> list[str]:
+    if value is None:
+        return []
+    if isinstance(value, str):
+        return [value]
+    return list(value)

nextrec/utils/file.py ADDED Viewed

@@ -0,0 +1,70 @@
+"""
+File I/O utilities for NextRec
+Date: create on 03/12/2025
+Author: Yang Zhou, zyaztec@gmail.com
+"""
+import pandas as pd
+import pyarrow.parquet as pq
+from pathlib import Path
+from typing import Generator
+def resolve_file_paths(path: str) -> tuple[list[str], str]:
+    """
+    Resolve file or directory path into a sorted list of files and file type.
+    Args: path: Path to a file or directory
+    Returns: tuple: (list of file paths, file type)
+    """
+    path_obj = Path(path)
+    if path_obj.is_file():
+        file_type = path_obj.suffix.lower().lstrip(".")
+        assert file_type in ["csv", "parquet"], f"Unsupported file extension: {file_type}"
+        return [str(path_obj)], file_type
+    if path_obj.is_dir():
+        collected_files = [p for p in path_obj.iterdir() if p.is_file()]
+        csv_files = [str(p) for p in collected_files if p.suffix.lower() == ".csv"]
+        parquet_files = [str(p) for p in collected_files if p.suffix.lower() == ".parquet"]
+        if csv_files and parquet_files:
+            raise ValueError("Directory contains both CSV and Parquet files. Please keep a single format.")
+        file_paths = csv_files if csv_files else parquet_files
+        if not file_paths:
+            raise ValueError(f"No CSV or Parquet files found in directory: {path}")
+        file_paths.sort()
+        file_type = "csv" if csv_files else "parquet"
+        return file_paths, file_type
+    raise ValueError(f"Invalid path: {path}")
+def read_table(file_path: str, file_type: str) -> pd.DataFrame:
+    if file_type == "csv":
+        return pd.read_csv(file_path)
+    return pd.read_parquet(file_path)
+def load_dataframes(file_paths: list[str], file_type: str) -> list[pd.DataFrame]:
+    return [read_table(fp, file_type) for fp in file_paths]
+def iter_file_chunks(
+    file_path: str,
+    file_type: str,
+    chunk_size: int
+) -> Generator[pd.DataFrame, None, None]:
+    if file_type == "csv":
+        yield from pd.read_csv(file_path, chunksize=chunk_size)
+        return
+    parquet_file = pq.ParquetFile(file_path)
+    for batch in parquet_file.iter_batches(batch_size=chunk_size):
+        yield batch.to_pandas()
+def default_output_dir(path: str) -> Path:
+    path_obj = Path(path)
+    if path_obj.is_file():
+        return path_obj.parent / f"{path_obj.stem}_preprocessed"
+    return path_obj.with_name(f"{path_obj.name}_preprocessed")

nextrec/utils/initializer.py CHANGED Viewed

@@ -9,14 +9,6 @@ import torch.nn as nn
 def get_initializer(init_type='normal', activation='linear', param=None):
-    """
-    Get parameter initialization function.
-    Examples:
-        >>> init_fn = get_initializer('xavier_uniform', 'relu')
-        >>> init_fn(tensor)
-        >>> init_fn = get_initializer('normal', param={'mean': 0.0, 'std': 0.01})
-    """
     param = param or {}
     try:

nextrec/utils/model.py ADDED Viewed

@@ -0,0 +1,22 @@
+"""
+Model-related utilities for NextRec
+Date: create on 03/12/2025
+Author: Yang Zhou, zyaztec@gmail.com
+"""
+from collections import OrderedDict
+def merge_features(primary, secondary) -> list:
+    merged: OrderedDict[str, object] = OrderedDict()
+    for feat in list(primary or []) + list(secondary or []):
+        merged.setdefault(feat.name, feat)
+    return list(merged.values())
+def get_mlp_output_dim(params: dict, fallback: int) -> int:
+    dims = params.get("dims")
+    if dims:
+        return dims[-1]
+    return fallback

nextrec/utils/optimizer.py CHANGED Viewed

@@ -8,25 +8,16 @@ Author: Yang Zhou, zyaztec@gmail.com
 import torch
 from typing import Iterable
 def get_optimizer(
     optimizer: str | torch.optim.Optimizer = "adam",
     params: Iterable[torch.nn.Parameter] | None = None,
     **optimizer_params
 ):
-    """
-    Get optimizer function based on optimizer name or instance.
-    Examples:
-        >>> optimizer = get_optimizer("adam", model.parameters(), lr=1e-3)
-        >>> optimizer = get_optimizer("sgd", model.parameters(), lr=0.01, momentum=0.9)
-    """
     if params is None:
         raise ValueError("params cannot be None. Please provide model parameters.")
     if 'lr' not in optimizer_params:
         optimizer_params['lr'] = 1e-3
     if isinstance(optimizer, str):
         opt_name = optimizer.lower()
         if opt_name == "adam":
@@ -42,27 +33,17 @@ def get_optimizer(
         else:
             raise NotImplementedError(f"Unsupported optimizer: {optimizer}")
         optimizer_fn = opt_class(params=params, **optimizer_params)
     elif isinstance(optimizer, torch.optim.Optimizer):
         optimizer_fn = optimizer
     else:
         raise TypeError(f"Invalid optimizer type: {type(optimizer)}")
     return optimizer_fn
 def get_scheduler(
     scheduler: str | torch.optim.lr_scheduler._LRScheduler | torch.optim.lr_scheduler.LRScheduler | type[torch.optim.lr_scheduler._LRScheduler] | type[torch.optim.lr_scheduler.LRScheduler] | None,
     optimizer,
     **scheduler_params
 ):
-    """
-    Get learning rate scheduler function.
-    Examples:
-        >>> scheduler = get_scheduler("step", optimizer, step_size=10, gamma=0.1)
-        >>> scheduler = get_scheduler("cosine", optimizer, T_max=100)
-    """
     if isinstance(scheduler, str):
         if scheduler == "step":
             scheduler_fn = torch.optim.lr_scheduler.StepLR(optimizer, **scheduler_params)

nextrec/utils/tensor.py ADDED Viewed

@@ -0,0 +1,61 @@
+"""
+Tensor manipulation utilities for NextRec
+Date: create on 03/12/2025
+Author: Yang Zhou, zyaztec@gmail.com
+"""
+import torch
+import numpy as np
+from typing import Any
+def to_tensor(
+    value: Any,
+    dtype: torch.dtype,
+    device: torch.device | str | None = None
+) -> torch.Tensor:
+    if value is None:
+        raise ValueError("[Tensor Utils Error] Cannot convert None to tensor.")
+    tensor = value if isinstance(value, torch.Tensor) else torch.as_tensor(value)
+    if tensor.dtype != dtype:
+        tensor = tensor.to(dtype=dtype)
+    if device is not None:
+        target_device = device if isinstance(device, torch.device) else torch.device(device)
+        if tensor.device != target_device:
+            tensor = tensor.to(target_device)
+    return tensor
+def stack_tensors(tensors: list[torch.Tensor], dim: int = 0) -> torch.Tensor:
+    if not tensors:
+        raise ValueError("[Tensor Utils Error] Cannot stack empty list of tensors.")
+    return torch.stack(tensors, dim=dim)
+def concat_tensors(tensors: list[torch.Tensor], dim: int = 0) -> torch.Tensor:
+    if not tensors:
+        raise ValueError("[Tensor Utils Error] Cannot concatenate empty list of tensors.")
+    return torch.cat(tensors, dim=dim)
+def pad_sequence_tensors(
+    tensors: list[torch.Tensor],
+    max_len: int | None = None,
+    padding_value: float = 0.0,
+    padding_side: str = 'right'
+) -> torch.Tensor:
+    if not tensors:
+        raise ValueError("[Tensor Utils Error] Cannot pad empty list of tensors.")
+    if max_len is None:
+        max_len = max(t.size(0) for t in tensors)
+    batch_size = len(tensors)
+    padded = torch.full((batch_size, max_len), padding_value, dtype=tensors[0].dtype, device=tensors[0].device)
+    for i, tensor in enumerate(tensors):
+        length = min(tensor.size(0), max_len)
+        if padding_side == 'right':
+            padded[i, :length] = tensor[:length]
+        elif padding_side == 'left':
+            padded[i, -length:] = tensor[:length]
+        else:
+            raise ValueError(f"[Tensor Utils Error] padding_side must be 'right' or 'left', got {padding_side}")
+    return padded

{nextrec-0.3.4.dist-info → nextrec-0.3.5.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: nextrec
-Version: 0.3.4
+Version: 0.3.5
 Summary: A comprehensive recommendation library with match, ranking, and multi-task learning models
 Project-URL: Homepage, https://github.com/zerolovesea/NextRec
 Project-URL: Repository, https://github.com/zerolovesea/NextRec
@@ -63,7 +63,7 @@ Description-Content-Type: text/markdown
 ![Python](https://img.shields.io/badge/Python-3.10+-blue.svg)
 ![PyTorch](https://img.shields.io/badge/PyTorch-1.10+-ee4c2c.svg)
 ![License](https://img.shields.io/badge/License-Apache%202.0-green.svg)
-![Version](https://img.shields.io/badge/Version-0.3.4-orange.svg)
+![Version](https://img.shields.io/badge/Version-0.3.5-orange.svg)
 English | [中文文档](README_zh.md)
@@ -110,7 +110,7 @@ To dive deeper, Jupyter notebooks are available:
 - [Hands on the NextRec framework](/tutorials/notebooks/en/Hands%20on%20nextrec.ipynb)
 - [Using the data processor for preprocessing](/tutorials/notebooks/en/Hands%20on%20dataprocessor.ipynb)
-> Current version [0.3.4]: the matching module is not fully polished yet and may have compatibility issues or unexpected errors. Please raise an issue if you run into problems.
+> Current version [0.3.5]: the matching module is not fully polished yet and may have compatibility issues or unexpected errors. Please raise an issue if you run into problems.
 ## 5-Minute Quick Start

{nextrec-0.3.4.dist-info → nextrec-0.3.5.dist-info}/RECORD RENAMED Viewed

@@ -1,18 +1,20 @@
 nextrec/__init__.py,sha256=CvocnY2uBp0cjNkhrT6ogw0q2bN9s1GNp754FLO-7lo,1117
-nextrec/__version__.py,sha256=oYLGMpySamd16KLiaBTfRyrAS7_oyp-TOEHmzmeumwg,22
+nextrec/__version__.py,sha256=ThnCuF3X7rsQSd5PAea_jfYA70ZmhLvkFcLBxBPwZnY,22
 nextrec/basic/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nextrec/basic/activation.py,sha256=1qs9pq4hT3BUxIiYdYs57axMCm4-JyOBFQ6x7xkHTwM,2849
 nextrec/basic/callback.py,sha256=wwh0I2kKYyywCB-sG9eQXShlpXFJIo75qApJmnI5p6c,1036
-nextrec/basic/features.py,sha256=-RRRbEPU-SFI-GtppflW6O0bKShUsV-Hg_lTGpo3AIE,4262
+nextrec/basic/features.py,sha256=DFwYjG13GYHOujS_CMKa7Qrux9faF7MQNoaoRDF_Eks,4263
 nextrec/basic/layers.py,sha256=zzEseKYVnMVs1Tg5EGrFimugId15jI6HumgzjFyRqgw,23127
 nextrec/basic/loggers.py,sha256=hh9tRMmaCTaJ_sfRHIlbcqd6BcpK63vpZ_21TFCiKLI,6148
 nextrec/basic/metrics.py,sha256=8-hMZJXU5L4F8GnToxMZey5dlBrtFyRtTuI_zoQCtIo,21579
-nextrec/basic/model.py,sha256=afnvicyxXMgWdvhrIUaoNnZ7S-QYRYr7fTY5bdM1u_s,68829
-nextrec/basic/session.py,sha256=oaATn-nzbJ9A6SGbMut9xLV_NSh9_1KmVDeNauS06Ps,4767
-nextrec/data/__init__.py,sha256=6WgXZafzzXcv5kuxKNi67O8BJZVl_P_HM2IZCDIIhPA,1052
-nextrec/data/data_utils.py,sha256=aOyja3Yu7O2c8eIeL3P8MyUlUR5EerOUT9UeF4ATq8o,10574
-nextrec/data/dataloader.py,sha256=2MLe69y0E1cTZyzMNgyLUCxa6lllGd1ntvwpXzxdX10,14199
-nextrec/data/preprocessor.py,sha256=lhigpjvkEqsjTRfbBBOjgGOxoPyOifwq2LoswgyIVqc,40488
+nextrec/basic/model.py,sha256=THzpEb6uIRp4xNjAQz0Xdwsqbh3jewN97L5_Ps6qyeo,68902
+nextrec/basic/session.py,sha256=o-O7QMDAGjPiRBZaiYDy629xppfpiGqCWXpPrC4Y-_c,4337
+nextrec/data/__init__.py,sha256=XBEOUH4EbVgGjBgxPSw15nSR7vtB_1qCxge5Lt7uJ7o,1924
+nextrec/data/batch_utils.py,sha256=6G-E85H-PqYJ20EYVLnC3MqC8xYrXzZ1XYe82MhRPck,2816
+nextrec/data/data_processing.py,sha256=N3Uk4NsUCyLeoMDV1zeLmH-dP02I-cRWDo-vvQgLqjo,5006
+nextrec/data/data_utils.py,sha256=-3xLPW3csOiGNmj0kzzpOkCxZyu09RNBgfPkwX7nDAc,1172
+nextrec/data/dataloader.py,sha256=sXyUv8rRE7P2bsoTZebLBTLErPWBJw5OacZ106m9Unk,14288
+nextrec/data/preprocessor.py,sha256=_A3eEc1MpUGDEpno1TToA-dyJ_k707Mr3GilTi_9j5I,40419
 nextrec/loss/__init__.py,sha256=mO5t417BneZ8Ysa51GyjDaffjWyjzFgPXIQrrggasaQ,827
 nextrec/loss/listwise.py,sha256=gxDbO1td5IeS28jKzdE35o1KAYBRdCYoMzyZzfNLhc0,5689
 nextrec/loss/loss_utils.py,sha256=uZ4m9ChLr-UgIc5Yxm1LjwXDDepApQ-Fas8njweZ9qg,2641
@@ -30,7 +32,7 @@ nextrec/models/match/youtube_dnn.py,sha256=Wa5JWrlIpMuBoyXpnBrdnm1nQ8ZO_XcR517zf
 nextrec/models/multi_task/esmm.py,sha256=Ho5UN2H9H9-ZYML6eqpBYTVdTO4Ja9AoYP5SSgsgQaw,6442
 nextrec/models/multi_task/mmoe.py,sha256=zfBAUoQijHCuat962dZI0MCAy8C6PZqZ-zOd16JznF8,7803
 nextrec/models/multi_task/ple.py,sha256=zNBea0sfJska36RVH1N9O92m7rPmbaWYqoPbnGoy1RE,11949
-nextrec/models/multi_task/poso.py,sha256=_yLiCkD3NhOZEOWx-jP4MJxSEdNCu3mqeo_XRt8CWts,16652
+nextrec/models/multi_task/poso.py,sha256=_Pq-cl7HB1uQVO8HXreNeVpQso250ouxBNTsdTjyFos,16651
 nextrec/models/multi_task/share_bottom.py,sha256=kvrkXQSTDPEwwmBvXw3xryBm3gT8Uq4_Hb3TenwRj9w,5920
 nextrec/models/ranking/__init__.py,sha256=AY806x-2BtltQdlR4wu23-keL9YUe3An92OJshS4t9Y,472
 nextrec/models/ranking/afm.py,sha256=uFSUIv9d6NQkCiM2epmSdMy4kxjFuCRVbrZOv3nebGE,4539
@@ -46,12 +48,16 @@ nextrec/models/ranking/masknet.py,sha256=9K6XKcr8f8PcVhLfgFd8l4tq78lcclAQAXZKlVE
 nextrec/models/ranking/pnn.py,sha256=eEyBnALuzaNx27iGJ0ZqNcf0u7dKN8SiO03lkcv1hiE,4956
 nextrec/models/ranking/widedeep.py,sha256=AJPkoThUTSBGPNBjD-aiWsMH2hSiSnGLjIPy_2neNhc,5034
 nextrec/models/ranking/xdeepfm.py,sha256=wn6YnX78EyBzil7IRBcqyDqsnysERVJ5-lWGuRMCpxE,5681
-nextrec/utils/__init__.py,sha256=ciw6B9SXffjSb4cwco-WXpKSE7M9D6ILpLZ2oftwj6A,457
-nextrec/utils/common.py,sha256=NYXnBVtUCtm8epT2ZxJHn_m1SIBBI_PEjZ5VpL465ls,2009
+nextrec/utils/__init__.py,sha256=lAVpHsGe_WgGf7R-K1wr0DeVLvskG0Bj1L12N6kEPwM,1810
+nextrec/utils/device.py,sha256=nos-J5VTe2hyaqiZ7D8q1k8l1KwORQ0bISI485Jdqnw,1012
 nextrec/utils/embedding.py,sha256=yxYSdFx0cJITh3Gf-K4SdhwRtKGcI0jOsyBgZ0NLa_c,465
-nextrec/utils/initializer.py,sha256=ffYOs5QuIns_d_-5e40iNtg6s1ftgREJN-ueq_NbDQE,1647
-nextrec/utils/optimizer.py,sha256=EUjAGFPeyou_Cv-_2HRvjzut8y_qpAQudc8L2T0k8zw,2706
-nextrec-0.3.4.dist-info/METADATA,sha256=X5fo5gymQdPXLgM1N03E58uFSQyuQOmdbUp8vXvKl0g,16319
-nextrec-0.3.4.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
-nextrec-0.3.4.dist-info/licenses/LICENSE,sha256=2fQfVKeafywkni7MYHyClC6RGGC3laLTXCNBx-ubtp0,1064
-nextrec-0.3.4.dist-info/RECORD,,
+nextrec/utils/feature.py,sha256=s0eMEuvbOsotjll7eSYjb0b-1cXnvVy1mSI1Syg_7n4,299
+nextrec/utils/file.py,sha256=wxKvd1_U9ugFDP7EzLNG6-3PBInA0QhxoHzBWKfe_B8,2384
+nextrec/utils/initializer.py,sha256=BkP6-vJdsc0A-8ya-AVEs7W24dPXyxIilNnckwXgPEc,1391
+nextrec/utils/model.py,sha256=FB7QbatO0uEvghBEfByJtRS0waaBEB1UI0YzfA_2k04,535
+nextrec/utils/optimizer.py,sha256=cVkDrEkxwig17UAEhL8p9v3iVNiXI8B067Yf_6LqUp8,2198
+nextrec/utils/tensor.py,sha256=_RibR6BMPizhzRLVdnJqwUgzA0zpzkZuKfTrdSjbL60,2136
+nextrec-0.3.5.dist-info/METADATA,sha256=uZAs7fg2m4UtVWWoxlqecC8a7KzfqSdQbVExo88L1kM,16319
+nextrec-0.3.5.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
+nextrec-0.3.5.dist-info/licenses/LICENSE,sha256=2fQfVKeafywkni7MYHyClC6RGGC3laLTXCNBx-ubtp0,1064
+nextrec-0.3.5.dist-info/RECORD,,

nextrec/utils/common.py DELETED Viewed

@@ -1,60 +0,0 @@
-import torch
-import platform
-from collections import OrderedDict
-def resolve_device() -> str:
-    """Select a usable device with graceful fallback."""
-    if torch.cuda.is_available():
-        return "cuda"
-    if torch.backends.mps.is_available():
-        mac_ver = platform.mac_ver()[0]
-        try:
-            major, minor = (int(x) for x in mac_ver.split(".")[:2])
-        except Exception:
-            major, minor = 0, 0
-        if major >= 14:
-            return "mps"
-    return "cpu"
-def normalize_to_list(value: str | list[str] | None) -> list[str]:
-    if value is None:
-        return []
-    if isinstance(value, str):
-        return [value]
-    return list(value)
-def merge_features(primary, secondary) -> list:
-    """
-    Merge two feature lists while preserving order and deduplicating by feature name.
-    Later duplicates are skipped.
-    """
-    merged: OrderedDict[str, object] = OrderedDict()
-    for feat in list(primary or []) + list(secondary or []):
-        merged.setdefault(feat.name, feat)
-    return list(merged.values())
-def get_mlp_output_dim(params: dict, fallback: int) -> int:
-    """
-    Get the output dimension of an MLP-like config.
-    If dims are provided, use the last dim; otherwise fall back to input dim.
-    """
-    dims = params.get("dims")
-    if dims:
-        return dims[-1]
-    return fallback
-def to_tensor(value, dtype: torch.dtype, device: torch.device | str | None = None) -> torch.Tensor:
-    """Convert any value to a tensor with the desired dtype/device."""
-    if value is None:
-        raise ValueError("[Tensor Utils Error] Cannot convert None to tensor.")
-    tensor = value if isinstance(value, torch.Tensor) else torch.as_tensor(value)
-    if tensor.dtype != dtype:
-        tensor = tensor.to(dtype=dtype)
-    if device is not None:
-        target_device = device if isinstance(device, torch.device) else torch.device(device)
-        if tensor.device != target_device:
-            tensor = tensor.to(target_device)
-    return tensor

{nextrec-0.3.4.dist-info → nextrec-0.3.5.dist-info}/WHEEL RENAMED Viewed

File without changes

{nextrec-0.3.4.dist-info → nextrec-0.3.5.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

nextrec 0.3.4__py3-none-any.whl → 0.3.5__py3-none-any.whl

nextrec 0.3.4py3-none-any.whl → 0.3.5py3-none-any.whl