PyPI - nshtrainer - Versions diffs - 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl - Mend

nshtrainer 0.1.0py3-none-any.whl → 0.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

nshtrainer/__init__.py +1 -17
nshtrainer/callbacks/__init__.py +3 -2
nshtrainer/callbacks/base.py +3 -4
nshtrainer/config.py +3 -288
nshtrainer/lr_scheduler/__init__.py +3 -2
nshtrainer/lr_scheduler/_base.py +3 -6
nshtrainer/lr_scheduler/linear_warmup_cosine.py +5 -5
nshtrainer/lr_scheduler/reduce_lr_on_plateau.py +5 -4
nshtrainer/model/__init__.py +0 -4
nshtrainer/model/base.py +9 -71
nshtrainer/model/config.py +39 -141
nshtrainer/nn/nonlinearity.py +3 -4
nshtrainer/optimizer.py +3 -7
nshtrainer/runner.py +18 -8
nshtrainer/trainer/signal_connector.py +22 -11
nshtrainer/trainer/trainer.py +1 -1
nshtrainer/typecheck.py +1 -0
{nshtrainer-0.1.0.dist-info → nshtrainer-0.2.0.dist-info}/METADATA +13 -2
{nshtrainer-0.1.0.dist-info → nshtrainer-0.2.0.dist-info}/RECORD +20 -27
nshtrainer/_submit/print_environment_info.py +0 -31
nshtrainer/_submit/session/_output.py +0 -12
nshtrainer/_submit/session/_script.py +0 -109
nshtrainer/_submit/session/lsf.py +0 -467
nshtrainer/_submit/session/slurm.py +0 -573
nshtrainer/_submit/session/unified.py +0 -350
nshtrainer/util/singleton.py +0 -89
{nshtrainer-0.1.0.dist-info → nshtrainer-0.2.0.dist-info}/WHEEL +0 -0

nshtrainer/_submit/session/unified.py DELETED Viewed

@@ -1,350 +0,0 @@
-import copy
-import logging
-import os
-import signal
-import subprocess
-from collections.abc import Callable, Mapping, Sequence
-from datetime import timedelta
-from pathlib import Path
-from typing import Any, Literal
-from typing_extensions import (
-    TypeAlias,
-    TypedDict,
-    TypeVar,
-    TypeVarTuple,
-    Unpack,
-    assert_never,
-)
-from . import lsf, slurm
-from ._output import SubmitOutput
-TArgs = TypeVarTuple("TArgs")
-_Path: TypeAlias = str | Path | os.PathLike
-log = logging.getLogger(__name__)
-class GenericJobKwargs(TypedDict, total=False):
-    name: str
-    """The name of the job."""
-    partition: str | Sequence[str]
-    """The partition or queue to submit the job to. Same as `queue`."""
-    queue: str | Sequence[str]
-    """The queue to submit the job to. Same as `partition`."""
-    qos: str
-    """
-    The quality of service to submit the job to.
-    This corresponds to the "--qos" option in sbatch (only for Slurm).
-    """
-    account: str
-    """The account (or project) to charge the job to. Same as `project`."""
-    project: str
-    """The project (or account) to charge the job to. Same as `account`."""
-    output_file: _Path
-    """
-    The file to write the job output to.
-    This corresponds to the "-o" option in bsub. If not specified, the output will be written to the default output file.
-    """
-    error_file: _Path
-    """
-    The file to write the job errors to.
-    This corresponds to the "-e" option in bsub. If not specified, the errors will be written to the default error file.
-    """
-    nodes: int
-    """The number of nodes to request."""
-    tasks_per_node: int
-    """The number of tasks to request per node."""
-    cpus_per_task: int
-    """The number of CPUs to request per task."""
-    gpus_per_task: int
-    """The number of GPUs to request per task."""
-    memory_mb: int
-    """The maximum memory for the job in MB."""
-    walltime: timedelta
-    """The maximum walltime for the job."""
-    email: str
-    """The email address to send notifications to."""
-    notifications: set[Literal["begin", "end"]]
-    """The notifications to send via email."""
-    setup_commands: Sequence[str]
-    """
-    The setup commands to run before the job.
-    These commands will be executed prior to everything else in the job script.
-    """
-    environment: Mapping[str, str]
-    """
-    The environment variables to set for the job.
-    These variables will be set prior to executing any commands in the job script.
-    """
-    command_prefix: str
-    """
-    A command to prefix the job command with.
-    This is used to add commands like `srun` or `jsrun` to the job command.
-    """
-    constraint: str | Sequence[str]
-    """
-    The constraint to request for the job. For SLRUM, this corresponds to the `--constraint` option. For LSF, this is unused.
-    """
-    signal: signal.Signals
-    """The signal that will be sent to the job when it is time to stop it."""
-    command_template: str
-    """
-    The template for the command to execute the helper script.
-    Default: `bash {script}`.
-    """
-    requeue_on_preempt: bool
-    """
-    Whether to requeue the job if it is preempted.
-    This corresponds to the "--requeue" option in sbatch (only for Slurm).
-    """
-    slurm_options: slurm.SlurmJobKwargs
-    """Additional keyword arguments for Slurm jobs."""
-    lsf_options: lsf.LSFJobKwargs
-    """Additional keyword arguments for LSF jobs."""
-Scheduler: TypeAlias = Literal["slurm", "lsf"]
-T = TypeVar("T", infer_variance=True)
-def _one_of(*fns: Callable[[], T | None]) -> T | None:
-    values = [value for fn in fns if (value := fn()) is not None]
-    # Only one (or zero) value should be set. If not, raise an error.
-    if len(set(values)) > 1:
-        raise ValueError(f"Multiple values set: {values}")
-    return next((value for value in values if value is not None), None)
-def _to_slurm(kwargs: GenericJobKwargs) -> slurm.SlurmJobKwargs:
-    slurm_kwargs: slurm.SlurmJobKwargs = {}
-    if (name := kwargs.get("name")) is not None:
-        slurm_kwargs["name"] = name
-    if (
-        account := _one_of(
-            lambda: kwargs.get("account"),
-            lambda: kwargs.get("project"),
-        )
-    ) is not None:
-        slurm_kwargs["account"] = account
-    if (
-        partition := _one_of(
-            lambda: kwargs.get("partition"),
-            lambda: kwargs.get("queue"),
-        )
-    ) is not None:
-        slurm_kwargs["partition"] = partition
-    if (qos := kwargs.get("qos")) is not None:
-        slurm_kwargs["qos"] = qos
-    if (output_file := kwargs.get("output_file")) is not None:
-        slurm_kwargs["output_file"] = output_file
-    if (error_file := kwargs.get("error_file")) is not None:
-        slurm_kwargs["error_file"] = error_file
-    if (walltime := kwargs.get("walltime")) is not None:
-        slurm_kwargs["time"] = walltime
-    if (memory_mb := kwargs.get("memory_mb")) is not None:
-        slurm_kwargs["memory_mb"] = memory_mb
-    if (nodes := kwargs.get("nodes")) is not None:
-        slurm_kwargs["nodes"] = nodes
-    if (tasks_per_node := kwargs.get("tasks_per_node")) is not None:
-        slurm_kwargs["ntasks_per_node"] = tasks_per_node
-    if (cpus_per_task := kwargs.get("cpus_per_task")) is not None:
-        slurm_kwargs["cpus_per_task"] = cpus_per_task
-    if (gpus_per_task := kwargs.get("gpus_per_task")) is not None:
-        slurm_kwargs["gpus_per_task"] = gpus_per_task
-    if (constraint := kwargs.get("constraint")) is not None:
-        slurm_kwargs["constraint"] = constraint
-    if (signal := kwargs.get("signal")) is not None:
-        slurm_kwargs["signal"] = signal
-    if (email := kwargs.get("email")) is not None:
-        slurm_kwargs["mail_user"] = email
-    if (notifications := kwargs.get("notifications")) is not None:
-        mail_type: list[slurm.MailType] = []
-        for notification in notifications:
-            match notification:
-                case "begin":
-                    mail_type.append("BEGIN")
-                case "end":
-                    mail_type.append("END")
-                case _:
-                    raise ValueError(f"Unknown notification type: {notification}")
-        slurm_kwargs["mail_type"] = mail_type
-    if (setup_commands := kwargs.get("setup_commands")) is not None:
-        slurm_kwargs["setup_commands"] = setup_commands
-    if (environment := kwargs.get("environment")) is not None:
-        slurm_kwargs["environment"] = environment
-    if (command_prefix := kwargs.get("command_prefix")) is not None:
-        slurm_kwargs["command_prefix"] = command_prefix
-    if (requeue_on_preempt := kwargs.get("requeue_on_preempt")) is not None:
-        slurm_kwargs["requeue"] = requeue_on_preempt
-    if (additional_kwargs := kwargs.get("slurm_options")) is not None:
-        slurm_kwargs.update(additional_kwargs)
-    return slurm_kwargs
-def _to_lsf(kwargs: GenericJobKwargs) -> lsf.LSFJobKwargs:
-    lsf_kwargs: lsf.LSFJobKwargs = {}
-    if (name := kwargs.get("name")) is not None:
-        lsf_kwargs["name"] = name
-    if (
-        account := _one_of(
-            lambda: kwargs.get("account"),
-            lambda: kwargs.get("project"),
-        )
-    ) is not None:
-        lsf_kwargs["project"] = account
-    if (
-        partition := _one_of(
-            lambda: kwargs.get("partition"),
-            lambda: kwargs.get("queue"),
-        )
-    ) is not None:
-        lsf_kwargs["queue"] = partition
-    if (output_file := kwargs.get("output_file")) is not None:
-        lsf_kwargs["output_file"] = output_file
-    if (error_file := kwargs.get("error_file")) is not None:
-        lsf_kwargs["error_file"] = error_file
-    if (walltime := kwargs.get("walltime")) is not None:
-        lsf_kwargs["walltime"] = walltime
-    if (memory_mb := kwargs.get("memory_mb")) is not None:
-        lsf_kwargs["memory_mb"] = memory_mb
-    if (nodes := kwargs.get("nodes")) is not None:
-        lsf_kwargs["nodes"] = nodes
-    if (tasks_per_node := kwargs.get("tasks_per_node")) is not None:
-        lsf_kwargs["rs_per_node"] = tasks_per_node
-    if (cpus_per_task := kwargs.get("cpus_per_task")) is not None:
-        lsf_kwargs["cpus_per_rs"] = cpus_per_task
-    if (gpus_per_task := kwargs.get("gpus_per_task")) is not None:
-        lsf_kwargs["gpus_per_rs"] = gpus_per_task
-    if (constraint := kwargs.get("constraint")) is not None:
-        log.warning(f'LSF does not support constraints, ignoring "{constraint=}".')
-    if (email := kwargs.get("email")) is not None:
-        lsf_kwargs["email"] = email
-    if (notifications := kwargs.get("notifications")) is not None:
-        if "begin" in notifications:
-            lsf_kwargs["notify_begin"] = True
-        if "end" in notifications:
-            lsf_kwargs["notify_end"] = True
-    if (setup_commands := kwargs.get("setup_commands")) is not None:
-        lsf_kwargs["setup_commands"] = setup_commands
-    if (environment := kwargs.get("environment")) is not None:
-        lsf_kwargs["environment"] = environment
-    if (command_prefix := kwargs.get("command_prefix")) is not None:
-        lsf_kwargs["command_prefix"] = command_prefix
-    if (signal := kwargs.get("signal")) is not None:
-        lsf_kwargs["signal"] = signal
-    if (requeue_on_preempt := kwargs.get("requeue_on_preempt")) is not None:
-        log.warning(
-            f'LSF does not support requeueing, ignoring "{requeue_on_preempt=}".'
-        )
-    if (additional_kwargs := kwargs.get("lsf_options")) is not None:
-        lsf_kwargs.update(additional_kwargs)
-    return lsf_kwargs
-def validate_kwargs(scheduler: Scheduler, kwargs: GenericJobKwargs) -> None:
-    match scheduler:
-        case "slurm":
-            _to_slurm(copy.deepcopy(kwargs))
-        case "lsf":
-            _to_lsf(copy.deepcopy(kwargs))
-        case _:
-            assert_never(scheduler)
-def to_array_batch_script(
-    scheduler: Scheduler,
-    dest: Path,
-    callable: Callable[[Unpack[TArgs]], Any],
-    args_list: Sequence[tuple[Unpack[TArgs]]],
-    /,
-    job_index_variable: str | None = None,
-    print_environment_info: bool = False,
-    python_command_prefix: str | None = None,
-    **kwargs: Unpack[GenericJobKwargs],
-) -> SubmitOutput:
-    job_index_variable_kwargs = {}
-    if job_index_variable is not None:
-        job_index_variable_kwargs["job_index_variable"] = job_index_variable
-    match scheduler:
-        case "slurm":
-            slurm_kwargs = _to_slurm(kwargs)
-            return slurm.to_array_batch_script(
-                dest,
-                callable,
-                args_list,
-                **job_index_variable_kwargs,
-                print_environment_info=print_environment_info,
-                python_command_prefix=python_command_prefix,
-                **slurm_kwargs,
-            )
-        case "lsf":
-            lsf_kwargs = _to_lsf(kwargs)
-            return lsf.to_array_batch_script(
-                dest,
-                callable,
-                args_list,
-                **job_index_variable_kwargs,
-                print_environment_info=print_environment_info,
-                python_command_prefix=python_command_prefix,
-                **lsf_kwargs,
-            )
-        case _:
-            assert_never(scheduler)
-def infer_current_scheduler() -> Scheduler:
-    # First, we check for `bsub` as it's much less common than `sbatch`.
-    try:
-        subprocess.check_output(["bsub", "-V"])
-        return "lsf"
-    except BaseException:
-        pass
-    # Next, we check for `sbatch` as it's the most common scheduler.
-    try:
-        subprocess.check_output(["sbatch", "--version"])
-        return "slurm"
-    except BaseException:
-        pass
-    raise RuntimeError("Could not determine the current scheduler.")

nshtrainer/util/singleton.py DELETED Viewed

@@ -1,89 +0,0 @@
-from logging import getLogger
-from typing import Any
-from typing_extensions import Self, TypeVar, override
-log = getLogger(__name__)
-class Singleton:
-    singleton_key = "_singleton_instance"
-    @classmethod
-    def get(cls) -> Self | None:
-        return getattr(cls, cls.singleton_key, None)
-    @classmethod
-    def set(cls, instance: Self) -> None:
-        if cls.get() is not None:
-            log.warning(f"{cls.__qualname__} instance is already set")
-        setattr(cls, cls.singleton_key, instance)
-    @classmethod
-    def reset(cls) -> None:
-        if cls.get() is not None:
-            delattr(cls, cls.singleton_key)
-    @classmethod
-    def register(cls, instance: Self) -> None:
-        cls.set(instance)
-    def register_self(self):
-        self.register(self)
-    @classmethod
-    def instance(cls) -> Self:
-        instance = cls.get()
-        if instance is None:
-            raise RuntimeError(f"{cls.__qualname__} instance is not set")
-        return instance
-    @override
-    def __init_subclass__(cls, *args, **kwargs) -> None:
-        super().__init_subclass__(*args, **kwargs)
-        cls.reset()
-T = TypeVar("T", infer_variance=True)
-class Registry:
-    _registry: dict[type, Any] = {}
-    @staticmethod
-    def register(cls_: type[T], instance: T):
-        if not isinstance(instance, cls_):
-            raise ValueError(f"{instance} is not an instance of {cls_.__qualname__}")
-        if cls_ in Registry._registry:
-            raise ValueError(f"{cls_.__qualname__} is already registered")
-        Registry._registry[cls_] = instance
-    @staticmethod
-    def try_get(cls_: type[T]) -> T | None:
-        return Registry._registry.get(cls_)
-    @staticmethod
-    def get(cls_: type[T]) -> T:
-        instance = Registry.try_get(cls_)
-        if instance is None:
-            raise ValueError(f"{cls_.__qualname__} is not registered")
-        return instance
-    @staticmethod
-    def instance(cls_: type[T]) -> T:
-        return Registry.get(cls_)
-    @staticmethod
-    def reset(cls_: type[T]):
-        if cls_ in Registry._registry:
-            del Registry._registry[cls_]
-    @staticmethod
-    def reset_all():
-        Registry._registry.clear()

{nshtrainer-0.1.0.dist-info → nshtrainer-0.2.0.dist-info}/WHEEL RENAMED Viewed

File without changes

nshtrainer 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl

nshtrainer 0.1.0py3-none-any.whl → 0.2.0py3-none-any.whl