PyPI - libinephany - Versions diffs - 0.15.0__tar.gz → 0.15.1__tar.gz - Mend

libinephany 0.15.0tar.gz → 0.15.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (65) hide show

libinephany-0.15.1/CODE_VERSION.cfg ADDED Viewed

	@@ -0,0 +1 @@
1	+ 0.15.1

{libinephany-0.15.0/libinephany.egg-info → libinephany-0.15.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: libinephany
-Version: 0.15.0
+Version: 0.15.1
 Summary: Inephany library containing code commonly used by multiple subpackages.
 Author-email: Inephany <info@inephany.com>
 License: Apache 2.0

{libinephany-0.15.0 → libinephany-0.15.1}/libinephany/observations/observers/global_observers.py RENAMED Viewed

@@ -36,7 +36,7 @@ class InitialHyperparameters(GlobalObserver):
         super().__init__(**kwargs)
-        force_skip = ["samples"]
+        force_skip = ["samples", "gradient_accumulation"]
         skip_hparams = force_skip if skip_hparams is None else skip_hparams + force_skip
         self.skip_hparams = [] if skip_hparams is None else skip_hparams
         self.pad_with = pad_with

{libinephany-0.15.0 → libinephany-0.15.1}/libinephany/observations/statistic_trackers.py RENAMED Viewed

@@ -213,10 +213,10 @@ class Statistic(ABC):
         if torch_distributed_utils.is_scheduler_master_rank():
             if isinstance(statistic, torch.Tensor):
-                shape = statistic.shape
+                shape = statistic.view(-1).shape
             elif isinstance(statistic, TensorStatistics):
-                shape = statistic.to_tensor().shape
+                shape = statistic.to_tensor().view(-1).shape
             elif statistic is not None:
                 shape = torch.tensor([statistic]).shape
@@ -239,23 +239,21 @@ class Statistic(ABC):
         if not torch_distributed_utils.is_distributed():
             return statistic
-        if statistic is None:
-            shape = self._determine_reduction_shape(statistic=statistic)
-            if shape is None:
-                return statistic
+        shape = self._determine_reduction_shape(statistic=statistic)
-            to_reduce = torch.zeros(shape)
+        if statistic is None:
+            to_reduce = torch.zeros(shape, dtype=torch.float64)
         elif isinstance(statistic, torch.Tensor):
-            to_reduce = statistic.clone()
+            to_reduce = statistic.clone().to(torch.float64).view(-1)
         elif isinstance(statistic, TensorStatistics):
-            to_reduce = statistic.to_tensor()
+            to_reduce = statistic.to_tensor().to(torch.float64).view(-1)
         else:
-            to_reduce = torch.tensor([statistic])
+            to_reduce = torch.tensor([statistic], dtype=torch.float64)
+        to_reduce = to_reduce.to(torch_distributed_utils.get_local_device())
         dist.reduce(to_reduce, dst=MASTER_SCHEDULER_RANK, op=ReduceOp.SUM)
         if not torch_distributed_utils.is_scheduler_master_rank():
@@ -283,11 +281,13 @@ class Statistic(ABC):
         parameter_group = self._find_parameter_group(optimizer=optimizer)
         parameters = self._get_parameters(parameter_group=parameter_group)
+        self._sample_number += 1
         if self._sample_number % self.sample_frequency == 0:
             statistic = self._gather(
                 optimizer=optimizer, model=model, parameters=parameters, parameter_group=parameter_group
             )
             statistic = self._distributed_reduce(statistic=statistic)
             if not torch_distributed_utils.is_scheduler_master_rank():
@@ -303,9 +303,6 @@ class Statistic(ABC):
             elif statistic is not None:
                 self._data.append(statistic)  # type: ignore
-        if torch_distributed_utils.is_scheduler_master_rank():
-            self._sample_number += 1
     @final
     def fetch(self) -> TensorStatistics | float | None:
         """

{libinephany-0.15.0 → libinephany-0.15.1}/libinephany/utils/torch_distributed_utils.py RENAMED Viewed

@@ -4,8 +4,10 @@
 #
 # ======================================================================================================================
+import os
 from typing import Any
+import torch
 import torch.distributed as dist
 # ======================================================================================================================
@@ -14,7 +16,11 @@ import torch.distributed as dist
 #
 # ======================================================================================================================
+CUDA = "cuda"
+CPU = "cpu"
+CUDA_PREFIX = f"{CUDA}:"
 MASTER_SCHEDULER_RANK = 0
+LOCAL_RANK = "LOCAL_RANK"
 # ======================================================================================================================
 #
@@ -48,7 +54,10 @@ def get_local_rank() -> int:
     :return: Distributed computing rank of this process.
     """
-    return dist.get_rank() if is_distributed() else MASTER_SCHEDULER_RANK
+    if not is_distributed():
+        return MASTER_SCHEDULER_RANK
+    return dist.get_rank()
 def is_scheduler_master_rank() -> bool:
@@ -83,3 +92,15 @@ def barrier() -> None:
     if is_distributed():
         dist.barrier()
+def get_local_device() -> torch.device:
+    """
+    :return: Local device of the current rank.
+    """
+    if not is_distributed():
+        return torch.device(CUDA if torch.cuda.is_available() else CPU)
+    local_device_rank = os.environ.get(LOCAL_RANK, MASTER_SCHEDULER_RANK)
+    return torch.device(f"{CUDA_PREFIX}{local_device_rank}" if torch.cuda.is_available() else CPU)

{libinephany-0.15.0 → libinephany-0.15.1/libinephany.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: libinephany
-Version: 0.15.0
+Version: 0.15.1
 Summary: Inephany library containing code commonly used by multiple subpackages.
 Author-email: Inephany <info@inephany.com>
 License: Apache 2.0