PyPI - libinephany - Versions diffs - 0.15.0__py3-none-any.whl → 0.15.2__py3-none-any.whl - Mend

libinephany 0.15.0py3-none-any.whl → 0.15.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

libinephany/observations/observation_utils.py CHANGED Viewed

@@ -14,7 +14,7 @@ import torch
 import torch.optim as optim
 from libinephany.pydantic_models.schemas.tensor_statistics import TensorStatistics
-from libinephany.utils import optim_utils
+from libinephany.utils import optim_utils, torch_distributed_utils
 # ======================================================================================================================
 #
@@ -173,7 +173,14 @@ def tensor_on_local_rank(tensor: torch.Tensor | None) -> bool:
     :return: Whether the tensor is owned by the local rank.
     """
-    return tensor is not None and tensor.grad is not None and tensor.numel() > 0
+    valid_tensor = tensor is not None and tensor.grad is not None and tensor.numel() > 0
+    if valid_tensor and tensor.is_cuda:
+        local_rank = torch_distributed_utils.get_local_rank()
+        return tensor.device.index == local_rank
+    return valid_tensor
 def form_update_tensor(

libinephany/observations/observers/global_observers.py CHANGED Viewed

@@ -36,7 +36,7 @@ class InitialHyperparameters(GlobalObserver):
         super().__init__(**kwargs)
-        force_skip = ["samples"]
+        force_skip = ["samples", "gradient_accumulation"]
         skip_hparams = force_skip if skip_hparams is None else skip_hparams + force_skip
         self.skip_hparams = [] if skip_hparams is None else skip_hparams
         self.pad_with = pad_with

libinephany/observations/statistic_trackers.py CHANGED Viewed

@@ -193,13 +193,14 @@ class Statistic(ABC):
         Processes the tensor cache to build a TensorStatistic model.
         """
-        concatenated = torch.cat(self._tensor_cache)
-        self._tensor_cache = []
+        if self._tensor_cache:
+            concatenated = torch.cat(self._tensor_cache)
+            self._tensor_cache = []
-        statistics = TensorStatistics.build(
-            tensor=concatenated, skip_statistics=self.skip_statistics, sample_percentage=self.downsample_percent
-        )
-        self._data.append(statistics)  # type: ignore
+            statistics = TensorStatistics.build(
+                tensor=concatenated, skip_statistics=self.skip_statistics, sample_percentage=self.downsample_percent
+            )
+            self._data.append(statistics)  # type: ignore
     @staticmethod
     @final
@@ -213,10 +214,10 @@ class Statistic(ABC):
         if torch_distributed_utils.is_scheduler_master_rank():
             if isinstance(statistic, torch.Tensor):
-                shape = statistic.shape
+                shape = statistic.view(-1).shape
             elif isinstance(statistic, TensorStatistics):
-                shape = statistic.to_tensor().shape
+                shape = statistic.to_tensor().view(-1).shape
             elif statistic is not None:
                 shape = torch.tensor([statistic]).shape
@@ -239,23 +240,21 @@ class Statistic(ABC):
         if not torch_distributed_utils.is_distributed():
             return statistic
-        if statistic is None:
-            shape = self._determine_reduction_shape(statistic=statistic)
-            if shape is None:
-                return statistic
+        shape = self._determine_reduction_shape(statistic=statistic)
-            to_reduce = torch.zeros(shape)
+        if statistic is None:
+            to_reduce = torch.zeros(shape, dtype=torch.float64)
         elif isinstance(statistic, torch.Tensor):
-            to_reduce = statistic.clone()
+            to_reduce = statistic.clone().to(torch.float64).view(-1)
         elif isinstance(statistic, TensorStatistics):
-            to_reduce = statistic.to_tensor()
+            to_reduce = statistic.to_tensor().to(torch.float64).view(-1)
         else:
-            to_reduce = torch.tensor([statistic])
+            to_reduce = torch.tensor([statistic], dtype=torch.float64)
+        to_reduce = to_reduce.to(torch_distributed_utils.get_local_device())
         dist.reduce(to_reduce, dst=MASTER_SCHEDULER_RANK, op=ReduceOp.SUM)
         if not torch_distributed_utils.is_scheduler_master_rank():
@@ -288,23 +287,21 @@ class Statistic(ABC):
             statistic = self._gather(
                 optimizer=optimizer, model=model, parameters=parameters, parameter_group=parameter_group
             )
-            statistic = self._distributed_reduce(statistic=statistic)
-            if not torch_distributed_utils.is_scheduler_master_rank():
-                return
+            statistic = self._distributed_reduce(statistic=statistic)
-            if isinstance(statistic, torch.Tensor):
-                statistic = statistic.view(-1)
-                self._tensor_cache.append(statistic)
+            if torch_distributed_utils.is_scheduler_master_rank():
+                if isinstance(statistic, torch.Tensor):
+                    statistic = statistic.view(-1)
+                    self._tensor_cache.append(statistic)
-                if len(self._tensor_cache) >= self.max_cache_size:
-                    self._process_tensor_cache()
+                    if len(self._tensor_cache) >= self.max_cache_size:
+                        self._process_tensor_cache()
-            elif statistic is not None:
-                self._data.append(statistic)  # type: ignore
+                elif statistic is not None:
+                    self._data.append(statistic)  # type: ignore
-        if torch_distributed_utils.is_scheduler_master_rank():
-            self._sample_number += 1
+        self._sample_number += 1
     @final
     def fetch(self) -> TensorStatistics | float | None:

libinephany/utils/torch_distributed_utils.py CHANGED Viewed

@@ -4,8 +4,10 @@
 #
 # ======================================================================================================================
+import os
 from typing import Any
+import torch
 import torch.distributed as dist
 # ======================================================================================================================
@@ -14,7 +16,11 @@ import torch.distributed as dist
 #
 # ======================================================================================================================
+CUDA = "cuda"
+CPU = "cpu"
+CUDA_PREFIX = f"{CUDA}:"
 MASTER_SCHEDULER_RANK = 0
+LOCAL_RANK = "LOCAL_RANK"
 # ======================================================================================================================
 #
@@ -48,7 +54,10 @@ def get_local_rank() -> int:
     :return: Distributed computing rank of this process.
     """
-    return dist.get_rank() if is_distributed() else MASTER_SCHEDULER_RANK
+    if not is_distributed():
+        return MASTER_SCHEDULER_RANK
+    return dist.get_rank()
 def is_scheduler_master_rank() -> bool:
@@ -83,3 +92,15 @@ def barrier() -> None:
     if is_distributed():
         dist.barrier()
+def get_local_device() -> torch.device:
+    """
+    :return: Local device of the current rank.
+    """
+    if not is_distributed():
+        return torch.device(CUDA if torch.cuda.is_available() else CPU)
+    local_device_rank = os.environ.get(LOCAL_RANK, MASTER_SCHEDULER_RANK)
+    return torch.device(f"{CUDA_PREFIX}{local_device_rank}" if torch.cuda.is_available() else CPU)

{libinephany-0.15.0.dist-info → libinephany-0.15.2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: libinephany
-Version: 0.15.0
+Version: 0.15.2
 Summary: Inephany library containing code commonly used by multiple subpackages.
 Author-email: Inephany <info@inephany.com>
 License: Apache 2.0

{libinephany-0.15.0.dist-info → libinephany-0.15.2.dist-info}/RECORD RENAMED Viewed

@@ -2,14 +2,14 @@ libinephany/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 libinephany/aws/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 libinephany/aws/s3_functions.py,sha256=W8u85A6tDloo4FlJvydJbVHCUq_m9i8KDGdnKzy-Xpg,1745
 libinephany/observations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-libinephany/observations/observation_utils.py,sha256=wb6EZiaEiPuOqN26zzuT1rHyehoKh-8818KXn8pHweI,8688
+libinephany/observations/observation_utils.py,sha256=pR7MM57KYYJxqRXsr9eMnhm7m_aGffH-dyFejnj2w_I,8899
 libinephany/observations/observer_pipeline.py,sha256=ZhONGXJQSgs2VJJn9d2F7ItkYqntvchl9-JTyxW9eU0,12146
 libinephany/observations/pipeline_coordinator.py,sha256=FrN3linKaC0pVE5uKjlh_0Fi8Mb1oK91NzH3Fq7PvyM,7420
 libinephany/observations/statistic_manager.py,sha256=LLg1zSxnJr2oQQepYla3qoUuRy10rsthr9jta4wEbnc,8956
-libinephany/observations/statistic_trackers.py,sha256=flkXquMHvY6YjnQAvRElsV5OUm7Ek_PhA1_fvtX-0oQ,30124
+libinephany/observations/statistic_trackers.py,sha256=PvuIqCkeaiAzCUJOYLOxM-Dl655HH1kQgiK4kjpEIyo,30236
 libinephany/observations/observers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 libinephany/observations/observers/base_observers.py,sha256=RkG5SW0b6Ooy0_oscRHxyB_YFNP7k8fxu37jBZElxIM,15418
-libinephany/observations/observers/global_observers.py,sha256=-BJJaYjQSO82qskIlY_iijd3Lk1Ei1d3Hg1fzmYUPSM,38659
+libinephany/observations/observers/global_observers.py,sha256=3TaiV2AxMOXfDq-kXMU3ZSo-rQENNCFhdWCJtpY99ok,38684
 libinephany/observations/observers/local_observers.py,sha256=EdivrylOcmxRsu4xiMwZqwmPX8Ru9-IRwoPk6En7qvw,37050
 libinephany/observations/observers/observer_containers.py,sha256=g73ScbRRVTNbGEBb-Nyk8AQwoDhKZaqTd6OYP8FIcOs,8771
 libinephany/observations/post_processors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -43,15 +43,15 @@ libinephany/utils/optim_utils.py,sha256=-PLqsyuq4ZH3spBy_olNB3yuLwvhnLrCF0384elC
 libinephany/utils/random_seeds.py,sha256=eF-ErrMShu8mp9V_gXrB_iUxR-Lb-OtHypEEUQAGn2Y,1565
 libinephany/utils/samplers.py,sha256=uyVGAy5cm5bCyWMOuySJmzUc_vFuieO_3zydJciwdv4,12158
 libinephany/utils/standardizers.py,sha256=pG1K_XL4OR_NjVtT6Hjbln1dk1BtQdDuSK1PQTkA17Y,8014
-libinephany/utils/torch_distributed_utils.py,sha256=ygdVz-s7hMRoBJcZkNRBlF81MYnxoRJt8S0SAwq6SC4,2467
+libinephany/utils/torch_distributed_utils.py,sha256=UPMfhdZZwyHX_r3h55AAK4PcB-zFtjK37Z5aawAKNmE,2968
 libinephany/utils/torch_utils.py,sha256=o5TsqrXe6Id04P6SqB_avGBRZutbu6IBB61llAHQ_PY,2696
 libinephany/utils/transforms.py,sha256=Ca4pbCs_FbCpXb8M8oPxrP5QOqOAwGSdGpKzy5YUubc,3503
 libinephany/utils/typing.py,sha256=rGbaPO3MaUndsWiC_wHzReD_TOLYqb43i01pKN-j7Xs,624
 libinephany/web_apps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 libinephany/web_apps/error_logger.py,sha256=gAQIaqerqP4ornXZwFF1cghjnd2mMZEt3aVrTuUCr34,16653
 libinephany/web_apps/web_app_utils.py,sha256=qiq_lasPipgN1RgRudPJc342kYci8O_4RqppxmIX8NY,4095
-libinephany-0.15.0.dist-info/licenses/LICENSE,sha256=pogfDoMBP07ehIOvWymuWIar8pg2YLUhqOHsJQU3wdc,9250
-libinephany-0.15.0.dist-info/METADATA,sha256=lU7SqV1ArMEAyuZ845Z1jAYxNUEYGfJ8Tl6Df6EwSpc,8354
-libinephany-0.15.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-libinephany-0.15.0.dist-info/top_level.txt,sha256=bYAOXQdJgIoLkO2Ui0kxe7pSYegS_e38u0dMscd7COQ,12
-libinephany-0.15.0.dist-info/RECORD,,
+libinephany-0.15.2.dist-info/licenses/LICENSE,sha256=pogfDoMBP07ehIOvWymuWIar8pg2YLUhqOHsJQU3wdc,9250
+libinephany-0.15.2.dist-info/METADATA,sha256=GU4yudoPYVfLXO5dLp8UNY2XMQMpSSdw8lP-ZZUaQy4,8354
+libinephany-0.15.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+libinephany-0.15.2.dist-info/top_level.txt,sha256=bYAOXQdJgIoLkO2Ui0kxe7pSYegS_e38u0dMscd7COQ,12
+libinephany-0.15.2.dist-info/RECORD,,

{libinephany-0.15.0.dist-info → libinephany-0.15.2.dist-info}/WHEEL RENAMED Viewed

File without changes

{libinephany-0.15.0.dist-info → libinephany-0.15.2.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{libinephany-0.15.0.dist-info → libinephany-0.15.2.dist-info}/top_level.txt RENAMED Viewed

File without changes

libinephany 0.15.0__py3-none-any.whl → 0.15.2__py3-none-any.whl

libinephany 0.15.0py3-none-any.whl → 0.15.2py3-none-any.whl