libinephany 0.15.0__tar.gz → 0.15.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- libinephany-0.15.2/CODE_VERSION.cfg +1 -0
- {libinephany-0.15.0/libinephany.egg-info → libinephany-0.15.2}/PKG-INFO +1 -1
- {libinephany-0.15.0 → libinephany-0.15.2}/libinephany/observations/observation_utils.py +9 -2
- {libinephany-0.15.0 → libinephany-0.15.2}/libinephany/observations/observers/global_observers.py +1 -1
- {libinephany-0.15.0 → libinephany-0.15.2}/libinephany/observations/statistic_trackers.py +26 -29
- {libinephany-0.15.0 → libinephany-0.15.2}/libinephany/utils/torch_distributed_utils.py +22 -1
- {libinephany-0.15.0 → libinephany-0.15.2/libinephany.egg-info}/PKG-INFO +1 -1
- libinephany-0.15.0/CODE_VERSION.cfg +0 -1
- {libinephany-0.15.0 → libinephany-0.15.2}/LICENSE +0 -0
- {libinephany-0.15.0 → libinephany-0.15.2}/MANIFEST.in +0 -0
- {libinephany-0.15.0 → libinephany-0.15.2}/README.md +0 -0
- {libinephany-0.15.0 → libinephany-0.15.2}/libinephany/__init__.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.2}/libinephany/aws/__init__.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.2}/libinephany/aws/s3_functions.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.2}/libinephany/observations/__init__.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.2}/libinephany/observations/observer_pipeline.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.2}/libinephany/observations/observers/__init__.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.2}/libinephany/observations/observers/base_observers.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.2}/libinephany/observations/observers/local_observers.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.2}/libinephany/observations/observers/observer_containers.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.2}/libinephany/observations/pipeline_coordinator.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.2}/libinephany/observations/post_processors/__init__.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.2}/libinephany/observations/post_processors/postprocessors.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.2}/libinephany/observations/statistic_manager.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.2}/libinephany/pydantic_models/__init__.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.2}/libinephany/pydantic_models/configs/__init__.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.2}/libinephany/pydantic_models/configs/hyperparameter_configs.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.2}/libinephany/pydantic_models/configs/observer_config.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.2}/libinephany/pydantic_models/configs/outer_model_config.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.2}/libinephany/pydantic_models/schemas/__init__.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.2}/libinephany/pydantic_models/schemas/agent_info.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.2}/libinephany/pydantic_models/schemas/inner_task_profile.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.2}/libinephany/pydantic_models/schemas/observation_models.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.2}/libinephany/pydantic_models/schemas/request_schemas.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.2}/libinephany/pydantic_models/schemas/response_schemas.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.2}/libinephany/pydantic_models/schemas/tensor_statistics.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.2}/libinephany/pydantic_models/states/__init__.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.2}/libinephany/pydantic_models/states/hyperparameter_states.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.2}/libinephany/utils/__init__.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.2}/libinephany/utils/agent_utils.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.2}/libinephany/utils/asyncio_worker.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.2}/libinephany/utils/backend_statuses.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.2}/libinephany/utils/constants.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.2}/libinephany/utils/directory_utils.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.2}/libinephany/utils/dropout_utils.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.2}/libinephany/utils/enums.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.2}/libinephany/utils/error_severities.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.2}/libinephany/utils/exceptions.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.2}/libinephany/utils/import_utils.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.2}/libinephany/utils/optim_utils.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.2}/libinephany/utils/random_seeds.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.2}/libinephany/utils/samplers.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.2}/libinephany/utils/standardizers.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.2}/libinephany/utils/torch_utils.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.2}/libinephany/utils/transforms.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.2}/libinephany/utils/typing.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.2}/libinephany/web_apps/__init__.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.2}/libinephany/web_apps/error_logger.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.2}/libinephany/web_apps/web_app_utils.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.2}/libinephany.egg-info/SOURCES.txt +0 -0
- {libinephany-0.15.0 → libinephany-0.15.2}/libinephany.egg-info/dependency_links.txt +0 -0
- {libinephany-0.15.0 → libinephany-0.15.2}/libinephany.egg-info/requires.txt +0 -0
- {libinephany-0.15.0 → libinephany-0.15.2}/libinephany.egg-info/top_level.txt +0 -0
- {libinephany-0.15.0 → libinephany-0.15.2}/pyproject.toml +0 -0
- {libinephany-0.15.0 → libinephany-0.15.2}/setup.cfg +0 -0
@@ -0,0 +1 @@
|
|
1
|
+
0.15.2
|
@@ -14,7 +14,7 @@ import torch
|
|
14
14
|
import torch.optim as optim
|
15
15
|
|
16
16
|
from libinephany.pydantic_models.schemas.tensor_statistics import TensorStatistics
|
17
|
-
from libinephany.utils import optim_utils
|
17
|
+
from libinephany.utils import optim_utils, torch_distributed_utils
|
18
18
|
|
19
19
|
# ======================================================================================================================
|
20
20
|
#
|
@@ -173,7 +173,14 @@ def tensor_on_local_rank(tensor: torch.Tensor | None) -> bool:
|
|
173
173
|
:return: Whether the tensor is owned by the local rank.
|
174
174
|
"""
|
175
175
|
|
176
|
-
|
176
|
+
valid_tensor = tensor is not None and tensor.grad is not None and tensor.numel() > 0
|
177
|
+
|
178
|
+
if valid_tensor and tensor.is_cuda:
|
179
|
+
local_rank = torch_distributed_utils.get_local_rank()
|
180
|
+
|
181
|
+
return tensor.device.index == local_rank
|
182
|
+
|
183
|
+
return valid_tensor
|
177
184
|
|
178
185
|
|
179
186
|
def form_update_tensor(
|
{libinephany-0.15.0 → libinephany-0.15.2}/libinephany/observations/observers/global_observers.py
RENAMED
@@ -36,7 +36,7 @@ class InitialHyperparameters(GlobalObserver):
|
|
36
36
|
|
37
37
|
super().__init__(**kwargs)
|
38
38
|
|
39
|
-
force_skip = ["samples"]
|
39
|
+
force_skip = ["samples", "gradient_accumulation"]
|
40
40
|
skip_hparams = force_skip if skip_hparams is None else skip_hparams + force_skip
|
41
41
|
self.skip_hparams = [] if skip_hparams is None else skip_hparams
|
42
42
|
self.pad_with = pad_with
|
@@ -193,13 +193,14 @@ class Statistic(ABC):
|
|
193
193
|
Processes the tensor cache to build a TensorStatistic model.
|
194
194
|
"""
|
195
195
|
|
196
|
-
|
197
|
-
|
196
|
+
if self._tensor_cache:
|
197
|
+
concatenated = torch.cat(self._tensor_cache)
|
198
|
+
self._tensor_cache = []
|
198
199
|
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
200
|
+
statistics = TensorStatistics.build(
|
201
|
+
tensor=concatenated, skip_statistics=self.skip_statistics, sample_percentage=self.downsample_percent
|
202
|
+
)
|
203
|
+
self._data.append(statistics) # type: ignore
|
203
204
|
|
204
205
|
@staticmethod
|
205
206
|
@final
|
@@ -213,10 +214,10 @@ class Statistic(ABC):
|
|
213
214
|
|
214
215
|
if torch_distributed_utils.is_scheduler_master_rank():
|
215
216
|
if isinstance(statistic, torch.Tensor):
|
216
|
-
shape = statistic.shape
|
217
|
+
shape = statistic.view(-1).shape
|
217
218
|
|
218
219
|
elif isinstance(statistic, TensorStatistics):
|
219
|
-
shape = statistic.to_tensor().shape
|
220
|
+
shape = statistic.to_tensor().view(-1).shape
|
220
221
|
|
221
222
|
elif statistic is not None:
|
222
223
|
shape = torch.tensor([statistic]).shape
|
@@ -239,23 +240,21 @@ class Statistic(ABC):
|
|
239
240
|
if not torch_distributed_utils.is_distributed():
|
240
241
|
return statistic
|
241
242
|
|
242
|
-
|
243
|
-
shape = self._determine_reduction_shape(statistic=statistic)
|
244
|
-
|
245
|
-
if shape is None:
|
246
|
-
return statistic
|
243
|
+
shape = self._determine_reduction_shape(statistic=statistic)
|
247
244
|
|
248
|
-
|
245
|
+
if statistic is None:
|
246
|
+
to_reduce = torch.zeros(shape, dtype=torch.float64)
|
249
247
|
|
250
248
|
elif isinstance(statistic, torch.Tensor):
|
251
|
-
to_reduce = statistic.clone()
|
249
|
+
to_reduce = statistic.clone().to(torch.float64).view(-1)
|
252
250
|
|
253
251
|
elif isinstance(statistic, TensorStatistics):
|
254
|
-
to_reduce = statistic.to_tensor()
|
252
|
+
to_reduce = statistic.to_tensor().to(torch.float64).view(-1)
|
255
253
|
|
256
254
|
else:
|
257
|
-
to_reduce = torch.tensor([statistic])
|
255
|
+
to_reduce = torch.tensor([statistic], dtype=torch.float64)
|
258
256
|
|
257
|
+
to_reduce = to_reduce.to(torch_distributed_utils.get_local_device())
|
259
258
|
dist.reduce(to_reduce, dst=MASTER_SCHEDULER_RANK, op=ReduceOp.SUM)
|
260
259
|
|
261
260
|
if not torch_distributed_utils.is_scheduler_master_rank():
|
@@ -288,23 +287,21 @@ class Statistic(ABC):
|
|
288
287
|
statistic = self._gather(
|
289
288
|
optimizer=optimizer, model=model, parameters=parameters, parameter_group=parameter_group
|
290
289
|
)
|
291
|
-
statistic = self._distributed_reduce(statistic=statistic)
|
292
290
|
|
293
|
-
|
294
|
-
return
|
291
|
+
statistic = self._distributed_reduce(statistic=statistic)
|
295
292
|
|
296
|
-
if
|
297
|
-
statistic
|
298
|
-
|
293
|
+
if torch_distributed_utils.is_scheduler_master_rank():
|
294
|
+
if isinstance(statistic, torch.Tensor):
|
295
|
+
statistic = statistic.view(-1)
|
296
|
+
self._tensor_cache.append(statistic)
|
299
297
|
|
300
|
-
|
301
|
-
|
298
|
+
if len(self._tensor_cache) >= self.max_cache_size:
|
299
|
+
self._process_tensor_cache()
|
302
300
|
|
303
|
-
|
304
|
-
|
301
|
+
elif statistic is not None:
|
302
|
+
self._data.append(statistic) # type: ignore
|
305
303
|
|
306
|
-
|
307
|
-
self._sample_number += 1
|
304
|
+
self._sample_number += 1
|
308
305
|
|
309
306
|
@final
|
310
307
|
def fetch(self) -> TensorStatistics | float | None:
|
@@ -4,8 +4,10 @@
|
|
4
4
|
#
|
5
5
|
# ======================================================================================================================
|
6
6
|
|
7
|
+
import os
|
7
8
|
from typing import Any
|
8
9
|
|
10
|
+
import torch
|
9
11
|
import torch.distributed as dist
|
10
12
|
|
11
13
|
# ======================================================================================================================
|
@@ -14,7 +16,11 @@ import torch.distributed as dist
|
|
14
16
|
#
|
15
17
|
# ======================================================================================================================
|
16
18
|
|
19
|
+
CUDA = "cuda"
|
20
|
+
CPU = "cpu"
|
21
|
+
CUDA_PREFIX = f"{CUDA}:"
|
17
22
|
MASTER_SCHEDULER_RANK = 0
|
23
|
+
LOCAL_RANK = "LOCAL_RANK"
|
18
24
|
|
19
25
|
# ======================================================================================================================
|
20
26
|
#
|
@@ -48,7 +54,10 @@ def get_local_rank() -> int:
|
|
48
54
|
:return: Distributed computing rank of this process.
|
49
55
|
"""
|
50
56
|
|
51
|
-
|
57
|
+
if not is_distributed():
|
58
|
+
return MASTER_SCHEDULER_RANK
|
59
|
+
|
60
|
+
return dist.get_rank()
|
52
61
|
|
53
62
|
|
54
63
|
def is_scheduler_master_rank() -> bool:
|
@@ -83,3 +92,15 @@ def barrier() -> None:
|
|
83
92
|
|
84
93
|
if is_distributed():
|
85
94
|
dist.barrier()
|
95
|
+
|
96
|
+
|
97
|
+
def get_local_device() -> torch.device:
|
98
|
+
"""
|
99
|
+
:return: Local device of the current rank.
|
100
|
+
"""
|
101
|
+
|
102
|
+
if not is_distributed():
|
103
|
+
return torch.device(CUDA if torch.cuda.is_available() else CPU)
|
104
|
+
|
105
|
+
local_device_rank = os.environ.get(LOCAL_RANK, MASTER_SCHEDULER_RANK)
|
106
|
+
return torch.device(f"{CUDA_PREFIX}{local_device_rank}" if torch.cuda.is_available() else CPU)
|
@@ -1 +0,0 @@
|
|
1
|
-
0.15.0
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{libinephany-0.15.0 → libinephany-0.15.2}/libinephany/observations/observers/base_observers.py
RENAMED
File without changes
|
{libinephany-0.15.0 → libinephany-0.15.2}/libinephany/observations/observers/local_observers.py
RENAMED
File without changes
|
{libinephany-0.15.0 → libinephany-0.15.2}/libinephany/observations/observers/observer_containers.py
RENAMED
File without changes
|
File without changes
|
{libinephany-0.15.0 → libinephany-0.15.2}/libinephany/observations/post_processors/__init__.py
RENAMED
File without changes
|
{libinephany-0.15.0 → libinephany-0.15.2}/libinephany/observations/post_processors/postprocessors.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{libinephany-0.15.0 → libinephany-0.15.2}/libinephany/pydantic_models/configs/observer_config.py
RENAMED
File without changes
|
{libinephany-0.15.0 → libinephany-0.15.2}/libinephany/pydantic_models/configs/outer_model_config.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
{libinephany-0.15.0 → libinephany-0.15.2}/libinephany/pydantic_models/schemas/inner_task_profile.py
RENAMED
File without changes
|
{libinephany-0.15.0 → libinephany-0.15.2}/libinephany/pydantic_models/schemas/observation_models.py
RENAMED
File without changes
|
{libinephany-0.15.0 → libinephany-0.15.2}/libinephany/pydantic_models/schemas/request_schemas.py
RENAMED
File without changes
|
{libinephany-0.15.0 → libinephany-0.15.2}/libinephany/pydantic_models/schemas/response_schemas.py
RENAMED
File without changes
|
{libinephany-0.15.0 → libinephany-0.15.2}/libinephany/pydantic_models/schemas/tensor_statistics.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|