libinephany 0.15.0__tar.gz → 0.15.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- libinephany-0.15.1/CODE_VERSION.cfg +1 -0
- {libinephany-0.15.0/libinephany.egg-info → libinephany-0.15.1}/PKG-INFO +1 -1
- {libinephany-0.15.0 → libinephany-0.15.1}/libinephany/observations/observers/global_observers.py +1 -1
- {libinephany-0.15.0 → libinephany-0.15.1}/libinephany/observations/statistic_trackers.py +11 -14
- {libinephany-0.15.0 → libinephany-0.15.1}/libinephany/utils/torch_distributed_utils.py +22 -1
- {libinephany-0.15.0 → libinephany-0.15.1/libinephany.egg-info}/PKG-INFO +1 -1
- libinephany-0.15.0/CODE_VERSION.cfg +0 -1
- {libinephany-0.15.0 → libinephany-0.15.1}/LICENSE +0 -0
- {libinephany-0.15.0 → libinephany-0.15.1}/MANIFEST.in +0 -0
- {libinephany-0.15.0 → libinephany-0.15.1}/README.md +0 -0
- {libinephany-0.15.0 → libinephany-0.15.1}/libinephany/__init__.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.1}/libinephany/aws/__init__.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.1}/libinephany/aws/s3_functions.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.1}/libinephany/observations/__init__.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.1}/libinephany/observations/observation_utils.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.1}/libinephany/observations/observer_pipeline.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.1}/libinephany/observations/observers/__init__.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.1}/libinephany/observations/observers/base_observers.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.1}/libinephany/observations/observers/local_observers.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.1}/libinephany/observations/observers/observer_containers.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.1}/libinephany/observations/pipeline_coordinator.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.1}/libinephany/observations/post_processors/__init__.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.1}/libinephany/observations/post_processors/postprocessors.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.1}/libinephany/observations/statistic_manager.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.1}/libinephany/pydantic_models/__init__.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.1}/libinephany/pydantic_models/configs/__init__.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.1}/libinephany/pydantic_models/configs/hyperparameter_configs.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.1}/libinephany/pydantic_models/configs/observer_config.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.1}/libinephany/pydantic_models/configs/outer_model_config.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.1}/libinephany/pydantic_models/schemas/__init__.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.1}/libinephany/pydantic_models/schemas/agent_info.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.1}/libinephany/pydantic_models/schemas/inner_task_profile.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.1}/libinephany/pydantic_models/schemas/observation_models.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.1}/libinephany/pydantic_models/schemas/request_schemas.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.1}/libinephany/pydantic_models/schemas/response_schemas.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.1}/libinephany/pydantic_models/schemas/tensor_statistics.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.1}/libinephany/pydantic_models/states/__init__.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.1}/libinephany/pydantic_models/states/hyperparameter_states.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.1}/libinephany/utils/__init__.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.1}/libinephany/utils/agent_utils.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.1}/libinephany/utils/asyncio_worker.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.1}/libinephany/utils/backend_statuses.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.1}/libinephany/utils/constants.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.1}/libinephany/utils/directory_utils.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.1}/libinephany/utils/dropout_utils.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.1}/libinephany/utils/enums.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.1}/libinephany/utils/error_severities.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.1}/libinephany/utils/exceptions.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.1}/libinephany/utils/import_utils.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.1}/libinephany/utils/optim_utils.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.1}/libinephany/utils/random_seeds.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.1}/libinephany/utils/samplers.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.1}/libinephany/utils/standardizers.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.1}/libinephany/utils/torch_utils.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.1}/libinephany/utils/transforms.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.1}/libinephany/utils/typing.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.1}/libinephany/web_apps/__init__.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.1}/libinephany/web_apps/error_logger.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.1}/libinephany/web_apps/web_app_utils.py +0 -0
- {libinephany-0.15.0 → libinephany-0.15.1}/libinephany.egg-info/SOURCES.txt +0 -0
- {libinephany-0.15.0 → libinephany-0.15.1}/libinephany.egg-info/dependency_links.txt +0 -0
- {libinephany-0.15.0 → libinephany-0.15.1}/libinephany.egg-info/requires.txt +0 -0
- {libinephany-0.15.0 → libinephany-0.15.1}/libinephany.egg-info/top_level.txt +0 -0
- {libinephany-0.15.0 → libinephany-0.15.1}/pyproject.toml +0 -0
- {libinephany-0.15.0 → libinephany-0.15.1}/setup.cfg +0 -0
@@ -0,0 +1 @@
|
|
1
|
+
0.15.1
|
{libinephany-0.15.0 → libinephany-0.15.1}/libinephany/observations/observers/global_observers.py
RENAMED
@@ -36,7 +36,7 @@ class InitialHyperparameters(GlobalObserver):
|
|
36
36
|
|
37
37
|
super().__init__(**kwargs)
|
38
38
|
|
39
|
-
force_skip = ["samples"]
|
39
|
+
force_skip = ["samples", "gradient_accumulation"]
|
40
40
|
skip_hparams = force_skip if skip_hparams is None else skip_hparams + force_skip
|
41
41
|
self.skip_hparams = [] if skip_hparams is None else skip_hparams
|
42
42
|
self.pad_with = pad_with
|
@@ -213,10 +213,10 @@ class Statistic(ABC):
|
|
213
213
|
|
214
214
|
if torch_distributed_utils.is_scheduler_master_rank():
|
215
215
|
if isinstance(statistic, torch.Tensor):
|
216
|
-
shape = statistic.shape
|
216
|
+
shape = statistic.view(-1).shape
|
217
217
|
|
218
218
|
elif isinstance(statistic, TensorStatistics):
|
219
|
-
shape = statistic.to_tensor().shape
|
219
|
+
shape = statistic.to_tensor().view(-1).shape
|
220
220
|
|
221
221
|
elif statistic is not None:
|
222
222
|
shape = torch.tensor([statistic]).shape
|
@@ -239,23 +239,21 @@ class Statistic(ABC):
|
|
239
239
|
if not torch_distributed_utils.is_distributed():
|
240
240
|
return statistic
|
241
241
|
|
242
|
-
|
243
|
-
shape = self._determine_reduction_shape(statistic=statistic)
|
244
|
-
|
245
|
-
if shape is None:
|
246
|
-
return statistic
|
242
|
+
shape = self._determine_reduction_shape(statistic=statistic)
|
247
243
|
|
248
|
-
|
244
|
+
if statistic is None:
|
245
|
+
to_reduce = torch.zeros(shape, dtype=torch.float64)
|
249
246
|
|
250
247
|
elif isinstance(statistic, torch.Tensor):
|
251
|
-
to_reduce = statistic.clone()
|
248
|
+
to_reduce = statistic.clone().to(torch.float64).view(-1)
|
252
249
|
|
253
250
|
elif isinstance(statistic, TensorStatistics):
|
254
|
-
to_reduce = statistic.to_tensor()
|
251
|
+
to_reduce = statistic.to_tensor().to(torch.float64).view(-1)
|
255
252
|
|
256
253
|
else:
|
257
|
-
to_reduce = torch.tensor([statistic])
|
254
|
+
to_reduce = torch.tensor([statistic], dtype=torch.float64)
|
258
255
|
|
256
|
+
to_reduce = to_reduce.to(torch_distributed_utils.get_local_device())
|
259
257
|
dist.reduce(to_reduce, dst=MASTER_SCHEDULER_RANK, op=ReduceOp.SUM)
|
260
258
|
|
261
259
|
if not torch_distributed_utils.is_scheduler_master_rank():
|
@@ -283,11 +281,13 @@ class Statistic(ABC):
|
|
283
281
|
|
284
282
|
parameter_group = self._find_parameter_group(optimizer=optimizer)
|
285
283
|
parameters = self._get_parameters(parameter_group=parameter_group)
|
284
|
+
self._sample_number += 1
|
286
285
|
|
287
286
|
if self._sample_number % self.sample_frequency == 0:
|
288
287
|
statistic = self._gather(
|
289
288
|
optimizer=optimizer, model=model, parameters=parameters, parameter_group=parameter_group
|
290
289
|
)
|
290
|
+
|
291
291
|
statistic = self._distributed_reduce(statistic=statistic)
|
292
292
|
|
293
293
|
if not torch_distributed_utils.is_scheduler_master_rank():
|
@@ -303,9 +303,6 @@ class Statistic(ABC):
|
|
303
303
|
elif statistic is not None:
|
304
304
|
self._data.append(statistic) # type: ignore
|
305
305
|
|
306
|
-
if torch_distributed_utils.is_scheduler_master_rank():
|
307
|
-
self._sample_number += 1
|
308
|
-
|
309
306
|
@final
|
310
307
|
def fetch(self) -> TensorStatistics | float | None:
|
311
308
|
"""
|
@@ -4,8 +4,10 @@
|
|
4
4
|
#
|
5
5
|
# ======================================================================================================================
|
6
6
|
|
7
|
+
import os
|
7
8
|
from typing import Any
|
8
9
|
|
10
|
+
import torch
|
9
11
|
import torch.distributed as dist
|
10
12
|
|
11
13
|
# ======================================================================================================================
|
@@ -14,7 +16,11 @@ import torch.distributed as dist
|
|
14
16
|
#
|
15
17
|
# ======================================================================================================================
|
16
18
|
|
19
|
+
CUDA = "cuda"
|
20
|
+
CPU = "cpu"
|
21
|
+
CUDA_PREFIX = f"{CUDA}:"
|
17
22
|
MASTER_SCHEDULER_RANK = 0
|
23
|
+
LOCAL_RANK = "LOCAL_RANK"
|
18
24
|
|
19
25
|
# ======================================================================================================================
|
20
26
|
#
|
@@ -48,7 +54,10 @@ def get_local_rank() -> int:
|
|
48
54
|
:return: Distributed computing rank of this process.
|
49
55
|
"""
|
50
56
|
|
51
|
-
|
57
|
+
if not is_distributed():
|
58
|
+
return MASTER_SCHEDULER_RANK
|
59
|
+
|
60
|
+
return dist.get_rank()
|
52
61
|
|
53
62
|
|
54
63
|
def is_scheduler_master_rank() -> bool:
|
@@ -83,3 +92,15 @@ def barrier() -> None:
|
|
83
92
|
|
84
93
|
if is_distributed():
|
85
94
|
dist.barrier()
|
95
|
+
|
96
|
+
|
97
|
+
def get_local_device() -> torch.device:
|
98
|
+
"""
|
99
|
+
:return: Local device of the current rank.
|
100
|
+
"""
|
101
|
+
|
102
|
+
if not is_distributed():
|
103
|
+
return torch.device(CUDA if torch.cuda.is_available() else CPU)
|
104
|
+
|
105
|
+
local_device_rank = os.environ.get(LOCAL_RANK, MASTER_SCHEDULER_RANK)
|
106
|
+
return torch.device(f"{CUDA_PREFIX}{local_device_rank}" if torch.cuda.is_available() else CPU)
|
@@ -1 +0,0 @@
|
|
1
|
-
0.15.0
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{libinephany-0.15.0 → libinephany-0.15.1}/libinephany/observations/observers/base_observers.py
RENAMED
File without changes
|
{libinephany-0.15.0 → libinephany-0.15.1}/libinephany/observations/observers/local_observers.py
RENAMED
File without changes
|
{libinephany-0.15.0 → libinephany-0.15.1}/libinephany/observations/observers/observer_containers.py
RENAMED
File without changes
|
File without changes
|
{libinephany-0.15.0 → libinephany-0.15.1}/libinephany/observations/post_processors/__init__.py
RENAMED
File without changes
|
{libinephany-0.15.0 → libinephany-0.15.1}/libinephany/observations/post_processors/postprocessors.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{libinephany-0.15.0 → libinephany-0.15.1}/libinephany/pydantic_models/configs/observer_config.py
RENAMED
File without changes
|
{libinephany-0.15.0 → libinephany-0.15.1}/libinephany/pydantic_models/configs/outer_model_config.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
{libinephany-0.15.0 → libinephany-0.15.1}/libinephany/pydantic_models/schemas/inner_task_profile.py
RENAMED
File without changes
|
{libinephany-0.15.0 → libinephany-0.15.1}/libinephany/pydantic_models/schemas/observation_models.py
RENAMED
File without changes
|
{libinephany-0.15.0 → libinephany-0.15.1}/libinephany/pydantic_models/schemas/request_schemas.py
RENAMED
File without changes
|
{libinephany-0.15.0 → libinephany-0.15.1}/libinephany/pydantic_models/schemas/response_schemas.py
RENAMED
File without changes
|
{libinephany-0.15.0 → libinephany-0.15.1}/libinephany/pydantic_models/schemas/tensor_statistics.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|