libinephany 0.14.1__tar.gz → 0.15.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. libinephany-0.15.1/CODE_VERSION.cfg +1 -0
  2. {libinephany-0.14.1/libinephany.egg-info → libinephany-0.15.1}/PKG-INFO +1 -1
  3. {libinephany-0.14.1 → libinephany-0.15.1}/libinephany/observations/observers/global_observers.py +1 -1
  4. {libinephany-0.14.1 → libinephany-0.15.1}/libinephany/observations/statistic_trackers.py +11 -14
  5. {libinephany-0.14.1 → libinephany-0.15.1}/libinephany/pydantic_models/configs/hyperparameter_configs.py +25 -1
  6. {libinephany-0.14.1 → libinephany-0.15.1}/libinephany/pydantic_models/schemas/inner_task_profile.py +34 -0
  7. {libinephany-0.14.1 → libinephany-0.15.1}/libinephany/pydantic_models/states/hyperparameter_states.py +12 -0
  8. {libinephany-0.14.1 → libinephany-0.15.1}/libinephany/utils/constants.py +5 -0
  9. {libinephany-0.14.1 → libinephany-0.15.1}/libinephany/utils/enums.py +2 -0
  10. {libinephany-0.14.1 → libinephany-0.15.1}/libinephany/utils/torch_distributed_utils.py +22 -1
  11. {libinephany-0.14.1 → libinephany-0.15.1/libinephany.egg-info}/PKG-INFO +1 -1
  12. libinephany-0.14.1/CODE_VERSION.cfg +0 -1
  13. {libinephany-0.14.1 → libinephany-0.15.1}/LICENSE +0 -0
  14. {libinephany-0.14.1 → libinephany-0.15.1}/MANIFEST.in +0 -0
  15. {libinephany-0.14.1 → libinephany-0.15.1}/README.md +0 -0
  16. {libinephany-0.14.1 → libinephany-0.15.1}/libinephany/__init__.py +0 -0
  17. {libinephany-0.14.1 → libinephany-0.15.1}/libinephany/aws/__init__.py +0 -0
  18. {libinephany-0.14.1 → libinephany-0.15.1}/libinephany/aws/s3_functions.py +0 -0
  19. {libinephany-0.14.1 → libinephany-0.15.1}/libinephany/observations/__init__.py +0 -0
  20. {libinephany-0.14.1 → libinephany-0.15.1}/libinephany/observations/observation_utils.py +0 -0
  21. {libinephany-0.14.1 → libinephany-0.15.1}/libinephany/observations/observer_pipeline.py +0 -0
  22. {libinephany-0.14.1 → libinephany-0.15.1}/libinephany/observations/observers/__init__.py +0 -0
  23. {libinephany-0.14.1 → libinephany-0.15.1}/libinephany/observations/observers/base_observers.py +0 -0
  24. {libinephany-0.14.1 → libinephany-0.15.1}/libinephany/observations/observers/local_observers.py +0 -0
  25. {libinephany-0.14.1 → libinephany-0.15.1}/libinephany/observations/observers/observer_containers.py +0 -0
  26. {libinephany-0.14.1 → libinephany-0.15.1}/libinephany/observations/pipeline_coordinator.py +0 -0
  27. {libinephany-0.14.1 → libinephany-0.15.1}/libinephany/observations/post_processors/__init__.py +0 -0
  28. {libinephany-0.14.1 → libinephany-0.15.1}/libinephany/observations/post_processors/postprocessors.py +0 -0
  29. {libinephany-0.14.1 → libinephany-0.15.1}/libinephany/observations/statistic_manager.py +0 -0
  30. {libinephany-0.14.1 → libinephany-0.15.1}/libinephany/pydantic_models/__init__.py +0 -0
  31. {libinephany-0.14.1 → libinephany-0.15.1}/libinephany/pydantic_models/configs/__init__.py +0 -0
  32. {libinephany-0.14.1 → libinephany-0.15.1}/libinephany/pydantic_models/configs/observer_config.py +0 -0
  33. {libinephany-0.14.1 → libinephany-0.15.1}/libinephany/pydantic_models/configs/outer_model_config.py +0 -0
  34. {libinephany-0.14.1 → libinephany-0.15.1}/libinephany/pydantic_models/schemas/__init__.py +0 -0
  35. {libinephany-0.14.1 → libinephany-0.15.1}/libinephany/pydantic_models/schemas/agent_info.py +0 -0
  36. {libinephany-0.14.1 → libinephany-0.15.1}/libinephany/pydantic_models/schemas/observation_models.py +0 -0
  37. {libinephany-0.14.1 → libinephany-0.15.1}/libinephany/pydantic_models/schemas/request_schemas.py +0 -0
  38. {libinephany-0.14.1 → libinephany-0.15.1}/libinephany/pydantic_models/schemas/response_schemas.py +0 -0
  39. {libinephany-0.14.1 → libinephany-0.15.1}/libinephany/pydantic_models/schemas/tensor_statistics.py +0 -0
  40. {libinephany-0.14.1 → libinephany-0.15.1}/libinephany/pydantic_models/states/__init__.py +0 -0
  41. {libinephany-0.14.1 → libinephany-0.15.1}/libinephany/utils/__init__.py +0 -0
  42. {libinephany-0.14.1 → libinephany-0.15.1}/libinephany/utils/agent_utils.py +0 -0
  43. {libinephany-0.14.1 → libinephany-0.15.1}/libinephany/utils/asyncio_worker.py +0 -0
  44. {libinephany-0.14.1 → libinephany-0.15.1}/libinephany/utils/backend_statuses.py +0 -0
  45. {libinephany-0.14.1 → libinephany-0.15.1}/libinephany/utils/directory_utils.py +0 -0
  46. {libinephany-0.14.1 → libinephany-0.15.1}/libinephany/utils/dropout_utils.py +0 -0
  47. {libinephany-0.14.1 → libinephany-0.15.1}/libinephany/utils/error_severities.py +0 -0
  48. {libinephany-0.14.1 → libinephany-0.15.1}/libinephany/utils/exceptions.py +0 -0
  49. {libinephany-0.14.1 → libinephany-0.15.1}/libinephany/utils/import_utils.py +0 -0
  50. {libinephany-0.14.1 → libinephany-0.15.1}/libinephany/utils/optim_utils.py +0 -0
  51. {libinephany-0.14.1 → libinephany-0.15.1}/libinephany/utils/random_seeds.py +0 -0
  52. {libinephany-0.14.1 → libinephany-0.15.1}/libinephany/utils/samplers.py +0 -0
  53. {libinephany-0.14.1 → libinephany-0.15.1}/libinephany/utils/standardizers.py +0 -0
  54. {libinephany-0.14.1 → libinephany-0.15.1}/libinephany/utils/torch_utils.py +0 -0
  55. {libinephany-0.14.1 → libinephany-0.15.1}/libinephany/utils/transforms.py +0 -0
  56. {libinephany-0.14.1 → libinephany-0.15.1}/libinephany/utils/typing.py +0 -0
  57. {libinephany-0.14.1 → libinephany-0.15.1}/libinephany/web_apps/__init__.py +0 -0
  58. {libinephany-0.14.1 → libinephany-0.15.1}/libinephany/web_apps/error_logger.py +0 -0
  59. {libinephany-0.14.1 → libinephany-0.15.1}/libinephany/web_apps/web_app_utils.py +0 -0
  60. {libinephany-0.14.1 → libinephany-0.15.1}/libinephany.egg-info/SOURCES.txt +0 -0
  61. {libinephany-0.14.1 → libinephany-0.15.1}/libinephany.egg-info/dependency_links.txt +0 -0
  62. {libinephany-0.14.1 → libinephany-0.15.1}/libinephany.egg-info/requires.txt +0 -0
  63. {libinephany-0.14.1 → libinephany-0.15.1}/libinephany.egg-info/top_level.txt +0 -0
  64. {libinephany-0.14.1 → libinephany-0.15.1}/pyproject.toml +0 -0
  65. {libinephany-0.14.1 → libinephany-0.15.1}/setup.cfg +0 -0
@@ -0,0 +1 @@
1
+ 0.15.1
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: libinephany
3
- Version: 0.14.1
3
+ Version: 0.15.1
4
4
  Summary: Inephany library containing code commonly used by multiple subpackages.
5
5
  Author-email: Inephany <info@inephany.com>
6
6
  License: Apache 2.0
@@ -36,7 +36,7 @@ class InitialHyperparameters(GlobalObserver):
36
36
 
37
37
  super().__init__(**kwargs)
38
38
 
39
- force_skip = ["samples"]
39
+ force_skip = ["samples", "gradient_accumulation"]
40
40
  skip_hparams = force_skip if skip_hparams is None else skip_hparams + force_skip
41
41
  self.skip_hparams = [] if skip_hparams is None else skip_hparams
42
42
  self.pad_with = pad_with
@@ -213,10 +213,10 @@ class Statistic(ABC):
213
213
 
214
214
  if torch_distributed_utils.is_scheduler_master_rank():
215
215
  if isinstance(statistic, torch.Tensor):
216
- shape = statistic.shape
216
+ shape = statistic.view(-1).shape
217
217
 
218
218
  elif isinstance(statistic, TensorStatistics):
219
- shape = statistic.to_tensor().shape
219
+ shape = statistic.to_tensor().view(-1).shape
220
220
 
221
221
  elif statistic is not None:
222
222
  shape = torch.tensor([statistic]).shape
@@ -239,23 +239,21 @@ class Statistic(ABC):
239
239
  if not torch_distributed_utils.is_distributed():
240
240
  return statistic
241
241
 
242
- if statistic is None:
243
- shape = self._determine_reduction_shape(statistic=statistic)
244
-
245
- if shape is None:
246
- return statistic
242
+ shape = self._determine_reduction_shape(statistic=statistic)
247
243
 
248
- to_reduce = torch.zeros(shape)
244
+ if statistic is None:
245
+ to_reduce = torch.zeros(shape, dtype=torch.float64)
249
246
 
250
247
  elif isinstance(statistic, torch.Tensor):
251
- to_reduce = statistic.clone()
248
+ to_reduce = statistic.clone().to(torch.float64).view(-1)
252
249
 
253
250
  elif isinstance(statistic, TensorStatistics):
254
- to_reduce = statistic.to_tensor()
251
+ to_reduce = statistic.to_tensor().to(torch.float64).view(-1)
255
252
 
256
253
  else:
257
- to_reduce = torch.tensor([statistic])
254
+ to_reduce = torch.tensor([statistic], dtype=torch.float64)
258
255
 
256
+ to_reduce = to_reduce.to(torch_distributed_utils.get_local_device())
259
257
  dist.reduce(to_reduce, dst=MASTER_SCHEDULER_RANK, op=ReduceOp.SUM)
260
258
 
261
259
  if not torch_distributed_utils.is_scheduler_master_rank():
@@ -283,11 +281,13 @@ class Statistic(ABC):
283
281
 
284
282
  parameter_group = self._find_parameter_group(optimizer=optimizer)
285
283
  parameters = self._get_parameters(parameter_group=parameter_group)
284
+ self._sample_number += 1
286
285
 
287
286
  if self._sample_number % self.sample_frequency == 0:
288
287
  statistic = self._gather(
289
288
  optimizer=optimizer, model=model, parameters=parameters, parameter_group=parameter_group
290
289
  )
290
+
291
291
  statistic = self._distributed_reduce(statistic=statistic)
292
292
 
293
293
  if not torch_distributed_utils.is_scheduler_master_rank():
@@ -303,9 +303,6 @@ class Statistic(ABC):
303
303
  elif statistic is not None:
304
304
  self._data.append(statistic) # type: ignore
305
305
 
306
- if torch_distributed_utils.is_scheduler_master_rank():
307
- self._sample_number += 1
308
-
309
306
  @final
310
307
  def fetch(self) -> TensorStatistics | float | None:
311
308
  """
@@ -4,7 +4,7 @@
4
4
  #
5
5
  # ======================================================================================================================
6
6
 
7
- from typing import Any
7
+ from typing import Any, cast
8
8
 
9
9
  from pydantic import BaseModel, ConfigDict, ValidationError, field_serializer, field_validator, model_validator
10
10
 
@@ -232,6 +232,23 @@ class BatchSizeHParamConfig(HParamConfig):
232
232
  sample_discrete_values: list[float | int] | None = None
233
233
 
234
234
 
235
+ class GradientAccumulationHParamConfig(HParamConfig):
236
+ max_hparam_value: float | int = 64
237
+ min_hparam_value: float | int = 1
238
+ hparam_dtype: type[float | int] = int
239
+ initial_value: int = 1
240
+ initial_delta: float = 0.0
241
+ scale: float = 1.0
242
+
243
+ sampler: str = "DiscreteRangeSampler"
244
+ sample_initial_values: bool = False
245
+ sample_lower_bound: int = 1
246
+ sample_upper_bound: int = 64
247
+ sample_step: int = 1
248
+ sample_discrete_values: list[float | int] | None = None
249
+ force_limit: float | int = 64
250
+
251
+
235
252
  class EpochsHParamConfig(HParamConfig):
236
253
  max_hparam_value: float | int = 16
237
254
  min_hparam_value: float | int = 1
@@ -289,6 +306,7 @@ class HParamConfigs(BaseModel):
289
306
  sgd_momentum_config: HParamConfig = SGDMomentumHParamConfig()
290
307
 
291
308
  batch_size_config: HParamConfig = BatchSizeHParamConfig()
309
+ gradient_accumulation_config: GradientAccumulationHParamConfig = GradientAccumulationHParamConfig()
292
310
  epochs_config: HParamConfig = EpochsHParamConfig()
293
311
  token_config: HParamConfig = TokensHParamConfig()
294
312
  samples_config: HParamConfig = SamplesHParamConfig()
@@ -351,6 +369,9 @@ class HParamConfigs(BaseModel):
351
369
  case AgentTypes.BatchSize:
352
370
  self.batch_size_config = hparam_config
353
371
 
372
+ case AgentTypes.GradientAccumulationAgent:
373
+ self.gradient_accumulation_config = cast(GradientAccumulationHParamConfig, hparam_config)
374
+
354
375
  case AgentTypes.Epochs:
355
376
  self.epochs_config = hparam_config
356
377
 
@@ -400,6 +421,9 @@ class HParamConfigs(BaseModel):
400
421
  case AgentTypes.BatchSize:
401
422
  return self.batch_size_config
402
423
 
424
+ case AgentTypes.GradientAccumulationAgent:
425
+ return self.gradient_accumulation_config
426
+
403
427
  case AgentTypes.Epochs:
404
428
  return self.epochs_config
405
429
 
@@ -140,6 +140,40 @@ class InnerTaskProfiles(BaseModel):
140
140
 
141
141
  return sum(self.compiled_action_sizes.values())
142
142
 
143
+ @property
144
+ def max_total_observation_size(self) -> int:
145
+ """
146
+ :return: The summed observation size of all agents with the task that has the most layers.
147
+ """
148
+
149
+ if not self.profiles:
150
+ raise ValueError(
151
+ "No profiles to calculate max total observation size. Ensure profiles have been "
152
+ "added before executing the training loop"
153
+ )
154
+
155
+ largest_task_name = max(self.profiles, key=lambda k: self.profiles[k].number_of_layers)
156
+ largest_task = self.profiles[largest_task_name]
157
+
158
+ return sum(largest_task.observation_space_sizes.values())
159
+
160
+ @property
161
+ def max_total_action_size(self) -> int:
162
+ """
163
+ :return: The summed action size of all agents with the task that has the most layers.
164
+ """
165
+
166
+ if not self.profiles:
167
+ raise ValueError(
168
+ "No profiles to calculate max total action size. Ensure profiles have been "
169
+ "added before executing the training loop"
170
+ )
171
+
172
+ largest_task_name = max(self.profiles, key=lambda k: self.profiles[k].number_of_layers)
173
+ largest_task = self.profiles[largest_task_name]
174
+
175
+ return sum(largest_task.action_space_sizes.values())
176
+
143
177
  @staticmethod
144
178
  def _compile_gym_space_sizes(spaces: dict[str, dict[str, int]]) -> dict[str, int]:
145
179
  """
@@ -20,6 +20,7 @@ from libinephany.utils.constants import (
20
20
  DROPOUT,
21
21
  EPOCHS,
22
22
  GRAD_NORM_CLIP,
23
+ GRADIENT_ACCUMULATION,
23
24
  LEARNING_RATE,
24
25
  SAMPLES,
25
26
  SGD_MOMENTUM,
@@ -60,6 +61,7 @@ class UpdateCallbacks(BaseModel):
60
61
  sgd_momentum: Callable[..., None]
61
62
 
62
63
  batch_size: Callable[..., None] | None
64
+ gradient_accumulation: Callable[..., None] | None
63
65
  epochs: Callable[..., None] | None
64
66
 
65
67
  def __getitem__(self, item: str) -> Callable[..., None] | None:
@@ -457,6 +459,7 @@ class ParameterGroupHParams(HyperparameterContainer):
457
459
  class GlobalHParams(HyperparameterContainer):
458
460
 
459
461
  batch_size: Hyperparameter
462
+ gradient_accumulation: Hyperparameter
460
463
  epochs: Hyperparameter
461
464
  tokens: Hyperparameter
462
465
  samples: Hyperparameter
@@ -550,6 +553,14 @@ class HyperparameterStates(BaseModel):
550
553
  """
551
554
  return self.global_hparams.batch_size
552
555
 
556
+ @computed_field # type: ignore[misc]
557
+ @property
558
+ def gradient_accumulation(self) -> Hyperparameter:
559
+ """
560
+ :return: The gradient accumulation steps of the inner model.
561
+ """
562
+ return self.global_hparams.gradient_accumulation
563
+
553
564
  @computed_field # type: ignore[misc]
554
565
  @property
555
566
  def epochs(self) -> Hyperparameter:
@@ -676,6 +687,7 @@ class HyperparameterStates(BaseModel):
676
687
 
677
688
  return {
678
689
  BATCH_SIZE: hparam_configs.batch_size_config,
690
+ GRADIENT_ACCUMULATION: hparam_configs.gradient_accumulation_config,
679
691
  EPOCHS: hparam_configs.epochs_config,
680
692
  TOKENS: hparam_configs.token_config,
681
693
  SAMPLES: hparam_configs.samples_config,
@@ -21,6 +21,7 @@ ADAM_BETA_TWO = "adam_beta_two"
21
21
  ADAM_EPS = "adam_eps"
22
22
  SGD_MOMENTUM = "sgd_momentum"
23
23
  BATCH_SIZE = "batch_size"
24
+ GRADIENT_ACCUMULATION = "gradient_accumulation"
24
25
  EPOCHS = "epochs"
25
26
  TOKENS = "tokens"
26
27
  SAMPLES = "samples"
@@ -41,6 +42,7 @@ AGENT_PREFIX_EPS = "adam-eps"
41
42
  AGENT_PREFIX_SGD_MOMENTUM = "sgd-momentum"
42
43
 
43
44
  AGENT_BATCH_SIZE = "batch-size"
45
+ AGENT_GRADIENT_ACCUMULATION = "gradient-accumulation"
44
46
 
45
47
  AGENT_BANDIT_SUFFIX = "bandit-agent"
46
48
 
@@ -53,6 +55,7 @@ AGENT_TYPES = [
53
55
  ADAM_BETA_TWO,
54
56
  ADAM_EPS,
55
57
  SGD_MOMENTUM,
58
+ GRADIENT_ACCUMULATION,
56
59
  ]
57
60
  SUFFIXES = [AGENT_BANDIT_SUFFIX]
58
61
  PREFIXES = [
@@ -64,6 +67,7 @@ PREFIXES = [
64
67
  AGENT_PREFIX_BETA_TWO,
65
68
  AGENT_PREFIX_EPS,
66
69
  AGENT_PREFIX_SGD_MOMENTUM,
70
+ AGENT_GRADIENT_ACCUMULATION,
67
71
  ]
68
72
  PREFIXES_TO_HPARAMS = {
69
73
  AGENT_PREFIX_LR: LEARNING_RATE,
@@ -74,4 +78,5 @@ PREFIXES_TO_HPARAMS = {
74
78
  AGENT_PREFIX_BETA_TWO: ADAM_BETA_TWO,
75
79
  AGENT_PREFIX_EPS: ADAM_EPS,
76
80
  AGENT_PREFIX_SGD_MOMENTUM: SGD_MOMENTUM,
81
+ AGENT_GRADIENT_ACCUMULATION: GRADIENT_ACCUMULATION,
77
82
  }
@@ -14,6 +14,7 @@ from libinephany.utils.constants import (
14
14
  DROPOUT,
15
15
  EPOCHS,
16
16
  GRAD_NORM_CLIP,
17
+ GRADIENT_ACCUMULATION,
17
18
  LEARNING_RATE,
18
19
  SAMPLES,
19
20
  SGD_MOMENTUM,
@@ -69,6 +70,7 @@ class AgentTypes(EnumWithIndices):
69
70
  AdamBetaTwoAgent = ADAM_BETA_TWO
70
71
  AdamEpsAgent = ADAM_EPS
71
72
  SGDMomentumAgent = SGD_MOMENTUM
73
+ GradientAccumulationAgent = GRADIENT_ACCUMULATION
72
74
 
73
75
  # Deprecated or Non-Agent
74
76
  BatchSize = BATCH_SIZE
@@ -4,8 +4,10 @@
4
4
  #
5
5
  # ======================================================================================================================
6
6
 
7
+ import os
7
8
  from typing import Any
8
9
 
10
+ import torch
9
11
  import torch.distributed as dist
10
12
 
11
13
  # ======================================================================================================================
@@ -14,7 +16,11 @@ import torch.distributed as dist
14
16
  #
15
17
  # ======================================================================================================================
16
18
 
19
+ CUDA = "cuda"
20
+ CPU = "cpu"
21
+ CUDA_PREFIX = f"{CUDA}:"
17
22
  MASTER_SCHEDULER_RANK = 0
23
+ LOCAL_RANK = "LOCAL_RANK"
18
24
 
19
25
  # ======================================================================================================================
20
26
  #
@@ -48,7 +54,10 @@ def get_local_rank() -> int:
48
54
  :return: Distributed computing rank of this process.
49
55
  """
50
56
 
51
- return dist.get_rank() if is_distributed() else MASTER_SCHEDULER_RANK
57
+ if not is_distributed():
58
+ return MASTER_SCHEDULER_RANK
59
+
60
+ return dist.get_rank()
52
61
 
53
62
 
54
63
  def is_scheduler_master_rank() -> bool:
@@ -83,3 +92,15 @@ def barrier() -> None:
83
92
 
84
93
  if is_distributed():
85
94
  dist.barrier()
95
+
96
+
97
+ def get_local_device() -> torch.device:
98
+ """
99
+ :return: Local device of the current rank.
100
+ """
101
+
102
+ if not is_distributed():
103
+ return torch.device(CUDA if torch.cuda.is_available() else CPU)
104
+
105
+ local_device_rank = os.environ.get(LOCAL_RANK, MASTER_SCHEDULER_RANK)
106
+ return torch.device(f"{CUDA_PREFIX}{local_device_rank}" if torch.cuda.is_available() else CPU)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: libinephany
3
- Version: 0.14.1
3
+ Version: 0.15.1
4
4
  Summary: Inephany library containing code commonly used by multiple subpackages.
5
5
  Author-email: Inephany <info@inephany.com>
6
6
  License: Apache 2.0
@@ -1 +0,0 @@
1
- 0.14.1
File without changes
File without changes
File without changes
File without changes