service-capacity-modeling 0.3.73__py3-none-any.whl → 0.3.79__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of service-capacity-modeling might be problematic. Click here for more details.
- service_capacity_modeling/capacity_planner.py +46 -40
- service_capacity_modeling/hardware/__init__.py +11 -7
- service_capacity_modeling/hardware/profiles/shapes/aws/auto_i3en.json +172 -0
- service_capacity_modeling/hardware/profiles/shapes/aws/auto_i4i.json +220 -0
- service_capacity_modeling/hardware/profiles/shapes/aws/manual_instances.json +0 -184
- service_capacity_modeling/interface.py +48 -22
- service_capacity_modeling/models/__init__.py +21 -2
- service_capacity_modeling/models/common.py +268 -190
- service_capacity_modeling/models/headroom_strategy.py +2 -1
- service_capacity_modeling/models/org/netflix/__init__.py +4 -1
- service_capacity_modeling/models/org/netflix/aurora.py +12 -7
- service_capacity_modeling/models/org/netflix/cassandra.py +39 -24
- service_capacity_modeling/models/org/netflix/counter.py +44 -20
- service_capacity_modeling/models/org/netflix/crdb.py +7 -4
- service_capacity_modeling/models/org/netflix/ddb.py +9 -5
- service_capacity_modeling/models/org/netflix/elasticsearch.py +8 -6
- service_capacity_modeling/models/org/netflix/entity.py +5 -3
- service_capacity_modeling/models/org/netflix/evcache.py +21 -25
- service_capacity_modeling/models/org/netflix/graphkv.py +5 -3
- service_capacity_modeling/models/org/netflix/iso_date_math.py +12 -9
- service_capacity_modeling/models/org/netflix/kafka.py +13 -7
- service_capacity_modeling/models/org/netflix/key_value.py +4 -2
- service_capacity_modeling/models/org/netflix/postgres.py +4 -2
- service_capacity_modeling/models/org/netflix/rds.py +10 -5
- service_capacity_modeling/models/org/netflix/stateless_java.py +4 -2
- service_capacity_modeling/models/org/netflix/time_series.py +4 -2
- service_capacity_modeling/models/org/netflix/time_series_config.py +3 -3
- service_capacity_modeling/models/org/netflix/wal.py +4 -2
- service_capacity_modeling/models/org/netflix/zookeeper.py +5 -3
- service_capacity_modeling/stats.py +14 -11
- service_capacity_modeling/tools/auto_shape.py +10 -6
- service_capacity_modeling/tools/fetch_pricing.py +13 -6
- service_capacity_modeling/tools/generate_missing.py +4 -3
- service_capacity_modeling/tools/instance_families.py +18 -7
- {service_capacity_modeling-0.3.73.dist-info → service_capacity_modeling-0.3.79.dist-info}/METADATA +9 -5
- {service_capacity_modeling-0.3.73.dist-info → service_capacity_modeling-0.3.79.dist-info}/RECORD +40 -38
- {service_capacity_modeling-0.3.73.dist-info → service_capacity_modeling-0.3.79.dist-info}/WHEEL +0 -0
- {service_capacity_modeling-0.3.73.dist-info → service_capacity_modeling-0.3.79.dist-info}/entry_points.txt +0 -0
- {service_capacity_modeling-0.3.73.dist-info → service_capacity_modeling-0.3.79.dist-info}/licenses/LICENSE +0 -0
- {service_capacity_modeling-0.3.73.dist-info → service_capacity_modeling-0.3.79.dist-info}/top_level.txt +0 -0
|
@@ -1,13 +1,19 @@
|
|
|
1
|
+
# pylint: disable=too-many-lines
|
|
1
2
|
import logging
|
|
2
3
|
import math
|
|
3
4
|
import random
|
|
4
5
|
from decimal import Decimal
|
|
6
|
+
from typing import Any
|
|
5
7
|
from typing import Callable
|
|
6
8
|
from typing import Dict
|
|
7
9
|
from typing import List
|
|
8
10
|
from typing import Optional
|
|
11
|
+
from typing import Set
|
|
9
12
|
from typing import Tuple
|
|
10
13
|
|
|
14
|
+
from pydantic import BaseModel
|
|
15
|
+
from pydantic import Field
|
|
16
|
+
|
|
11
17
|
from service_capacity_modeling.hardware import shapes
|
|
12
18
|
from service_capacity_modeling.interface import AVG_ITEM_SIZE_BYTES
|
|
13
19
|
from service_capacity_modeling.interface import Buffer
|
|
@@ -63,6 +69,23 @@ def _QOS(tier: int) -> float:
|
|
|
63
69
|
return 1
|
|
64
70
|
|
|
65
71
|
|
|
72
|
+
def combine_buffer_ratios(left: Optional[float], right: Optional[float]) -> float:
|
|
73
|
+
"""
|
|
74
|
+
Strategy for how two buffers for the same component are combined.
|
|
75
|
+
- Multiply two buffers by multiplying if both are not None
|
|
76
|
+
"""
|
|
77
|
+
|
|
78
|
+
if left is None and right is None:
|
|
79
|
+
raise ValueError("Cannot combine buffer ratios when both values are None")
|
|
80
|
+
if left is None:
|
|
81
|
+
assert right is not None # MyPy
|
|
82
|
+
return right
|
|
83
|
+
if right is None:
|
|
84
|
+
assert left is not None # MyPy
|
|
85
|
+
return left
|
|
86
|
+
return left * right
|
|
87
|
+
|
|
88
|
+
|
|
66
89
|
def _sqrt_staffed_cores(rps: float, latency_s: float, qos: float) -> int:
|
|
67
90
|
# Square root staffing
|
|
68
91
|
# s = a + Q*sqrt(a)
|
|
@@ -153,18 +176,31 @@ def normalize_cores(
|
|
|
153
176
|
target_shape: Instance,
|
|
154
177
|
reference_shape: Optional[Instance] = None,
|
|
155
178
|
) -> int:
|
|
156
|
-
"""Calculates equivalent
|
|
179
|
+
"""Calculates equivalent CPU on a target shape relative to a reference
|
|
157
180
|
|
|
158
181
|
Takes into account relative core frequency and IPC factor from the hardware
|
|
159
182
|
description to give a rough estimate of how many equivalent cores you need
|
|
160
183
|
in a target_shape to have the core_count number of cores on the reference_shape
|
|
161
184
|
"""
|
|
185
|
+
# Normalize the core count the same as CPUs
|
|
186
|
+
return _normalize_cpu(
|
|
187
|
+
cpu_count=core_count,
|
|
188
|
+
target_shape=target_shape,
|
|
189
|
+
reference_shape=reference_shape,
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
def _normalize_cpu(
|
|
194
|
+
cpu_count: float,
|
|
195
|
+
target_shape: Instance,
|
|
196
|
+
reference_shape: Optional[Instance] = None,
|
|
197
|
+
) -> int:
|
|
162
198
|
if reference_shape is None:
|
|
163
199
|
reference_shape = default_reference_shape
|
|
164
200
|
|
|
165
201
|
target_speed = target_shape.cpu_ghz * target_shape.cpu_ipc_scale
|
|
166
202
|
reference_speed = reference_shape.cpu_ghz * reference_shape.cpu_ipc_scale
|
|
167
|
-
return max(1, math.ceil(
|
|
203
|
+
return max(1, math.ceil(cpu_count / (target_speed / reference_speed)))
|
|
168
204
|
|
|
169
205
|
|
|
170
206
|
def _reserved_headroom(
|
|
@@ -218,8 +254,6 @@ def cpu_headroom_target(instance: Instance, buffers: Optional[Buffers] = None) -
|
|
|
218
254
|
# When someone asks for the key, return any buffers that
|
|
219
255
|
# influence the component in the value
|
|
220
256
|
_default_buffer_fallbacks: Dict[str, List[str]] = {
|
|
221
|
-
BufferComponent.compute: [BufferComponent.cpu],
|
|
222
|
-
BufferComponent.storage: [BufferComponent.disk],
|
|
223
257
|
BufferComponent.cpu: [BufferComponent.compute],
|
|
224
258
|
BufferComponent.network: [BufferComponent.compute],
|
|
225
259
|
BufferComponent.memory: [BufferComponent.storage],
|
|
@@ -227,6 +261,44 @@ _default_buffer_fallbacks: Dict[str, List[str]] = {
|
|
|
227
261
|
}
|
|
228
262
|
|
|
229
263
|
|
|
264
|
+
def _expand_components(
|
|
265
|
+
components: List[str],
|
|
266
|
+
component_fallbacks: Optional[Dict[str, List[str]]] = None,
|
|
267
|
+
) -> Set[str]:
|
|
268
|
+
"""Expand and dedupe components to include their fallbacks
|
|
269
|
+
|
|
270
|
+
Args:
|
|
271
|
+
components: List of component names to expand
|
|
272
|
+
component_fallbacks: Optional fallback mapping (uses default if None)
|
|
273
|
+
|
|
274
|
+
Returns:
|
|
275
|
+
Set of expanded component names including fallbacks
|
|
276
|
+
"""
|
|
277
|
+
|
|
278
|
+
# Semantically it does not make sense to fetch buffers for the generic category
|
|
279
|
+
generic_components = [c for c in components if BufferComponent.is_generic(c)]
|
|
280
|
+
if generic_components:
|
|
281
|
+
all_specific_components = [
|
|
282
|
+
c for c in BufferComponent if BufferComponent.is_specific(c)
|
|
283
|
+
]
|
|
284
|
+
raise ValueError(
|
|
285
|
+
f"Only specific components allowed. Generic components found: "
|
|
286
|
+
f"{', '.join(str(c) for c in generic_components)}. "
|
|
287
|
+
f"Use specific components instead: "
|
|
288
|
+
f"{', '.join(str(c) for c in all_specific_components)}"
|
|
289
|
+
)
|
|
290
|
+
|
|
291
|
+
if component_fallbacks is None:
|
|
292
|
+
component_fallbacks = _default_buffer_fallbacks
|
|
293
|
+
|
|
294
|
+
expanded_components = set(components)
|
|
295
|
+
for component in components:
|
|
296
|
+
expanded_components = expanded_components | set(
|
|
297
|
+
component_fallbacks.get(component, [])
|
|
298
|
+
)
|
|
299
|
+
return expanded_components
|
|
300
|
+
|
|
301
|
+
|
|
230
302
|
def buffer_for_components(
|
|
231
303
|
buffers: Buffers,
|
|
232
304
|
components: List[str],
|
|
@@ -245,14 +317,7 @@ def buffer_for_components(
|
|
|
245
317
|
components: the components that ultimately matched after applying
|
|
246
318
|
source: All the component buffers that made up the composite ratio
|
|
247
319
|
"""
|
|
248
|
-
|
|
249
|
-
component_fallbacks = _default_buffer_fallbacks
|
|
250
|
-
|
|
251
|
-
unique_components = set(components)
|
|
252
|
-
for component in components:
|
|
253
|
-
unique_components = unique_components | set(
|
|
254
|
-
component_fallbacks.get(component, [])
|
|
255
|
-
)
|
|
320
|
+
expanded_components = _expand_components(components, component_fallbacks)
|
|
256
321
|
|
|
257
322
|
desired = {k: v.model_copy() for k, v in buffers.desired.items()}
|
|
258
323
|
if current_capacity:
|
|
@@ -266,14 +331,14 @@ def buffer_for_components(
|
|
|
266
331
|
ratio = 1.0
|
|
267
332
|
sources = {}
|
|
268
333
|
for name, buffer in desired.items():
|
|
269
|
-
if
|
|
334
|
+
if expanded_components.intersection(buffer.components):
|
|
270
335
|
sources[name] = buffer
|
|
271
|
-
ratio
|
|
336
|
+
ratio = combine_buffer_ratios(ratio, buffer.ratio)
|
|
272
337
|
if not sources:
|
|
273
338
|
ratio = buffers.default.ratio
|
|
274
339
|
|
|
275
340
|
return Buffer(
|
|
276
|
-
ratio=ratio, components=sorted(list(
|
|
341
|
+
ratio=ratio, components=sorted(list(expanded_components)), sources=sources
|
|
277
342
|
)
|
|
278
343
|
|
|
279
344
|
|
|
@@ -483,12 +548,12 @@ def compute_stateful_zone( # pylint: disable=too-many-positional-arguments
|
|
|
483
548
|
# When initially provisioniong we don't want to attach more than
|
|
484
549
|
# 1/3 the maximum volume size in one node (preferring more nodes
|
|
485
550
|
# with smaller volumes)
|
|
486
|
-
max_size = drive.max_size_gib / 3
|
|
551
|
+
max_size = math.ceil(drive.max_size_gib / 3)
|
|
487
552
|
if ebs_gib > max_size > 0:
|
|
488
553
|
ratio = ebs_gib / max_size
|
|
489
554
|
count = max(cluster_size(math.ceil(count * ratio)), min_count)
|
|
490
555
|
cost = count * instance.annual_cost
|
|
491
|
-
ebs_gib = max_size
|
|
556
|
+
ebs_gib = int(max_size)
|
|
492
557
|
|
|
493
558
|
read_io, write_io = required_disk_ios(space_gib, count)
|
|
494
559
|
read_io, write_io = (
|
|
@@ -533,27 +598,27 @@ def compute_stateful_zone( # pylint: disable=too-many-positional-arguments
|
|
|
533
598
|
|
|
534
599
|
|
|
535
600
|
# AWS GP2 gives 3 IOS / gb stored.
|
|
536
|
-
def gp2_gib_for_io(read_ios) -> int:
|
|
601
|
+
def gp2_gib_for_io(read_ios: float) -> int:
|
|
537
602
|
return int(max(1, read_ios // 3))
|
|
538
603
|
|
|
539
604
|
|
|
540
|
-
def cloud_gib_for_io(drive, total_ios, space_gib) -> int:
|
|
605
|
+
def cloud_gib_for_io(drive: Drive, total_ios: float, space_gib: float) -> int:
|
|
541
606
|
if drive.name == "gp2":
|
|
542
607
|
return gp2_gib_for_io(total_ios)
|
|
543
608
|
else:
|
|
544
|
-
return space_gib
|
|
609
|
+
return int(space_gib)
|
|
545
610
|
|
|
546
611
|
|
|
547
612
|
class WorkingSetEstimator:
|
|
548
|
-
def __init__(self):
|
|
549
|
-
self._cache = {}
|
|
613
|
+
def __init__(self) -> None:
|
|
614
|
+
self._cache: Dict[Any, Interval] = {}
|
|
550
615
|
|
|
551
616
|
def working_set_percent(
|
|
552
617
|
self,
|
|
553
618
|
# latency distributions of the read SLOs versus the drives
|
|
554
619
|
# expressed as scipy rv_continuous objects
|
|
555
|
-
drive_read_latency_dist,
|
|
556
|
-
read_slo_latency_dist,
|
|
620
|
+
drive_read_latency_dist: Any,
|
|
621
|
+
read_slo_latency_dist: Any,
|
|
557
622
|
# what percentile of disk latency should we target for keeping in
|
|
558
623
|
# memory. Not as this is _increased_ more memory will be reserved
|
|
559
624
|
target_percentile: float = 0.90,
|
|
@@ -591,8 +656,8 @@ _working_set_estimator = WorkingSetEstimator()
|
|
|
591
656
|
def working_set_from_drive_and_slo(
|
|
592
657
|
# latency distributions of the read SLOs versus the drives
|
|
593
658
|
# expressed as scipy rv_continuous objects
|
|
594
|
-
drive_read_latency_dist,
|
|
595
|
-
read_slo_latency_dist,
|
|
659
|
+
drive_read_latency_dist: Any,
|
|
660
|
+
read_slo_latency_dist: Any,
|
|
596
661
|
estimated_working_set: Optional[Interval] = None,
|
|
597
662
|
# what percentile of disk latency should we target for keeping in
|
|
598
663
|
# memory. Not as this is _increased_ more memory will be reserved
|
|
@@ -717,180 +782,188 @@ def merge_plan(
|
|
|
717
782
|
)
|
|
718
783
|
|
|
719
784
|
|
|
720
|
-
|
|
721
|
-
scale = 0
|
|
722
|
-
preserve = False
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
785
|
+
class DerivedBuffers(BaseModel):
|
|
786
|
+
scale: float = Field(default=1, gt=0)
|
|
787
|
+
preserve: bool = False
|
|
788
|
+
# When present, this is the maximum ratio of the current usage
|
|
789
|
+
ceiling: Optional[float] = Field(
|
|
790
|
+
default=None,
|
|
791
|
+
gt=0,
|
|
792
|
+
)
|
|
793
|
+
# When present, this is the minimum ratio of the current usage
|
|
794
|
+
floor: Optional[float] = Field(default=None, gt=0)
|
|
795
|
+
|
|
796
|
+
@staticmethod
|
|
797
|
+
def for_components(
|
|
798
|
+
buffer: Dict[str, Buffer],
|
|
799
|
+
components: List[str],
|
|
800
|
+
component_fallbacks: Optional[Dict[str, List[str]]] = None,
|
|
801
|
+
) -> "DerivedBuffers":
|
|
802
|
+
expanded_components = _expand_components(components, component_fallbacks)
|
|
803
|
+
|
|
804
|
+
scale = 1.0
|
|
805
|
+
preserve = False
|
|
806
|
+
ceiling = None
|
|
807
|
+
floor = None
|
|
808
|
+
|
|
809
|
+
for bfr in buffer.values():
|
|
810
|
+
if not expanded_components.intersection(bfr.components):
|
|
811
|
+
continue
|
|
812
|
+
|
|
813
|
+
if bfr.intent in [
|
|
814
|
+
BufferIntent.scale,
|
|
815
|
+
BufferIntent.scale_up,
|
|
816
|
+
BufferIntent.scale_down,
|
|
817
|
+
]:
|
|
818
|
+
scale = combine_buffer_ratios(scale, bfr.ratio)
|
|
819
|
+
if bfr.intent == BufferIntent.scale_up:
|
|
820
|
+
floor = 1 # Create a floor of 1.0x the current usage
|
|
821
|
+
if bfr.intent == BufferIntent.scale_down:
|
|
822
|
+
ceiling = 1 # Create a ceiling of 1.0x the current usage
|
|
731
823
|
if bfr.intent == BufferIntent.preserve:
|
|
732
824
|
preserve = True
|
|
733
825
|
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
def get_cores_from_current_capacity(
|
|
738
|
-
current_capacity: CurrentClusterCapacity, buffers: Buffers, instance: Instance
|
|
739
|
-
):
|
|
740
|
-
# compute cores required per zone
|
|
741
|
-
cpu_success_buffer = (1 - cpu_headroom_target(instance, buffers)) * 100
|
|
742
|
-
current_cpu_utilization = current_capacity.cpu_utilization.mid
|
|
743
|
-
|
|
744
|
-
if current_capacity.cluster_instance is None:
|
|
745
|
-
cluster_instance = shapes.instance(current_capacity.cluster_instance_name)
|
|
746
|
-
else:
|
|
747
|
-
cluster_instance = current_capacity.cluster_instance
|
|
748
|
-
|
|
749
|
-
current_cores = cluster_instance.cpu * current_capacity.cluster_instance_count.mid
|
|
750
|
-
|
|
751
|
-
scale, preserve = derived_buffer_for_component(buffers.derived, ["compute", "cpu"])
|
|
752
|
-
# Scale and preserve for the same component should not be passed together.
|
|
753
|
-
# If user passes it, then scale will be preferred over preserve.
|
|
754
|
-
if scale > 0:
|
|
755
|
-
# if the new cpu core is less than the current,
|
|
756
|
-
# then take no action and return the current cpu cores
|
|
757
|
-
new_cpu_utilization = current_cpu_utilization * scale
|
|
758
|
-
core_scale_up_factor = max(1.0, new_cpu_utilization / cpu_success_buffer)
|
|
759
|
-
return math.ceil(current_cores * core_scale_up_factor)
|
|
760
|
-
|
|
761
|
-
if preserve:
|
|
762
|
-
return current_cores
|
|
763
|
-
|
|
764
|
-
return int(current_cores * (current_cpu_utilization / cpu_success_buffer))
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
def get_memory_from_current_capacity(
|
|
768
|
-
current_capacity: CurrentClusterCapacity, buffers: Buffers
|
|
769
|
-
):
|
|
770
|
-
# compute memory required per zone
|
|
771
|
-
current_memory_utilization = (
|
|
772
|
-
current_capacity.memory_utilization_gib.mid
|
|
773
|
-
* current_capacity.cluster_instance_count.mid
|
|
774
|
-
)
|
|
775
|
-
|
|
776
|
-
if current_capacity.cluster_instance is None:
|
|
777
|
-
cluster_instance = shapes.instance(current_capacity.cluster_instance_name)
|
|
778
|
-
else:
|
|
779
|
-
cluster_instance = current_capacity.cluster_instance
|
|
780
|
-
|
|
781
|
-
zonal_ram_allocated = (
|
|
782
|
-
cluster_instance.ram_gib * current_capacity.cluster_instance_count.mid
|
|
783
|
-
)
|
|
784
|
-
|
|
785
|
-
# These are the desired buffers
|
|
786
|
-
memory_buffer = buffer_for_components(
|
|
787
|
-
buffers=buffers, components=[BufferComponent.memory]
|
|
788
|
-
)
|
|
789
|
-
|
|
790
|
-
scale, preserve = derived_buffer_for_component(
|
|
791
|
-
buffers.derived, ["memory", "storage"]
|
|
792
|
-
)
|
|
793
|
-
# Scale and preserve for the same component should not be passed together.
|
|
794
|
-
# If user passes it, then scale will be preferred over preserve.
|
|
795
|
-
if scale > 0:
|
|
796
|
-
# if the new required memory is less than the current,
|
|
797
|
-
# then take no action and return the current ram
|
|
798
|
-
return max(
|
|
799
|
-
current_memory_utilization * scale * memory_buffer.ratio,
|
|
800
|
-
zonal_ram_allocated,
|
|
826
|
+
return DerivedBuffers(
|
|
827
|
+
scale=scale, preserve=preserve, ceiling=ceiling, floor=floor
|
|
801
828
|
)
|
|
802
829
|
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
830
|
+
def calculate_requirement(
|
|
831
|
+
self,
|
|
832
|
+
current_usage: float,
|
|
833
|
+
existing_capacity: float,
|
|
834
|
+
desired_buffer_ratio: float = 1.0,
|
|
835
|
+
) -> float:
|
|
836
|
+
if self.preserve:
|
|
837
|
+
return existing_capacity
|
|
838
|
+
|
|
839
|
+
requirement = self.scale * current_usage * desired_buffer_ratio
|
|
840
|
+
if self.ceiling is not None:
|
|
841
|
+
requirement = min(requirement, self.ceiling * existing_capacity)
|
|
842
|
+
if self.floor is not None:
|
|
843
|
+
requirement = max(requirement, self.floor * existing_capacity)
|
|
844
|
+
|
|
845
|
+
return requirement
|
|
846
|
+
|
|
847
|
+
|
|
848
|
+
class RequirementFromCurrentCapacity(BaseModel):
|
|
849
|
+
current_capacity: CurrentClusterCapacity
|
|
850
|
+
buffers: Buffers
|
|
851
|
+
|
|
852
|
+
@property
|
|
853
|
+
def current_instance(self) -> Instance:
|
|
854
|
+
if self.current_capacity.cluster_instance is not None:
|
|
855
|
+
return self.current_capacity.cluster_instance
|
|
856
|
+
return shapes.instance(self.current_capacity.cluster_instance_name)
|
|
857
|
+
|
|
858
|
+
def cpu(self, instance_candidate: Instance) -> int:
|
|
859
|
+
current_cpu_util = self.current_capacity.cpu_utilization.mid / 100
|
|
860
|
+
current_total_cpu = float(
|
|
861
|
+
self.current_instance.cpu * self.current_capacity.cluster_instance_count.mid
|
|
862
|
+
)
|
|
831
863
|
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
)
|
|
835
|
-
# Scale and preserve for the same component should not be passed together.
|
|
836
|
-
# If user passes it, then scale will be preferred over preserve.
|
|
837
|
-
if scale > 0:
|
|
838
|
-
# if the new required network is less than the current,
|
|
839
|
-
# then take no action and return the current bandwidth
|
|
840
|
-
return max(
|
|
841
|
-
current_network_utilization * scale * network_buffer.ratio,
|
|
842
|
-
zonal_network_allocated,
|
|
864
|
+
derived_buffers = DerivedBuffers.for_components(
|
|
865
|
+
self.buffers.derived, [BufferComponent.cpu]
|
|
843
866
|
)
|
|
844
867
|
|
|
845
|
-
|
|
846
|
-
|
|
868
|
+
# The ideal CPU% that accomodates the headroom + desired buffer, sometimes
|
|
869
|
+
# referred to as the "success buffer"
|
|
870
|
+
target_cpu_util = 1 - cpu_headroom_target(instance_candidate, self.buffers)
|
|
871
|
+
# current_util / target_util ratio indicates CPU scaling direction:
|
|
872
|
+
# > 1: scale up, < 1: scale down, = 1: no change needed
|
|
873
|
+
used_cpu = (current_cpu_util / target_cpu_util) * current_total_cpu
|
|
874
|
+
return math.ceil(
|
|
875
|
+
# Desired buffer is omitted because the cpu_headroom already
|
|
876
|
+
# includes it
|
|
877
|
+
derived_buffers.calculate_requirement(
|
|
878
|
+
current_usage=used_cpu,
|
|
879
|
+
existing_capacity=current_total_cpu,
|
|
880
|
+
)
|
|
881
|
+
)
|
|
847
882
|
|
|
848
|
-
|
|
883
|
+
@property
|
|
884
|
+
def mem_gib(self) -> float:
|
|
885
|
+
current_memory_utilization = float(
|
|
886
|
+
self.current_capacity.memory_utilization_gib.mid
|
|
887
|
+
* self.current_capacity.cluster_instance_count.mid
|
|
888
|
+
)
|
|
889
|
+
zonal_ram_allocated = float(
|
|
890
|
+
self.current_instance.ram_gib
|
|
891
|
+
* self.current_capacity.cluster_instance_count.mid
|
|
892
|
+
)
|
|
849
893
|
|
|
894
|
+
desired_buffer = buffer_for_components(
|
|
895
|
+
buffers=self.buffers, components=[BufferComponent.memory]
|
|
896
|
+
)
|
|
897
|
+
derived_buffer = DerivedBuffers.for_components(
|
|
898
|
+
self.buffers.derived, [BufferComponent.memory]
|
|
899
|
+
)
|
|
850
900
|
|
|
851
|
-
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
|
|
856
|
-
current_capacity.disk_utilization_gib.mid
|
|
857
|
-
* current_capacity.cluster_instance_count.mid
|
|
858
|
-
)
|
|
901
|
+
return derived_buffer.calculate_requirement(
|
|
902
|
+
current_usage=current_memory_utilization,
|
|
903
|
+
existing_capacity=zonal_ram_allocated,
|
|
904
|
+
desired_buffer_ratio=desired_buffer.ratio,
|
|
905
|
+
)
|
|
859
906
|
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
907
|
+
@property
|
|
908
|
+
def disk_gib(self) -> int:
|
|
909
|
+
current_cluster_disk_util_gib = float(
|
|
910
|
+
self.current_capacity.disk_utilization_gib.mid
|
|
911
|
+
* self.current_capacity.cluster_instance_count.mid
|
|
912
|
+
)
|
|
913
|
+
current_node_disk_gib = float(
|
|
914
|
+
self.current_instance.drive.max_size_gib
|
|
915
|
+
if self.current_instance.drive is not None
|
|
916
|
+
else (
|
|
917
|
+
self.current_capacity.cluster_drive.size_gib
|
|
918
|
+
if self.current_capacity.cluster_drive is not None
|
|
919
|
+
else 0
|
|
920
|
+
)
|
|
921
|
+
)
|
|
864
922
|
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
923
|
+
zonal_disk_allocated = float(
|
|
924
|
+
current_node_disk_gib * self.current_capacity.cluster_instance_count.mid
|
|
925
|
+
)
|
|
926
|
+
# These are the desired buffers
|
|
927
|
+
disk_buffer = buffer_for_components(
|
|
928
|
+
buffers=self.buffers, components=[BufferComponent.disk]
|
|
929
|
+
)
|
|
870
930
|
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
931
|
+
derived_buffer = DerivedBuffers.for_components(
|
|
932
|
+
self.buffers.derived, [BufferComponent.disk]
|
|
933
|
+
)
|
|
934
|
+
required_disk = derived_buffer.calculate_requirement(
|
|
935
|
+
current_usage=current_cluster_disk_util_gib,
|
|
936
|
+
existing_capacity=zonal_disk_allocated,
|
|
937
|
+
desired_buffer_ratio=disk_buffer.ratio,
|
|
938
|
+
)
|
|
939
|
+
return math.ceil(required_disk)
|
|
874
940
|
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
|
|
941
|
+
@property
|
|
942
|
+
def network_mbps(self) -> int:
|
|
943
|
+
current_network_utilization = float(
|
|
944
|
+
self.current_capacity.network_utilization_mbps.mid
|
|
945
|
+
* self.current_capacity.cluster_instance_count.mid
|
|
946
|
+
)
|
|
947
|
+
zonal_network_allocated = float(
|
|
948
|
+
self.current_instance.net_mbps
|
|
949
|
+
* self.current_capacity.cluster_instance_count.mid
|
|
950
|
+
)
|
|
879
951
|
|
|
880
|
-
|
|
881
|
-
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
|
|
886
|
-
return max(
|
|
887
|
-
current_disk_utilization * scale * disk_buffer.ratio, zonal_disk_allocated
|
|
952
|
+
# These are the desired buffers
|
|
953
|
+
network_buffer = buffer_for_components(
|
|
954
|
+
buffers=self.buffers, components=[BufferComponent.network]
|
|
955
|
+
)
|
|
956
|
+
derived_buffer = DerivedBuffers.for_components(
|
|
957
|
+
self.buffers.derived, [BufferComponent.network]
|
|
888
958
|
)
|
|
889
|
-
if preserve:
|
|
890
|
-
# preserve the current disk size for the zone
|
|
891
|
-
return zonal_disk_allocated
|
|
892
959
|
|
|
893
|
-
|
|
960
|
+
return math.ceil(
|
|
961
|
+
derived_buffer.calculate_requirement(
|
|
962
|
+
current_usage=current_network_utilization,
|
|
963
|
+
existing_capacity=zonal_network_allocated,
|
|
964
|
+
desired_buffer_ratio=network_buffer.ratio,
|
|
965
|
+
)
|
|
966
|
+
)
|
|
894
967
|
|
|
895
968
|
|
|
896
969
|
def zonal_requirements_from_current(
|
|
@@ -901,20 +974,25 @@ def zonal_requirements_from_current(
|
|
|
901
974
|
) -> CapacityRequirement:
|
|
902
975
|
if current_cluster is not None and current_cluster.zonal[0] is not None:
|
|
903
976
|
current_capacity: CurrentClusterCapacity = current_cluster.zonal[0]
|
|
904
|
-
|
|
905
|
-
|
|
977
|
+
|
|
978
|
+
# Adjust the CPUs (vCPU + cores) based on generation / instance type
|
|
979
|
+
requirement = RequirementFromCurrentCapacity(
|
|
980
|
+
current_capacity=current_capacity,
|
|
981
|
+
buffers=buffers,
|
|
982
|
+
)
|
|
983
|
+
normalized_cpu = _normalize_cpu(
|
|
984
|
+
requirement.cpu(instance),
|
|
906
985
|
instance,
|
|
907
986
|
reference_shape,
|
|
908
987
|
)
|
|
909
|
-
|
|
910
|
-
|
|
911
|
-
|
|
912
|
-
needed_memory_gib =
|
|
913
|
-
needed_disk_gib = get_disk_from_current_capacity(current_capacity, buffers)
|
|
988
|
+
|
|
989
|
+
needed_network_mbps = requirement.network_mbps
|
|
990
|
+
needed_disk_gib = requirement.disk_gib
|
|
991
|
+
needed_memory_gib = requirement.mem_gib
|
|
914
992
|
|
|
915
993
|
return CapacityRequirement(
|
|
916
994
|
requirement_type="zonal-capacity",
|
|
917
|
-
cpu_cores=certain_int(
|
|
995
|
+
cpu_cores=certain_int(normalized_cpu),
|
|
918
996
|
mem_gib=certain_float(needed_memory_gib),
|
|
919
997
|
disk_gib=certain_float(needed_disk_gib),
|
|
920
998
|
network_mbps=certain_float(needed_network_mbps),
|
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
from typing import Any
|
|
2
|
+
from typing import Dict
|
|
3
|
+
|
|
1
4
|
from .aurora import nflx_aurora_capacity_model
|
|
2
5
|
from .cassandra import nflx_cassandra_capacity_model
|
|
3
6
|
from .counter import nflx_counter_capacity_model
|
|
@@ -20,7 +23,7 @@ from .wal import nflx_wal_capacity_model
|
|
|
20
23
|
from .zookeeper import nflx_zookeeper_capacity_model
|
|
21
24
|
|
|
22
25
|
|
|
23
|
-
def models():
|
|
26
|
+
def models() -> Dict[str, Any]:
|
|
24
27
|
return {
|
|
25
28
|
"org.netflix.cassandra": nflx_cassandra_capacity_model,
|
|
26
29
|
"org.netflix.stateless-java": nflx_java_app_capacity_model,
|