PyPI - service-capacity-modeling - Versions diffs - 0.3.73__py3-none-any.whl → 0.3.79__py3-none-any.whl - Mend

service-capacity-modeling 0.3.73py3-none-any.whl → 0.3.79py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of service-capacity-modeling might be problematic. Click here for more details.

Files changed (40) hide show

service_capacity_modeling/models/common.py CHANGED Viewed

@@ -1,13 +1,19 @@
+# pylint: disable=too-many-lines
 import logging
 import math
 import random
 from decimal import Decimal
+from typing import Any
 from typing import Callable
 from typing import Dict
 from typing import List
 from typing import Optional
+from typing import Set
 from typing import Tuple
+from pydantic import BaseModel
+from pydantic import Field
 from service_capacity_modeling.hardware import shapes
 from service_capacity_modeling.interface import AVG_ITEM_SIZE_BYTES
 from service_capacity_modeling.interface import Buffer
@@ -63,6 +69,23 @@ def _QOS(tier: int) -> float:
         return 1
+def combine_buffer_ratios(left: Optional[float], right: Optional[float]) -> float:
+    """
+    Strategy for how two buffers for the same component are combined.
+    - Multiply two buffers by multiplying if both are not None
+    """
+    if left is None and right is None:
+        raise ValueError("Cannot combine buffer ratios when both values are None")
+    if left is None:
+        assert right is not None  # MyPy
+        return right
+    if right is None:
+        assert left is not None  # MyPy
+        return left
+    return left * right
 def _sqrt_staffed_cores(rps: float, latency_s: float, qos: float) -> int:
     # Square root staffing
     # s = a + Q*sqrt(a)
@@ -153,18 +176,31 @@ def normalize_cores(
     target_shape: Instance,
     reference_shape: Optional[Instance] = None,
 ) -> int:
-    """Calculates equivalent cores on a target shape relative to a reference
+    """Calculates equivalent CPU on a target shape relative to a reference
     Takes into account relative core frequency and IPC factor from the hardware
     description to give a rough estimate of how many equivalent cores you need
     in a target_shape to have the core_count number of cores on the reference_shape
     """
+    # Normalize the core count the same as CPUs
+    return _normalize_cpu(
+        cpu_count=core_count,
+        target_shape=target_shape,
+        reference_shape=reference_shape,
+    )
+def _normalize_cpu(
+    cpu_count: float,
+    target_shape: Instance,
+    reference_shape: Optional[Instance] = None,
+) -> int:
     if reference_shape is None:
         reference_shape = default_reference_shape
     target_speed = target_shape.cpu_ghz * target_shape.cpu_ipc_scale
     reference_speed = reference_shape.cpu_ghz * reference_shape.cpu_ipc_scale
-    return max(1, math.ceil(core_count / (target_speed / reference_speed)))
+    return max(1, math.ceil(cpu_count / (target_speed / reference_speed)))
 def _reserved_headroom(
@@ -218,8 +254,6 @@ def cpu_headroom_target(instance: Instance, buffers: Optional[Buffers] = None) -
 # When someone asks for the key, return any buffers that
 # influence the component in the value
 _default_buffer_fallbacks: Dict[str, List[str]] = {
-    BufferComponent.compute: [BufferComponent.cpu],
-    BufferComponent.storage: [BufferComponent.disk],
     BufferComponent.cpu: [BufferComponent.compute],
     BufferComponent.network: [BufferComponent.compute],
     BufferComponent.memory: [BufferComponent.storage],
@@ -227,6 +261,44 @@ _default_buffer_fallbacks: Dict[str, List[str]] = {
 }
+def _expand_components(
+    components: List[str],
+    component_fallbacks: Optional[Dict[str, List[str]]] = None,
+) -> Set[str]:
+    """Expand and dedupe components to include their fallbacks
+    Args:
+        components: List of component names to expand
+        component_fallbacks: Optional fallback mapping (uses default if None)
+    Returns:
+        Set of expanded component names including fallbacks
+    """
+    # Semantically it does not make sense to fetch buffers for the generic category
+    generic_components = [c for c in components if BufferComponent.is_generic(c)]
+    if generic_components:
+        all_specific_components = [
+            c for c in BufferComponent if BufferComponent.is_specific(c)
+        ]
+        raise ValueError(
+            f"Only specific components allowed. Generic components found: "
+            f"{', '.join(str(c) for c in generic_components)}. "
+            f"Use specific components instead: "
+            f"{', '.join(str(c) for c in all_specific_components)}"
+        )
+    if component_fallbacks is None:
+        component_fallbacks = _default_buffer_fallbacks
+    expanded_components = set(components)
+    for component in components:
+        expanded_components = expanded_components | set(
+            component_fallbacks.get(component, [])
+        )
+    return expanded_components
 def buffer_for_components(
     buffers: Buffers,
     components: List[str],
@@ -245,14 +317,7 @@ def buffer_for_components(
         components: the components that ultimately matched after applying
         source: All the component buffers that made up the composite ratio
     """
-    if component_fallbacks is None:
-        component_fallbacks = _default_buffer_fallbacks
-    unique_components = set(components)
-    for component in components:
-        unique_components = unique_components | set(
-            component_fallbacks.get(component, [])
-        )
+    expanded_components = _expand_components(components, component_fallbacks)
     desired = {k: v.model_copy() for k, v in buffers.desired.items()}
     if current_capacity:
@@ -266,14 +331,14 @@ def buffer_for_components(
     ratio = 1.0
     sources = {}
     for name, buffer in desired.items():
-        if any(i in unique_components for i in buffer.components):
+        if expanded_components.intersection(buffer.components):
             sources[name] = buffer
-            ratio *= buffer.ratio
+            ratio = combine_buffer_ratios(ratio, buffer.ratio)
     if not sources:
         ratio = buffers.default.ratio
     return Buffer(
-        ratio=ratio, components=sorted(list(unique_components)), sources=sources
+        ratio=ratio, components=sorted(list(expanded_components)), sources=sources
     )
@@ -483,12 +548,12 @@ def compute_stateful_zone(  # pylint: disable=too-many-positional-arguments
         # When initially provisioniong we don't want to attach more than
         # 1/3 the maximum volume size in one node (preferring more nodes
         # with smaller volumes)
-        max_size = drive.max_size_gib / 3
+        max_size = math.ceil(drive.max_size_gib / 3)
         if ebs_gib > max_size > 0:
             ratio = ebs_gib / max_size
             count = max(cluster_size(math.ceil(count * ratio)), min_count)
             cost = count * instance.annual_cost
-            ebs_gib = max_size
+            ebs_gib = int(max_size)
         read_io, write_io = required_disk_ios(space_gib, count)
         read_io, write_io = (
@@ -533,27 +598,27 @@ def compute_stateful_zone(  # pylint: disable=too-many-positional-arguments
 # AWS GP2 gives 3 IOS / gb stored.
-def gp2_gib_for_io(read_ios) -> int:
+def gp2_gib_for_io(read_ios: float) -> int:
     return int(max(1, read_ios // 3))
-def cloud_gib_for_io(drive, total_ios, space_gib) -> int:
+def cloud_gib_for_io(drive: Drive, total_ios: float, space_gib: float) -> int:
     if drive.name == "gp2":
         return gp2_gib_for_io(total_ios)
     else:
-        return space_gib
+        return int(space_gib)
 class WorkingSetEstimator:
-    def __init__(self):
-        self._cache = {}
+    def __init__(self) -> None:
+        self._cache: Dict[Any, Interval] = {}
     def working_set_percent(
         self,
         # latency distributions of the read SLOs versus the drives
         # expressed as scipy rv_continuous objects
-        drive_read_latency_dist,
-        read_slo_latency_dist,
+        drive_read_latency_dist: Any,
+        read_slo_latency_dist: Any,
         # what percentile of disk latency should we target for keeping in
         # memory. Not as this is _increased_ more memory will be reserved
         target_percentile: float = 0.90,
@@ -591,8 +656,8 @@ _working_set_estimator = WorkingSetEstimator()
 def working_set_from_drive_and_slo(
     # latency distributions of the read SLOs versus the drives
     # expressed as scipy rv_continuous objects
-    drive_read_latency_dist,
-    read_slo_latency_dist,
+    drive_read_latency_dist: Any,
+    read_slo_latency_dist: Any,
     estimated_working_set: Optional[Interval] = None,
     # what percentile of disk latency should we target for keeping in
     # memory. Not as this is _increased_ more memory will be reserved
@@ -717,180 +782,188 @@ def merge_plan(
     )
-def derived_buffer_for_component(buffer: Dict[str, Buffer], components: List[str]):
-    scale = 0.0
-    preserve = False
-    if not buffer:
-        return scale, preserve
-    for bfr in buffer.values():
-        if any(component in components for component in bfr.components):
-            if bfr.intent == BufferIntent.scale:
-                scale = max(scale, bfr.ratio)
+class DerivedBuffers(BaseModel):
+    scale: float = Field(default=1, gt=0)
+    preserve: bool = False
+    # When present, this is the maximum ratio of the current usage
+    ceiling: Optional[float] = Field(
+        default=None,
+        gt=0,
+    )
+    # When present, this is the minimum ratio of the current usage
+    floor: Optional[float] = Field(default=None, gt=0)
+    @staticmethod
+    def for_components(
+        buffer: Dict[str, Buffer],
+        components: List[str],
+        component_fallbacks: Optional[Dict[str, List[str]]] = None,
+    ) -> "DerivedBuffers":
+        expanded_components = _expand_components(components, component_fallbacks)
+        scale = 1.0
+        preserve = False
+        ceiling = None
+        floor = None
+        for bfr in buffer.values():
+            if not expanded_components.intersection(bfr.components):
+                continue
+            if bfr.intent in [
+                BufferIntent.scale,
+                BufferIntent.scale_up,
+                BufferIntent.scale_down,
+            ]:
+                scale = combine_buffer_ratios(scale, bfr.ratio)
+            if bfr.intent == BufferIntent.scale_up:
+                floor = 1  # Create a floor of 1.0x the current usage
+            if bfr.intent == BufferIntent.scale_down:
+                ceiling = 1  # Create a ceiling of 1.0x the current usage
             if bfr.intent == BufferIntent.preserve:
                 preserve = True
-    return scale, preserve
-def get_cores_from_current_capacity(
-    current_capacity: CurrentClusterCapacity, buffers: Buffers, instance: Instance
-):
-    # compute cores required per zone
-    cpu_success_buffer = (1 - cpu_headroom_target(instance, buffers)) * 100
-    current_cpu_utilization = current_capacity.cpu_utilization.mid
-    if current_capacity.cluster_instance is None:
-        cluster_instance = shapes.instance(current_capacity.cluster_instance_name)
-    else:
-        cluster_instance = current_capacity.cluster_instance
-    current_cores = cluster_instance.cpu * current_capacity.cluster_instance_count.mid
-    scale, preserve = derived_buffer_for_component(buffers.derived, ["compute", "cpu"])
-    # Scale and preserve for the same component should not be passed together.
-    # If user passes it, then scale will be preferred over preserve.
-    if scale > 0:
-        # if the new cpu core is less than the current,
-        # then take no action and return the current cpu cores
-        new_cpu_utilization = current_cpu_utilization * scale
-        core_scale_up_factor = max(1.0, new_cpu_utilization / cpu_success_buffer)
-        return math.ceil(current_cores * core_scale_up_factor)
-    if preserve:
-        return current_cores
-    return int(current_cores * (current_cpu_utilization / cpu_success_buffer))
-def get_memory_from_current_capacity(
-    current_capacity: CurrentClusterCapacity, buffers: Buffers
-):
-    # compute memory required per zone
-    current_memory_utilization = (
-        current_capacity.memory_utilization_gib.mid
-        * current_capacity.cluster_instance_count.mid
-    )
-    if current_capacity.cluster_instance is None:
-        cluster_instance = shapes.instance(current_capacity.cluster_instance_name)
-    else:
-        cluster_instance = current_capacity.cluster_instance
-    zonal_ram_allocated = (
-        cluster_instance.ram_gib * current_capacity.cluster_instance_count.mid
-    )
-    # These are the desired buffers
-    memory_buffer = buffer_for_components(
-        buffers=buffers, components=[BufferComponent.memory]
-    )
-    scale, preserve = derived_buffer_for_component(
-        buffers.derived, ["memory", "storage"]
-    )
-    # Scale and preserve for the same component should not be passed together.
-    # If user passes it, then scale will be preferred over preserve.
-    if scale > 0:
-        # if the new required memory is less than the current,
-        # then take no action and return the current ram
-        return max(
-            current_memory_utilization * scale * memory_buffer.ratio,
-            zonal_ram_allocated,
+        return DerivedBuffers(
+            scale=scale, preserve=preserve, ceiling=ceiling, floor=floor
         )
-    if preserve:
-        return zonal_ram_allocated
-    return current_memory_utilization * memory_buffer.ratio
-def get_network_from_current_capacity(
-    current_capacity: CurrentClusterCapacity, buffers: Buffers
-):
-    # compute network required per zone
-    current_network_utilization = (
-        current_capacity.network_utilization_mbps.mid
-        * current_capacity.cluster_instance_count.mid
-    )
-    if current_capacity.cluster_instance is None:
-        cluster_instance = shapes.instance(current_capacity.cluster_instance_name)
-    else:
-        cluster_instance = current_capacity.cluster_instance
-    zonal_network_allocated = (
-        cluster_instance.net_mbps * current_capacity.cluster_instance_count.mid
-    )
-    # These are the desired buffers
-    network_buffer = buffer_for_components(
-        buffers=buffers, components=[BufferComponent.network]
-    )
+    def calculate_requirement(
+        self,
+        current_usage: float,
+        existing_capacity: float,
+        desired_buffer_ratio: float = 1.0,
+    ) -> float:
+        if self.preserve:
+            return existing_capacity
+        requirement = self.scale * current_usage * desired_buffer_ratio
+        if self.ceiling is not None:
+            requirement = min(requirement, self.ceiling * existing_capacity)
+        if self.floor is not None:
+            requirement = max(requirement, self.floor * existing_capacity)
+        return requirement
+class RequirementFromCurrentCapacity(BaseModel):
+    current_capacity: CurrentClusterCapacity
+    buffers: Buffers
+    @property
+    def current_instance(self) -> Instance:
+        if self.current_capacity.cluster_instance is not None:
+            return self.current_capacity.cluster_instance
+        return shapes.instance(self.current_capacity.cluster_instance_name)
+    def cpu(self, instance_candidate: Instance) -> int:
+        current_cpu_util = self.current_capacity.cpu_utilization.mid / 100
+        current_total_cpu = float(
+            self.current_instance.cpu * self.current_capacity.cluster_instance_count.mid
+        )
-    scale, preserve = derived_buffer_for_component(
-        buffers.derived, ["compute", "network"]
-    )
-    # Scale and preserve for the same component should not be passed together.
-    # If user passes it, then scale will be preferred over preserve.
-    if scale > 0:
-        # if the new required network is less than the current,
-        # then take no action and return the current bandwidth
-        return max(
-            current_network_utilization * scale * network_buffer.ratio,
-            zonal_network_allocated,
+        derived_buffers = DerivedBuffers.for_components(
+            self.buffers.derived, [BufferComponent.cpu]
         )
-    if preserve:
-        return zonal_network_allocated
+        # The ideal CPU% that accomodates the headroom + desired buffer, sometimes
+        # referred to as the "success buffer"
+        target_cpu_util = 1 - cpu_headroom_target(instance_candidate, self.buffers)
+        # current_util / target_util ratio indicates CPU scaling direction:
+        # > 1: scale up, < 1: scale down, = 1: no change needed
+        used_cpu = (current_cpu_util / target_cpu_util) * current_total_cpu
+        return math.ceil(
+            # Desired buffer is omitted because the cpu_headroom already
+            # includes it
+            derived_buffers.calculate_requirement(
+                current_usage=used_cpu,
+                existing_capacity=current_total_cpu,
+            )
+        )
-    return current_network_utilization * network_buffer.ratio
+    @property
+    def mem_gib(self) -> float:
+        current_memory_utilization = float(
+            self.current_capacity.memory_utilization_gib.mid
+            * self.current_capacity.cluster_instance_count.mid
+        )
+        zonal_ram_allocated = float(
+            self.current_instance.ram_gib
+            * self.current_capacity.cluster_instance_count.mid
+        )
+        desired_buffer = buffer_for_components(
+            buffers=self.buffers, components=[BufferComponent.memory]
+        )
+        derived_buffer = DerivedBuffers.for_components(
+            self.buffers.derived, [BufferComponent.memory]
+        )
-def get_disk_from_current_capacity(
-    current_capacity: CurrentClusterCapacity, buffers: Buffers
-):
-    # compute disk required per zone
-    current_disk_utilization = (
-        current_capacity.disk_utilization_gib.mid
-        * current_capacity.cluster_instance_count.mid
-    )
+        return derived_buffer.calculate_requirement(
+            current_usage=current_memory_utilization,
+            existing_capacity=zonal_ram_allocated,
+            desired_buffer_ratio=desired_buffer.ratio,
+        )
-    if current_capacity.cluster_instance is None:
-        cluster_instance = shapes.instance(current_capacity.cluster_instance_name)
-    else:
-        cluster_instance = current_capacity.cluster_instance
+    @property
+    def disk_gib(self) -> int:
+        current_cluster_disk_util_gib = float(
+            self.current_capacity.disk_utilization_gib.mid
+            * self.current_capacity.cluster_instance_count.mid
+        )
+        current_node_disk_gib = float(
+            self.current_instance.drive.max_size_gib
+            if self.current_instance.drive is not None
+            else (
+                self.current_capacity.cluster_drive.size_gib
+                if self.current_capacity.cluster_drive is not None
+                else 0
+            )
+        )
-    if cluster_instance.drive is not None:
-        instance_disk_allocated = cluster_instance.drive.max_size_gib
-    else:
-        assert current_capacity.cluster_drive is not None, "Drive should not be None"
-        instance_disk_allocated = current_capacity.cluster_drive.size_gib
+        zonal_disk_allocated = float(
+            current_node_disk_gib * self.current_capacity.cluster_instance_count.mid
+        )
+        # These are the desired buffers
+        disk_buffer = buffer_for_components(
+            buffers=self.buffers, components=[BufferComponent.disk]
+        )
-    zonal_disk_allocated = (
-        instance_disk_allocated * current_capacity.cluster_instance_count.mid
-    )
+        derived_buffer = DerivedBuffers.for_components(
+            self.buffers.derived, [BufferComponent.disk]
+        )
+        required_disk = derived_buffer.calculate_requirement(
+            current_usage=current_cluster_disk_util_gib,
+            existing_capacity=zonal_disk_allocated,
+            desired_buffer_ratio=disk_buffer.ratio,
+        )
+        return math.ceil(required_disk)
-    # These are the desired buffers
-    disk_buffer = buffer_for_components(
-        buffers=buffers, components=[BufferComponent.disk]
-    )
+    @property
+    def network_mbps(self) -> int:
+        current_network_utilization = float(
+            self.current_capacity.network_utilization_mbps.mid
+            * self.current_capacity.cluster_instance_count.mid
+        )
+        zonal_network_allocated = float(
+            self.current_instance.net_mbps
+            * self.current_capacity.cluster_instance_count.mid
+        )
-    scale, preserve = derived_buffer_for_component(buffers.derived, ["storage", "disk"])
-    # Scale and preserve for the same component should not be passed together.
-    # If user passes it, then scale will be preferred over preserve.
-    if scale > 0:
-        # if the new required disk is less than the current,
-        # then take no action and return the current disk
-        return max(
-            current_disk_utilization * scale * disk_buffer.ratio, zonal_disk_allocated
+        # These are the desired buffers
+        network_buffer = buffer_for_components(
+            buffers=self.buffers, components=[BufferComponent.network]
+        )
+        derived_buffer = DerivedBuffers.for_components(
+            self.buffers.derived, [BufferComponent.network]
         )
-    if preserve:
-        # preserve the current disk size for the zone
-        return zonal_disk_allocated
-    return current_disk_utilization * disk_buffer.ratio
+        return math.ceil(
+            derived_buffer.calculate_requirement(
+                current_usage=current_network_utilization,
+                existing_capacity=zonal_network_allocated,
+                desired_buffer_ratio=network_buffer.ratio,
+            )
+        )
 def zonal_requirements_from_current(
@@ -901,20 +974,25 @@ def zonal_requirements_from_current(
 ) -> CapacityRequirement:
     if current_cluster is not None and current_cluster.zonal[0] is not None:
         current_capacity: CurrentClusterCapacity = current_cluster.zonal[0]
-        needed_cores = normalize_cores(
-            get_cores_from_current_capacity(current_capacity, buffers, instance),
+        # Adjust the CPUs (vCPU + cores) based on generation / instance type
+        requirement = RequirementFromCurrentCapacity(
+            current_capacity=current_capacity,
+            buffers=buffers,
+        )
+        normalized_cpu = _normalize_cpu(
+            requirement.cpu(instance),
             instance,
             reference_shape,
         )
-        needed_network_mbps = get_network_from_current_capacity(
-            current_capacity, buffers
-        )
-        needed_memory_gib = get_memory_from_current_capacity(current_capacity, buffers)
-        needed_disk_gib = get_disk_from_current_capacity(current_capacity, buffers)
+        needed_network_mbps = requirement.network_mbps
+        needed_disk_gib = requirement.disk_gib
+        needed_memory_gib = requirement.mem_gib
         return CapacityRequirement(
             requirement_type="zonal-capacity",
-            cpu_cores=certain_int(needed_cores),
+            cpu_cores=certain_int(normalized_cpu),
             mem_gib=certain_float(needed_memory_gib),
             disk_gib=certain_float(needed_disk_gib),
             network_mbps=certain_float(needed_network_mbps),

service_capacity_modeling/models/headroom_strategy.py CHANGED Viewed

@@ -19,4 +19,5 @@ class QueuingBasedHeadroomStrategy(HeadroomStrategy):
     """
     def calculate_reserved_headroom(self, effective_cpu: float) -> float:
-        return 0.712 / (effective_cpu**0.448)
+        result: float = 0.712 / (effective_cpu**0.448)
+        return result

service_capacity_modeling/models/org/netflix/__init__.py CHANGED Viewed

@@ -1,3 +1,6 @@
+from typing import Any
+from typing import Dict
 from .aurora import nflx_aurora_capacity_model
 from .cassandra import nflx_cassandra_capacity_model
 from .counter import nflx_counter_capacity_model
@@ -20,7 +23,7 @@ from .wal import nflx_wal_capacity_model
 from .zookeeper import nflx_zookeeper_capacity_model
-def models():
+def models() -> Dict[str, Any]:
     return {
         "org.netflix.cassandra": nflx_cassandra_capacity_model,
         "org.netflix.stateless-java": nflx_java_app_capacity_model,

service-capacity-modeling 0.3.73__py3-none-any.whl → 0.3.79__py3-none-any.whl

Potentially problematic release.

service-capacity-modeling 0.3.73py3-none-any.whl → 0.3.79py3-none-any.whl