PyPI - service-capacity-modeling - Versions diffs - 0.3.105__py3-none-any.whl → 0.3.107__py3-none-any.whl - Mend

service-capacity-modeling 0.3.105py3-none-any.whl → 0.3.107py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

service_capacity_modeling/models/org/netflix/cassandra.py CHANGED Viewed

@@ -1,9 +1,12 @@
+# pylint: disable=too-many-lines
 import logging
 import math
 from typing import Any
 from typing import Callable
 from typing import Dict
+from typing import List
 from typing import Optional
+from typing import Sequence
 from typing import Set
 from pydantic import BaseModel
@@ -19,6 +22,7 @@ from service_capacity_modeling.interface import CapacityPlan
 from service_capacity_modeling.interface import CapacityRequirement
 from service_capacity_modeling.interface import certain_float
 from service_capacity_modeling.interface import certain_int
+from service_capacity_modeling.interface import ClusterCapacity
 from service_capacity_modeling.interface import Clusters
 from service_capacity_modeling.interface import Consistency
 from service_capacity_modeling.interface import CurrentClusterCapacity
@@ -33,7 +37,9 @@ from service_capacity_modeling.interface import RegionContext
 from service_capacity_modeling.interface import Requirements
 from service_capacity_modeling.interface import ServiceCapacity
 from service_capacity_modeling.models import CapacityModel
+from service_capacity_modeling.models import CostAwareModel
 from service_capacity_modeling.models.common import buffer_for_components
+from service_capacity_modeling.models.common import cluster_infra_cost
 from service_capacity_modeling.models.common import compute_stateful_zone
 from service_capacity_modeling.models.common import DerivedBuffers
 from service_capacity_modeling.models.common import get_effective_disk_per_node_gib
@@ -195,11 +201,7 @@ def _estimate_cassandra_requirement(
     zones_per_region: int = 3,
     copies_per_region: int = 3,
 ) -> CapacityRequirement:
-    """Estimate the capacity required for one zone given a regional desire
-    The input desires should be the **regional** desire, and this function will
-    return the zonal capacity requirement
-    """
+    # Input: regional desires → Output: zonal requirement
     disk_buffer = buffer_for_components(
         buffers=desires.buffers, components=[BufferComponent.disk]
     )
@@ -533,40 +535,27 @@ def _estimate_cassandra_cluster_zonal(  # pylint: disable=too-many-positional-ar
     if cluster.count > (max_regional_size // zones_per_region):
         return None
-    # Durable Cassandra clusters backup to S3
-    # TODO use the write rate and estimated write size to estimate churn
-    # over the retention period.
-    cap_services = []
-    if desires.data_shape.durability_slo_order.mid >= 1000:
-        blob = context.services.get("blob.standard", None)
-        if blob:
-            cap_services = [
-                ServiceCapacity(
-                    service_type=f"cassandra.backup.{blob.name}",
-                    annual_cost=blob.annual_cost_gib(requirement.disk_gib.mid),
-                    service_params={
-                        "nines_required": (
-                            1 - 1.0 / desires.data_shape.durability_slo_order.mid
-                        )
-                    },
-                )
-            ]
+    # Calculate service costs (network + backup)
+    cap_services = NflxCassandraCapacityModel.service_costs(
+        service_type=NflxCassandraCapacityModel.service_name,
+        context=context,
+        desires=desires,
+        extra_model_arguments={"copies_per_region": copies_per_region},
+    )
-    network_costs = network_services("cassandra", context, desires, copies_per_region)
-    if network_costs:
-        cap_services.extend(network_costs)
+    cluster.cluster_type = NflxCassandraCapacityModel.cluster_type
+    zonal_clusters = [cluster] * zones_per_region
     # Account for the clusters, backup, and network costs
-    cassandra_costs = {
-        "cassandra.zonal-clusters": zones_per_region * cluster.annual_cost,
-    }
-    for s in cap_services:
-        cassandra_costs[f"{s.service_type}"] = s.annual_cost
+    cassandra_costs = NflxCassandraCapacityModel.cluster_costs(
+        service_type=NflxCassandraCapacityModel.service_name,
+        zonal_clusters=zonal_clusters,
+    )
+    cassandra_costs.update({s.service_type: s.annual_cost for s in cap_services})
-    cluster.cluster_type = "cassandra"
     clusters = Clusters(
         annual_costs=cassandra_costs,
-        zonal=[cluster] * zones_per_region,
+        zonal=zonal_clusters,
         regional=[],
         services=cap_services,
     )
@@ -711,7 +700,10 @@ class NflxCassandraArguments(BaseModel):
         return cls.model_validate(args)
-class NflxCassandraCapacityModel(CapacityModel):
+class NflxCassandraCapacityModel(CapacityModel, CostAwareModel):
+    service_name = "cassandra"
+    cluster_type = "cassandra"
     def __init__(self) -> None:
         pass
@@ -744,6 +736,60 @@ class NflxCassandraCapacityModel(CapacityModel):
         return required_cluster_size
+    @staticmethod
+    def service_costs(
+        service_type: str,
+        context: RegionContext,
+        desires: CapacityDesires,
+        extra_model_arguments: Dict[str, Any],
+    ) -> List[ServiceCapacity]:
+        # C* service costs: network + backup
+        copies_per_region: int = _target_rf(
+            desires, extra_model_arguments.get("copies_per_region")
+        )
+        services: List[ServiceCapacity] = []
+        services.extend(
+            network_services(service_type, context, desires, copies_per_region)
+        )
+        if desires.data_shape.durability_slo_order.mid >= 1000:
+            blob = context.services.get("blob.standard", None)
+            if blob:
+                # Calculate backup disk from desires (same as capacity_plan)
+                # This ensures consistent backup costs regardless of how requirement was built
+                backup_disk_gib = max(
+                    1,
+                    _get_disk_from_desires(desires, copies_per_region)
+                    // context.zones_in_region,
+                )
+                services.append(
+                    ServiceCapacity(
+                        service_type=f"{service_type}.backup.{blob.name}",
+                        annual_cost=blob.annual_cost_gib(backup_disk_gib),
+                        service_params={
+                            "nines_required": (
+                                1 - 1.0 / desires.data_shape.durability_slo_order.mid
+                            )
+                        },
+                    )
+                )
+        return services
+    @staticmethod
+    def cluster_costs(
+        service_type: str,
+        zonal_clusters: Sequence[ClusterCapacity] = (),
+        regional_clusters: Sequence[ClusterCapacity] = (),
+    ) -> Dict[str, float]:
+        return cluster_infra_cost(
+            service_type,
+            zonal_clusters,
+            regional_clusters,
+            cluster_type=NflxCassandraCapacityModel.cluster_type,
+        )
     @staticmethod
     def capacity_plan(
         instance: Instance,

service_capacity_modeling/models/org/netflix/evcache.py CHANGED Viewed

@@ -2,6 +2,8 @@ import logging
 import math
 from typing import Any
 from typing import Dict
+from typing import List
+from typing import Sequence
 from typing import Optional
 from typing import Tuple
@@ -28,10 +30,14 @@ from service_capacity_modeling.interface import GlobalConsistency
 from service_capacity_modeling.interface import Instance
 from service_capacity_modeling.interface import Interval
 from service_capacity_modeling.interface import QueryPattern
+from service_capacity_modeling.interface import ClusterCapacity
 from service_capacity_modeling.interface import RegionContext
 from service_capacity_modeling.interface import Requirements
+from service_capacity_modeling.interface import ServiceCapacity
 from service_capacity_modeling.models import CapacityModel
+from service_capacity_modeling.models import CostAwareModel
 from service_capacity_modeling.models.common import buffer_for_components
+from service_capacity_modeling.models.common import cluster_infra_cost
 from service_capacity_modeling.models.common import compute_stateful_zone
 from service_capacity_modeling.models.common import get_effective_disk_per_node_gib
 from service_capacity_modeling.models.common import network_services
@@ -330,36 +336,29 @@ def _estimate_evcache_cluster_zonal(  # noqa: C901,E501 pylint: disable=too-many
     if cluster.count > (max_regional_size // copies_per_region):
         return None
-    services = []
-    if cross_region_replication is Replication.sets:
-        services.extend(
-            network_services("evcache", context, desires, copies_per_region)
-        )
-    elif cross_region_replication is Replication.evicts:
-        modified = desires.model_copy(deep=True)
-        # Assume that DELETES replicating cross region mean 128 bytes
-        # of key per evict.
-        modified.query_pattern.estimated_mean_write_size_bytes = certain_int(128)
-        services.extend(
-            network_services("evcache", context, modified, copies_per_region)
-        )
-    ec2_cost = copies_per_region * cluster.annual_cost
-    spread_cost = calculate_spread_cost(cluster.count)
+    # Calculate service costs (network transfer) using the model's service_costs method
+    services = NflxEVCacheCapacityModel.service_costs(
+        service_type=NflxEVCacheCapacityModel.service_name,
+        context=context,
+        desires=desires,
+        extra_model_arguments={
+            "copies_per_region": copies_per_region,
+            "cross_region_replication": cross_region_replication.value,
+        },
+    )
-    # Account for the clusters and replication costs
-    evcache_costs = {
-        "evcache.zonal-clusters": ec2_cost,
-        "evcache.spread.cost": spread_cost,
-    }
+    cluster.cluster_type = NflxEVCacheCapacityModel.cluster_type
+    zonal_clusters = [cluster] * copies_per_region
-    for s in services:
-        evcache_costs[f"{s.service_type}"] = s.annual_cost
+    evcache_costs = NflxEVCacheCapacityModel.cluster_costs(
+        service_type=NflxEVCacheCapacityModel.service_name,
+        zonal_clusters=zonal_clusters,
+    )
+    evcache_costs.update({s.service_type: s.annual_cost for s in services})
-    cluster.cluster_type = "evcache"
     clusters = Clusters(
         annual_costs=evcache_costs,
-        zonal=[cluster] * copies_per_region,
+        zonal=zonal_clusters,
         regional=[],
         services=services,
     )
@@ -399,7 +398,69 @@ class NflxEVCacheArguments(BaseModel):
     )
-class NflxEVCacheCapacityModel(CapacityModel):
+class NflxEVCacheCapacityModel(CapacityModel, CostAwareModel):
+    service_name = "evcache"
+    cluster_type = "evcache"
+    @staticmethod
+    def cluster_costs(
+        service_type: str,
+        zonal_clusters: Sequence[ClusterCapacity] = (),
+        regional_clusters: Sequence[ClusterCapacity] = (),
+    ) -> Dict[str, float]:
+        # Adds "{service_type}.spread.cost" penalty for small clusters
+        filtered_zonal = [
+            c
+            for c in zonal_clusters
+            if c.cluster_type == NflxEVCacheCapacityModel.cluster_type
+        ]
+        costs = cluster_infra_cost(
+            service_type,
+            filtered_zonal,
+            regional_clusters,
+            cluster_type=NflxEVCacheCapacityModel.cluster_type,
+        )
+        # Add spread cost penalty for small clusters
+        if filtered_zonal:
+            cluster_count = filtered_zonal[0].count
+            costs[f"{service_type}.spread.cost"] = calculate_spread_cost(cluster_count)
+        return costs
+    @staticmethod
+    def service_costs(
+        service_type: str,
+        context: RegionContext,
+        desires: CapacityDesires,
+        extra_model_arguments: Dict[str, Any],
+    ) -> List[ServiceCapacity]:
+        # Network costs depend on cross_region_replication mode:
+        # - 'none': No network costs (default)
+        # - 'sets': Full write size replicated cross-region
+        # - 'evicts': Only 128-byte keys replicated (DELETE operations)
+        # Default to 'none' for composite models (like Key-Value) that compose
+        # EVCache without specifying cross_region_replication
+        cross_region_replication = Replication(
+            extra_model_arguments.get("cross_region_replication", "none")
+        )
+        match cross_region_replication:
+            case Replication.sets:
+                copies: int = extra_model_arguments["copies_per_region"]
+                return network_services(service_type, context, desires, copies)
+            case Replication.evicts:
+                copies = extra_model_arguments["copies_per_region"]
+                # For evicts mode, only replicate 128-byte keys (DELETE operations)
+                modified = desires.model_copy(deep=True)
+                modified.query_pattern.estimated_mean_write_size_bytes = certain_int(
+                    128
+                )
+                return network_services(service_type, context, modified, copies)
+            case Replication.none:
+                return []
     @staticmethod
     def capacity_plan(
         instance: Instance,

service_capacity_modeling/models/org/netflix/kafka.py CHANGED Viewed

@@ -2,7 +2,9 @@ import logging
 import math
 from typing import Any
 from typing import Dict
+from typing import List
 from typing import Optional
+from typing import Sequence
 from typing import Tuple
 from pydantic import BaseModel
@@ -17,6 +19,7 @@ from service_capacity_modeling.interface import Buffers
 from service_capacity_modeling.interface import CapacityDesires
 from service_capacity_modeling.interface import CapacityPlan
 from service_capacity_modeling.interface import CapacityRequirement
+from service_capacity_modeling.interface import ClusterCapacity
 from service_capacity_modeling.interface import certain_float
 from service_capacity_modeling.interface import certain_int
 from service_capacity_modeling.interface import Clusters
@@ -33,8 +36,11 @@ from service_capacity_modeling.interface import MIB_IN_BYTES
 from service_capacity_modeling.interface import QueryPattern
 from service_capacity_modeling.interface import RegionContext
 from service_capacity_modeling.interface import Requirements
+from service_capacity_modeling.interface import ServiceCapacity
 from service_capacity_modeling.models import CapacityModel
+from service_capacity_modeling.models import CostAwareModel
 from service_capacity_modeling.models.common import buffer_for_components
+from service_capacity_modeling.models.common import cluster_infra_cost
 from service_capacity_modeling.models.common import compute_stateful_zone
 from service_capacity_modeling.models.common import get_effective_disk_per_node_gib
 from service_capacity_modeling.models.common import normalize_cores
@@ -388,15 +394,18 @@ def _estimate_kafka_cluster_zonal(  # noqa: C901
     if cluster.count > (max_regional_size // zones_per_region):
         return None
-    ec2_cost = zones_per_region * cluster.annual_cost
+    cluster.cluster_type = NflxKafkaCapacityModel.cluster_type
+    zonal_clusters = [cluster] * zones_per_region
     # Account for the clusters and replication costs
-    kafka_costs = {"kafka.zonal-clusters": ec2_cost}
+    kafka_costs = NflxKafkaCapacityModel.cluster_costs(
+        service_type=NflxKafkaCapacityModel.service_name,
+        zonal_clusters=zonal_clusters,
+    )
-    cluster.cluster_type = "kafka"
     clusters = Clusters(
         annual_costs=kafka_costs,
-        zonal=[cluster] * zones_per_region,
+        zonal=zonal_clusters,
         regional=[],
         services=[],
     )
@@ -464,7 +473,9 @@ class NflxKafkaArguments(BaseModel):
     )
-class NflxKafkaCapacityModel(CapacityModel):
+class NflxKafkaCapacityModel(CapacityModel, CostAwareModel):
+    service_name = "kafka"
+    cluster_type = "kafka"
     HA_DEFAULT_REPLICATION_FACTOR = 2
     SC_DEFAULT_REPLICATION_FACTOR = 3
@@ -537,6 +548,29 @@ class NflxKafkaCapacityModel(CapacityModel):
             require_same_instance_family=require_same_instance_family,
         )
+    @staticmethod
+    def cluster_costs(
+        service_type: str,
+        zonal_clusters: Sequence[ClusterCapacity] = (),
+        regional_clusters: Sequence[ClusterCapacity] = (),
+    ) -> Dict[str, float]:
+        return cluster_infra_cost(
+            service_type,
+            zonal_clusters,
+            regional_clusters,
+            cluster_type=NflxKafkaCapacityModel.cluster_type,
+        )
+    @staticmethod
+    def service_costs(
+        service_type: str,
+        context: RegionContext,
+        desires: CapacityDesires,
+        extra_model_arguments: Dict[str, Any],
+    ) -> List[ServiceCapacity]:
+        _ = (service_type, context, desires, extra_model_arguments)
+        return []
     @staticmethod
     def description() -> str:
         return "Netflix Streaming Kafka Model"

service_capacity_modeling/models/org/netflix/key_value.py CHANGED Viewed

@@ -1,14 +1,18 @@
 from typing import Any
 from typing import Callable
 from typing import Dict
+from typing import List
 from typing import Optional
+from typing import Sequence
 from typing import Tuple
 from .stateless_java import nflx_java_app_capacity_model
+from .stateless_java import NflxJavaAppCapacityModel
 from service_capacity_modeling.interface import AccessConsistency
 from service_capacity_modeling.interface import AccessPattern
 from service_capacity_modeling.interface import CapacityDesires
 from service_capacity_modeling.interface import CapacityPlan
+from service_capacity_modeling.interface import ClusterCapacity
 from service_capacity_modeling.interface import Consistency
 from service_capacity_modeling.interface import DataShape
 from service_capacity_modeling.interface import Drive
@@ -18,10 +22,16 @@ from service_capacity_modeling.interface import Instance
 from service_capacity_modeling.interface import Interval
 from service_capacity_modeling.interface import QueryPattern
 from service_capacity_modeling.interface import RegionContext
+from service_capacity_modeling.interface import ServiceCapacity
 from service_capacity_modeling.models import CapacityModel
+from service_capacity_modeling.models import CostAwareModel
+from service_capacity_modeling.models.common import cluster_infra_cost
-class NflxKeyValueCapacityModel(CapacityModel):
+class NflxKeyValueCapacityModel(CapacityModel, CostAwareModel):
+    service_name = "key-value"
+    cluster_type = "dgwkv"
     @staticmethod
     def capacity_plan(
         instance: Instance,
@@ -44,7 +54,7 @@ class NflxKeyValueCapacityModel(CapacityModel):
             return None
         for cluster in kv_app.candidate_clusters.regional:
-            cluster.cluster_type = "dgwkv"
+            cluster.cluster_type = NflxKeyValueCapacityModel.cluster_type
         return kv_app
     @staticmethod
@@ -122,6 +132,7 @@ class NflxKeyValueCapacityModel(CapacityModel):
     def default_desires(
         user_desires: CapacityDesires, extra_model_arguments: Dict[str, Any]
     ) -> CapacityDesires:
+        _ = extra_model_arguments
         if user_desires.query_pattern.access_pattern == AccessPattern.latency:
             return CapacityDesires(
                 query_pattern=QueryPattern(
@@ -225,5 +236,36 @@ class NflxKeyValueCapacityModel(CapacityModel):
                 ),
             )
+    @staticmethod
+    def cluster_costs(
+        service_type: str,
+        zonal_clusters: Sequence[ClusterCapacity] = (),
+        regional_clusters: Sequence[ClusterCapacity] = (),
+    ) -> Dict[str, float]:
+        # Uses NflxJavaAppCapacityModel.service_name (not service_type param)
+        # because capacity_plan delegates to nflx_java_app_capacity_model
+        _ = service_type
+        return cluster_infra_cost(
+            service_type=NflxJavaAppCapacityModel.service_name,
+            zonal_clusters=zonal_clusters,
+            regional_clusters=regional_clusters,
+            cluster_type=NflxKeyValueCapacityModel.cluster_type,
+        )
+    @staticmethod
+    def service_costs(
+        service_type: str,
+        context: RegionContext,
+        desires: CapacityDesires,
+        extra_model_arguments: Dict[str, Any],
+    ) -> List[ServiceCapacity]:
+        # Returns empty - dgwkv has no direct network costs:
+        # - DataStax driver selects local Cassandra coordinators (same AZ = free)
+        # - Coordinator→replica fan-out is counted in cassandra.net.intra.region
+        # - EVCache access uses local nodes (same AZ = free)
+        # Cassandra/EVCache service costs come from _sub_models() DAG traversal.
+        _ = (service_type, context, desires, extra_model_arguments)
+        return []
 nflx_key_value_capacity_model = NflxKeyValueCapacityModel()

service_capacity_modeling/models/org/netflix/stateless_java.py CHANGED Viewed

@@ -1,7 +1,9 @@
 import math
 from typing import Any
 from typing import Dict
+from typing import List
 from typing import Optional
+from typing import Sequence
 from pydantic import BaseModel
 from pydantic import Field
@@ -14,6 +16,7 @@ from service_capacity_modeling.interface import CapacityRegretParameters
 from service_capacity_modeling.interface import CapacityRequirement
 from service_capacity_modeling.interface import certain_float
 from service_capacity_modeling.interface import certain_int
+from service_capacity_modeling.interface import ClusterCapacity
 from service_capacity_modeling.interface import Clusters
 from service_capacity_modeling.interface import Consistency
 from service_capacity_modeling.interface import DataShape
@@ -26,7 +29,10 @@ from service_capacity_modeling.interface import QueryPattern
 from service_capacity_modeling.interface import RegionClusterCapacity
 from service_capacity_modeling.interface import RegionContext
 from service_capacity_modeling.interface import Requirements
+from service_capacity_modeling.interface import ServiceCapacity
 from service_capacity_modeling.models import CapacityModel
+from service_capacity_modeling.models import CostAwareModel
+from service_capacity_modeling.models.common import cluster_infra_cost
 from service_capacity_modeling.models.common import compute_stateless_region
 from service_capacity_modeling.models.common import network_services
 from service_capacity_modeling.models.common import normalize_cores
@@ -110,24 +116,25 @@ def _estimate_java_app_region(  # pylint: disable=too-many-positional-arguments
         needed_network_mbps=requirement.network_mbps.mid,
         num_zones=zones_per_region,
     )
-    cluster.cluster_type = "nflx-java-app"
+    cluster.cluster_type = NflxJavaAppCapacityModel.cluster_type
     cluster.attached_drives = attached_drives
-    # Add drive cost (root volume is EBS and costs money)
-    drive_cost = sum(d.annual_cost for d in attached_drives) * cluster.count
-    cluster.annual_cost = cluster.annual_cost + drive_cost
     # Generally don't want giant clusters
     # Especially not above 1000 because some load balancers struggle
     # with such large clusters
     if cluster.count <= 256:
-        costs = {"nflx-java-app.regional-clusters": cluster.annual_cost}
-        # Assume stateless java stays in the same region but crosses a zone
-        network = network_services(
-            "nflx-java-app", RegionContext(num_regions=1), desires, copies_per_region=2
+        costs = NflxJavaAppCapacityModel.cluster_costs(
+            service_type=NflxJavaAppCapacityModel.service_name,
+            regional_clusters=[cluster],
+        )
+        services = NflxJavaAppCapacityModel.service_costs(
+            service_type=NflxJavaAppCapacityModel.service_name,
+            context=context,
+            desires=desires,
+            extra_model_arguments={},
         )
-        for s in network:
+        for s in services:
             costs[s.service_type] = s.annual_cost
         return CapacityPlan(
@@ -136,6 +143,7 @@ def _estimate_java_app_region(  # pylint: disable=too-many-positional-arguments
                 annual_costs=costs,
                 regional=[cluster],
                 zonal=[],
+                services=services,
             ),
         )
     return None
@@ -154,7 +162,41 @@ class NflxJavaAppArguments(BaseModel):
     )
-class NflxJavaAppCapacityModel(CapacityModel):
+class NflxJavaAppCapacityModel(CapacityModel, CostAwareModel):
+    service_name = "nflx-java-app"
+    cluster_type = "nflx-java-app"
+    @staticmethod
+    def cluster_costs(
+        service_type: str,
+        zonal_clusters: Sequence[ClusterCapacity] = (),
+        regional_clusters: Sequence[ClusterCapacity] = (),
+    ) -> Dict[str, float]:
+        return cluster_infra_cost(
+            service_type,
+            zonal_clusters,
+            regional_clusters,
+            cluster_type=NflxJavaAppCapacityModel.cluster_type,
+        )
+    @staticmethod
+    def service_costs(
+        service_type: str,
+        context: RegionContext,
+        desires: CapacityDesires,
+        extra_model_arguments: Dict[str, Any],
+    ) -> List[ServiceCapacity]:
+        # TODO(matthewho): Currently returns empty because RegionContext is
+        # created without services. Need to determine if stateless apps should
+        # have cross-zone costs (copies_per_region=2 implies 1 cross-AZ hop).
+        _ = (context, extra_model_arguments)
+        return network_services(
+            service_type,
+            RegionContext(num_regions=1),
+            desires,
+            copies_per_region=2,
+        )
     @staticmethod
     def capacity_plan(
         instance: Instance,

service-capacity-modeling 0.3.105__py3-none-any.whl → 0.3.107__py3-none-any.whl

service-capacity-modeling 0.3.105py3-none-any.whl → 0.3.107py3-none-any.whl