service-capacity-modeling 0.3.105__py3-none-any.whl → 0.3.107__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- service_capacity_modeling/capacity_planner.py +262 -10
- service_capacity_modeling/interface.py +48 -1
- service_capacity_modeling/models/__init__.py +46 -0
- service_capacity_modeling/models/common.py +40 -8
- service_capacity_modeling/models/org/netflix/aurora.py +6 -1
- service_capacity_modeling/models/org/netflix/cassandra.py +80 -34
- service_capacity_modeling/models/org/netflix/evcache.py +87 -26
- service_capacity_modeling/models/org/netflix/kafka.py +39 -5
- service_capacity_modeling/models/org/netflix/key_value.py +44 -2
- service_capacity_modeling/models/org/netflix/stateless_java.py +53 -11
- service_capacity_modeling/models/plan_comparison.py +523 -0
- {service_capacity_modeling-0.3.105.dist-info → service_capacity_modeling-0.3.107.dist-info}/METADATA +1 -1
- {service_capacity_modeling-0.3.105.dist-info → service_capacity_modeling-0.3.107.dist-info}/RECORD +17 -16
- {service_capacity_modeling-0.3.105.dist-info → service_capacity_modeling-0.3.107.dist-info}/WHEEL +0 -0
- {service_capacity_modeling-0.3.105.dist-info → service_capacity_modeling-0.3.107.dist-info}/entry_points.txt +0 -0
- {service_capacity_modeling-0.3.105.dist-info → service_capacity_modeling-0.3.107.dist-info}/licenses/LICENSE +0 -0
- {service_capacity_modeling-0.3.105.dist-info → service_capacity_modeling-0.3.107.dist-info}/top_level.txt +0 -0
|
@@ -1,9 +1,12 @@
|
|
|
1
|
+
# pylint: disable=too-many-lines
|
|
1
2
|
import logging
|
|
2
3
|
import math
|
|
3
4
|
from typing import Any
|
|
4
5
|
from typing import Callable
|
|
5
6
|
from typing import Dict
|
|
7
|
+
from typing import List
|
|
6
8
|
from typing import Optional
|
|
9
|
+
from typing import Sequence
|
|
7
10
|
from typing import Set
|
|
8
11
|
|
|
9
12
|
from pydantic import BaseModel
|
|
@@ -19,6 +22,7 @@ from service_capacity_modeling.interface import CapacityPlan
|
|
|
19
22
|
from service_capacity_modeling.interface import CapacityRequirement
|
|
20
23
|
from service_capacity_modeling.interface import certain_float
|
|
21
24
|
from service_capacity_modeling.interface import certain_int
|
|
25
|
+
from service_capacity_modeling.interface import ClusterCapacity
|
|
22
26
|
from service_capacity_modeling.interface import Clusters
|
|
23
27
|
from service_capacity_modeling.interface import Consistency
|
|
24
28
|
from service_capacity_modeling.interface import CurrentClusterCapacity
|
|
@@ -33,7 +37,9 @@ from service_capacity_modeling.interface import RegionContext
|
|
|
33
37
|
from service_capacity_modeling.interface import Requirements
|
|
34
38
|
from service_capacity_modeling.interface import ServiceCapacity
|
|
35
39
|
from service_capacity_modeling.models import CapacityModel
|
|
40
|
+
from service_capacity_modeling.models import CostAwareModel
|
|
36
41
|
from service_capacity_modeling.models.common import buffer_for_components
|
|
42
|
+
from service_capacity_modeling.models.common import cluster_infra_cost
|
|
37
43
|
from service_capacity_modeling.models.common import compute_stateful_zone
|
|
38
44
|
from service_capacity_modeling.models.common import DerivedBuffers
|
|
39
45
|
from service_capacity_modeling.models.common import get_effective_disk_per_node_gib
|
|
@@ -195,11 +201,7 @@ def _estimate_cassandra_requirement(
|
|
|
195
201
|
zones_per_region: int = 3,
|
|
196
202
|
copies_per_region: int = 3,
|
|
197
203
|
) -> CapacityRequirement:
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
The input desires should be the **regional** desire, and this function will
|
|
201
|
-
return the zonal capacity requirement
|
|
202
|
-
"""
|
|
204
|
+
# Input: regional desires → Output: zonal requirement
|
|
203
205
|
disk_buffer = buffer_for_components(
|
|
204
206
|
buffers=desires.buffers, components=[BufferComponent.disk]
|
|
205
207
|
)
|
|
@@ -533,40 +535,27 @@ def _estimate_cassandra_cluster_zonal( # pylint: disable=too-many-positional-ar
|
|
|
533
535
|
if cluster.count > (max_regional_size // zones_per_region):
|
|
534
536
|
return None
|
|
535
537
|
|
|
536
|
-
#
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
cap_services = [
|
|
544
|
-
ServiceCapacity(
|
|
545
|
-
service_type=f"cassandra.backup.{blob.name}",
|
|
546
|
-
annual_cost=blob.annual_cost_gib(requirement.disk_gib.mid),
|
|
547
|
-
service_params={
|
|
548
|
-
"nines_required": (
|
|
549
|
-
1 - 1.0 / desires.data_shape.durability_slo_order.mid
|
|
550
|
-
)
|
|
551
|
-
},
|
|
552
|
-
)
|
|
553
|
-
]
|
|
538
|
+
# Calculate service costs (network + backup)
|
|
539
|
+
cap_services = NflxCassandraCapacityModel.service_costs(
|
|
540
|
+
service_type=NflxCassandraCapacityModel.service_name,
|
|
541
|
+
context=context,
|
|
542
|
+
desires=desires,
|
|
543
|
+
extra_model_arguments={"copies_per_region": copies_per_region},
|
|
544
|
+
)
|
|
554
545
|
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
cap_services.extend(network_costs)
|
|
546
|
+
cluster.cluster_type = NflxCassandraCapacityModel.cluster_type
|
|
547
|
+
zonal_clusters = [cluster] * zones_per_region
|
|
558
548
|
|
|
559
549
|
# Account for the clusters, backup, and network costs
|
|
560
|
-
cassandra_costs =
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
550
|
+
cassandra_costs = NflxCassandraCapacityModel.cluster_costs(
|
|
551
|
+
service_type=NflxCassandraCapacityModel.service_name,
|
|
552
|
+
zonal_clusters=zonal_clusters,
|
|
553
|
+
)
|
|
554
|
+
cassandra_costs.update({s.service_type: s.annual_cost for s in cap_services})
|
|
565
555
|
|
|
566
|
-
cluster.cluster_type = "cassandra"
|
|
567
556
|
clusters = Clusters(
|
|
568
557
|
annual_costs=cassandra_costs,
|
|
569
|
-
zonal=
|
|
558
|
+
zonal=zonal_clusters,
|
|
570
559
|
regional=[],
|
|
571
560
|
services=cap_services,
|
|
572
561
|
)
|
|
@@ -711,7 +700,10 @@ class NflxCassandraArguments(BaseModel):
|
|
|
711
700
|
return cls.model_validate(args)
|
|
712
701
|
|
|
713
702
|
|
|
714
|
-
class NflxCassandraCapacityModel(CapacityModel):
|
|
703
|
+
class NflxCassandraCapacityModel(CapacityModel, CostAwareModel):
|
|
704
|
+
service_name = "cassandra"
|
|
705
|
+
cluster_type = "cassandra"
|
|
706
|
+
|
|
715
707
|
def __init__(self) -> None:
|
|
716
708
|
pass
|
|
717
709
|
|
|
@@ -744,6 +736,60 @@ class NflxCassandraCapacityModel(CapacityModel):
|
|
|
744
736
|
|
|
745
737
|
return required_cluster_size
|
|
746
738
|
|
|
739
|
+
@staticmethod
|
|
740
|
+
def service_costs(
|
|
741
|
+
service_type: str,
|
|
742
|
+
context: RegionContext,
|
|
743
|
+
desires: CapacityDesires,
|
|
744
|
+
extra_model_arguments: Dict[str, Any],
|
|
745
|
+
) -> List[ServiceCapacity]:
|
|
746
|
+
# C* service costs: network + backup
|
|
747
|
+
copies_per_region: int = _target_rf(
|
|
748
|
+
desires, extra_model_arguments.get("copies_per_region")
|
|
749
|
+
)
|
|
750
|
+
|
|
751
|
+
services: List[ServiceCapacity] = []
|
|
752
|
+
services.extend(
|
|
753
|
+
network_services(service_type, context, desires, copies_per_region)
|
|
754
|
+
)
|
|
755
|
+
|
|
756
|
+
if desires.data_shape.durability_slo_order.mid >= 1000:
|
|
757
|
+
blob = context.services.get("blob.standard", None)
|
|
758
|
+
if blob:
|
|
759
|
+
# Calculate backup disk from desires (same as capacity_plan)
|
|
760
|
+
# This ensures consistent backup costs regardless of how requirement was built
|
|
761
|
+
backup_disk_gib = max(
|
|
762
|
+
1,
|
|
763
|
+
_get_disk_from_desires(desires, copies_per_region)
|
|
764
|
+
// context.zones_in_region,
|
|
765
|
+
)
|
|
766
|
+
services.append(
|
|
767
|
+
ServiceCapacity(
|
|
768
|
+
service_type=f"{service_type}.backup.{blob.name}",
|
|
769
|
+
annual_cost=blob.annual_cost_gib(backup_disk_gib),
|
|
770
|
+
service_params={
|
|
771
|
+
"nines_required": (
|
|
772
|
+
1 - 1.0 / desires.data_shape.durability_slo_order.mid
|
|
773
|
+
)
|
|
774
|
+
},
|
|
775
|
+
)
|
|
776
|
+
)
|
|
777
|
+
|
|
778
|
+
return services
|
|
779
|
+
|
|
780
|
+
@staticmethod
|
|
781
|
+
def cluster_costs(
|
|
782
|
+
service_type: str,
|
|
783
|
+
zonal_clusters: Sequence[ClusterCapacity] = (),
|
|
784
|
+
regional_clusters: Sequence[ClusterCapacity] = (),
|
|
785
|
+
) -> Dict[str, float]:
|
|
786
|
+
return cluster_infra_cost(
|
|
787
|
+
service_type,
|
|
788
|
+
zonal_clusters,
|
|
789
|
+
regional_clusters,
|
|
790
|
+
cluster_type=NflxCassandraCapacityModel.cluster_type,
|
|
791
|
+
)
|
|
792
|
+
|
|
747
793
|
@staticmethod
|
|
748
794
|
def capacity_plan(
|
|
749
795
|
instance: Instance,
|
|
@@ -2,6 +2,8 @@ import logging
|
|
|
2
2
|
import math
|
|
3
3
|
from typing import Any
|
|
4
4
|
from typing import Dict
|
|
5
|
+
from typing import List
|
|
6
|
+
from typing import Sequence
|
|
5
7
|
from typing import Optional
|
|
6
8
|
from typing import Tuple
|
|
7
9
|
|
|
@@ -28,10 +30,14 @@ from service_capacity_modeling.interface import GlobalConsistency
|
|
|
28
30
|
from service_capacity_modeling.interface import Instance
|
|
29
31
|
from service_capacity_modeling.interface import Interval
|
|
30
32
|
from service_capacity_modeling.interface import QueryPattern
|
|
33
|
+
from service_capacity_modeling.interface import ClusterCapacity
|
|
31
34
|
from service_capacity_modeling.interface import RegionContext
|
|
32
35
|
from service_capacity_modeling.interface import Requirements
|
|
36
|
+
from service_capacity_modeling.interface import ServiceCapacity
|
|
33
37
|
from service_capacity_modeling.models import CapacityModel
|
|
38
|
+
from service_capacity_modeling.models import CostAwareModel
|
|
34
39
|
from service_capacity_modeling.models.common import buffer_for_components
|
|
40
|
+
from service_capacity_modeling.models.common import cluster_infra_cost
|
|
35
41
|
from service_capacity_modeling.models.common import compute_stateful_zone
|
|
36
42
|
from service_capacity_modeling.models.common import get_effective_disk_per_node_gib
|
|
37
43
|
from service_capacity_modeling.models.common import network_services
|
|
@@ -330,36 +336,29 @@ def _estimate_evcache_cluster_zonal( # noqa: C901,E501 pylint: disable=too-many
|
|
|
330
336
|
if cluster.count > (max_regional_size // copies_per_region):
|
|
331
337
|
return None
|
|
332
338
|
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
services.extend(
|
|
344
|
-
network_services("evcache", context, modified, copies_per_region)
|
|
345
|
-
)
|
|
346
|
-
|
|
347
|
-
ec2_cost = copies_per_region * cluster.annual_cost
|
|
348
|
-
spread_cost = calculate_spread_cost(cluster.count)
|
|
339
|
+
# Calculate service costs (network transfer) using the model's service_costs method
|
|
340
|
+
services = NflxEVCacheCapacityModel.service_costs(
|
|
341
|
+
service_type=NflxEVCacheCapacityModel.service_name,
|
|
342
|
+
context=context,
|
|
343
|
+
desires=desires,
|
|
344
|
+
extra_model_arguments={
|
|
345
|
+
"copies_per_region": copies_per_region,
|
|
346
|
+
"cross_region_replication": cross_region_replication.value,
|
|
347
|
+
},
|
|
348
|
+
)
|
|
349
349
|
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
"evcache.zonal-clusters": ec2_cost,
|
|
353
|
-
"evcache.spread.cost": spread_cost,
|
|
354
|
-
}
|
|
350
|
+
cluster.cluster_type = NflxEVCacheCapacityModel.cluster_type
|
|
351
|
+
zonal_clusters = [cluster] * copies_per_region
|
|
355
352
|
|
|
356
|
-
|
|
357
|
-
|
|
353
|
+
evcache_costs = NflxEVCacheCapacityModel.cluster_costs(
|
|
354
|
+
service_type=NflxEVCacheCapacityModel.service_name,
|
|
355
|
+
zonal_clusters=zonal_clusters,
|
|
356
|
+
)
|
|
357
|
+
evcache_costs.update({s.service_type: s.annual_cost for s in services})
|
|
358
358
|
|
|
359
|
-
cluster.cluster_type = "evcache"
|
|
360
359
|
clusters = Clusters(
|
|
361
360
|
annual_costs=evcache_costs,
|
|
362
|
-
zonal=
|
|
361
|
+
zonal=zonal_clusters,
|
|
363
362
|
regional=[],
|
|
364
363
|
services=services,
|
|
365
364
|
)
|
|
@@ -399,7 +398,69 @@ class NflxEVCacheArguments(BaseModel):
|
|
|
399
398
|
)
|
|
400
399
|
|
|
401
400
|
|
|
402
|
-
class NflxEVCacheCapacityModel(CapacityModel):
|
|
401
|
+
class NflxEVCacheCapacityModel(CapacityModel, CostAwareModel):
|
|
402
|
+
service_name = "evcache"
|
|
403
|
+
cluster_type = "evcache"
|
|
404
|
+
|
|
405
|
+
@staticmethod
|
|
406
|
+
def cluster_costs(
|
|
407
|
+
service_type: str,
|
|
408
|
+
zonal_clusters: Sequence[ClusterCapacity] = (),
|
|
409
|
+
regional_clusters: Sequence[ClusterCapacity] = (),
|
|
410
|
+
) -> Dict[str, float]:
|
|
411
|
+
# Adds "{service_type}.spread.cost" penalty for small clusters
|
|
412
|
+
filtered_zonal = [
|
|
413
|
+
c
|
|
414
|
+
for c in zonal_clusters
|
|
415
|
+
if c.cluster_type == NflxEVCacheCapacityModel.cluster_type
|
|
416
|
+
]
|
|
417
|
+
|
|
418
|
+
costs = cluster_infra_cost(
|
|
419
|
+
service_type,
|
|
420
|
+
filtered_zonal,
|
|
421
|
+
regional_clusters,
|
|
422
|
+
cluster_type=NflxEVCacheCapacityModel.cluster_type,
|
|
423
|
+
)
|
|
424
|
+
|
|
425
|
+
# Add spread cost penalty for small clusters
|
|
426
|
+
if filtered_zonal:
|
|
427
|
+
cluster_count = filtered_zonal[0].count
|
|
428
|
+
costs[f"{service_type}.spread.cost"] = calculate_spread_cost(cluster_count)
|
|
429
|
+
|
|
430
|
+
return costs
|
|
431
|
+
|
|
432
|
+
@staticmethod
|
|
433
|
+
def service_costs(
|
|
434
|
+
service_type: str,
|
|
435
|
+
context: RegionContext,
|
|
436
|
+
desires: CapacityDesires,
|
|
437
|
+
extra_model_arguments: Dict[str, Any],
|
|
438
|
+
) -> List[ServiceCapacity]:
|
|
439
|
+
# Network costs depend on cross_region_replication mode:
|
|
440
|
+
# - 'none': No network costs (default)
|
|
441
|
+
# - 'sets': Full write size replicated cross-region
|
|
442
|
+
# - 'evicts': Only 128-byte keys replicated (DELETE operations)
|
|
443
|
+
# Default to 'none' for composite models (like Key-Value) that compose
|
|
444
|
+
# EVCache without specifying cross_region_replication
|
|
445
|
+
cross_region_replication = Replication(
|
|
446
|
+
extra_model_arguments.get("cross_region_replication", "none")
|
|
447
|
+
)
|
|
448
|
+
|
|
449
|
+
match cross_region_replication:
|
|
450
|
+
case Replication.sets:
|
|
451
|
+
copies: int = extra_model_arguments["copies_per_region"]
|
|
452
|
+
return network_services(service_type, context, desires, copies)
|
|
453
|
+
case Replication.evicts:
|
|
454
|
+
copies = extra_model_arguments["copies_per_region"]
|
|
455
|
+
# For evicts mode, only replicate 128-byte keys (DELETE operations)
|
|
456
|
+
modified = desires.model_copy(deep=True)
|
|
457
|
+
modified.query_pattern.estimated_mean_write_size_bytes = certain_int(
|
|
458
|
+
128
|
|
459
|
+
)
|
|
460
|
+
return network_services(service_type, context, modified, copies)
|
|
461
|
+
case Replication.none:
|
|
462
|
+
return []
|
|
463
|
+
|
|
403
464
|
@staticmethod
|
|
404
465
|
def capacity_plan(
|
|
405
466
|
instance: Instance,
|
|
@@ -2,7 +2,9 @@ import logging
|
|
|
2
2
|
import math
|
|
3
3
|
from typing import Any
|
|
4
4
|
from typing import Dict
|
|
5
|
+
from typing import List
|
|
5
6
|
from typing import Optional
|
|
7
|
+
from typing import Sequence
|
|
6
8
|
from typing import Tuple
|
|
7
9
|
|
|
8
10
|
from pydantic import BaseModel
|
|
@@ -17,6 +19,7 @@ from service_capacity_modeling.interface import Buffers
|
|
|
17
19
|
from service_capacity_modeling.interface import CapacityDesires
|
|
18
20
|
from service_capacity_modeling.interface import CapacityPlan
|
|
19
21
|
from service_capacity_modeling.interface import CapacityRequirement
|
|
22
|
+
from service_capacity_modeling.interface import ClusterCapacity
|
|
20
23
|
from service_capacity_modeling.interface import certain_float
|
|
21
24
|
from service_capacity_modeling.interface import certain_int
|
|
22
25
|
from service_capacity_modeling.interface import Clusters
|
|
@@ -33,8 +36,11 @@ from service_capacity_modeling.interface import MIB_IN_BYTES
|
|
|
33
36
|
from service_capacity_modeling.interface import QueryPattern
|
|
34
37
|
from service_capacity_modeling.interface import RegionContext
|
|
35
38
|
from service_capacity_modeling.interface import Requirements
|
|
39
|
+
from service_capacity_modeling.interface import ServiceCapacity
|
|
36
40
|
from service_capacity_modeling.models import CapacityModel
|
|
41
|
+
from service_capacity_modeling.models import CostAwareModel
|
|
37
42
|
from service_capacity_modeling.models.common import buffer_for_components
|
|
43
|
+
from service_capacity_modeling.models.common import cluster_infra_cost
|
|
38
44
|
from service_capacity_modeling.models.common import compute_stateful_zone
|
|
39
45
|
from service_capacity_modeling.models.common import get_effective_disk_per_node_gib
|
|
40
46
|
from service_capacity_modeling.models.common import normalize_cores
|
|
@@ -388,15 +394,18 @@ def _estimate_kafka_cluster_zonal( # noqa: C901
|
|
|
388
394
|
if cluster.count > (max_regional_size // zones_per_region):
|
|
389
395
|
return None
|
|
390
396
|
|
|
391
|
-
|
|
397
|
+
cluster.cluster_type = NflxKafkaCapacityModel.cluster_type
|
|
398
|
+
zonal_clusters = [cluster] * zones_per_region
|
|
392
399
|
|
|
393
400
|
# Account for the clusters and replication costs
|
|
394
|
-
kafka_costs =
|
|
401
|
+
kafka_costs = NflxKafkaCapacityModel.cluster_costs(
|
|
402
|
+
service_type=NflxKafkaCapacityModel.service_name,
|
|
403
|
+
zonal_clusters=zonal_clusters,
|
|
404
|
+
)
|
|
395
405
|
|
|
396
|
-
cluster.cluster_type = "kafka"
|
|
397
406
|
clusters = Clusters(
|
|
398
407
|
annual_costs=kafka_costs,
|
|
399
|
-
zonal=
|
|
408
|
+
zonal=zonal_clusters,
|
|
400
409
|
regional=[],
|
|
401
410
|
services=[],
|
|
402
411
|
)
|
|
@@ -464,7 +473,9 @@ class NflxKafkaArguments(BaseModel):
|
|
|
464
473
|
)
|
|
465
474
|
|
|
466
475
|
|
|
467
|
-
class NflxKafkaCapacityModel(CapacityModel):
|
|
476
|
+
class NflxKafkaCapacityModel(CapacityModel, CostAwareModel):
|
|
477
|
+
service_name = "kafka"
|
|
478
|
+
cluster_type = "kafka"
|
|
468
479
|
HA_DEFAULT_REPLICATION_FACTOR = 2
|
|
469
480
|
SC_DEFAULT_REPLICATION_FACTOR = 3
|
|
470
481
|
|
|
@@ -537,6 +548,29 @@ class NflxKafkaCapacityModel(CapacityModel):
|
|
|
537
548
|
require_same_instance_family=require_same_instance_family,
|
|
538
549
|
)
|
|
539
550
|
|
|
551
|
+
@staticmethod
|
|
552
|
+
def cluster_costs(
|
|
553
|
+
service_type: str,
|
|
554
|
+
zonal_clusters: Sequence[ClusterCapacity] = (),
|
|
555
|
+
regional_clusters: Sequence[ClusterCapacity] = (),
|
|
556
|
+
) -> Dict[str, float]:
|
|
557
|
+
return cluster_infra_cost(
|
|
558
|
+
service_type,
|
|
559
|
+
zonal_clusters,
|
|
560
|
+
regional_clusters,
|
|
561
|
+
cluster_type=NflxKafkaCapacityModel.cluster_type,
|
|
562
|
+
)
|
|
563
|
+
|
|
564
|
+
@staticmethod
|
|
565
|
+
def service_costs(
|
|
566
|
+
service_type: str,
|
|
567
|
+
context: RegionContext,
|
|
568
|
+
desires: CapacityDesires,
|
|
569
|
+
extra_model_arguments: Dict[str, Any],
|
|
570
|
+
) -> List[ServiceCapacity]:
|
|
571
|
+
_ = (service_type, context, desires, extra_model_arguments)
|
|
572
|
+
return []
|
|
573
|
+
|
|
540
574
|
@staticmethod
|
|
541
575
|
def description() -> str:
|
|
542
576
|
return "Netflix Streaming Kafka Model"
|
|
@@ -1,14 +1,18 @@
|
|
|
1
1
|
from typing import Any
|
|
2
2
|
from typing import Callable
|
|
3
3
|
from typing import Dict
|
|
4
|
+
from typing import List
|
|
4
5
|
from typing import Optional
|
|
6
|
+
from typing import Sequence
|
|
5
7
|
from typing import Tuple
|
|
6
8
|
|
|
7
9
|
from .stateless_java import nflx_java_app_capacity_model
|
|
10
|
+
from .stateless_java import NflxJavaAppCapacityModel
|
|
8
11
|
from service_capacity_modeling.interface import AccessConsistency
|
|
9
12
|
from service_capacity_modeling.interface import AccessPattern
|
|
10
13
|
from service_capacity_modeling.interface import CapacityDesires
|
|
11
14
|
from service_capacity_modeling.interface import CapacityPlan
|
|
15
|
+
from service_capacity_modeling.interface import ClusterCapacity
|
|
12
16
|
from service_capacity_modeling.interface import Consistency
|
|
13
17
|
from service_capacity_modeling.interface import DataShape
|
|
14
18
|
from service_capacity_modeling.interface import Drive
|
|
@@ -18,10 +22,16 @@ from service_capacity_modeling.interface import Instance
|
|
|
18
22
|
from service_capacity_modeling.interface import Interval
|
|
19
23
|
from service_capacity_modeling.interface import QueryPattern
|
|
20
24
|
from service_capacity_modeling.interface import RegionContext
|
|
25
|
+
from service_capacity_modeling.interface import ServiceCapacity
|
|
21
26
|
from service_capacity_modeling.models import CapacityModel
|
|
27
|
+
from service_capacity_modeling.models import CostAwareModel
|
|
28
|
+
from service_capacity_modeling.models.common import cluster_infra_cost
|
|
22
29
|
|
|
23
30
|
|
|
24
|
-
class NflxKeyValueCapacityModel(CapacityModel):
|
|
31
|
+
class NflxKeyValueCapacityModel(CapacityModel, CostAwareModel):
|
|
32
|
+
service_name = "key-value"
|
|
33
|
+
cluster_type = "dgwkv"
|
|
34
|
+
|
|
25
35
|
@staticmethod
|
|
26
36
|
def capacity_plan(
|
|
27
37
|
instance: Instance,
|
|
@@ -44,7 +54,7 @@ class NflxKeyValueCapacityModel(CapacityModel):
|
|
|
44
54
|
return None
|
|
45
55
|
|
|
46
56
|
for cluster in kv_app.candidate_clusters.regional:
|
|
47
|
-
cluster.cluster_type =
|
|
57
|
+
cluster.cluster_type = NflxKeyValueCapacityModel.cluster_type
|
|
48
58
|
return kv_app
|
|
49
59
|
|
|
50
60
|
@staticmethod
|
|
@@ -122,6 +132,7 @@ class NflxKeyValueCapacityModel(CapacityModel):
|
|
|
122
132
|
def default_desires(
|
|
123
133
|
user_desires: CapacityDesires, extra_model_arguments: Dict[str, Any]
|
|
124
134
|
) -> CapacityDesires:
|
|
135
|
+
_ = extra_model_arguments
|
|
125
136
|
if user_desires.query_pattern.access_pattern == AccessPattern.latency:
|
|
126
137
|
return CapacityDesires(
|
|
127
138
|
query_pattern=QueryPattern(
|
|
@@ -225,5 +236,36 @@ class NflxKeyValueCapacityModel(CapacityModel):
|
|
|
225
236
|
),
|
|
226
237
|
)
|
|
227
238
|
|
|
239
|
+
@staticmethod
|
|
240
|
+
def cluster_costs(
|
|
241
|
+
service_type: str,
|
|
242
|
+
zonal_clusters: Sequence[ClusterCapacity] = (),
|
|
243
|
+
regional_clusters: Sequence[ClusterCapacity] = (),
|
|
244
|
+
) -> Dict[str, float]:
|
|
245
|
+
# Uses NflxJavaAppCapacityModel.service_name (not service_type param)
|
|
246
|
+
# because capacity_plan delegates to nflx_java_app_capacity_model
|
|
247
|
+
_ = service_type
|
|
248
|
+
return cluster_infra_cost(
|
|
249
|
+
service_type=NflxJavaAppCapacityModel.service_name,
|
|
250
|
+
zonal_clusters=zonal_clusters,
|
|
251
|
+
regional_clusters=regional_clusters,
|
|
252
|
+
cluster_type=NflxKeyValueCapacityModel.cluster_type,
|
|
253
|
+
)
|
|
254
|
+
|
|
255
|
+
@staticmethod
|
|
256
|
+
def service_costs(
|
|
257
|
+
service_type: str,
|
|
258
|
+
context: RegionContext,
|
|
259
|
+
desires: CapacityDesires,
|
|
260
|
+
extra_model_arguments: Dict[str, Any],
|
|
261
|
+
) -> List[ServiceCapacity]:
|
|
262
|
+
# Returns empty - dgwkv has no direct network costs:
|
|
263
|
+
# - DataStax driver selects local Cassandra coordinators (same AZ = free)
|
|
264
|
+
# - Coordinator→replica fan-out is counted in cassandra.net.intra.region
|
|
265
|
+
# - EVCache access uses local nodes (same AZ = free)
|
|
266
|
+
# Cassandra/EVCache service costs come from _sub_models() DAG traversal.
|
|
267
|
+
_ = (service_type, context, desires, extra_model_arguments)
|
|
268
|
+
return []
|
|
269
|
+
|
|
228
270
|
|
|
229
271
|
nflx_key_value_capacity_model = NflxKeyValueCapacityModel()
|
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
import math
|
|
2
2
|
from typing import Any
|
|
3
3
|
from typing import Dict
|
|
4
|
+
from typing import List
|
|
4
5
|
from typing import Optional
|
|
6
|
+
from typing import Sequence
|
|
5
7
|
|
|
6
8
|
from pydantic import BaseModel
|
|
7
9
|
from pydantic import Field
|
|
@@ -14,6 +16,7 @@ from service_capacity_modeling.interface import CapacityRegretParameters
|
|
|
14
16
|
from service_capacity_modeling.interface import CapacityRequirement
|
|
15
17
|
from service_capacity_modeling.interface import certain_float
|
|
16
18
|
from service_capacity_modeling.interface import certain_int
|
|
19
|
+
from service_capacity_modeling.interface import ClusterCapacity
|
|
17
20
|
from service_capacity_modeling.interface import Clusters
|
|
18
21
|
from service_capacity_modeling.interface import Consistency
|
|
19
22
|
from service_capacity_modeling.interface import DataShape
|
|
@@ -26,7 +29,10 @@ from service_capacity_modeling.interface import QueryPattern
|
|
|
26
29
|
from service_capacity_modeling.interface import RegionClusterCapacity
|
|
27
30
|
from service_capacity_modeling.interface import RegionContext
|
|
28
31
|
from service_capacity_modeling.interface import Requirements
|
|
32
|
+
from service_capacity_modeling.interface import ServiceCapacity
|
|
29
33
|
from service_capacity_modeling.models import CapacityModel
|
|
34
|
+
from service_capacity_modeling.models import CostAwareModel
|
|
35
|
+
from service_capacity_modeling.models.common import cluster_infra_cost
|
|
30
36
|
from service_capacity_modeling.models.common import compute_stateless_region
|
|
31
37
|
from service_capacity_modeling.models.common import network_services
|
|
32
38
|
from service_capacity_modeling.models.common import normalize_cores
|
|
@@ -110,24 +116,25 @@ def _estimate_java_app_region( # pylint: disable=too-many-positional-arguments
|
|
|
110
116
|
needed_network_mbps=requirement.network_mbps.mid,
|
|
111
117
|
num_zones=zones_per_region,
|
|
112
118
|
)
|
|
113
|
-
cluster.cluster_type =
|
|
119
|
+
cluster.cluster_type = NflxJavaAppCapacityModel.cluster_type
|
|
114
120
|
cluster.attached_drives = attached_drives
|
|
115
121
|
|
|
116
|
-
# Add drive cost (root volume is EBS and costs money)
|
|
117
|
-
drive_cost = sum(d.annual_cost for d in attached_drives) * cluster.count
|
|
118
|
-
cluster.annual_cost = cluster.annual_cost + drive_cost
|
|
119
|
-
|
|
120
122
|
# Generally don't want giant clusters
|
|
121
123
|
# Especially not above 1000 because some load balancers struggle
|
|
122
124
|
# with such large clusters
|
|
123
125
|
|
|
124
126
|
if cluster.count <= 256:
|
|
125
|
-
costs =
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
127
|
+
costs = NflxJavaAppCapacityModel.cluster_costs(
|
|
128
|
+
service_type=NflxJavaAppCapacityModel.service_name,
|
|
129
|
+
regional_clusters=[cluster],
|
|
130
|
+
)
|
|
131
|
+
services = NflxJavaAppCapacityModel.service_costs(
|
|
132
|
+
service_type=NflxJavaAppCapacityModel.service_name,
|
|
133
|
+
context=context,
|
|
134
|
+
desires=desires,
|
|
135
|
+
extra_model_arguments={},
|
|
129
136
|
)
|
|
130
|
-
for s in
|
|
137
|
+
for s in services:
|
|
131
138
|
costs[s.service_type] = s.annual_cost
|
|
132
139
|
|
|
133
140
|
return CapacityPlan(
|
|
@@ -136,6 +143,7 @@ def _estimate_java_app_region( # pylint: disable=too-many-positional-arguments
|
|
|
136
143
|
annual_costs=costs,
|
|
137
144
|
regional=[cluster],
|
|
138
145
|
zonal=[],
|
|
146
|
+
services=services,
|
|
139
147
|
),
|
|
140
148
|
)
|
|
141
149
|
return None
|
|
@@ -154,7 +162,41 @@ class NflxJavaAppArguments(BaseModel):
|
|
|
154
162
|
)
|
|
155
163
|
|
|
156
164
|
|
|
157
|
-
class NflxJavaAppCapacityModel(CapacityModel):
|
|
165
|
+
class NflxJavaAppCapacityModel(CapacityModel, CostAwareModel):
|
|
166
|
+
service_name = "nflx-java-app"
|
|
167
|
+
cluster_type = "nflx-java-app"
|
|
168
|
+
|
|
169
|
+
@staticmethod
|
|
170
|
+
def cluster_costs(
|
|
171
|
+
service_type: str,
|
|
172
|
+
zonal_clusters: Sequence[ClusterCapacity] = (),
|
|
173
|
+
regional_clusters: Sequence[ClusterCapacity] = (),
|
|
174
|
+
) -> Dict[str, float]:
|
|
175
|
+
return cluster_infra_cost(
|
|
176
|
+
service_type,
|
|
177
|
+
zonal_clusters,
|
|
178
|
+
regional_clusters,
|
|
179
|
+
cluster_type=NflxJavaAppCapacityModel.cluster_type,
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
@staticmethod
|
|
183
|
+
def service_costs(
|
|
184
|
+
service_type: str,
|
|
185
|
+
context: RegionContext,
|
|
186
|
+
desires: CapacityDesires,
|
|
187
|
+
extra_model_arguments: Dict[str, Any],
|
|
188
|
+
) -> List[ServiceCapacity]:
|
|
189
|
+
# TODO(matthewho): Currently returns empty because RegionContext is
|
|
190
|
+
# created without services. Need to determine if stateless apps should
|
|
191
|
+
# have cross-zone costs (copies_per_region=2 implies 1 cross-AZ hop).
|
|
192
|
+
_ = (context, extra_model_arguments)
|
|
193
|
+
return network_services(
|
|
194
|
+
service_type,
|
|
195
|
+
RegionContext(num_regions=1),
|
|
196
|
+
desires,
|
|
197
|
+
copies_per_region=2,
|
|
198
|
+
)
|
|
199
|
+
|
|
158
200
|
@staticmethod
|
|
159
201
|
def capacity_plan(
|
|
160
202
|
instance: Instance,
|