service-capacity-modeling 0.3.100__py3-none-any.whl → 0.3.101__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,350 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Capture current cost outputs for regression testing.
4
+
5
+ This script runs capacity planning for various scenarios and captures
6
+ the cost breakdowns to use as baselines for regression tests.
7
+
8
+ Usage:
9
+ python -m service_capacity_modeling.tools.capture_baseline_costs
10
+ """
11
+
12
+ import json
13
+ from pathlib import Path
14
+ from typing import Any
15
+
16
+ from service_capacity_modeling.capacity_planner import planner
17
+ from service_capacity_modeling.interface import (
18
+ AccessConsistency,
19
+ CapacityDesires,
20
+ certain_float,
21
+ certain_int,
22
+ Consistency,
23
+ DataShape,
24
+ GlobalConsistency,
25
+ Interval,
26
+ QueryPattern,
27
+ )
28
+
29
+
30
+ def capture_costs(
31
+ model_name: str,
32
+ region: str,
33
+ desires: CapacityDesires,
34
+ extra_args: dict[str, Any] | None = None,
35
+ scenario_name: str = "",
36
+ ) -> dict[str, Any]:
37
+ """Capture all cost breakdown for a planning scenario."""
38
+ try:
39
+ cap_plans = planner.plan_certain(
40
+ model_name=model_name,
41
+ region=region,
42
+ desires=desires,
43
+ num_results=1,
44
+ extra_model_arguments=extra_args or {},
45
+ )
46
+
47
+ if not cap_plans:
48
+ return {"error": "No capacity plans generated", "scenario": scenario_name}
49
+
50
+ cap_plan = cap_plans[0]
51
+ clusters = cap_plan.candidate_clusters
52
+
53
+ result = {
54
+ "scenario": scenario_name,
55
+ "model": model_name,
56
+ "region": region,
57
+ "service_tier": desires.service_tier,
58
+ "annual_costs": dict(
59
+ sorted((k, float(v)) for k, v in clusters.annual_costs.items())
60
+ ),
61
+ "total_annual_cost": float(clusters.total_annual_cost),
62
+ "cluster_count": len(clusters.zonal) + len(clusters.regional),
63
+ "service_count": len(clusters.services),
64
+ }
65
+
66
+ # Add instance info
67
+ if clusters.zonal:
68
+ result["instance_name"] = clusters.zonal[0].instance.name
69
+ result["instance_count"] = clusters.zonal[0].count
70
+ result["deployment"] = "zonal"
71
+ elif clusters.regional:
72
+ result["instance_name"] = clusters.regional[0].instance.name
73
+ result["instance_count"] = clusters.regional[0].count
74
+ result["deployment"] = "regional"
75
+
76
+ return result
77
+ except (ValueError, KeyError, AttributeError) as e:
78
+ return {"error": str(e), "scenario": scenario_name}
79
+
80
+
81
+ # Define test scenarios for each service
82
+ # Each scenario: (model_name, region, desires, extra_args, scenario_name)
83
+ scenarios: list[tuple[str, str, CapacityDesires, dict[str, Any] | None, str]] = []
84
+
85
+ # RDS scenarios
86
+ rds_small = CapacityDesires(
87
+ service_tier=1,
88
+ query_pattern=QueryPattern(
89
+ estimated_read_per_second=certain_int(200),
90
+ estimated_write_per_second=certain_int(100),
91
+ estimated_mean_read_latency_ms=certain_float(10),
92
+ estimated_mean_write_latency_ms=certain_float(10),
93
+ ),
94
+ data_shape=DataShape(
95
+ estimated_state_size_gib=certain_int(50),
96
+ ),
97
+ )
98
+
99
+ rds_tier3 = CapacityDesires(
100
+ service_tier=3,
101
+ query_pattern=QueryPattern(
102
+ estimated_read_per_second=certain_int(200),
103
+ estimated_write_per_second=certain_int(100),
104
+ estimated_mean_read_latency_ms=certain_float(20),
105
+ estimated_mean_write_latency_ms=certain_float(20),
106
+ ),
107
+ data_shape=DataShape(
108
+ estimated_state_size_gib=certain_int(200),
109
+ ),
110
+ )
111
+
112
+ scenarios.extend(
113
+ [
114
+ ("org.netflix.rds", "us-east-1", rds_small, None, "rds_small_tier1"),
115
+ ("org.netflix.rds", "us-east-1", rds_tier3, None, "rds_tier3"),
116
+ ]
117
+ )
118
+
119
+ # Aurora scenarios
120
+ aurora_small = CapacityDesires(
121
+ service_tier=1,
122
+ query_pattern=QueryPattern(
123
+ estimated_read_per_second=certain_int(100),
124
+ estimated_write_per_second=certain_int(100),
125
+ estimated_mean_read_latency_ms=certain_float(10),
126
+ estimated_mean_write_latency_ms=certain_float(10),
127
+ ),
128
+ data_shape=DataShape(
129
+ estimated_state_size_gib=certain_int(50),
130
+ ),
131
+ )
132
+
133
+ aurora_tier3 = CapacityDesires(
134
+ service_tier=3,
135
+ query_pattern=QueryPattern(
136
+ estimated_read_per_second=certain_int(200),
137
+ estimated_write_per_second=certain_int(100),
138
+ estimated_mean_read_latency_ms=certain_float(10),
139
+ estimated_mean_write_latency_ms=certain_float(10),
140
+ ),
141
+ data_shape=DataShape(
142
+ estimated_state_size_gib=certain_int(200),
143
+ ),
144
+ )
145
+
146
+ scenarios.extend(
147
+ [
148
+ ("org.netflix.aurora", "us-east-1", aurora_small, None, "aurora_small_tier1"),
149
+ ("org.netflix.aurora", "us-east-1", aurora_tier3, None, "aurora_tier3"),
150
+ ]
151
+ )
152
+
153
+ # Cassandra scenarios
154
+ cassandra_small_high_qps = CapacityDesires(
155
+ service_tier=1,
156
+ query_pattern=QueryPattern(
157
+ estimated_read_per_second=certain_int(100_000),
158
+ estimated_write_per_second=certain_int(100_000),
159
+ estimated_mean_read_latency_ms=certain_float(0.5),
160
+ estimated_mean_write_latency_ms=certain_float(0.4),
161
+ ),
162
+ data_shape=DataShape(
163
+ estimated_state_size_gib=certain_int(10),
164
+ ),
165
+ )
166
+
167
+ cassandra_high_writes = CapacityDesires(
168
+ service_tier=1,
169
+ query_pattern=QueryPattern(
170
+ estimated_read_per_second=certain_int(10_000),
171
+ estimated_write_per_second=certain_int(500_000),
172
+ ),
173
+ data_shape=DataShape(
174
+ estimated_state_size_gib=certain_int(300),
175
+ ),
176
+ )
177
+
178
+ scenarios.extend(
179
+ [
180
+ (
181
+ "org.netflix.cassandra",
182
+ "us-east-1",
183
+ cassandra_small_high_qps,
184
+ {"require_local_disks": True},
185
+ "cassandra_small_high_qps_local",
186
+ ),
187
+ (
188
+ "org.netflix.cassandra",
189
+ "us-east-1",
190
+ cassandra_high_writes,
191
+ {"require_local_disks": False, "copies_per_region": 2},
192
+ "cassandra_high_writes_ebs",
193
+ ),
194
+ ]
195
+ )
196
+
197
+ # Kafka scenarios - Kafka uses throughput-based sizing via write_size
198
+ # 100 MiB/s throughput with 2 consumers, 1 producer
199
+ throughput = 100 * 1024 * 1024 # 100 MiB/s
200
+ kafka_throughput = CapacityDesires(
201
+ service_tier=1,
202
+ query_pattern=QueryPattern(
203
+ estimated_read_per_second=Interval(low=1, mid=2, high=2, confidence=0.98),
204
+ estimated_write_per_second=Interval(low=1, mid=1, high=1, confidence=0.98),
205
+ estimated_mean_write_size_bytes=Interval(
206
+ low=throughput, mid=throughput, high=throughput * 2, confidence=0.98
207
+ ),
208
+ ),
209
+ )
210
+
211
+ scenarios.extend(
212
+ [
213
+ (
214
+ "org.netflix.kafka",
215
+ "us-east-1",
216
+ kafka_throughput,
217
+ {"require_local_disks": False},
218
+ "kafka_100mib_throughput",
219
+ ),
220
+ ]
221
+ )
222
+
223
+ # EVCache scenarios
224
+ # Tiny EVCache - small cluster to show spread cost (< 10 instances = spread penalty)
225
+ evcache_tiny = CapacityDesires(
226
+ service_tier=1,
227
+ query_pattern=QueryPattern(
228
+ estimated_read_per_second=certain_int(1_000),
229
+ estimated_write_per_second=certain_int(100),
230
+ estimated_mean_read_latency_ms=certain_float(1.0),
231
+ ),
232
+ data_shape=DataShape(
233
+ estimated_state_size_gib=certain_int(1),
234
+ estimated_state_item_count=Interval(
235
+ low=10_000, mid=100_000, high=200_000, confidence=0.98
236
+ ),
237
+ ),
238
+ )
239
+
240
+ evcache_small = CapacityDesires(
241
+ service_tier=1,
242
+ query_pattern=QueryPattern(
243
+ estimated_read_per_second=certain_int(100_000),
244
+ estimated_write_per_second=certain_int(10_000),
245
+ estimated_mean_read_latency_ms=certain_float(1.0),
246
+ ),
247
+ data_shape=DataShape(
248
+ estimated_state_size_gib=certain_int(10),
249
+ estimated_state_item_count=Interval(
250
+ low=1_000_000, mid=10_000_000, high=20_000_000, confidence=0.98
251
+ ),
252
+ ),
253
+ )
254
+
255
+ evcache_large = CapacityDesires(
256
+ service_tier=1,
257
+ query_pattern=QueryPattern(
258
+ estimated_read_per_second=certain_int(500_000),
259
+ estimated_write_per_second=certain_int(50_000),
260
+ estimated_mean_read_latency_ms=certain_float(1.0),
261
+ ),
262
+ data_shape=DataShape(
263
+ estimated_state_size_gib=certain_int(500),
264
+ estimated_state_item_count=Interval(
265
+ low=10_000_000, mid=100_000_000, high=200_000_000, confidence=0.98
266
+ ),
267
+ ),
268
+ )
269
+
270
+ scenarios.extend(
271
+ [
272
+ (
273
+ "org.netflix.evcache",
274
+ "us-east-1",
275
+ evcache_tiny,
276
+ {"cross_region_replication": "none"},
277
+ "evcache_tiny_with_spread",
278
+ ),
279
+ (
280
+ "org.netflix.evcache",
281
+ "us-east-1",
282
+ evcache_small,
283
+ {"cross_region_replication": "none"},
284
+ "evcache_small_no_replication",
285
+ ),
286
+ (
287
+ "org.netflix.evcache",
288
+ "us-east-1",
289
+ evcache_large,
290
+ {"cross_region_replication": "sets", "copies_per_region": 2},
291
+ "evcache_large_with_replication",
292
+ ),
293
+ ]
294
+ )
295
+
296
+ # Key-Value scenarios (composite: Cassandra + EVCache)
297
+ # Uses evcache_large desires with eventual consistency to enable caching layer
298
+ kv_with_cache = evcache_large.model_copy(deep=True)
299
+ kv_with_cache.query_pattern.access_consistency = GlobalConsistency(
300
+ same_region=Consistency(target_consistency=AccessConsistency.eventual),
301
+ cross_region=Consistency(target_consistency=AccessConsistency.best_effort),
302
+ )
303
+
304
+ scenarios.extend(
305
+ [
306
+ (
307
+ "org.netflix.key-value",
308
+ "us-east-1",
309
+ kv_with_cache,
310
+ None,
311
+ "kv_with_cache",
312
+ ),
313
+ ]
314
+ )
315
+
316
+ # Export as dict for tests to import (single source of truth)
317
+ SCENARIOS: dict[str, dict[str, Any]] = {
318
+ name: {
319
+ "model": model,
320
+ "region": region,
321
+ "desires": desires,
322
+ "extra_args": extra_args,
323
+ }
324
+ for model, region, desires, extra_args, name in scenarios
325
+ }
326
+
327
+
328
+ if __name__ == "__main__":
329
+ # Capture all scenarios
330
+ results = []
331
+ for model, region, desires, extra_args, scenario_name in scenarios:
332
+ print(f"Capturing: {scenario_name}...")
333
+ result = capture_costs(model, region, desires, extra_args, scenario_name)
334
+ results.append(result)
335
+
336
+ if "error" in result:
337
+ print(f" ERROR: {result['error']}")
338
+ else:
339
+ print(f" Total cost: ${result['total_annual_cost']:,.2f}")
340
+ print(f" Cost breakdown: {list(result['annual_costs'].keys())}")
341
+
342
+ # Save results
343
+ output_file = Path(__file__).parent / "data" / "baseline_costs.json"
344
+ with open(output_file, "w", encoding="utf-8") as f:
345
+ json.dump(results, f, indent=2, sort_keys=True)
346
+ f.write("\n") # Ensure trailing newline for pre-commit
347
+
348
+ print(f"\nResults saved to: {output_file}")
349
+ success_count = len([r for r in results if "error" not in r])
350
+ print(f"Total scenarios captured: {success_count}/{len(results)}")
File without changes
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: service-capacity-modeling
3
- Version: 0.3.100
3
+ Version: 0.3.101
4
4
  Summary: Contains utilities for modeling capacity for pluggable workloads
5
5
  Author: Joseph Lynch
6
6
  Author-email: josephl@netflix.com
@@ -79,12 +79,14 @@ service_capacity_modeling/models/org/netflix/wal.py,sha256=QtRlqP_AIVpTg-XEINAfv
79
79
  service_capacity_modeling/models/org/netflix/zookeeper.py,sha256=T_CkmRqoEVqpERCFPU8xihyaxlNfUHDJXz7dMHM8GD0,7679
80
80
  service_capacity_modeling/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
81
81
  service_capacity_modeling/tools/auto_shape.py,sha256=K248-DayPrcZwLw1dYr47lpeQQwL0ylh1WAoVSdLNxw,23621
82
+ service_capacity_modeling/tools/capture_baseline_costs.py,sha256=VXa5AEGpK4fGFhsDgKv-jKSkgLL8arAkuogv8Vm3Gq4,10729
82
83
  service_capacity_modeling/tools/fetch_pricing.py,sha256=fO84h77cqiiIHF4hZt490RwbZ6JqjB45UsnPpV2AXD4,6122
83
84
  service_capacity_modeling/tools/generate_missing.py,sha256=F7YqvMJAV4nZc20GNrlIsnQSF8_77sLgwYZqc5k4LDg,3099
84
85
  service_capacity_modeling/tools/instance_families.py,sha256=e5RuYkCLUITvsAazDH12B6KjX_PaBsv6Ne3mj0HK_sQ,9223
85
- service_capacity_modeling-0.3.100.dist-info/licenses/LICENSE,sha256=nl_Lt5v9VvJ-5lWJDT4ddKAG-VZ-2IaLmbzpgYDz2hU,11343
86
- service_capacity_modeling-0.3.100.dist-info/METADATA,sha256=LF_RV_duMAiGaOc7BX7jDNBjizo2BRtOX-cZtWLPYzw,10367
87
- service_capacity_modeling-0.3.100.dist-info/WHEEL,sha256=qELbo2s1Yzl39ZmrAibXA2jjPLUYfnVhUNTlyF1rq0Y,92
88
- service_capacity_modeling-0.3.100.dist-info/entry_points.txt,sha256=ZsjzpG5SomWpT1zCE19n1uSXKH2gTI_yc33sdl0vmJg,146
89
- service_capacity_modeling-0.3.100.dist-info/top_level.txt,sha256=H8XjTCLgR3enHq5t3bIbxt9SeUkUT8HT_SDv2dgIT_A,26
90
- service_capacity_modeling-0.3.100.dist-info/RECORD,,
86
+ service_capacity_modeling/tools/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
87
+ service_capacity_modeling-0.3.101.dist-info/licenses/LICENSE,sha256=nl_Lt5v9VvJ-5lWJDT4ddKAG-VZ-2IaLmbzpgYDz2hU,11343
88
+ service_capacity_modeling-0.3.101.dist-info/METADATA,sha256=6tuDUib_sRWz0Y9imRcO6t5KhF-qr4v1kXG8zbmewj0,10367
89
+ service_capacity_modeling-0.3.101.dist-info/WHEEL,sha256=qELbo2s1Yzl39ZmrAibXA2jjPLUYfnVhUNTlyF1rq0Y,92
90
+ service_capacity_modeling-0.3.101.dist-info/entry_points.txt,sha256=ZsjzpG5SomWpT1zCE19n1uSXKH2gTI_yc33sdl0vmJg,146
91
+ service_capacity_modeling-0.3.101.dist-info/top_level.txt,sha256=H8XjTCLgR3enHq5t3bIbxt9SeUkUT8HT_SDv2dgIT_A,26
92
+ service_capacity_modeling-0.3.101.dist-info/RECORD,,