nuvu-scan 2.0.2__py3-none-any.whl → 2.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nuvu_scan/cli/commands/scan.py +8 -1
- nuvu_scan/cli/formatters/html.py +141 -20
- nuvu_scan/core/base.py +34 -0
- nuvu_scan/core/providers/aws/aws_scanner.py +52 -36
- nuvu_scan/core/providers/aws/collectors/athena.py +102 -67
- nuvu_scan/core/providers/aws/collectors/glue.py +104 -34
- nuvu_scan/core/providers/aws/collectors/mwaa.py +10 -5
- nuvu_scan/core/providers/aws/collectors/redshift.py +381 -18
- {nuvu_scan-2.0.2.dist-info → nuvu_scan-2.1.2.dist-info}/METADATA +41 -30
- {nuvu_scan-2.0.2.dist-info → nuvu_scan-2.1.2.dist-info}/RECORD +12 -12
- {nuvu_scan-2.0.2.dist-info → nuvu_scan-2.1.2.dist-info}/WHEEL +0 -0
- {nuvu_scan-2.0.2.dist-info → nuvu_scan-2.1.2.dist-info}/entry_points.txt +0 -0
nuvu_scan/cli/commands/scan.py
CHANGED
|
@@ -108,6 +108,11 @@ from ..formatters.json import JSONFormatter
|
|
|
108
108
|
default="https://nuvu.dev",
|
|
109
109
|
help="Nuvu Cloud API URL (default: https://nuvu.dev)",
|
|
110
110
|
)
|
|
111
|
+
@click.option(
|
|
112
|
+
"--list-collectors",
|
|
113
|
+
is_flag=True,
|
|
114
|
+
help="List available collectors for the specified provider and exit.",
|
|
115
|
+
)
|
|
111
116
|
def scan_command(
|
|
112
117
|
provider: str,
|
|
113
118
|
output_format: str,
|
|
@@ -344,6 +349,7 @@ def scan_command(
|
|
|
344
349
|
"size_bytes": asset.size_bytes,
|
|
345
350
|
"tags": asset.tags,
|
|
346
351
|
"cost_estimate_usd": asset.cost_estimate_usd,
|
|
352
|
+
"usage_metrics": asset.usage_metrics, # Include all usage metrics
|
|
347
353
|
"risk_flags": asset.risk_flags,
|
|
348
354
|
"ownership_confidence": asset.ownership_confidence or "unknown",
|
|
349
355
|
"suggested_owner": asset.suggested_owner,
|
|
@@ -353,7 +359,8 @@ def scan_command(
|
|
|
353
359
|
}
|
|
354
360
|
|
|
355
361
|
# Push to API using the /api/scans/import endpoint
|
|
356
|
-
|
|
362
|
+
# Use longer timeout for large scans (2000+ assets can take minutes)
|
|
363
|
+
with httpx.Client(timeout=300) as client:
|
|
357
364
|
response = client.post(
|
|
358
365
|
f"{api_url.rstrip('/')}/api/scans/import",
|
|
359
366
|
json=payload,
|
nuvu_scan/cli/formatters/html.py
CHANGED
|
@@ -10,9 +10,8 @@ class HTMLFormatter:
|
|
|
10
10
|
|
|
11
11
|
def format(self, result: ScanResult) -> str:
|
|
12
12
|
"""Format scan result as HTML."""
|
|
13
|
-
# Build summary cards (use actual cost if available)
|
|
13
|
+
# Build summary cards (use actual cost from Cost Explorer if available)
|
|
14
14
|
actual_total = result.summary.get("total_actual_cost_30d")
|
|
15
|
-
estimated_assets_total = result.summary.get("estimated_assets_cost_total")
|
|
16
15
|
|
|
17
16
|
# Calculate cost saving opportunities
|
|
18
17
|
savings_opportunities = self._calculate_savings(result.assets)
|
|
@@ -29,17 +28,16 @@ class HTMLFormatter:
|
|
|
29
28
|
<div class="summary-card">
|
|
30
29
|
<h3>Actual 30-Day Cost</h3>
|
|
31
30
|
<div class="value">${actual_total:,.2f}</div>
|
|
32
|
-
|
|
33
|
-
<div class="summary-card">
|
|
34
|
-
<h3>Estimated Asset Cost</h3>
|
|
35
|
-
<div class="value">${(estimated_assets_total or 0):,.2f}</div>
|
|
31
|
+
<div class="card-note">From AWS Cost Explorer</div>
|
|
36
32
|
</div>
|
|
37
33
|
"""
|
|
38
34
|
else:
|
|
35
|
+
# Fallback when Cost Explorer data not available
|
|
39
36
|
summary_cards += f"""
|
|
40
37
|
<div class="summary-card">
|
|
41
|
-
<h3>
|
|
38
|
+
<h3>Monthly Cost</h3>
|
|
42
39
|
<div class="value">${result.total_cost_estimate_usd:,.2f}</div>
|
|
40
|
+
<div class="card-note">Add Cost Explorer permissions for accurate data</div>
|
|
43
41
|
</div>
|
|
44
42
|
"""
|
|
45
43
|
|
|
@@ -99,6 +97,7 @@ class HTMLFormatter:
|
|
|
99
97
|
.summary-card.savings {{ border-left-color: #ff9800; background: #fff8e1; }}
|
|
100
98
|
.summary-card h3 {{ margin: 0 0 10px 0; color: #666; font-size: 13px; text-transform: uppercase; }}
|
|
101
99
|
.summary-card .value {{ font-size: 22px; font-weight: bold; color: #333; }}
|
|
100
|
+
.summary-card .card-note {{ font-size: 11px; color: #888; margin-top: 5px; }}
|
|
102
101
|
table {{ width: 100%; border-collapse: collapse; margin: 20px 0; }}
|
|
103
102
|
table.compact {{ font-size: 13px; }}
|
|
104
103
|
table.compact th, table.compact td {{ padding: 8px; }}
|
|
@@ -133,6 +132,12 @@ class HTMLFormatter:
|
|
|
133
132
|
<p><strong>Account ID:</strong> {result.account_id}</p>
|
|
134
133
|
<p><strong>Scan Time:</strong> {result.scan_timestamp}</p>
|
|
135
134
|
|
|
135
|
+
<h2>📋 Scan Scope</h2>
|
|
136
|
+
<div class="insight-box info">
|
|
137
|
+
<p><strong>Collectors:</strong> {", ".join(result.scanned_collectors) if result.scanned_collectors else "All (Full Scan)"}</p>
|
|
138
|
+
<p><strong>Regions:</strong> {", ".join(result.scanned_regions[:10]) if result.scanned_regions else "All enabled regions"}{" (+ " + str(len(result.scanned_regions) - 10) + " more)" if len(result.scanned_regions) > 10 else ""}</p>
|
|
139
|
+
</div>
|
|
140
|
+
|
|
136
141
|
<h2>Executive Summary</h2>
|
|
137
142
|
<div class="summary">
|
|
138
143
|
{summary_cards}
|
|
@@ -157,7 +162,27 @@ class HTMLFormatter:
|
|
|
157
162
|
html += f" <tr><td>{category.replace('_', ' ').title()}</td><td>{count}</td></tr>\n"
|
|
158
163
|
|
|
159
164
|
# All Assets - COLLAPSIBLE
|
|
160
|
-
|
|
165
|
+
# Filter out:
|
|
166
|
+
# - Expired/retired reserved nodes (historical clutter)
|
|
167
|
+
# - Cost summary (it's a summary row, not an asset)
|
|
168
|
+
# They're still counted in the governance summary for context
|
|
169
|
+
display_assets = [
|
|
170
|
+
a
|
|
171
|
+
for a in result.assets
|
|
172
|
+
if not (
|
|
173
|
+
# Exclude expired/retired reserved nodes
|
|
174
|
+
(
|
|
175
|
+
a.asset_type == "redshift_reserved_node"
|
|
176
|
+
and any(
|
|
177
|
+
flag in (a.risk_flags or [])
|
|
178
|
+
for flag in ["reservation_expired", "reservation_retired"]
|
|
179
|
+
)
|
|
180
|
+
)
|
|
181
|
+
# Exclude cost summary pseudo-asset
|
|
182
|
+
or a.asset_type == "cost_summary"
|
|
183
|
+
)
|
|
184
|
+
]
|
|
185
|
+
asset_count = len(display_assets)
|
|
161
186
|
html += f""" </table>
|
|
162
187
|
|
|
163
188
|
<button class="collapsible">All Assets <span class="asset-count">({asset_count} items)</span></button>
|
|
@@ -175,11 +200,12 @@ class HTMLFormatter:
|
|
|
175
200
|
"""
|
|
176
201
|
|
|
177
202
|
# Sort assets by cost (descending)
|
|
178
|
-
sorted_assets = sorted(
|
|
203
|
+
sorted_assets = sorted(display_assets, key=lambda x: x.cost_estimate_usd or 0, reverse=True)
|
|
179
204
|
|
|
180
205
|
for asset in sorted_assets:
|
|
181
206
|
owner_class = ""
|
|
182
|
-
if
|
|
207
|
+
# Only show no-owner class if we have no suggested owner at all
|
|
208
|
+
if not asset.suggested_owner and asset.ownership_confidence == "unknown":
|
|
183
209
|
owner_class = "no-owner"
|
|
184
210
|
|
|
185
211
|
risk_flags_html = ""
|
|
@@ -213,8 +239,11 @@ class HTMLFormatter:
|
|
|
213
239
|
</script>
|
|
214
240
|
|
|
215
241
|
<div class="footer">
|
|
216
|
-
<p>
|
|
217
|
-
<p
|
|
242
|
+
<p><strong>nuvu-scan</strong> — The Open Source Cloud Data Scanner</p>
|
|
243
|
+
<p><a href="https://github.com/nuvudev/nuvu-scan" target="_blank">github.com/nuvudev/nuvu-scan</a></p>
|
|
244
|
+
<p style="margin-top: 12px; font-size: 11px; color: #888;">
|
|
245
|
+
Add the governance layer: <a href="https://nuvu.dev" style="color: #666;">Nuvu Cloud</a> — historical tracking • team dashboards • scheduled scans • Slack/email alerts
|
|
246
|
+
</p>
|
|
218
247
|
</div>
|
|
219
248
|
</div>
|
|
220
249
|
</body>
|
|
@@ -303,24 +332,61 @@ class HTMLFormatter:
|
|
|
303
332
|
</div>
|
|
304
333
|
"""
|
|
305
334
|
|
|
306
|
-
# Reserved nodes analysis
|
|
335
|
+
# Reserved nodes analysis - compare with provisioned clusters
|
|
307
336
|
if reserved_nodes:
|
|
308
337
|
active_reservations = [
|
|
309
338
|
a for a in reserved_nodes if (a.usage_metrics or {}).get("state") == "active"
|
|
310
339
|
]
|
|
311
|
-
|
|
340
|
+
|
|
341
|
+
# Count total nodes covered by active reservations
|
|
342
|
+
active_reserved_nodes = sum(
|
|
343
|
+
(a.usage_metrics or {}).get("node_count", 0) for a in active_reservations
|
|
344
|
+
)
|
|
345
|
+
|
|
346
|
+
# Count total provisioned cluster nodes
|
|
347
|
+
clusters = [a for a in assets if a.asset_type == "redshift_cluster"]
|
|
348
|
+
total_provisioned_nodes = sum(
|
|
349
|
+
(a.usage_metrics or {}).get("node_count", 0) for a in clusters
|
|
350
|
+
)
|
|
351
|
+
|
|
352
|
+
# Calculate uncovered nodes (potential savings opportunity)
|
|
353
|
+
uncovered_nodes = max(0, total_provisioned_nodes - active_reserved_nodes)
|
|
354
|
+
|
|
355
|
+
# Determine if this is a savings opportunity
|
|
356
|
+
is_savings_opportunity = uncovered_nodes > 0
|
|
357
|
+
box_class = "warning" if is_savings_opportunity else "info"
|
|
312
358
|
|
|
313
359
|
html += f"""
|
|
314
|
-
<div class="insight-box
|
|
315
|
-
<h3>🎫 Reserved
|
|
360
|
+
<div class="insight-box {box_class}">
|
|
361
|
+
<h3>🎫 Reserved vs On-Demand Nodes</h3>
|
|
316
362
|
<ul>
|
|
317
|
-
<li><strong>
|
|
318
|
-
<li><strong>
|
|
319
|
-
<li><strong>
|
|
363
|
+
<li><strong>Provisioned Cluster Nodes:</strong> {total_provisioned_nodes}</li>
|
|
364
|
+
<li><strong>Active Reserved Nodes:</strong> {active_reserved_nodes} ({len(active_reservations)} reservations)</li>
|
|
365
|
+
<li><strong>Uncovered (On-Demand) Nodes:</strong> {uncovered_nodes}</li>
|
|
320
366
|
</ul>
|
|
321
|
-
</div>
|
|
322
367
|
"""
|
|
323
368
|
|
|
369
|
+
if is_savings_opportunity:
|
|
370
|
+
# Reserved pricing typically saves 30-40% vs on-demand
|
|
371
|
+
html += f"""
|
|
372
|
+
<p class="recommendation">💰 <strong>Potential Savings:</strong> {uncovered_nodes} nodes running on-demand pricing. Reserved nodes typically offer 30-40% discount.</p>
|
|
373
|
+
"""
|
|
374
|
+
else:
|
|
375
|
+
html += """
|
|
376
|
+
<p class="recommendation">✅ All provisioned nodes are covered by reservations.</p>
|
|
377
|
+
"""
|
|
378
|
+
|
|
379
|
+
# Show expiring reservations if any
|
|
380
|
+
if expiring_reservations:
|
|
381
|
+
expiring_nodes = sum(
|
|
382
|
+
(a.usage_metrics or {}).get("node_count", 0) for a in expiring_reservations
|
|
383
|
+
)
|
|
384
|
+
html += f"""
|
|
385
|
+
<p class="recommendation">⚠️ <strong>{len(expiring_reservations)} reservations ({expiring_nodes} nodes) expiring soon.</strong> Plan for renewal to maintain coverage.</p>
|
|
386
|
+
"""
|
|
387
|
+
|
|
388
|
+
html += "</div>"
|
|
389
|
+
|
|
324
390
|
return html
|
|
325
391
|
|
|
326
392
|
def _build_governance_section(self, assets) -> str:
|
|
@@ -418,4 +484,59 @@ class HTMLFormatter:
|
|
|
418
484
|
html += f"<li><strong>{cluster.name}</strong>: {queues} queues, Auto WLM: {auto_wlm} ({flags})</li>"
|
|
419
485
|
html += "</ul></div>"
|
|
420
486
|
|
|
487
|
+
# Add cluster performance section
|
|
488
|
+
clusters_with_metrics = [
|
|
489
|
+
a
|
|
490
|
+
for a in clusters
|
|
491
|
+
if (a.usage_metrics or {}).get("cpu_utilization_max_24h") is not None
|
|
492
|
+
]
|
|
493
|
+
if clusters_with_metrics:
|
|
494
|
+
html += """
|
|
495
|
+
<div class="insight-box info">
|
|
496
|
+
<h3>📊 Cluster Performance (Last 24h)</h3>
|
|
497
|
+
<table class="compact">
|
|
498
|
+
<tr><th>Cluster</th><th>CPU Max</th><th>CPU Avg</th><th>Queries</th><th>Disk Used</th><th>Recommendation</th></tr>
|
|
499
|
+
"""
|
|
500
|
+
for cluster in clusters_with_metrics[:10]:
|
|
501
|
+
metrics = cluster.usage_metrics or {}
|
|
502
|
+
cpu_max = metrics.get("cpu_utilization_max_24h", 0)
|
|
503
|
+
cpu_avg = metrics.get("cpu_utilization_avg_24h", 0)
|
|
504
|
+
queries = metrics.get("queries_completed_24h", 0)
|
|
505
|
+
disk = metrics.get("disk_space_used_percent", 0)
|
|
506
|
+
rec = metrics.get("performance_recommendation", "-")
|
|
507
|
+
html += (
|
|
508
|
+
f"<tr><td>{cluster.name}</td><td>{cpu_max:.1f}%</td>"
|
|
509
|
+
f"<td>{cpu_avg:.1f}%</td><td>{queries}</td>"
|
|
510
|
+
f"<td>{disk:.1f}%</td><td>{rec if rec else '-'}</td></tr>"
|
|
511
|
+
)
|
|
512
|
+
html += "</table></div>"
|
|
513
|
+
|
|
514
|
+
# Add serverless workgroup performance section
|
|
515
|
+
serverless_wgs = [a for a in assets if a.asset_type == "redshift_serverless_workgroup"]
|
|
516
|
+
serverless_with_metrics = [
|
|
517
|
+
a for a in serverless_wgs if (a.usage_metrics or {}).get("rpu_max_7d") is not None
|
|
518
|
+
]
|
|
519
|
+
if serverless_with_metrics:
|
|
520
|
+
html += """
|
|
521
|
+
<div class="insight-box info">
|
|
522
|
+
<h3>🚀 Serverless Workgroup Utilization</h3>
|
|
523
|
+
<table class="compact">
|
|
524
|
+
<tr><th>Workgroup</th><th>Base RPU</th><th>Max RPU (7d)</th><th>Avg RPU (7d)</th><th>Queries (24h)</th><th>Recommendation</th></tr>
|
|
525
|
+
"""
|
|
526
|
+
for wg in serverless_with_metrics[:10]:
|
|
527
|
+
metrics = wg.usage_metrics or {}
|
|
528
|
+
base = metrics.get("base_capacity", 0)
|
|
529
|
+
rpu_max = metrics.get("rpu_max_7d", 0)
|
|
530
|
+
rpu_avg = metrics.get("rpu_avg_7d", 0)
|
|
531
|
+
queries = metrics.get("queries_completed_24h", 0) + metrics.get(
|
|
532
|
+
"queries_failed_24h", 0
|
|
533
|
+
)
|
|
534
|
+
rec = metrics.get("utilization_recommendation", "-")
|
|
535
|
+
html += (
|
|
536
|
+
f"<tr><td>{wg.name}</td><td>{base}</td>"
|
|
537
|
+
f"<td>{rpu_max:.1f}</td><td>{rpu_avg:.1f}</td>"
|
|
538
|
+
f"<td>{queries}</td><td>{rec if rec else '-'}</td></tr>"
|
|
539
|
+
)
|
|
540
|
+
html += "</table></div>"
|
|
541
|
+
|
|
421
542
|
return html
|
nuvu_scan/core/base.py
CHANGED
|
@@ -88,10 +88,17 @@ class ScanResult:
|
|
|
88
88
|
assets: list[Asset]
|
|
89
89
|
total_cost_estimate_usd: float
|
|
90
90
|
summary: dict[str, Any] = None
|
|
91
|
+
# Scan scope metadata
|
|
92
|
+
scanned_regions: list[str] = None
|
|
93
|
+
scanned_collectors: list[str] = None
|
|
91
94
|
|
|
92
95
|
def __post_init__(self):
|
|
93
96
|
if self.summary is None:
|
|
94
97
|
self.summary = {}
|
|
98
|
+
if self.scanned_regions is None:
|
|
99
|
+
self.scanned_regions = []
|
|
100
|
+
if self.scanned_collectors is None:
|
|
101
|
+
self.scanned_collectors = []
|
|
95
102
|
|
|
96
103
|
|
|
97
104
|
class CloudProviderScan(ABC):
|
|
@@ -173,6 +180,12 @@ class CloudProviderScan(ABC):
|
|
|
173
180
|
# Build summary
|
|
174
181
|
summary = self._build_summary(assets)
|
|
175
182
|
|
|
183
|
+
# Get scanned regions from assets
|
|
184
|
+
scanned_regions = sorted(set(asset.region for asset in assets if asset.region))
|
|
185
|
+
|
|
186
|
+
# Get scanned collectors from config
|
|
187
|
+
scanned_collectors = self.config.collectors if self.config.collectors else []
|
|
188
|
+
|
|
176
189
|
return ScanResult(
|
|
177
190
|
provider=self.provider,
|
|
178
191
|
account_id=self.config.account_id or "unknown",
|
|
@@ -180,6 +193,8 @@ class CloudProviderScan(ABC):
|
|
|
180
193
|
assets=assets,
|
|
181
194
|
total_cost_estimate_usd=total_cost,
|
|
182
195
|
summary=summary,
|
|
196
|
+
scanned_regions=scanned_regions,
|
|
197
|
+
scanned_collectors=scanned_collectors,
|
|
183
198
|
)
|
|
184
199
|
|
|
185
200
|
def _build_summary(self, assets: list[Asset]) -> dict[str, Any]:
|
|
@@ -219,6 +234,21 @@ class CloudProviderScan(ABC):
|
|
|
219
234
|
if asset.risk_flags:
|
|
220
235
|
risky_count += 1
|
|
221
236
|
|
|
237
|
+
# Find cost summary asset if present
|
|
238
|
+
actual_costs_30d = {}
|
|
239
|
+
total_actual_cost_30d = None
|
|
240
|
+
for asset in assets:
|
|
241
|
+
if asset.asset_type == "cost_summary":
|
|
242
|
+
usage = asset.usage_metrics or {}
|
|
243
|
+
actual_costs_30d = usage.get("actual_costs_30d", {})
|
|
244
|
+
total_actual_cost_30d = usage.get("total_actual_cost_30d")
|
|
245
|
+
break
|
|
246
|
+
|
|
247
|
+
# Calculate estimated asset costs (excluding cost_summary)
|
|
248
|
+
estimated_assets_total = sum(
|
|
249
|
+
asset.cost_estimate_usd or 0 for asset in assets if asset.asset_type != "cost_summary"
|
|
250
|
+
)
|
|
251
|
+
|
|
222
252
|
return {
|
|
223
253
|
"total_assets": total_assets,
|
|
224
254
|
"assets_by_category": assets_by_category,
|
|
@@ -226,4 +256,8 @@ class CloudProviderScan(ABC):
|
|
|
226
256
|
"unused_count": unused_count,
|
|
227
257
|
"no_owner_count": no_owner_count,
|
|
228
258
|
"risky_count": risky_count,
|
|
259
|
+
# Cost data
|
|
260
|
+
"actual_costs_30d": actual_costs_30d,
|
|
261
|
+
"total_actual_cost_30d": total_actual_cost_30d,
|
|
262
|
+
"estimated_assets_cost_total": estimated_assets_total,
|
|
229
263
|
}
|
|
@@ -236,17 +236,55 @@ class AWSScanner(CloudProviderScan):
|
|
|
236
236
|
continue
|
|
237
237
|
|
|
238
238
|
# Add a summary asset with actual costs from Cost Explorer
|
|
239
|
+
# Only include costs for services related to the scanned collectors
|
|
240
|
+
print("Fetching cost data from AWS Cost Explorer...", file=sys.stderr)
|
|
239
241
|
try:
|
|
240
242
|
from datetime import datetime, timedelta
|
|
241
243
|
|
|
242
244
|
end_date = datetime.utcnow()
|
|
243
245
|
start_date = end_date - timedelta(days=30)
|
|
244
246
|
service_costs = self.cost_explorer.get_service_costs(start_date, end_date)
|
|
247
|
+
print(" → Cost data retrieved", file=sys.stderr)
|
|
245
248
|
|
|
246
249
|
if service_costs:
|
|
247
|
-
|
|
250
|
+
# Map collectors to AWS service names in Cost Explorer
|
|
251
|
+
collector_to_services = {
|
|
252
|
+
"s3": ["Amazon Simple Storage Service"],
|
|
253
|
+
"glue": ["AWS Glue"],
|
|
254
|
+
"athena": ["Amazon Athena"],
|
|
255
|
+
"redshift": ["Amazon Redshift"],
|
|
256
|
+
"iam": [], # IAM is free
|
|
257
|
+
"mwaa": ["Amazon Managed Workflows for Apache Airflow"],
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
# Filter costs based on active collectors
|
|
261
|
+
active_collector_names = (
|
|
262
|
+
[name.lower() for name in self.config.collectors]
|
|
263
|
+
if self.config.collectors
|
|
264
|
+
else list(collector_to_services.keys())
|
|
265
|
+
)
|
|
266
|
+
|
|
267
|
+
# Build list of relevant AWS service names
|
|
268
|
+
relevant_services = set()
|
|
269
|
+
for collector_name in active_collector_names:
|
|
270
|
+
services = collector_to_services.get(collector_name, [])
|
|
271
|
+
relevant_services.update(services)
|
|
272
|
+
|
|
273
|
+
# Filter service_costs to only include relevant services
|
|
274
|
+
if self.config.collectors: # Only filter if specific collectors requested
|
|
275
|
+
filtered_costs = {
|
|
276
|
+
svc: cost for svc, cost in service_costs.items() if svc in relevant_services
|
|
277
|
+
}
|
|
278
|
+
total_actual_cost = sum(filtered_costs.values())
|
|
279
|
+
display_costs = filtered_costs
|
|
280
|
+
scope_note = f"Filtered to collectors: {', '.join(self.config.collectors)}"
|
|
281
|
+
else:
|
|
282
|
+
# Full scan - show all costs
|
|
283
|
+
total_actual_cost = sum(service_costs.values())
|
|
284
|
+
display_costs = service_costs
|
|
285
|
+
scope_note = "Full scan - all services"
|
|
286
|
+
|
|
248
287
|
# Use the actual 30-day cost as monthly estimate
|
|
249
|
-
# This represents the actual spend, not an extrapolation
|
|
250
288
|
monthly_estimate = total_actual_cost
|
|
251
289
|
|
|
252
290
|
# Create a summary asset
|
|
@@ -257,7 +295,7 @@ class AWSScanner(CloudProviderScan):
|
|
|
257
295
|
service="Cost Explorer",
|
|
258
296
|
region="global",
|
|
259
297
|
arn="arn:aws:ce::cost-summary",
|
|
260
|
-
name="AWS Cost Summary
|
|
298
|
+
name=f"AWS Cost Summary - {scope_note}",
|
|
261
299
|
created_at=None,
|
|
262
300
|
last_activity_at=datetime.utcnow().isoformat(),
|
|
263
301
|
tags={},
|
|
@@ -266,10 +304,11 @@ class AWSScanner(CloudProviderScan):
|
|
|
266
304
|
ownership_confidence="unknown",
|
|
267
305
|
suggested_owner=None,
|
|
268
306
|
usage_metrics={
|
|
269
|
-
"actual_costs_30d":
|
|
307
|
+
"actual_costs_30d": display_costs,
|
|
270
308
|
"total_actual_cost_30d": total_actual_cost,
|
|
271
309
|
"estimated_monthly_cost": monthly_estimate,
|
|
272
|
-
"
|
|
310
|
+
"scope": scope_note,
|
|
311
|
+
"note": "Actual costs from AWS Cost Explorer API for the last 30 days.",
|
|
273
312
|
},
|
|
274
313
|
)
|
|
275
314
|
all_assets.append(cost_summary_asset)
|
|
@@ -300,39 +339,16 @@ class AWSScanner(CloudProviderScan):
|
|
|
300
339
|
def get_cost_estimate(self, asset: Asset) -> float:
|
|
301
340
|
"""Estimate monthly cost for an AWS asset.
|
|
302
341
|
|
|
303
|
-
|
|
304
|
-
|
|
342
|
+
Uses collector-based estimates for individual assets.
|
|
343
|
+
Service-level actual costs from Cost Explorer are already included
|
|
344
|
+
in the cost_summary asset and used for reporting.
|
|
305
345
|
"""
|
|
306
|
-
#
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
"S3": "Amazon Simple Storage Service",
|
|
311
|
-
"Athena": "Amazon Athena",
|
|
312
|
-
"Glue": "AWS Glue",
|
|
313
|
-
"Redshift": "Amazon Redshift",
|
|
314
|
-
"MWAA": "Amazon Managed Workflows for Apache Airflow",
|
|
315
|
-
}
|
|
316
|
-
|
|
317
|
-
cost_explorer_service = service_mapping.get(asset.service)
|
|
318
|
-
if cost_explorer_service:
|
|
319
|
-
# Get service-level cost from Cost Explorer (last 30 days actual cost)
|
|
320
|
-
service_cost = self.cost_explorer.get_monthly_cost_for_service(
|
|
321
|
-
cost_explorer_service
|
|
322
|
-
)
|
|
323
|
-
if service_cost > 0:
|
|
324
|
-
# We have actual service-level cost from Cost Explorer
|
|
325
|
-
# For now, we'll still use collector estimates for individual assets
|
|
326
|
-
# because Cost Explorer doesn't provide per-resource costs without tags
|
|
327
|
-
# But we could potentially distribute service cost across assets proportionally
|
|
328
|
-
# For now, prefer collector estimates which are more accurate per-resource
|
|
329
|
-
pass # Continue to collector-based estimation
|
|
330
|
-
|
|
331
|
-
except Exception:
|
|
332
|
-
# If Cost Explorer fails, fall back to collector-based estimation
|
|
333
|
-
pass
|
|
346
|
+
# Use the cost already set by the collector during collection
|
|
347
|
+
# This avoids making Cost Explorer API calls for each asset
|
|
348
|
+
if asset.cost_estimate_usd is not None and asset.cost_estimate_usd > 0:
|
|
349
|
+
return asset.cost_estimate_usd
|
|
334
350
|
|
|
335
|
-
# Delegate to appropriate collector based on service for
|
|
351
|
+
# Delegate to appropriate collector based on service for estimation
|
|
336
352
|
for collector in self.collectors:
|
|
337
353
|
if hasattr(collector, "get_cost_estimate"):
|
|
338
354
|
try:
|