nuvu-scan 2.0.2__py3-none-any.whl → 2.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. nuvu_scan/cli/commands/scan.py +10 -1
  2. nuvu_scan/cli/formatters/html.py +141 -20
  3. nuvu_scan/core/base.py +44 -0
  4. nuvu_scan/core/providers/aws/aws_scanner.py +187 -42
  5. nuvu_scan/core/providers/aws/collectors/apigateway.py +197 -0
  6. nuvu_scan/core/providers/aws/collectors/athena.py +102 -67
  7. nuvu_scan/core/providers/aws/collectors/backup.py +252 -0
  8. nuvu_scan/core/providers/aws/collectors/cloudfront.py +132 -0
  9. nuvu_scan/core/providers/aws/collectors/cloudtrail.py +189 -0
  10. nuvu_scan/core/providers/aws/collectors/cloudwatch.py +163 -0
  11. nuvu_scan/core/providers/aws/collectors/cost_explorer.py +90 -0
  12. nuvu_scan/core/providers/aws/collectors/dynamodb.py +236 -0
  13. nuvu_scan/core/providers/aws/collectors/ec2.py +572 -0
  14. nuvu_scan/core/providers/aws/collectors/ecs.py +243 -0
  15. nuvu_scan/core/providers/aws/collectors/eks.py +246 -0
  16. nuvu_scan/core/providers/aws/collectors/elasticache.py +325 -0
  17. nuvu_scan/core/providers/aws/collectors/elb.py +198 -0
  18. nuvu_scan/core/providers/aws/collectors/glue.py +104 -34
  19. nuvu_scan/core/providers/aws/collectors/iam.py +393 -77
  20. nuvu_scan/core/providers/aws/collectors/kinesis.py +174 -0
  21. nuvu_scan/core/providers/aws/collectors/kms.py +186 -0
  22. nuvu_scan/core/providers/aws/collectors/lakeformation.py +303 -0
  23. nuvu_scan/core/providers/aws/collectors/lambda_collector.py +224 -0
  24. nuvu_scan/core/providers/aws/collectors/misc_services.py +320 -0
  25. nuvu_scan/core/providers/aws/collectors/mwaa.py +10 -5
  26. nuvu_scan/core/providers/aws/collectors/rds.py +405 -0
  27. nuvu_scan/core/providers/aws/collectors/redshift.py +381 -18
  28. nuvu_scan/core/providers/aws/collectors/route53.py +183 -0
  29. nuvu_scan/core/providers/aws/collectors/secrets.py +178 -0
  30. nuvu_scan/core/providers/aws/collectors/security_services.py +329 -0
  31. nuvu_scan/core/providers/aws/collectors/sns_sqs.py +284 -0
  32. nuvu_scan/core/providers/aws/collectors/vpc_costs.py +296 -0
  33. {nuvu_scan-2.0.2.dist-info → nuvu_scan-2.1.6.dist-info}/METADATA +45 -30
  34. nuvu_scan-2.1.6.dist-info/RECORD +60 -0
  35. nuvu_scan-2.0.2.dist-info/RECORD +0 -38
  36. {nuvu_scan-2.0.2.dist-info → nuvu_scan-2.1.6.dist-info}/WHEEL +0 -0
  37. {nuvu_scan-2.0.2.dist-info → nuvu_scan-2.1.6.dist-info}/entry_points.txt +0 -0
@@ -108,6 +108,11 @@ from ..formatters.json import JSONFormatter
108
108
  default="https://nuvu.dev",
109
109
  help="Nuvu Cloud API URL (default: https://nuvu.dev)",
110
110
  )
111
+ @click.option(
112
+ "--list-collectors",
113
+ is_flag=True,
114
+ help="List available collectors for the specified provider and exit.",
115
+ )
111
116
  def scan_command(
112
117
  provider: str,
113
118
  output_format: str,
@@ -326,6 +331,7 @@ def scan_command(
326
331
  "total_cost_estimate_usd": result.total_cost_estimate_usd,
327
332
  "scan_regions": scan_regions if scan_regions else None,
328
333
  "scan_all_regions": not bool(region),
334
+ "summary": result.summary, # Include cost data from Cost Explorer
329
335
  "assets": [
330
336
  {
331
337
  "provider": asset.provider,
@@ -344,16 +350,19 @@ def scan_command(
344
350
  "size_bytes": asset.size_bytes,
345
351
  "tags": asset.tags,
346
352
  "cost_estimate_usd": asset.cost_estimate_usd,
353
+ "usage_metrics": asset.usage_metrics, # Include all usage metrics
347
354
  "risk_flags": asset.risk_flags,
348
355
  "ownership_confidence": asset.ownership_confidence or "unknown",
349
356
  "suggested_owner": asset.suggested_owner,
357
+ "underlying_cloud_account_id": asset.underlying_cloud_account_id,
350
358
  }
351
359
  for asset in result.assets
352
360
  ],
353
361
  }
354
362
 
355
363
  # Push to API using the /api/scans/import endpoint
356
- with httpx.Client(timeout=60) as client:
364
+ # Use longer timeout for large scans (2000+ assets can take minutes)
365
+ with httpx.Client(timeout=300) as client:
357
366
  response = client.post(
358
367
  f"{api_url.rstrip('/')}/api/scans/import",
359
368
  json=payload,
@@ -10,9 +10,8 @@ class HTMLFormatter:
10
10
 
11
11
  def format(self, result: ScanResult) -> str:
12
12
  """Format scan result as HTML."""
13
- # Build summary cards (use actual cost if available)
13
+ # Build summary cards (use actual cost from Cost Explorer if available)
14
14
  actual_total = result.summary.get("total_actual_cost_30d")
15
- estimated_assets_total = result.summary.get("estimated_assets_cost_total")
16
15
 
17
16
  # Calculate cost saving opportunities
18
17
  savings_opportunities = self._calculate_savings(result.assets)
@@ -29,17 +28,16 @@ class HTMLFormatter:
29
28
  <div class="summary-card">
30
29
  <h3>Actual 30-Day Cost</h3>
31
30
  <div class="value">${actual_total:,.2f}</div>
32
- </div>
33
- <div class="summary-card">
34
- <h3>Estimated Asset Cost</h3>
35
- <div class="value">${(estimated_assets_total or 0):,.2f}</div>
31
+ <div class="card-note">From AWS Cost Explorer</div>
36
32
  </div>
37
33
  """
38
34
  else:
35
+ # Fallback when Cost Explorer data not available
39
36
  summary_cards += f"""
40
37
  <div class="summary-card">
41
- <h3>Estimated Monthly Cost</h3>
38
+ <h3>Monthly Cost</h3>
42
39
  <div class="value">${result.total_cost_estimate_usd:,.2f}</div>
40
+ <div class="card-note">Add Cost Explorer permissions for accurate data</div>
43
41
  </div>
44
42
  """
45
43
 
@@ -99,6 +97,7 @@ class HTMLFormatter:
99
97
  .summary-card.savings {{ border-left-color: #ff9800; background: #fff8e1; }}
100
98
  .summary-card h3 {{ margin: 0 0 10px 0; color: #666; font-size: 13px; text-transform: uppercase; }}
101
99
  .summary-card .value {{ font-size: 22px; font-weight: bold; color: #333; }}
100
+ .summary-card .card-note {{ font-size: 11px; color: #888; margin-top: 5px; }}
102
101
  table {{ width: 100%; border-collapse: collapse; margin: 20px 0; }}
103
102
  table.compact {{ font-size: 13px; }}
104
103
  table.compact th, table.compact td {{ padding: 8px; }}
@@ -133,6 +132,12 @@ class HTMLFormatter:
133
132
  <p><strong>Account ID:</strong> {result.account_id}</p>
134
133
  <p><strong>Scan Time:</strong> {result.scan_timestamp}</p>
135
134
 
135
+ <h2>📋 Scan Scope</h2>
136
+ <div class="insight-box info">
137
+ <p><strong>Collectors:</strong> {", ".join(result.scanned_collectors) if result.scanned_collectors else "All (Full Scan)"}</p>
138
+ <p><strong>Regions:</strong> {", ".join(result.scanned_regions[:10]) if result.scanned_regions else "All enabled regions"}{" (+ " + str(len(result.scanned_regions) - 10) + " more)" if len(result.scanned_regions) > 10 else ""}</p>
139
+ </div>
140
+
136
141
  <h2>Executive Summary</h2>
137
142
  <div class="summary">
138
143
  {summary_cards}
@@ -157,7 +162,27 @@ class HTMLFormatter:
157
162
  html += f" <tr><td>{category.replace('_', ' ').title()}</td><td>{count}</td></tr>\n"
158
163
 
159
164
  # All Assets - COLLAPSIBLE
160
- asset_count = len(result.assets)
165
+ # Filter out:
166
+ # - Expired/retired reserved nodes (historical clutter)
167
+ # - Cost summary (it's a summary row, not an asset)
168
+ # They're still counted in the governance summary for context
169
+ display_assets = [
170
+ a
171
+ for a in result.assets
172
+ if not (
173
+ # Exclude expired/retired reserved nodes
174
+ (
175
+ a.asset_type == "redshift_reserved_node"
176
+ and any(
177
+ flag in (a.risk_flags or [])
178
+ for flag in ["reservation_expired", "reservation_retired"]
179
+ )
180
+ )
181
+ # Exclude cost summary pseudo-asset
182
+ or a.asset_type == "cost_summary"
183
+ )
184
+ ]
185
+ asset_count = len(display_assets)
161
186
  html += f""" </table>
162
187
 
163
188
  <button class="collapsible">All Assets <span class="asset-count">({asset_count} items)</span></button>
@@ -175,11 +200,12 @@ class HTMLFormatter:
175
200
  """
176
201
 
177
202
  # Sort assets by cost (descending)
178
- sorted_assets = sorted(result.assets, key=lambda x: x.cost_estimate_usd or 0, reverse=True)
203
+ sorted_assets = sorted(display_assets, key=lambda x: x.cost_estimate_usd or 0, reverse=True)
179
204
 
180
205
  for asset in sorted_assets:
181
206
  owner_class = ""
182
- if asset.ownership_confidence == "unknown":
207
+ # Only show no-owner class if we have no suggested owner at all
208
+ if not asset.suggested_owner and asset.ownership_confidence == "unknown":
183
209
  owner_class = "no-owner"
184
210
 
185
211
  risk_flags_html = ""
@@ -213,8 +239,11 @@ class HTMLFormatter:
213
239
  </script>
214
240
 
215
241
  <div class="footer">
216
- <p>Generated by Nuvu - AWS Data Asset Control</p>
217
- <p>Visit <a href="https://nuvu.dev">https://nuvu.dev</a> for continuous monitoring</p>
242
+ <p><strong>nuvu-scan</strong> The Open Source Cloud Data Scanner</p>
243
+ <p><a href="https://github.com/nuvudev/nuvu-scan" target="_blank">github.com/nuvudev/nuvu-scan</a></p>
244
+ <p style="margin-top: 12px; font-size: 11px; color: #888;">
245
+ Add the governance layer: <a href="https://nuvu.dev" style="color: #666;">Nuvu Cloud</a> — historical tracking • team dashboards • scheduled scans • Slack/email alerts
246
+ </p>
218
247
  </div>
219
248
  </div>
220
249
  </body>
@@ -303,24 +332,61 @@ class HTMLFormatter:
303
332
  </div>
304
333
  """
305
334
 
306
- # Reserved nodes analysis
335
+ # Reserved nodes analysis - compare with provisioned clusters
307
336
  if reserved_nodes:
308
337
  active_reservations = [
309
338
  a for a in reserved_nodes if (a.usage_metrics or {}).get("state") == "active"
310
339
  ]
311
- expired = [a for a in reserved_nodes if "reservation_expired" in (a.risk_flags or [])]
340
+
341
+ # Count total nodes covered by active reservations
342
+ active_reserved_nodes = sum(
343
+ (a.usage_metrics or {}).get("node_count", 0) for a in active_reservations
344
+ )
345
+
346
+ # Count total provisioned cluster nodes
347
+ clusters = [a for a in assets if a.asset_type == "redshift_cluster"]
348
+ total_provisioned_nodes = sum(
349
+ (a.usage_metrics or {}).get("node_count", 0) for a in clusters
350
+ )
351
+
352
+ # Calculate uncovered nodes (potential savings opportunity)
353
+ uncovered_nodes = max(0, total_provisioned_nodes - active_reserved_nodes)
354
+
355
+ # Determine if this is a savings opportunity
356
+ is_savings_opportunity = uncovered_nodes > 0
357
+ box_class = "warning" if is_savings_opportunity else "info"
312
358
 
313
359
  html += f"""
314
- <div class="insight-box info">
315
- <h3>🎫 Reserved Nodes ({len(reserved_nodes)} total)</h3>
360
+ <div class="insight-box {box_class}">
361
+ <h3>🎫 Reserved vs On-Demand Nodes</h3>
316
362
  <ul>
317
- <li><strong>Active Reservations:</strong> {len(active_reservations)}</li>
318
- <li><strong>Expired/Retired:</strong> {len(expired)}</li>
319
- <li><strong>Expiring Soon:</strong> {len(expiring_reservations)}</li>
363
+ <li><strong>Provisioned Cluster Nodes:</strong> {total_provisioned_nodes}</li>
364
+ <li><strong>Active Reserved Nodes:</strong> {active_reserved_nodes} ({len(active_reservations)} reservations)</li>
365
+ <li><strong>Uncovered (On-Demand) Nodes:</strong> {uncovered_nodes}</li>
320
366
  </ul>
321
- </div>
322
367
  """
323
368
 
369
+ if is_savings_opportunity:
370
+ # Reserved pricing typically saves 30-40% vs on-demand
371
+ html += f"""
372
+ <p class="recommendation">💰 <strong>Potential Savings:</strong> {uncovered_nodes} nodes running on-demand pricing. Reserved nodes typically offer 30-40% discount.</p>
373
+ """
374
+ else:
375
+ html += """
376
+ <p class="recommendation">✅ All provisioned nodes are covered by reservations.</p>
377
+ """
378
+
379
+ # Show expiring reservations if any
380
+ if expiring_reservations:
381
+ expiring_nodes = sum(
382
+ (a.usage_metrics or {}).get("node_count", 0) for a in expiring_reservations
383
+ )
384
+ html += f"""
385
+ <p class="recommendation">⚠️ <strong>{len(expiring_reservations)} reservations ({expiring_nodes} nodes) expiring soon.</strong> Plan for renewal to maintain coverage.</p>
386
+ """
387
+
388
+ html += "</div>"
389
+
324
390
  return html
325
391
 
326
392
  def _build_governance_section(self, assets) -> str:
@@ -418,4 +484,59 @@ class HTMLFormatter:
418
484
  html += f"<li><strong>{cluster.name}</strong>: {queues} queues, Auto WLM: {auto_wlm} ({flags})</li>"
419
485
  html += "</ul></div>"
420
486
 
487
+ # Add cluster performance section
488
+ clusters_with_metrics = [
489
+ a
490
+ for a in clusters
491
+ if (a.usage_metrics or {}).get("cpu_utilization_max_24h") is not None
492
+ ]
493
+ if clusters_with_metrics:
494
+ html += """
495
+ <div class="insight-box info">
496
+ <h3>📊 Cluster Performance (Last 24h)</h3>
497
+ <table class="compact">
498
+ <tr><th>Cluster</th><th>CPU Max</th><th>CPU Avg</th><th>Queries</th><th>Disk Used</th><th>Recommendation</th></tr>
499
+ """
500
+ for cluster in clusters_with_metrics[:10]:
501
+ metrics = cluster.usage_metrics or {}
502
+ cpu_max = metrics.get("cpu_utilization_max_24h", 0)
503
+ cpu_avg = metrics.get("cpu_utilization_avg_24h", 0)
504
+ queries = metrics.get("queries_completed_24h", 0)
505
+ disk = metrics.get("disk_space_used_percent", 0)
506
+ rec = metrics.get("performance_recommendation", "-")
507
+ html += (
508
+ f"<tr><td>{cluster.name}</td><td>{cpu_max:.1f}%</td>"
509
+ f"<td>{cpu_avg:.1f}%</td><td>{queries}</td>"
510
+ f"<td>{disk:.1f}%</td><td>{rec if rec else '-'}</td></tr>"
511
+ )
512
+ html += "</table></div>"
513
+
514
+ # Add serverless workgroup performance section
515
+ serverless_wgs = [a for a in assets if a.asset_type == "redshift_serverless_workgroup"]
516
+ serverless_with_metrics = [
517
+ a for a in serverless_wgs if (a.usage_metrics or {}).get("rpu_max_7d") is not None
518
+ ]
519
+ if serverless_with_metrics:
520
+ html += """
521
+ <div class="insight-box info">
522
+ <h3>🚀 Serverless Workgroup Utilization</h3>
523
+ <table class="compact">
524
+ <tr><th>Workgroup</th><th>Base RPU</th><th>Max RPU (7d)</th><th>Avg RPU (7d)</th><th>Queries (24h)</th><th>Recommendation</th></tr>
525
+ """
526
+ for wg in serverless_with_metrics[:10]:
527
+ metrics = wg.usage_metrics or {}
528
+ base = metrics.get("base_capacity", 0)
529
+ rpu_max = metrics.get("rpu_max_7d", 0)
530
+ rpu_avg = metrics.get("rpu_avg_7d", 0)
531
+ queries = metrics.get("queries_completed_24h", 0) + metrics.get(
532
+ "queries_failed_24h", 0
533
+ )
534
+ rec = metrics.get("utilization_recommendation", "-")
535
+ html += (
536
+ f"<tr><td>{wg.name}</td><td>{base}</td>"
537
+ f"<td>{rpu_max:.1f}</td><td>{rpu_avg:.1f}</td>"
538
+ f"<td>{queries}</td><td>{rec if rec else '-'}</td></tr>"
539
+ )
540
+ html += "</table></div>"
541
+
421
542
  return html
nuvu_scan/core/base.py CHANGED
@@ -28,6 +28,16 @@ class NormalizedCategory(str, Enum):
28
28
  DATABASE = "database"
29
29
  SECURITY = "security"
30
30
  BILLING = "billing"
31
+ # Additional categories for comprehensive coverage
32
+ NETWORKING = "networking" # VPC, Load Balancers, Route 53, CloudFront
33
+ CACHING = "caching" # ElastiCache, DAX
34
+ CONTAINER = "container" # ECS, ECR, Fargate
35
+ SERVERLESS = "serverless" # Lambda, Step Functions, API Gateway
36
+ STORAGE = "storage" # EBS, EFS, FSx
37
+ MESSAGING = "messaging" # SNS, SQS, EventBridge
38
+ OBSERVABILITY = "observability" # CloudWatch, X-Ray
39
+ RESILIENCE = "resilience" # Backup, DR
40
+ GOVERNANCE = "governance" # Config, CloudTrail, Organizations
31
41
 
32
42
 
33
43
  @dataclass
@@ -88,10 +98,17 @@ class ScanResult:
88
98
  assets: list[Asset]
89
99
  total_cost_estimate_usd: float
90
100
  summary: dict[str, Any] = None
101
+ # Scan scope metadata
102
+ scanned_regions: list[str] = None
103
+ scanned_collectors: list[str] = None
91
104
 
92
105
  def __post_init__(self):
93
106
  if self.summary is None:
94
107
  self.summary = {}
108
+ if self.scanned_regions is None:
109
+ self.scanned_regions = []
110
+ if self.scanned_collectors is None:
111
+ self.scanned_collectors = []
95
112
 
96
113
 
97
114
  class CloudProviderScan(ABC):
@@ -173,6 +190,12 @@ class CloudProviderScan(ABC):
173
190
  # Build summary
174
191
  summary = self._build_summary(assets)
175
192
 
193
+ # Get scanned regions from assets
194
+ scanned_regions = sorted(set(asset.region for asset in assets if asset.region))
195
+
196
+ # Get scanned collectors from config
197
+ scanned_collectors = self.config.collectors if self.config.collectors else []
198
+
176
199
  return ScanResult(
177
200
  provider=self.provider,
178
201
  account_id=self.config.account_id or "unknown",
@@ -180,6 +203,8 @@ class CloudProviderScan(ABC):
180
203
  assets=assets,
181
204
  total_cost_estimate_usd=total_cost,
182
205
  summary=summary,
206
+ scanned_regions=scanned_regions,
207
+ scanned_collectors=scanned_collectors,
183
208
  )
184
209
 
185
210
  def _build_summary(self, assets: list[Asset]) -> dict[str, Any]:
@@ -219,6 +244,21 @@ class CloudProviderScan(ABC):
219
244
  if asset.risk_flags:
220
245
  risky_count += 1
221
246
 
247
+ # Find cost summary asset if present
248
+ actual_costs_30d = {}
249
+ total_actual_cost_30d = None
250
+ for asset in assets:
251
+ if asset.asset_type == "cost_summary":
252
+ usage = asset.usage_metrics or {}
253
+ actual_costs_30d = usage.get("actual_costs_30d", {})
254
+ total_actual_cost_30d = usage.get("total_actual_cost_30d")
255
+ break
256
+
257
+ # Calculate estimated asset costs (excluding cost_summary)
258
+ estimated_assets_total = sum(
259
+ asset.cost_estimate_usd or 0 for asset in assets if asset.asset_type != "cost_summary"
260
+ )
261
+
222
262
  return {
223
263
  "total_assets": total_assets,
224
264
  "assets_by_category": assets_by_category,
@@ -226,4 +266,8 @@ class CloudProviderScan(ABC):
226
266
  "unused_count": unused_count,
227
267
  "no_owner_count": no_owner_count,
228
268
  "risky_count": risky_count,
269
+ # Cost data
270
+ "actual_costs_30d": actual_costs_30d,
271
+ "total_actual_cost_30d": total_actual_cost_30d,
272
+ "estimated_assets_cost_total": estimated_assets_total,
229
273
  }