PyPI - nuvu-scan - Versions diffs - 2.0.2__py3-none-any.whl → 2.1.6__py3-none-any.whl - Mend

nuvu-scan 2.0.2py3-none-any.whl → 2.1.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

nuvu_scan/cli/commands/scan.py +10 -1
nuvu_scan/cli/formatters/html.py +141 -20
nuvu_scan/core/base.py +44 -0
nuvu_scan/core/providers/aws/aws_scanner.py +187 -42
nuvu_scan/core/providers/aws/collectors/apigateway.py +197 -0
nuvu_scan/core/providers/aws/collectors/athena.py +102 -67
nuvu_scan/core/providers/aws/collectors/backup.py +252 -0
nuvu_scan/core/providers/aws/collectors/cloudfront.py +132 -0
nuvu_scan/core/providers/aws/collectors/cloudtrail.py +189 -0
nuvu_scan/core/providers/aws/collectors/cloudwatch.py +163 -0
nuvu_scan/core/providers/aws/collectors/cost_explorer.py +90 -0
nuvu_scan/core/providers/aws/collectors/dynamodb.py +236 -0
nuvu_scan/core/providers/aws/collectors/ec2.py +572 -0
nuvu_scan/core/providers/aws/collectors/ecs.py +243 -0
nuvu_scan/core/providers/aws/collectors/eks.py +246 -0
nuvu_scan/core/providers/aws/collectors/elasticache.py +325 -0
nuvu_scan/core/providers/aws/collectors/elb.py +198 -0
nuvu_scan/core/providers/aws/collectors/glue.py +104 -34
nuvu_scan/core/providers/aws/collectors/iam.py +393 -77
nuvu_scan/core/providers/aws/collectors/kinesis.py +174 -0
nuvu_scan/core/providers/aws/collectors/kms.py +186 -0
nuvu_scan/core/providers/aws/collectors/lakeformation.py +303 -0
nuvu_scan/core/providers/aws/collectors/lambda_collector.py +224 -0
nuvu_scan/core/providers/aws/collectors/misc_services.py +320 -0
nuvu_scan/core/providers/aws/collectors/mwaa.py +10 -5
nuvu_scan/core/providers/aws/collectors/rds.py +405 -0
nuvu_scan/core/providers/aws/collectors/redshift.py +381 -18
nuvu_scan/core/providers/aws/collectors/route53.py +183 -0
nuvu_scan/core/providers/aws/collectors/secrets.py +178 -0
nuvu_scan/core/providers/aws/collectors/security_services.py +329 -0
nuvu_scan/core/providers/aws/collectors/sns_sqs.py +284 -0
nuvu_scan/core/providers/aws/collectors/vpc_costs.py +296 -0
{nuvu_scan-2.0.2.dist-info → nuvu_scan-2.1.6.dist-info}/METADATA +45 -30
nuvu_scan-2.1.6.dist-info/RECORD +60 -0
nuvu_scan-2.0.2.dist-info/RECORD +0 -38
{nuvu_scan-2.0.2.dist-info → nuvu_scan-2.1.6.dist-info}/WHEEL +0 -0
{nuvu_scan-2.0.2.dist-info → nuvu_scan-2.1.6.dist-info}/entry_points.txt +0 -0

nuvu_scan/cli/commands/scan.py CHANGED Viewed

@@ -108,6 +108,11 @@ from ..formatters.json import JSONFormatter
     default="https://nuvu.dev",
     help="Nuvu Cloud API URL (default: https://nuvu.dev)",
 )
+@click.option(
+    "--list-collectors",
+    is_flag=True,
+    help="List available collectors for the specified provider and exit.",
+)
 def scan_command(
     provider: str,
     output_format: str,
@@ -326,6 +331,7 @@ def scan_command(
                 "total_cost_estimate_usd": result.total_cost_estimate_usd,
                 "scan_regions": scan_regions if scan_regions else None,
                 "scan_all_regions": not bool(region),
+                "summary": result.summary,  # Include cost data from Cost Explorer
                 "assets": [
                     {
                         "provider": asset.provider,
@@ -344,16 +350,19 @@ def scan_command(
                         "size_bytes": asset.size_bytes,
                         "tags": asset.tags,
                         "cost_estimate_usd": asset.cost_estimate_usd,
+                        "usage_metrics": asset.usage_metrics,  # Include all usage metrics
                         "risk_flags": asset.risk_flags,
                         "ownership_confidence": asset.ownership_confidence or "unknown",
                         "suggested_owner": asset.suggested_owner,
+                        "underlying_cloud_account_id": asset.underlying_cloud_account_id,
                     }
                     for asset in result.assets
                 ],
             }
             # Push to API using the /api/scans/import endpoint
-            with httpx.Client(timeout=60) as client:
+            # Use longer timeout for large scans (2000+ assets can take minutes)
+            with httpx.Client(timeout=300) as client:
                 response = client.post(
                     f"{api_url.rstrip('/')}/api/scans/import",
                     json=payload,

nuvu_scan/cli/formatters/html.py CHANGED Viewed

@@ -10,9 +10,8 @@ class HTMLFormatter:
     def format(self, result: ScanResult) -> str:
         """Format scan result as HTML."""
-        # Build summary cards (use actual cost if available)
+        # Build summary cards (use actual cost from Cost Explorer if available)
         actual_total = result.summary.get("total_actual_cost_30d")
-        estimated_assets_total = result.summary.get("estimated_assets_cost_total")
         # Calculate cost saving opportunities
         savings_opportunities = self._calculate_savings(result.assets)
@@ -29,17 +28,16 @@ class HTMLFormatter:
             <div class="summary-card">
                 <h3>Actual 30-Day Cost</h3>
                 <div class="value">${actual_total:,.2f}</div>
-            </div>
-            <div class="summary-card">
-                <h3>Estimated Asset Cost</h3>
-                <div class="value">${(estimated_assets_total or 0):,.2f}</div>
+                <div class="card-note">From AWS Cost Explorer</div>
             </div>
             """
         else:
+            # Fallback when Cost Explorer data not available
             summary_cards += f"""
             <div class="summary-card">
-                <h3>Estimated Monthly Cost</h3>
+                <h3>Monthly Cost</h3>
                 <div class="value">${result.total_cost_estimate_usd:,.2f}</div>
+                <div class="card-note">Add Cost Explorer permissions for accurate data</div>
             </div>
             """
@@ -99,6 +97,7 @@ class HTMLFormatter:
         .summary-card.savings {{ border-left-color: #ff9800; background: #fff8e1; }}
         .summary-card h3 {{ margin: 0 0 10px 0; color: #666; font-size: 13px; text-transform: uppercase; }}
         .summary-card .value {{ font-size: 22px; font-weight: bold; color: #333; }}
+        .summary-card .card-note {{ font-size: 11px; color: #888; margin-top: 5px; }}
         table {{ width: 100%; border-collapse: collapse; margin: 20px 0; }}
         table.compact {{ font-size: 13px; }}
         table.compact th, table.compact td {{ padding: 8px; }}
@@ -133,6 +132,12 @@ class HTMLFormatter:
         <p><strong>Account ID:</strong> {result.account_id}</p>
         <p><strong>Scan Time:</strong> {result.scan_timestamp}</p>
+        <h2>📋 Scan Scope</h2>
+        <div class="insight-box info">
+            <p><strong>Collectors:</strong> {", ".join(result.scanned_collectors) if result.scanned_collectors else "All (Full Scan)"}</p>
+            <p><strong>Regions:</strong> {", ".join(result.scanned_regions[:10]) if result.scanned_regions else "All enabled regions"}{" (+ " + str(len(result.scanned_regions) - 10) + " more)" if len(result.scanned_regions) > 10 else ""}</p>
+        </div>
         <h2>Executive Summary</h2>
         <div class="summary">
 {summary_cards}
@@ -157,7 +162,27 @@ class HTMLFormatter:
             html += f"            <tr><td>{category.replace('_', ' ').title()}</td><td>{count}</td></tr>\n"
         # All Assets - COLLAPSIBLE
-        asset_count = len(result.assets)
+        # Filter out:
+        # - Expired/retired reserved nodes (historical clutter)
+        # - Cost summary (it's a summary row, not an asset)
+        # They're still counted in the governance summary for context
+        display_assets = [
+            a
+            for a in result.assets
+            if not (
+                # Exclude expired/retired reserved nodes
+                (
+                    a.asset_type == "redshift_reserved_node"
+                    and any(
+                        flag in (a.risk_flags or [])
+                        for flag in ["reservation_expired", "reservation_retired"]
+                    )
+                )
+                # Exclude cost summary pseudo-asset
+                or a.asset_type == "cost_summary"
+            )
+        ]
+        asset_count = len(display_assets)
         html += f"""        </table>
         <button class="collapsible">All Assets <span class="asset-count">({asset_count} items)</span></button>
@@ -175,11 +200,12 @@ class HTMLFormatter:
 """
         # Sort assets by cost (descending)
-        sorted_assets = sorted(result.assets, key=lambda x: x.cost_estimate_usd or 0, reverse=True)
+        sorted_assets = sorted(display_assets, key=lambda x: x.cost_estimate_usd or 0, reverse=True)
         for asset in sorted_assets:
             owner_class = ""
-            if asset.ownership_confidence == "unknown":
+            # Only show no-owner class if we have no suggested owner at all
+            if not asset.suggested_owner and asset.ownership_confidence == "unknown":
                 owner_class = "no-owner"
             risk_flags_html = ""
@@ -213,8 +239,11 @@ class HTMLFormatter:
         </script>
         <div class="footer">
-            <p>Generated by Nuvu - AWS Data Asset Control</p>
-            <p>Visit <a href="https://nuvu.dev">https://nuvu.dev</a> for continuous monitoring</p>
+            <p><strong>nuvu-scan</strong> — The Open Source Cloud Data Scanner</p>
+            <p><a href="https://github.com/nuvudev/nuvu-scan" target="_blank">github.com/nuvudev/nuvu-scan</a></p>
+            <p style="margin-top: 12px; font-size: 11px; color: #888;">
+                Add the governance layer: <a href="https://nuvu.dev" style="color: #666;">Nuvu Cloud</a> — historical tracking • team dashboards • scheduled scans • Slack/email alerts
+            </p>
         </div>
     </div>
 </body>
@@ -303,24 +332,61 @@ class HTMLFormatter:
         </div>
             """
-        # Reserved nodes analysis
+        # Reserved nodes analysis - compare with provisioned clusters
         if reserved_nodes:
             active_reservations = [
                 a for a in reserved_nodes if (a.usage_metrics or {}).get("state") == "active"
             ]
-            expired = [a for a in reserved_nodes if "reservation_expired" in (a.risk_flags or [])]
+            # Count total nodes covered by active reservations
+            active_reserved_nodes = sum(
+                (a.usage_metrics or {}).get("node_count", 0) for a in active_reservations
+            )
+            # Count total provisioned cluster nodes
+            clusters = [a for a in assets if a.asset_type == "redshift_cluster"]
+            total_provisioned_nodes = sum(
+                (a.usage_metrics or {}).get("node_count", 0) for a in clusters
+            )
+            # Calculate uncovered nodes (potential savings opportunity)
+            uncovered_nodes = max(0, total_provisioned_nodes - active_reserved_nodes)
+            # Determine if this is a savings opportunity
+            is_savings_opportunity = uncovered_nodes > 0
+            box_class = "warning" if is_savings_opportunity else "info"
             html += f"""
-        <div class="insight-box info">
-            <h3>🎫 Reserved Nodes ({len(reserved_nodes)} total)</h3>
+        <div class="insight-box {box_class}">
+            <h3>🎫 Reserved vs On-Demand Nodes</h3>
             <ul>
-                <li><strong>Active Reservations:</strong> {len(active_reservations)}</li>
-                <li><strong>Expired/Retired:</strong> {len(expired)}</li>
-                <li><strong>Expiring Soon:</strong> {len(expiring_reservations)}</li>
+                <li><strong>Provisioned Cluster Nodes:</strong> {total_provisioned_nodes}</li>
+                <li><strong>Active Reserved Nodes:</strong> {active_reserved_nodes} ({len(active_reservations)} reservations)</li>
+                <li><strong>Uncovered (On-Demand) Nodes:</strong> {uncovered_nodes}</li>
             </ul>
-        </div>
             """
+            if is_savings_opportunity:
+                # Reserved pricing typically saves 30-40% vs on-demand
+                html += f"""
+            <p class="recommendation">💰 <strong>Potential Savings:</strong> {uncovered_nodes} nodes running on-demand pricing. Reserved nodes typically offer 30-40% discount.</p>
+            """
+            else:
+                html += """
+            <p class="recommendation">✅ All provisioned nodes are covered by reservations.</p>
+            """
+            # Show expiring reservations if any
+            if expiring_reservations:
+                expiring_nodes = sum(
+                    (a.usage_metrics or {}).get("node_count", 0) for a in expiring_reservations
+                )
+                html += f"""
+            <p class="recommendation">⚠️ <strong>{len(expiring_reservations)} reservations ({expiring_nodes} nodes) expiring soon.</strong> Plan for renewal to maintain coverage.</p>
+            """
+            html += "</div>"
         return html
     def _build_governance_section(self, assets) -> str:
@@ -418,4 +484,59 @@ class HTMLFormatter:
                 html += f"<li><strong>{cluster.name}</strong>: {queues} queues, Auto WLM: {auto_wlm} ({flags})</li>"
             html += "</ul></div>"
+        # Add cluster performance section
+        clusters_with_metrics = [
+            a
+            for a in clusters
+            if (a.usage_metrics or {}).get("cpu_utilization_max_24h") is not None
+        ]
+        if clusters_with_metrics:
+            html += """
+        <div class="insight-box info">
+            <h3>📊 Cluster Performance (Last 24h)</h3>
+            <table class="compact">
+                <tr><th>Cluster</th><th>CPU Max</th><th>CPU Avg</th><th>Queries</th><th>Disk Used</th><th>Recommendation</th></tr>
+            """
+            for cluster in clusters_with_metrics[:10]:
+                metrics = cluster.usage_metrics or {}
+                cpu_max = metrics.get("cpu_utilization_max_24h", 0)
+                cpu_avg = metrics.get("cpu_utilization_avg_24h", 0)
+                queries = metrics.get("queries_completed_24h", 0)
+                disk = metrics.get("disk_space_used_percent", 0)
+                rec = metrics.get("performance_recommendation", "-")
+                html += (
+                    f"<tr><td>{cluster.name}</td><td>{cpu_max:.1f}%</td>"
+                    f"<td>{cpu_avg:.1f}%</td><td>{queries}</td>"
+                    f"<td>{disk:.1f}%</td><td>{rec if rec else '-'}</td></tr>"
+                )
+            html += "</table></div>"
+        # Add serverless workgroup performance section
+        serverless_wgs = [a for a in assets if a.asset_type == "redshift_serverless_workgroup"]
+        serverless_with_metrics = [
+            a for a in serverless_wgs if (a.usage_metrics or {}).get("rpu_max_7d") is not None
+        ]
+        if serverless_with_metrics:
+            html += """
+        <div class="insight-box info">
+            <h3>🚀 Serverless Workgroup Utilization</h3>
+            <table class="compact">
+                <tr><th>Workgroup</th><th>Base RPU</th><th>Max RPU (7d)</th><th>Avg RPU (7d)</th><th>Queries (24h)</th><th>Recommendation</th></tr>
+            """
+            for wg in serverless_with_metrics[:10]:
+                metrics = wg.usage_metrics or {}
+                base = metrics.get("base_capacity", 0)
+                rpu_max = metrics.get("rpu_max_7d", 0)
+                rpu_avg = metrics.get("rpu_avg_7d", 0)
+                queries = metrics.get("queries_completed_24h", 0) + metrics.get(
+                    "queries_failed_24h", 0
+                )
+                rec = metrics.get("utilization_recommendation", "-")
+                html += (
+                    f"<tr><td>{wg.name}</td><td>{base}</td>"
+                    f"<td>{rpu_max:.1f}</td><td>{rpu_avg:.1f}</td>"
+                    f"<td>{queries}</td><td>{rec if rec else '-'}</td></tr>"
+                )
+            html += "</table></div>"
         return html

nuvu_scan/core/base.py CHANGED Viewed

@@ -28,6 +28,16 @@ class NormalizedCategory(str, Enum):
     DATABASE = "database"
     SECURITY = "security"
     BILLING = "billing"
+    # Additional categories for comprehensive coverage
+    NETWORKING = "networking"  # VPC, Load Balancers, Route 53, CloudFront
+    CACHING = "caching"  # ElastiCache, DAX
+    CONTAINER = "container"  # ECS, ECR, Fargate
+    SERVERLESS = "serverless"  # Lambda, Step Functions, API Gateway
+    STORAGE = "storage"  # EBS, EFS, FSx
+    MESSAGING = "messaging"  # SNS, SQS, EventBridge
+    OBSERVABILITY = "observability"  # CloudWatch, X-Ray
+    RESILIENCE = "resilience"  # Backup, DR
+    GOVERNANCE = "governance"  # Config, CloudTrail, Organizations
 @dataclass
@@ -88,10 +98,17 @@ class ScanResult:
     assets: list[Asset]
     total_cost_estimate_usd: float
     summary: dict[str, Any] = None
+    # Scan scope metadata
+    scanned_regions: list[str] = None
+    scanned_collectors: list[str] = None
     def __post_init__(self):
         if self.summary is None:
             self.summary = {}
+        if self.scanned_regions is None:
+            self.scanned_regions = []
+        if self.scanned_collectors is None:
+            self.scanned_collectors = []
 class CloudProviderScan(ABC):
@@ -173,6 +190,12 @@ class CloudProviderScan(ABC):
         # Build summary
         summary = self._build_summary(assets)
+        # Get scanned regions from assets
+        scanned_regions = sorted(set(asset.region for asset in assets if asset.region))
+        # Get scanned collectors from config
+        scanned_collectors = self.config.collectors if self.config.collectors else []
         return ScanResult(
             provider=self.provider,
             account_id=self.config.account_id or "unknown",
@@ -180,6 +203,8 @@ class CloudProviderScan(ABC):
             assets=assets,
             total_cost_estimate_usd=total_cost,
             summary=summary,
+            scanned_regions=scanned_regions,
+            scanned_collectors=scanned_collectors,
         )
     def _build_summary(self, assets: list[Asset]) -> dict[str, Any]:
@@ -219,6 +244,21 @@ class CloudProviderScan(ABC):
             if asset.risk_flags:
                 risky_count += 1
+        # Find cost summary asset if present
+        actual_costs_30d = {}
+        total_actual_cost_30d = None
+        for asset in assets:
+            if asset.asset_type == "cost_summary":
+                usage = asset.usage_metrics or {}
+                actual_costs_30d = usage.get("actual_costs_30d", {})
+                total_actual_cost_30d = usage.get("total_actual_cost_30d")
+                break
+        # Calculate estimated asset costs (excluding cost_summary)
+        estimated_assets_total = sum(
+            asset.cost_estimate_usd or 0 for asset in assets if asset.asset_type != "cost_summary"
+        )
         return {
             "total_assets": total_assets,
             "assets_by_category": assets_by_category,
@@ -226,4 +266,8 @@ class CloudProviderScan(ABC):
             "unused_count": unused_count,
             "no_owner_count": no_owner_count,
             "risky_count": risky_count,
+            # Cost data
+            "actual_costs_30d": actual_costs_30d,
+            "total_actual_cost_30d": total_actual_cost_30d,
+            "estimated_assets_cost_total": estimated_assets_total,
         }

nuvu-scan 2.0.2__py3-none-any.whl → 2.1.6__py3-none-any.whl

nuvu-scan 2.0.2py3-none-any.whl → 2.1.6py3-none-any.whl