nuvu-scan 1.3.8__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
nuvu_scan/__init__.py CHANGED
@@ -1,3 +1,3 @@
1
1
  """Nuvu - Multi-Cloud Data Asset Control."""
2
2
 
3
- __version__ = "0.1.0"
3
+ __version__ = "2.0.0"
@@ -42,6 +42,15 @@ from ..formatters.json import JSONFormatter
42
42
  multiple=True,
43
43
  help="Cloud provider region(s) to scan (can be specified multiple times, default: all regions)",
44
44
  )
45
+ @click.option(
46
+ "--collectors",
47
+ "-c",
48
+ multiple=True,
49
+ help="Specific collector(s) to run (can be specified multiple times). "
50
+ "AWS: s3, glue, athena, redshift, iam, mwaa. "
51
+ "GCP: gcs, bigquery, dataproc, pubsub, iam, gemini. "
52
+ "Default: all collectors.",
53
+ )
45
54
  @click.option(
46
55
  "--access-key-id",
47
56
  envvar="AWS_ACCESS_KEY_ID",
@@ -103,11 +112,17 @@ from ..formatters.json import JSONFormatter
103
112
  envvar="NUVU_API_KEY",
104
113
  help="Nuvu Cloud API key (from dashboard account settings)",
105
114
  )
115
+ @click.option(
116
+ "--list-collectors",
117
+ is_flag=True,
118
+ help="List available collectors for the specified provider and exit",
119
+ )
106
120
  def scan_command(
107
121
  provider: str,
108
122
  output_format: str,
109
123
  output_file: str | None,
110
124
  region: tuple,
125
+ collectors: tuple,
111
126
  access_key_id: str | None,
112
127
  secret_access_key: str | None,
113
128
  session_token: str | None,
@@ -121,9 +136,25 @@ def scan_command(
121
136
  push: bool,
122
137
  nuvu_cloud_url: str | None,
123
138
  api_key: str | None,
139
+ list_collectors: bool,
124
140
  ):
125
141
  """Scan cloud provider for data assets."""
126
142
 
143
+ # Handle --list-collectors flag
144
+ if list_collectors:
145
+ if provider == "aws":
146
+ available = AWSScanner.get_available_collectors()
147
+ elif provider == "gcp":
148
+ available = GCPScanner.get_available_collectors()
149
+ else:
150
+ click.echo(f"Unknown provider: {provider}", err=True)
151
+ sys.exit(1)
152
+
153
+ click.echo(f"Available collectors for {provider.upper()}:")
154
+ for name in sorted(available):
155
+ click.echo(f" - {name}")
156
+ return
157
+
127
158
  # Build credentials based on provider
128
159
  credentials = {}
129
160
  account_id = None
@@ -206,6 +237,7 @@ def scan_command(
206
237
  credentials=credentials,
207
238
  regions=list(region) if region else None,
208
239
  account_id=account_id,
240
+ collectors=list(collectors) if collectors else None,
209
241
  )
210
242
 
211
243
  # Get scanner instance
@@ -14,6 +14,9 @@ class HTMLFormatter:
14
14
  actual_total = result.summary.get("total_actual_cost_30d")
15
15
  estimated_assets_total = result.summary.get("estimated_assets_cost_total")
16
16
 
17
+ # Calculate cost saving opportunities
18
+ savings_opportunities = self._calculate_savings(result.assets)
19
+
17
20
  summary_cards = f"""
18
21
  <div class="summary-card">
19
22
  <h3>Total Assets</h3>
@@ -43,18 +46,27 @@ class HTMLFormatter:
43
46
  summary_cards += f"""
44
47
  <div class="summary-card">
45
48
  <h3>Unused Assets</h3>
46
- <div class="value">{result.summary.get('unused_count', 0)}</div>
49
+ <div class="value">{result.summary.get("unused_count", 0)}</div>
47
50
  </div>
48
51
  <div class="summary-card">
49
52
  <h3>No Owner</h3>
50
- <div class="value">{result.summary.get('no_owner_count', 0)}</div>
53
+ <div class="value">{result.summary.get("no_owner_count", 0)}</div>
51
54
  </div>
52
55
  <div class="summary-card">
53
56
  <h3>Risky Assets</h3>
54
- <div class="value">{result.summary.get('risky_count', 0)}</div>
57
+ <div class="value">{result.summary.get("risky_count", 0)}</div>
55
58
  </div>
56
59
  """
57
60
 
61
+ # Add savings opportunity card if significant
62
+ if savings_opportunities["total_potential_savings"] > 100:
63
+ summary_cards += f"""
64
+ <div class="summary-card savings">
65
+ <h3>💰 Potential Savings</h3>
66
+ <div class="value">${savings_opportunities["total_potential_savings"]:,.2f}/mo</div>
67
+ </div>
68
+ """
69
+
58
70
  # Build service costs table if available
59
71
  service_costs_html = ""
60
72
  service_costs = result.summary.get("actual_costs_30d", {})
@@ -79,14 +91,17 @@ class HTMLFormatter:
79
91
  <title>Nuvu Scan Report - {result.provider.upper()}</title>
80
92
  <style>
81
93
  body {{ font-family: Arial, sans-serif; margin: 20px; background: #f5f5f5; }}
82
- .container {{ max-width: 1200px; margin: 0 auto; background: white; padding: 20px; border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.1); }}
94
+ .container {{ max-width: 1400px; margin: 0 auto; background: white; padding: 20px; border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.1); }}
83
95
  h1 {{ color: #333; border-bottom: 3px solid #4CAF50; padding-bottom: 10px; }}
84
96
  h2 {{ color: #555; margin-top: 30px; }}
85
- .summary {{ display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 15px; margin: 20px 0; }}
97
+ .summary {{ display: grid; grid-template-columns: repeat(auto-fit, minmax(180px, 1fr)); gap: 15px; margin: 20px 0; }}
86
98
  .summary-card {{ background: #f9f9f9; padding: 15px; border-radius: 5px; border-left: 4px solid #4CAF50; }}
87
- .summary-card h3 {{ margin: 0 0 10px 0; color: #666; font-size: 14px; text-transform: uppercase; }}
88
- .summary-card .value {{ font-size: 24px; font-weight: bold; color: #333; }}
99
+ .summary-card.savings {{ border-left-color: #ff9800; background: #fff8e1; }}
100
+ .summary-card h3 {{ margin: 0 0 10px 0; color: #666; font-size: 13px; text-transform: uppercase; }}
101
+ .summary-card .value {{ font-size: 22px; font-weight: bold; color: #333; }}
89
102
  table {{ width: 100%; border-collapse: collapse; margin: 20px 0; }}
103
+ table.compact {{ font-size: 13px; }}
104
+ table.compact th, table.compact td {{ padding: 8px; }}
90
105
  th, td {{ padding: 12px; text-align: left; border-bottom: 1px solid #ddd; }}
91
106
  th {{ background: #4CAF50; color: white; font-weight: bold; }}
92
107
  tr:hover {{ background: #f5f5f5; }}
@@ -94,6 +109,21 @@ class HTMLFormatter:
94
109
  .unused {{ color: #ff8800; font-weight: bold; }}
95
110
  .no-owner {{ color: #ff4444; font-weight: bold; }}
96
111
  .footer {{ margin-top: 40px; padding-top: 20px; border-top: 1px solid #ddd; color: #666; font-size: 12px; text-align: center; }}
112
+ .insight-box {{ padding: 15px; border-radius: 8px; margin: 15px 0; }}
113
+ .insight-box h3 {{ margin-top: 0; }}
114
+ .insight-box.warning {{ background: #fff8e1; border-left: 4px solid #ff9800; }}
115
+ .insight-box.alert {{ background: #ffebee; border-left: 4px solid #f44336; }}
116
+ .insight-box.info {{ background: #e3f2fd; border-left: 4px solid #2196f3; }}
117
+ .insight-box.success {{ background: #e8f5e9; border-left: 4px solid #4caf50; }}
118
+ .recommendation {{ font-style: italic; color: #666; margin-top: 10px; }}
119
+ /* Collapsible sections */
120
+ .collapsible {{ cursor: pointer; padding: 15px; width: 100%; border: none; text-align: left; outline: none; font-size: 18px; font-weight: bold; background: #f5f5f5; border-radius: 5px; margin-top: 20px; color: #555; display: flex; justify-content: space-between; align-items: center; }}
121
+ .collapsible:hover {{ background: #eee; }}
122
+ .collapsible:after {{ content: '▼'; font-size: 12px; color: #888; }}
123
+ .collapsible.active:after {{ content: '▲'; }}
124
+ .collapsible-content {{ display: none; overflow: hidden; padding: 0; }}
125
+ .collapsible-content.show {{ display: block; }}
126
+ .asset-count {{ font-size: 14px; font-weight: normal; color: #888; }}
97
127
  </style>
98
128
  </head>
99
129
  <body>
@@ -108,7 +138,16 @@ class HTMLFormatter:
108
138
  {summary_cards}
109
139
  </div>
110
140
  {service_costs_html}
141
+ """
142
+
143
+ # Add Cost Optimization Section FIRST (before Assets by Category)
144
+ html += self._build_cost_optimization_section(result.assets)
111
145
 
146
+ # Add Governance Insights Section SECOND
147
+ html += self._build_governance_section(result.assets)
148
+
149
+ # Assets by Category
150
+ html += """
112
151
  <h2>Assets by Category</h2>
113
152
  <table>
114
153
  <tr><th>Category</th><th>Count</th></tr>
@@ -117,9 +156,12 @@ class HTMLFormatter:
117
156
  for category, count in result.summary.get("assets_by_category", {}).items():
118
157
  html += f" <tr><td>{category.replace('_', ' ').title()}</td><td>{count}</td></tr>\n"
119
158
 
120
- html += """ </table>
159
+ # All Assets - COLLAPSIBLE
160
+ asset_count = len(result.assets)
161
+ html += f""" </table>
121
162
 
122
- <h2>All Assets</h2>
163
+ <button class="collapsible">All Assets <span class="asset-count">({asset_count} items)</span></button>
164
+ <div class="collapsible-content">
123
165
  <table>
124
166
  <tr>
125
167
  <th>Name</th>
@@ -151,12 +193,24 @@ class HTMLFormatter:
151
193
  <td>{asset.asset_type}</td>
152
194
  <td>{asset.region}</td>
153
195
  <td>${asset.cost_estimate_usd or 0:.2f}</td>
154
- <td class="{owner_class}">{asset.suggested_owner or 'Unknown'}</td>
196
+ <td class="{owner_class}">{asset.suggested_owner or "Unknown"}</td>
155
197
  <td>{risk_flags_html}</td>
156
198
  </tr>
157
199
  """
158
200
 
159
201
  html += """ </table>
202
+ </div>
203
+
204
+ <script>
205
+ var coll = document.getElementsByClassName("collapsible");
206
+ for (var i = 0; i < coll.length; i++) {
207
+ coll[i].addEventListener("click", function() {
208
+ this.classList.toggle("active");
209
+ var content = this.nextElementSibling;
210
+ content.classList.toggle("show");
211
+ });
212
+ }
213
+ </script>
160
214
 
161
215
  <div class="footer">
162
216
  <p>Generated by Nuvu - AWS Data Asset Control</p>
@@ -167,3 +221,201 @@ class HTMLFormatter:
167
221
  </html>"""
168
222
 
169
223
  return html
224
+
225
+ def _calculate_savings(self, assets) -> dict:
226
+ """Calculate potential cost savings from assets."""
227
+ savings = {
228
+ "old_manual_snapshots": 0,
229
+ "stale_crawlers": 0,
230
+ "unused_etl_jobs": 0,
231
+ "reservation_opportunities": 0,
232
+ "total_potential_savings": 0,
233
+ }
234
+
235
+ for asset in assets:
236
+ metrics = asset.usage_metrics or {}
237
+
238
+ # Old MANUAL snapshot savings (automated snapshots are free within retention)
239
+ if asset.asset_type == "redshift_snapshot":
240
+ if metrics.get("snapshot_type") == "manual":
241
+ if "old_snapshot" in (asset.risk_flags or []):
242
+ savings["old_manual_snapshots"] += asset.cost_estimate_usd or 0
243
+
244
+ # Reservation savings
245
+ if asset.asset_type == "redshift_cluster":
246
+ potential = metrics.get("potential_reservation_savings_usd", 0)
247
+ savings["reservation_opportunities"] += potential
248
+
249
+ # Stale crawler costs
250
+ if asset.asset_type == "glue_crawler":
251
+ if "stale_crawler" in (asset.risk_flags or []):
252
+ savings["stale_crawlers"] += asset.cost_estimate_usd or 0
253
+
254
+ savings["total_potential_savings"] = (
255
+ savings["old_manual_snapshots"]
256
+ + savings["reservation_opportunities"]
257
+ + savings["stale_crawlers"]
258
+ )
259
+
260
+ return savings
261
+
262
+ def _build_cost_optimization_section(self, assets) -> str:
263
+ """Build cost optimization recommendations section."""
264
+ # Filter relevant assets
265
+ snapshots = [a for a in assets if a.asset_type == "redshift_snapshot"]
266
+ manual_snapshots = [
267
+ a for a in snapshots if (a.usage_metrics or {}).get("snapshot_type") == "manual"
268
+ ]
269
+ auto_snapshots = [
270
+ a for a in snapshots if (a.usage_metrics or {}).get("snapshot_type") == "automated"
271
+ ]
272
+ old_manual_snapshots = [
273
+ a for a in manual_snapshots if "old_snapshot" in (a.risk_flags or [])
274
+ ]
275
+ reserved_nodes = [a for a in assets if a.asset_type == "redshift_reserved_node"]
276
+ expiring_reservations = [
277
+ a for a in reserved_nodes if "reservation_expiring_soon" in (a.risk_flags or [])
278
+ ]
279
+
280
+ if not snapshots and not reserved_nodes:
281
+ return ""
282
+
283
+ html = """
284
+ <h2>💰 Cost Optimization Opportunities</h2>
285
+ """
286
+
287
+ # Snapshot analysis - only manual snapshots are chargeable
288
+ if snapshots:
289
+ manual_snapshot_cost = sum(a.cost_estimate_usd or 0 for a in manual_snapshots)
290
+ old_manual_cost = sum(a.cost_estimate_usd or 0 for a in old_manual_snapshots)
291
+ manual_size = sum((a.size_bytes or 0) / (1024**4) for a in manual_snapshots) # TB
292
+
293
+ html += f"""
294
+ <div class="insight-box warning">
295
+ <h3>📦 Redshift Snapshots</h3>
296
+ <ul>
297
+ <li><strong>Automated Snapshots:</strong> {len(auto_snapshots)} (included in cluster cost)</li>
298
+ <li><strong>Manual Snapshots:</strong> {len(manual_snapshots)} ({manual_size:.2f} TB)</li>
299
+ <li><strong>Manual Snapshot Cost:</strong> ${manual_snapshot_cost:,.2f}/mo</li>
300
+ <li><strong>Old Manual Snapshots (>90 days):</strong> {len(old_manual_snapshots)} (${old_manual_cost:,.2f}/mo potential savings)</li>
301
+ </ul>
302
+ <p class="recommendation">💡 Review old manual snapshots - automated snapshots are retained per retention policy at no extra charge.</p>
303
+ </div>
304
+ """
305
+
306
+ # Reserved nodes analysis
307
+ if reserved_nodes:
308
+ active_reservations = [
309
+ a for a in reserved_nodes if (a.usage_metrics or {}).get("state") == "active"
310
+ ]
311
+ expired = [a for a in reserved_nodes if "reservation_expired" in (a.risk_flags or [])]
312
+
313
+ html += f"""
314
+ <div class="insight-box info">
315
+ <h3>🎫 Reserved Nodes ({len(reserved_nodes)} total)</h3>
316
+ <ul>
317
+ <li><strong>Active Reservations:</strong> {len(active_reservations)}</li>
318
+ <li><strong>Expired/Retired:</strong> {len(expired)}</li>
319
+ <li><strong>Expiring Soon:</strong> {len(expiring_reservations)}</li>
320
+ </ul>
321
+ </div>
322
+ """
323
+
324
+ return html
325
+
326
+ def _build_governance_section(self, assets) -> str:
327
+ """Build governance insights section."""
328
+ # Glue crawlers
329
+ crawlers = [a for a in assets if a.asset_type == "glue_crawler"]
330
+ stale_crawlers = [
331
+ a
332
+ for a in crawlers
333
+ if "stale_crawler" in (a.risk_flags or []) or "never_run" in (a.risk_flags or [])
334
+ ]
335
+
336
+ # Glue jobs
337
+ jobs = [a for a in assets if a.asset_type == "glue_job"]
338
+ stale_jobs = [
339
+ a
340
+ for a in jobs
341
+ if "stale_job" in (a.risk_flags or []) or "never_run" in (a.risk_flags or [])
342
+ ]
343
+
344
+ # Datashares
345
+ datashares = [a for a in assets if a.asset_type == "redshift_datashare"]
346
+ cross_account_shares = [
347
+ a for a in datashares if "cross_account_sharing" in (a.risk_flags or [])
348
+ ]
349
+
350
+ # WLM issues
351
+ clusters = [a for a in assets if a.asset_type == "redshift_cluster"]
352
+ wlm_issues = [
353
+ a
354
+ for a in clusters
355
+ if "default_wlm_only" in (a.risk_flags or [])
356
+ or "unlimited_wlm_queue" in (a.risk_flags or [])
357
+ ]
358
+
359
+ if not any([stale_crawlers, stale_jobs, cross_account_shares, wlm_issues]):
360
+ return ""
361
+
362
+ html = """
363
+ <h2>🔍 Governance Insights</h2>
364
+ """
365
+
366
+ if stale_crawlers:
367
+ html += f"""
368
+ <div class="insight-box warning">
369
+ <h3>🕷️ Stale/Unused Glue Crawlers ({len(stale_crawlers)})</h3>
370
+ <table class="compact">
371
+ <tr><th>Name</th><th>Last Run</th><th>Issue</th></tr>
372
+ """
373
+ for crawler in stale_crawlers[:10]:
374
+ days = (crawler.usage_metrics or {}).get("days_since_last_run", "Never")
375
+ issues = ", ".join(crawler.risk_flags or [])
376
+ html += f"<tr><td>{crawler.name}</td><td>{days} days ago</td><td>{issues}</td></tr>"
377
+ html += "</table></div>"
378
+
379
+ if stale_jobs:
380
+ html += f"""
381
+ <div class="insight-box warning">
382
+ <h3>⚙️ Stale/Unused Glue ETL Jobs ({len(stale_jobs)})</h3>
383
+ <table class="compact">
384
+ <tr><th>Name</th><th>Last Run</th><th>Issue</th></tr>
385
+ """
386
+ for job in stale_jobs[:10]:
387
+ days = (job.usage_metrics or {}).get("days_since_last_run", "Never")
388
+ issues = ", ".join(job.risk_flags or [])
389
+ html += f"<tr><td>{job.name}</td><td>{days} days ago</td><td>{issues}</td></tr>"
390
+ html += "</table></div>"
391
+
392
+ if cross_account_shares:
393
+ html += f"""
394
+ <div class="insight-box alert">
395
+ <h3>🔗 Cross-Account Data Shares ({len(cross_account_shares)})</h3>
396
+ <p>Data is being shared outside this AWS account. Review for security compliance.</p>
397
+ <table class="compact">
398
+ <tr><th>Share Name</th><th>Consumer Account</th><th>Flags</th></tr>
399
+ """
400
+ for share in cross_account_shares[:10]:
401
+ consumers = (share.usage_metrics or {}).get("consumers", [])
402
+ consumer_ids = ", ".join(c.get("account_id", "?") for c in consumers[:3])
403
+ flags = ", ".join(share.risk_flags or [])
404
+ html += f"<tr><td>{share.name}</td><td>{consumer_ids}</td><td>{flags}</td></tr>"
405
+ html += "</table></div>"
406
+
407
+ if wlm_issues:
408
+ html += f"""
409
+ <div class="insight-box info">
410
+ <h3>⚡ WLM Configuration Review ({len(wlm_issues)} clusters)</h3>
411
+ <p>Some clusters may benefit from WLM tuning:</p>
412
+ <ul>
413
+ """
414
+ for cluster in wlm_issues[:5]:
415
+ queues = (cluster.usage_metrics or {}).get("wlm_queue_count", 0)
416
+ auto_wlm = "Yes" if (cluster.usage_metrics or {}).get("wlm_auto_wlm") else "No"
417
+ flags = ", ".join(f for f in (cluster.risk_flags or []) if "wlm" in f)
418
+ html += f"<li><strong>{cluster.name}</strong>: {queues} queues, Auto WLM: {auto_wlm} ({flags})</li>"
419
+ html += "</ul></div>"
420
+
421
+ return html
nuvu_scan/cli/main.py CHANGED
@@ -7,11 +7,12 @@ Usage:
7
7
 
8
8
  import click
9
9
 
10
+ from .. import __version__
10
11
  from .commands.scan import scan_command
11
12
 
12
13
 
13
14
  @click.group()
14
- @click.version_option(version="0.1.0")
15
+ @click.version_option(version=__version__, prog_name="nuvu-scan")
15
16
  def cli():
16
17
  """Nuvu - Multi-Cloud Data Asset Control CLI."""
17
18
  pass
nuvu_scan/core/base.py CHANGED
@@ -21,10 +21,13 @@ class NormalizedCategory(str, Enum):
21
21
  ML_TRAINING = "ml_training"
22
22
  DATA_CATALOG = "data_catalog"
23
23
  DATA_INTEGRATION = "data_integration"
24
+ DATA_PIPELINE = "data_pipeline" # ETL jobs, crawlers, workflows
25
+ DATA_SHARING = "data_sharing" # Datashares, cross-account sharing
24
26
  QUERY_ENGINE = "query_engine"
25
27
  SEARCH = "search"
26
28
  DATABASE = "database"
27
29
  SECURITY = "security"
30
+ BILLING = "billing"
28
31
 
29
32
 
30
33
  @dataclass
@@ -66,10 +69,13 @@ class ScanConfig:
66
69
  credentials: dict[str, Any] # Provider-specific credentials
67
70
  regions: list[str] = None # None means all regions
68
71
  account_id: str | None = None
72
+ collectors: list[str] = None # None means all collectors, otherwise filter by name
69
73
 
70
74
  def __post_init__(self):
71
75
  if self.regions is None:
72
76
  self.regions = []
77
+ if self.collectors is None:
78
+ self.collectors = []
73
79
 
74
80
 
75
81
  @dataclass
@@ -177,7 +177,7 @@ class AWSScanner(CloudProviderScan):
177
177
  region_name=credentials.get("region", "us-east-1"),
178
178
  )
179
179
  except ClientError as e:
180
- raise ValueError(f"Failed to assume role {role_arn}: {str(e)}")
180
+ raise ValueError(f"Failed to assume role {role_arn}: {str(e)}") from e
181
181
 
182
182
  def _resolve_regions(self) -> list[str]:
183
183
  """Resolve regions to scan. If none provided, scan all enabled regions."""
@@ -201,23 +201,49 @@ class AWSScanner(CloudProviderScan):
201
201
  # If we can't get account ID, return "unknown"
202
202
  return "unknown"
203
203
 
204
+ # Map of collector names to their classes for filtering
205
+ COLLECTOR_MAP = {
206
+ "s3": S3Collector,
207
+ "glue": GlueCollector,
208
+ "athena": AthenaCollector,
209
+ "redshift": RedshiftCollector,
210
+ "iam": IAMCollector,
211
+ "mwaa": MWAACollector,
212
+ }
213
+
214
+ @classmethod
215
+ def get_available_collectors(cls) -> list[str]:
216
+ """Return list of available collector names."""
217
+ return list(cls.COLLECTOR_MAP.keys())
218
+
204
219
  def _initialize_collectors(self) -> list:
205
- """Initialize all AWS service collectors."""
220
+ """Initialize AWS service collectors based on config."""
206
221
  collectors = []
207
222
 
208
- # Initialize collectors for each service
209
- collectors.append(S3Collector(self.session, self.config.regions))
210
- collectors.append(GlueCollector(self.session, self.config.regions))
211
- collectors.append(AthenaCollector(self.session, self.config.regions))
212
- collectors.append(RedshiftCollector(self.session, self.config.regions))
213
- collectors.append(IAMCollector(self.session, self.config.regions))
214
- collectors.append(MWAACollector(self.session, self.config.regions))
223
+ # Get requested collectors from config
224
+ requested = self.config.collectors if self.config.collectors else []
225
+
226
+ # Normalize to lowercase
227
+ requested_lower = [c.lower() for c in requested]
228
+
229
+ # If no specific collectors requested, use all
230
+ if not requested_lower:
231
+ for collector_cls in self.COLLECTOR_MAP.values():
232
+ collectors.append(collector_cls(self.session, self.config.regions))
233
+ else:
234
+ # Filter to only requested collectors
235
+ for name, collector_cls in self.COLLECTOR_MAP.items():
236
+ if name in requested_lower:
237
+ collectors.append(collector_cls(self.session, self.config.regions))
238
+
239
+ # Warn about unknown collectors
240
+ known = set(self.COLLECTOR_MAP.keys())
241
+ unknown = set(requested_lower) - known
242
+ if unknown:
243
+ import sys
215
244
 
216
- # TODO: Add more collectors as needed
217
- # collectors.append(OpenSearchCollector(self.session, self.config.regions))
218
- # collectors.append(EMRCollector(self.session, self.config.regions))
219
- # collectors.append(SageMakerCollector(self.session, self.config.regions))
220
- # etc.
245
+ print(f"Warning: Unknown collectors ignored: {', '.join(unknown)}", file=sys.stderr)
246
+ print(f"Available collectors: {', '.join(sorted(known))}", file=sys.stderr)
221
247
 
222
248
  return collectors
223
249
 
@@ -264,7 +290,7 @@ class AWSScanner(CloudProviderScan):
264
290
  cost_summary_asset = Asset(
265
291
  provider="aws",
266
292
  asset_type="cost_summary",
267
- normalized_category=NormalizedCategory.SECURITY, # Using security as placeholder
293
+ normalized_category=NormalizedCategory.BILLING,
268
294
  service="Cost Explorer",
269
295
  region="global",
270
296
  arn="arn:aws:ce::cost-summary",
@@ -23,10 +23,13 @@ class AthenaCollector:
23
23
 
24
24
  def collect(self) -> list[Asset]:
25
25
  """Collect Athena workgroups."""
26
+ import sys
27
+
26
28
  assets = []
27
29
 
28
30
  try:
29
31
  # List workgroups
32
+ print(" → Listing Athena workgroups...", file=sys.stderr)
30
33
  response = self.athena_client.list_work_groups()
31
34
 
32
35
  for wg_info in response.get("WorkGroups", []):