cloudwire 0.2.0__tar.gz → 0.2.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. {cloudwire-0.2.0 → cloudwire-0.2.2}/PKG-INFO +38 -30
  2. {cloudwire-0.2.0 → cloudwire-0.2.2}/README.md +29 -22
  3. {cloudwire-0.2.0 → cloudwire-0.2.2}/cloudwire/__init__.py +1 -1
  4. cloudwire-0.2.2/cloudwire/app/graph_store.py +172 -0
  5. {cloudwire-0.2.0 → cloudwire-0.2.2}/cloudwire/app/main.py +195 -7
  6. {cloudwire-0.2.0 → cloudwire-0.2.2}/cloudwire/app/models.py +33 -3
  7. {cloudwire-0.2.0 → cloudwire-0.2.2}/cloudwire/app/scan_jobs.py +20 -11
  8. {cloudwire-0.2.0 → cloudwire-0.2.2}/cloudwire/app/scanner.py +615 -42
  9. {cloudwire-0.2.0 → cloudwire-0.2.2}/cloudwire/cli.py +35 -3
  10. cloudwire-0.2.2/cloudwire/static/assets/index-IhO1P1Kx.js +40 -0
  11. cloudwire-0.2.2/cloudwire/static/assets/index-ojHsU5ur.css +1 -0
  12. {cloudwire-0.2.0 → cloudwire-0.2.2}/cloudwire/static/index.html +2 -2
  13. {cloudwire-0.2.0 → cloudwire-0.2.2}/cloudwire.egg-info/PKG-INFO +38 -30
  14. {cloudwire-0.2.0 → cloudwire-0.2.2}/cloudwire.egg-info/SOURCES.txt +2 -2
  15. cloudwire-0.2.2/cloudwire.egg-info/requires.txt +13 -0
  16. {cloudwire-0.2.0 → cloudwire-0.2.2}/pyproject.toml +9 -8
  17. cloudwire-0.2.0/cloudwire/app/graph_store.py +0 -88
  18. cloudwire-0.2.0/cloudwire/static/assets/index-Be3fLAWR.js +0 -40
  19. cloudwire-0.2.0/cloudwire/static/assets/index-CCAQkFKj.css +0 -1
  20. cloudwire-0.2.0/cloudwire.egg-info/requires.txt +0 -13
  21. {cloudwire-0.2.0 → cloudwire-0.2.2}/cloudwire/app/__init__.py +0 -0
  22. {cloudwire-0.2.0 → cloudwire-0.2.2}/cloudwire/static/favicon.svg +0 -0
  23. {cloudwire-0.2.0 → cloudwire-0.2.2}/cloudwire.egg-info/dependency_links.txt +0 -0
  24. {cloudwire-0.2.0 → cloudwire-0.2.2}/cloudwire.egg-info/entry_points.txt +0 -0
  25. {cloudwire-0.2.0 → cloudwire-0.2.2}/cloudwire.egg-info/top_level.txt +0 -0
  26. {cloudwire-0.2.0 → cloudwire-0.2.2}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cloudwire
3
- Version: 0.2.0
3
+ Version: 0.2.2
4
4
  Summary: Scan and visualize your AWS infrastructure as an interactive graph
5
5
  License-Expression: MIT
6
6
  Project-URL: Homepage, https://github.com/hisingh_gwre/cloudwire
@@ -16,6 +16,7 @@ Classifier: Programming Language :: Python :: 3.9
16
16
  Classifier: Programming Language :: Python :: 3.10
17
17
  Classifier: Programming Language :: Python :: 3.11
18
18
  Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Programming Language :: Python :: 3.13
19
20
  Classifier: Topic :: Internet :: WWW/HTTP
20
21
  Classifier: Topic :: System :: Systems Administration
21
22
  Requires-Python: >=3.9
@@ -24,15 +25,15 @@ Provides-Extra: dev
24
25
  Requires-Dist: twine; extra == "dev"
25
26
  Requires-Dist: build; extra == "dev"
26
27
  Provides-Extra: dependencies
27
- Requires-Dist: fastapi>=0.115; extra == "dependencies"
28
- Requires-Dist: uvicorn[standard]>=0.34; extra == "dependencies"
29
- Requires-Dist: boto3>=1.37; extra == "dependencies"
30
- Requires-Dist: botocore>=1.37; extra == "dependencies"
31
- Requires-Dist: networkx>=3.4; extra == "dependencies"
32
- Requires-Dist: pydantic>=2.11; extra == "dependencies"
33
- Requires-Dist: click>=8.1; extra == "dependencies"
28
+ Requires-Dist: fastapi>=0.100; extra == "dependencies"
29
+ Requires-Dist: uvicorn>=0.20; extra == "dependencies"
30
+ Requires-Dist: boto3>=1.26; extra == "dependencies"
31
+ Requires-Dist: botocore>=1.29; extra == "dependencies"
32
+ Requires-Dist: networkx>=2.6; extra == "dependencies"
33
+ Requires-Dist: pydantic>=2.0; extra == "dependencies"
34
+ Requires-Dist: click>=8.0; extra == "dependencies"
34
35
 
35
- # cloudwire
36
+ # Cloudwire
36
37
 
37
38
  Scan your AWS account and visualize resource dependencies as an interactive graph — directly in your browser, running entirely on your local machine.
38
39
 
@@ -55,11 +56,13 @@ That's it. The browser opens automatically at `http://localhost:8080`.
55
56
 
56
57
  ## What it looks like
57
58
 
58
- - Dark hacker-aesthetic graph canvas
59
- - Nodes represent AWS resources Lambda functions, SQS queues, API Gateways, RDS instances, S3 buckets, and more
60
- - Edges represent relationships and data flow between resources
61
- - Click any node to inspect its attributes and connected resources
62
- - Search, filter by service, highlight upstream/downstream blast radius
59
+ - Dark hacker-aesthetic graph canvas with animated data flow
60
+ - 24 AWS services with dedicated icons, colors, and role badges
61
+ - Edges represent real relationships API integrations, event triggers, IAM policy inference, env var references
62
+ - Sequential left-to-right flow layout with START/END badges showing where data enters and exits
63
+ - Click any node to inspect its attributes, incoming/outgoing edges, and resource-specific tooltip
64
+ - Search, filter by service, highlight upstream/downstream blast radius, find shortest path
65
+ - Permission errors surfaced clearly — see exactly which IAM policies are missing
63
66
 
64
67
  ---
65
68
 
@@ -67,24 +70,29 @@ That's it. The browser opens automatically at `http://localhost:8080`.
67
70
 
68
71
  | Service | Scanner |
69
72
  |---------|---------|
70
- | API Gateway | Dedicated |
71
- | Lambda | Dedicated (with state) |
72
- | SQS | Dedicated |
73
- | SNS | Dedicated |
74
- | EventBridge | Dedicated |
75
- | DynamoDB | Dedicated (with state) |
76
- | EC2 | Dedicated (with state) |
77
- | ECS | Dedicated |
78
- | S3 | Dedicated |
79
- | RDS | Dedicated (with state) |
73
+ | API Gateway | Dedicated — REST + HTTP APIs, multi-service integrations, Cognito authorizers |
74
+ | Lambda | Dedicated functions, event source mappings, env var references, IAM policy inference |
75
+ | SQS | Dedicated — queues, attributes, dead letter queue edges |
76
+ | SNS | Dedicated — topics and subscriptions |
77
+ | EventBridge | Dedicated — rules and targets |
78
+ | DynamoDB | Dedicated tables, streams, global table replicas |
79
+ | EC2 | Dedicated instances, VPC, subnet, security group, instance profile edges |
80
+ | ECS | Dedicated — clusters, services, task definitions, load balancer edges |
81
+ | S3 | Dedicated — buckets and Lambda notification edges |
82
+ | RDS | Dedicated DB instances and clusters |
80
83
  | Step Functions | Dedicated |
81
84
  | Kinesis | Dedicated |
82
- | IAM | Dedicated |
83
- | Cognito | Dedicated |
84
- | CloudFront | Dedicated (with state) |
85
- | ElastiCache | Dedicated (with state) |
86
- | Glue | Dedicated |
87
- | AppSync | Dedicated |
85
+ | IAM | Dedicated — roles with full policy resolution |
86
+ | Cognito | Dedicated — user pools |
87
+ | CloudFront | Dedicated distributions, S3/API GW/ELB origins, Lambda@Edge |
88
+ | Route 53 | Dedicated hosted zones, record sets, alias target edges |
89
+ | ElastiCache | Dedicated — cache clusters |
90
+ | Redshift | Dedicated — clusters |
91
+ | Glue | Dedicated — jobs, crawlers, triggers |
92
+ | AppSync | Dedicated — GraphQL APIs |
93
+ | Secrets Manager | Dedicated |
94
+ | KMS | Dedicated |
95
+ | ELB | Discovered via CloudFront, Route 53, ECS edges |
88
96
  | Everything else | Generic (tagged resources only) |
89
97
 
90
98
  ---
@@ -1,4 +1,4 @@
1
- # cloudwire
1
+ # Cloudwire
2
2
 
3
3
  Scan your AWS account and visualize resource dependencies as an interactive graph — directly in your browser, running entirely on your local machine.
4
4
 
@@ -21,11 +21,13 @@ That's it. The browser opens automatically at `http://localhost:8080`.
21
21
 
22
22
  ## What it looks like
23
23
 
24
- - Dark hacker-aesthetic graph canvas
25
- - Nodes represent AWS resources Lambda functions, SQS queues, API Gateways, RDS instances, S3 buckets, and more
26
- - Edges represent relationships and data flow between resources
27
- - Click any node to inspect its attributes and connected resources
28
- - Search, filter by service, highlight upstream/downstream blast radius
24
+ - Dark hacker-aesthetic graph canvas with animated data flow
25
+ - 24 AWS services with dedicated icons, colors, and role badges
26
+ - Edges represent real relationships API integrations, event triggers, IAM policy inference, env var references
27
+ - Sequential left-to-right flow layout with START/END badges showing where data enters and exits
28
+ - Click any node to inspect its attributes, incoming/outgoing edges, and resource-specific tooltip
29
+ - Search, filter by service, highlight upstream/downstream blast radius, find shortest path
30
+ - Permission errors surfaced clearly — see exactly which IAM policies are missing
29
31
 
30
32
  ---
31
33
 
@@ -33,24 +35,29 @@ That's it. The browser opens automatically at `http://localhost:8080`.
33
35
 
34
36
  | Service | Scanner |
35
37
  |---------|---------|
36
- | API Gateway | Dedicated |
37
- | Lambda | Dedicated (with state) |
38
- | SQS | Dedicated |
39
- | SNS | Dedicated |
40
- | EventBridge | Dedicated |
41
- | DynamoDB | Dedicated (with state) |
42
- | EC2 | Dedicated (with state) |
43
- | ECS | Dedicated |
44
- | S3 | Dedicated |
45
- | RDS | Dedicated (with state) |
38
+ | API Gateway | Dedicated — REST + HTTP APIs, multi-service integrations, Cognito authorizers |
39
+ | Lambda | Dedicated functions, event source mappings, env var references, IAM policy inference |
40
+ | SQS | Dedicated — queues, attributes, dead letter queue edges |
41
+ | SNS | Dedicated — topics and subscriptions |
42
+ | EventBridge | Dedicated — rules and targets |
43
+ | DynamoDB | Dedicated tables, streams, global table replicas |
44
+ | EC2 | Dedicated instances, VPC, subnet, security group, instance profile edges |
45
+ | ECS | Dedicated — clusters, services, task definitions, load balancer edges |
46
+ | S3 | Dedicated — buckets and Lambda notification edges |
47
+ | RDS | Dedicated DB instances and clusters |
46
48
  | Step Functions | Dedicated |
47
49
  | Kinesis | Dedicated |
48
- | IAM | Dedicated |
49
- | Cognito | Dedicated |
50
- | CloudFront | Dedicated (with state) |
51
- | ElastiCache | Dedicated (with state) |
52
- | Glue | Dedicated |
53
- | AppSync | Dedicated |
50
+ | IAM | Dedicated — roles with full policy resolution |
51
+ | Cognito | Dedicated — user pools |
52
+ | CloudFront | Dedicated distributions, S3/API GW/ELB origins, Lambda@Edge |
53
+ | Route 53 | Dedicated hosted zones, record sets, alias target edges |
54
+ | ElastiCache | Dedicated — cache clusters |
55
+ | Redshift | Dedicated — clusters |
56
+ | Glue | Dedicated — jobs, crawlers, triggers |
57
+ | AppSync | Dedicated — GraphQL APIs |
58
+ | Secrets Manager | Dedicated |
59
+ | KMS | Dedicated |
60
+ | ELB | Discovered via CloudFront, Route 53, ECS edges |
54
61
  | Everything else | Generic (tagged resources only) |
55
62
 
56
63
  ---
@@ -1,3 +1,3 @@
1
1
  """CloudWire — scan and visualize your AWS infrastructure."""
2
2
 
3
- __version__ = "0.2.0"
3
+ __version__ = "0.2.2"
@@ -0,0 +1,172 @@
1
+ from __future__ import annotations
2
+
3
+ from datetime import datetime, timezone
4
+ from threading import Lock
5
+ from typing import Any, Dict, List, Set
6
+
7
+ import networkx as nx
8
+
9
+
10
+ class GraphStore:
11
+ def __init__(self) -> None:
12
+ self.graph = nx.DiGraph()
13
+ self.metadata: Dict[str, Any] = {
14
+ "last_scan_at": None,
15
+ "region": None,
16
+ "scanned_services": [],
17
+ "warnings": [],
18
+ }
19
+ self._lock = Lock()
20
+
21
+ def reset(self, *, region: str, services: List[str]) -> None:
22
+ with self._lock:
23
+ self.graph = nx.DiGraph()
24
+ self.metadata = {
25
+ "last_scan_at": datetime.now(timezone.utc).isoformat(),
26
+ "region": region,
27
+ "scanned_services": services,
28
+ "warnings": [],
29
+ }
30
+
31
+ def add_warning(self, warning: str) -> None:
32
+ with self._lock:
33
+ self.metadata.setdefault("warnings", []).append(warning)
34
+
35
+ def update_metadata(self, **kwargs: Any) -> None:
36
+ with self._lock:
37
+ self.metadata.update(kwargs)
38
+
39
+ def add_node(self, node_id: str, **attrs: Any) -> None:
40
+ with self._lock:
41
+ current = self.graph.nodes[node_id] if self.graph.has_node(node_id) else {}
42
+ merged = {**current, **attrs}
43
+ merged["id"] = node_id
44
+ self.graph.add_node(node_id, **merged)
45
+
46
+ def add_edge(self, source: str, target: str, **attrs: Any) -> None:
47
+ with self._lock:
48
+ current = self.graph.get_edge_data(source, target, default={})
49
+ merged = {**current, **attrs}
50
+ self.graph.add_edge(source, target, **merged)
51
+
52
+ def _serialize_node(self, node_id: str, attrs: Dict[str, Any]) -> Dict[str, Any]:
53
+ payload = {"id": node_id}
54
+ payload.update(attrs)
55
+ return payload
56
+
57
+ def _serialize_edge(self, source: str, target: str, attrs: Dict[str, Any]) -> Dict[str, Any]:
58
+ payload = {"id": f"{source}\u2192{target}", "source": source, "target": target}
59
+ payload.update(attrs)
60
+ return payload
61
+
62
+ def get_graph_payload(self) -> Dict[str, Any]:
63
+ with self._lock:
64
+ nodes = [self._serialize_node(node_id, attrs) for node_id, attrs in self.graph.nodes(data=True)]
65
+ edges = [
66
+ self._serialize_edge(source, target, attrs)
67
+ for source, target, attrs in self.graph.edges(data=True)
68
+ ]
69
+ metadata = dict(self.metadata)
70
+ metadata["node_count"] = len(nodes)
71
+ metadata["edge_count"] = len(edges)
72
+ return {"nodes": nodes, "edges": edges, "metadata": metadata}
73
+
74
+ def _node_matches_arns(self, node_id: str, attrs: Dict[str, Any], allowed_arns: Set[str]) -> bool:
75
+ """Check if a node matches any of the allowed ARNs.
76
+
77
+ Tries multiple fields since scanners are inconsistent about ARN storage:
78
+ 1. 'real_arn' attribute (set by _fetch_and_apply_tags — always a proper ARN)
79
+ 2. 'arn' attribute directly
80
+ 3. The embedded ARN in node_id (format 'service:arn')
81
+ Returns False for nodes without any ARN-like attribute (synthetic/connector nodes).
82
+ """
83
+ real_arn = attrs.get("real_arn")
84
+ if real_arn and real_arn in allowed_arns:
85
+ return True
86
+ node_arn = attrs.get("arn")
87
+ if node_arn and node_arn in allowed_arns:
88
+ return True
89
+ arn_in_id = node_id.split(":", 1)[1] if ":" in node_id else ""
90
+ if arn_in_id in allowed_arns:
91
+ return True
92
+ return False
93
+
94
+ def filter_by_arns(self, allowed_arns: Set[str]) -> int:
95
+ """Remove nodes that don't match the allowed ARNs, preserving neighbors.
96
+
97
+ Keeps:
98
+ - Nodes whose ARN matches the allowed set (the "seed" nodes)
99
+ - Direct neighbors of seed nodes (1-hop) so connected context is visible
100
+ - VPC infrastructure ancestors of kept nodes (so VPC containers, IGWs,
101
+ route tables, and Internet anchor nodes remain for topology context)
102
+ - Nodes without any ARN-like attribute (synthetic/connector nodes)
103
+ Returns the number of nodes removed.
104
+ """
105
+ with self._lock:
106
+ # Phase 1: identify seed nodes (directly matched by ARN)
107
+ seed_ids: Set[str] = set()
108
+ no_arn_ids: Set[str] = set()
109
+ for node_id, attrs in self.graph.nodes(data=True):
110
+ if self._node_matches_arns(node_id, attrs, allowed_arns):
111
+ seed_ids.add(node_id)
112
+ elif not attrs.get("real_arn") and not attrs.get("arn"):
113
+ no_arn_ids.add(node_id)
114
+
115
+ # Phase 2: expand to direct neighbors of seeds (1-hop)
116
+ keep_ids = set(seed_ids) | no_arn_ids
117
+ for seed_id in seed_ids:
118
+ for neighbor in self.graph.predecessors(seed_id):
119
+ keep_ids.add(neighbor)
120
+ for neighbor in self.graph.successors(seed_id):
121
+ keep_ids.add(neighbor)
122
+
123
+ # Phase 3: walk VPC infrastructure ancestors so topology context
124
+ # (VPC → subnet → resource, IGW → VPC, RTB → subnet) stays intact.
125
+ # For any kept VPC infra node, also keep its predecessors/successors
126
+ # that are VPC infra, up the containment chain.
127
+ vpc_frontier = [
128
+ nid for nid in keep_ids
129
+ if self.graph.nodes[nid].get("service") == "vpc"
130
+ ]
131
+ visited = set(vpc_frontier)
132
+ while vpc_frontier:
133
+ nid = vpc_frontier.pop()
134
+ for neighbor in self.graph.predecessors(nid):
135
+ if neighbor not in keep_ids and neighbor not in visited:
136
+ attrs = self.graph.nodes[neighbor]
137
+ if attrs.get("service") == "vpc":
138
+ keep_ids.add(neighbor)
139
+ visited.add(neighbor)
140
+ vpc_frontier.append(neighbor)
141
+ for neighbor in self.graph.successors(nid):
142
+ if neighbor not in keep_ids and neighbor not in visited:
143
+ attrs = self.graph.nodes[neighbor]
144
+ if attrs.get("service") == "vpc":
145
+ keep_ids.add(neighbor)
146
+ visited.add(neighbor)
147
+ vpc_frontier.append(neighbor)
148
+
149
+ # Phase 4: remove everything else
150
+ nodes_to_remove = [
151
+ node_id for node_id in self.graph.nodes()
152
+ if node_id not in keep_ids
153
+ ]
154
+ for node_id in nodes_to_remove:
155
+ self.graph.remove_node(node_id)
156
+ return len(nodes_to_remove)
157
+
158
+ def get_resource_payload(self, resource_id: str) -> Dict[str, Any]:
159
+ with self._lock:
160
+ if not self.graph.has_node(resource_id):
161
+ raise KeyError(resource_id)
162
+
163
+ node = self._serialize_node(resource_id, dict(self.graph.nodes[resource_id]))
164
+ incoming = [
165
+ self._serialize_edge(source, resource_id, dict(attrs))
166
+ for source, _, attrs in self.graph.in_edges(resource_id, data=True)
167
+ ]
168
+ outgoing = [
169
+ self._serialize_edge(resource_id, target, dict(attrs))
170
+ for _, target, attrs in self.graph.out_edges(resource_id, data=True)
171
+ ]
172
+ return {"node": node, "incoming": incoming, "outgoing": outgoing}
@@ -23,12 +23,17 @@ from fastapi.responses import FileResponse, JSONResponse
23
23
  from fastapi.staticfiles import StaticFiles
24
24
 
25
25
  from .models import (
26
+ _REGION_RE,
26
27
  APIErrorResponse,
27
28
  GraphResponse,
28
29
  ResourceResponse,
29
30
  ScanJobCreateResponse,
30
31
  ScanJobStatusResponse,
31
32
  ScanRequest,
33
+ TagKeysResponse,
34
+ TagResourcesResponse,
35
+ TagValuesResponse,
36
+ normalize_service_name,
32
37
  )
33
38
  from .scan_jobs import ScanJobStore
34
39
  from .scanner import AWSGraphScanner, ScanCancelledError, ScanExecutionOptions
@@ -68,15 +73,9 @@ def _error_payload(code: str, message: str, details: Optional[Any] = None) -> Di
68
73
 
69
74
 
70
75
  def _normalize_services(services: List[str]) -> List[str]:
71
- aliases = {
72
- "api-gateway": "apigateway",
73
- "apigw": "apigateway",
74
- "event-bridge": "eventbridge",
75
- "events": "eventbridge",
76
- }
77
76
  normalized = []
78
77
  for service in services:
79
- key = aliases.get(service.lower().strip(), service.lower().strip())
78
+ key = normalize_service_name(service)
80
79
  if key and key not in normalized:
81
80
  normalized.append(key)
82
81
  return normalized
@@ -225,6 +224,40 @@ async def unexpected_exception_handler(_: Request, exc: Exception) -> JSONRespon
225
224
  )
226
225
 
227
226
 
227
+ # ---------------------------------------------------------------------------
228
+ # Tag discovery helper
229
+ # ---------------------------------------------------------------------------
230
+
231
+ def _tagging_client(region: str):
232
+ session = boto3.session.Session(region_name=region)
233
+ return session.client(
234
+ "resourcegroupstaggingapi",
235
+ config=Config(
236
+ retries={"mode": "adaptive", "max_attempts": 10},
237
+ max_pool_connections=8,
238
+ connect_timeout=3,
239
+ read_timeout=10,
240
+ ),
241
+ )
242
+
243
+
244
+ def _validate_region(region: str) -> str:
245
+ cleaned = region.strip()
246
+ if not cleaned or not _REGION_RE.match(cleaned):
247
+ raise APIError(
248
+ status_code=422,
249
+ code="validation_error",
250
+ message=f"'{cleaned}' is not a valid AWS region identifier (e.g. us-east-1)",
251
+ )
252
+ return cleaned
253
+
254
+
255
+ def _service_from_arn(arn: str) -> str:
256
+ parts = arn.split(":")
257
+ service = parts[2] if len(parts) > 2 else ""
258
+ return service if service else ""
259
+
260
+
228
261
  # ---------------------------------------------------------------------------
229
262
  # Scan runner (background thread)
230
263
  # ---------------------------------------------------------------------------
@@ -236,6 +269,7 @@ def _run_scan_job(
236
269
  services: List[str],
237
270
  account_id: str,
238
271
  options: ScanExecutionOptions,
272
+ tag_arns: Optional[List[str]] = None,
239
273
  ) -> None:
240
274
  job_store.mark_running(job_id)
241
275
  if job_store.is_cancel_requested(job_id):
@@ -264,6 +298,12 @@ def _run_scan_job(
264
298
  if job_store.is_cancel_requested(job_id):
265
299
  job_store.mark_cancelled(job_id)
266
300
  return
301
+ # Post-scan ARN filtering for tag-based scans
302
+ if tag_arns:
303
+ allowed = set(tag_arns)
304
+ removed = job.graph_store.filter_by_arns(allowed)
305
+ if removed:
306
+ job.graph_store.add_warning(f"Tag filter removed {removed} resource(s) not matching selected tags or their neighbors.")
267
307
  job_store.mark_completed(job_id, ttl_seconds=_cache_ttl_seconds(options.mode))
268
308
  except ScanCancelledError:
269
309
  job.graph_store.add_warning("Scan cancelled by user request.")
@@ -326,6 +366,8 @@ def create_scan_job(payload: ScanRequest) -> Dict[str, Any]:
326
366
  options = _resolve_scan_options(payload)
327
367
  account_id = _resolve_account_id(payload.region)
328
368
 
369
+ tag_arns = payload.tag_arns
370
+
329
371
  cache_key = ScanJobStore.build_cache_key(
330
372
  account_id=account_id,
331
373
  region=payload.region,
@@ -333,6 +375,7 @@ def create_scan_job(payload: ScanRequest) -> Dict[str, Any]:
333
375
  mode=options.mode,
334
376
  include_iam_inference=options.include_iam_inference,
335
377
  include_resource_describes=options.include_resource_describes,
378
+ tag_arns=tag_arns,
336
379
  )
337
380
  reusable_job_id, cached = job_store.find_reusable_job(
338
381
  cache_key=cache_key,
@@ -357,6 +400,8 @@ def create_scan_job(payload: ScanRequest) -> Dict[str, Any]:
357
400
  include_iam_inference=options.include_iam_inference,
358
401
  include_resource_describes=options.include_resource_describes,
359
402
  )
403
+ # Capture tag_arns in local scope for the lambda closure
404
+ _tag_arns = tag_arns
360
405
  job_store.submit_job(
361
406
  job.id,
362
407
  lambda: _run_scan_job(
@@ -365,6 +410,7 @@ def create_scan_job(payload: ScanRequest) -> Dict[str, Any]:
365
410
  services=services,
366
411
  account_id=account_id,
367
412
  options=options,
413
+ tag_arns=_tag_arns,
368
414
  ),
369
415
  )
370
416
  return {
@@ -429,6 +475,148 @@ def stop_scan_job(job_id: str) -> Dict[str, Any]:
429
475
  ) from exc
430
476
 
431
477
 
478
+ # ---------------------------------------------------------------------------
479
+ # Tag discovery endpoints
480
+ # ---------------------------------------------------------------------------
481
+
482
+ def _handle_tagging_error(exc: Exception, region: str, operation: str):
483
+ """Convert AWS errors from tagging API to APIError."""
484
+ logger.warning("Tag API error in %s (region=%s): %s: %s", operation, region, type(exc).__name__, exc)
485
+ if isinstance(exc, (NoCredentialsError, PartialCredentialsError, CredentialRetrievalError)):
486
+ raise APIError(
487
+ status_code=status.HTTP_401_UNAUTHORIZED,
488
+ code="aws_credentials_missing",
489
+ message=_friendly_exception_message(exc),
490
+ ) from exc
491
+ if isinstance(exc, ClientError):
492
+ aws_code = exc.response.get("Error", {}).get("Code", "")
493
+ aws_message = exc.response.get("Error", {}).get("Message", "")
494
+ if aws_code in ("AccessDenied", "AccessDeniedException", "UnauthorizedAccess", "UnauthorizedOperation"):
495
+ raise APIError(
496
+ status_code=status.HTTP_403_FORBIDDEN,
497
+ code="tags_access_denied",
498
+ message=f"Access denied for {operation}. Ensure the IAM role has tag:GetTagKeys, tag:GetTagValues, and tag:GetResources permissions. ({aws_code}: {aws_message})",
499
+ details={"aws_error_code": aws_code, "region": region},
500
+ ) from exc
501
+ raise APIError(
502
+ status_code=status.HTTP_502_BAD_GATEWAY,
503
+ code="tags_api_error",
504
+ message=f"AWS tagging API error: {aws_code}: {aws_message}" if aws_message else _friendly_exception_message(exc),
505
+ details={"aws_error_code": aws_code, "region": region},
506
+ ) from exc
507
+ if isinstance(exc, (EndpointConnectionError, ConnectTimeoutError, ReadTimeoutError)):
508
+ raise APIError(
509
+ status_code=status.HTTP_502_BAD_GATEWAY,
510
+ code="aws_endpoint_unreachable",
511
+ message=_friendly_exception_message(exc),
512
+ details={"region": region},
513
+ ) from exc
514
+ if isinstance(exc, BotoCoreError):
515
+ raise APIError(
516
+ status_code=status.HTTP_502_BAD_GATEWAY,
517
+ code="tags_api_error",
518
+ message=_friendly_exception_message(exc),
519
+ details={"region": region},
520
+ ) from exc
521
+ # Fallback for unexpected exception types
522
+ raise APIError(
523
+ status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
524
+ code="unexpected_error",
525
+ message=_friendly_exception_message(exc),
526
+ ) from exc
527
+
528
+
529
+ @api.get(
530
+ "/tags/keys",
531
+ response_model=TagKeysResponse,
532
+ responses={401: {"model": APIErrorResponse}, 403: {"model": APIErrorResponse}, 502: {"model": APIErrorResponse}},
533
+ )
534
+ def get_tag_keys(region: str = Query(default="us-east-1")) -> Dict[str, Any]:
535
+ region = _validate_region(region)
536
+ try:
537
+ client = _tagging_client(region)
538
+ keys = []
539
+ paginator = client.get_paginator("get_tag_keys")
540
+ for page in paginator.paginate():
541
+ keys.extend(page.get("TagKeys", []))
542
+ return {"keys": sorted(set(keys))}
543
+ except Exception as exc:
544
+ _handle_tagging_error(exc, region, "get_tag_keys")
545
+
546
+
547
+ @api.get(
548
+ "/tags/values",
549
+ response_model=TagValuesResponse,
550
+ responses={401: {"model": APIErrorResponse}, 403: {"model": APIErrorResponse}, 502: {"model": APIErrorResponse}},
551
+ )
552
+ def get_tag_values(
553
+ region: str = Query(default="us-east-1"),
554
+ key: str = Query(..., min_length=1),
555
+ ) -> Dict[str, Any]:
556
+ region = _validate_region(region)
557
+ try:
558
+ client = _tagging_client(region)
559
+ values = []
560
+ paginator = client.get_paginator("get_tag_values")
561
+ for page in paginator.paginate(Key=key):
562
+ values.extend(page.get("TagValues", []))
563
+ return {"key": key, "values": sorted(set(values))}
564
+ except Exception as exc:
565
+ _handle_tagging_error(exc, region, "get_tag_values")
566
+
567
+
568
+ @api.get(
569
+ "/tags/resources",
570
+ response_model=TagResourcesResponse,
571
+ responses={401: {"model": APIErrorResponse}, 403: {"model": APIErrorResponse}, 502: {"model": APIErrorResponse}},
572
+ )
573
+ def get_tag_resources(
574
+ region: str = Query(default="us-east-1"),
575
+ tag_filters: str = Query(..., description="JSON array of {Key, Values} filter objects"),
576
+ ) -> Dict[str, Any]:
577
+ import json as _json
578
+
579
+ region = _validate_region(region)
580
+
581
+ try:
582
+ parsed_filters = _json.loads(tag_filters)
583
+ if not isinstance(parsed_filters, list):
584
+ raise ValueError("tag_filters must be a JSON array")
585
+ for i, entry in enumerate(parsed_filters):
586
+ if not isinstance(entry, dict):
587
+ raise ValueError(f"tag_filters[{i}] must be an object")
588
+ if "Key" not in entry:
589
+ raise ValueError(f"tag_filters[{i}] is missing required field 'Key'")
590
+ if not isinstance(entry.get("Key"), str):
591
+ raise ValueError(f"tag_filters[{i}].Key must be a string")
592
+ if "Values" in entry and not isinstance(entry["Values"], list):
593
+ raise ValueError(f"tag_filters[{i}].Values must be an array")
594
+ except (ValueError, _json.JSONDecodeError) as exc:
595
+ raise APIError(
596
+ status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
597
+ code="validation_error",
598
+ message=f"Invalid tag_filters JSON: {exc}",
599
+ ) from exc
600
+
601
+ try:
602
+ client = _tagging_client(region)
603
+ arns = []
604
+ paginator = client.get_paginator("get_resources")
605
+ for page in paginator.paginate(
606
+ TagFilters=parsed_filters,
607
+ ResourcesPerPage=100,
608
+ ):
609
+ for entry in page.get("ResourceTagMappingList", []):
610
+ arn = entry.get("ResourceARN")
611
+ if arn:
612
+ arns.append(arn)
613
+
614
+ services = sorted(s for s in set(_service_from_arn(arn) for arn in arns) if s)
615
+ return {"arns": arns, "services": services}
616
+ except Exception as exc:
617
+ _handle_tagging_error(exc, region, "get_resources")
618
+
619
+
432
620
  app.include_router(api)
433
621
 
434
622
  # ---------------------------------------------------------------------------