aws-inventory-manager 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aws-inventory-manager might be problematic. Click here for more details.

Files changed (65) hide show
  1. aws_inventory_manager-0.2.0.dist-info/METADATA +508 -0
  2. aws_inventory_manager-0.2.0.dist-info/RECORD +65 -0
  3. aws_inventory_manager-0.2.0.dist-info/WHEEL +5 -0
  4. aws_inventory_manager-0.2.0.dist-info/entry_points.txt +2 -0
  5. aws_inventory_manager-0.2.0.dist-info/licenses/LICENSE +21 -0
  6. aws_inventory_manager-0.2.0.dist-info/top_level.txt +1 -0
  7. src/__init__.py +3 -0
  8. src/aws/__init__.py +11 -0
  9. src/aws/client.py +128 -0
  10. src/aws/credentials.py +191 -0
  11. src/aws/rate_limiter.py +177 -0
  12. src/cli/__init__.py +5 -0
  13. src/cli/config.py +130 -0
  14. src/cli/main.py +1450 -0
  15. src/cost/__init__.py +5 -0
  16. src/cost/analyzer.py +226 -0
  17. src/cost/explorer.py +209 -0
  18. src/cost/reporter.py +237 -0
  19. src/delta/__init__.py +5 -0
  20. src/delta/calculator.py +180 -0
  21. src/delta/reporter.py +225 -0
  22. src/models/__init__.py +17 -0
  23. src/models/cost_report.py +87 -0
  24. src/models/delta_report.py +111 -0
  25. src/models/inventory.py +124 -0
  26. src/models/resource.py +99 -0
  27. src/models/snapshot.py +108 -0
  28. src/snapshot/__init__.py +6 -0
  29. src/snapshot/capturer.py +347 -0
  30. src/snapshot/filter.py +245 -0
  31. src/snapshot/inventory_storage.py +264 -0
  32. src/snapshot/resource_collectors/__init__.py +5 -0
  33. src/snapshot/resource_collectors/apigateway.py +140 -0
  34. src/snapshot/resource_collectors/backup.py +136 -0
  35. src/snapshot/resource_collectors/base.py +81 -0
  36. src/snapshot/resource_collectors/cloudformation.py +55 -0
  37. src/snapshot/resource_collectors/cloudwatch.py +109 -0
  38. src/snapshot/resource_collectors/codebuild.py +69 -0
  39. src/snapshot/resource_collectors/codepipeline.py +82 -0
  40. src/snapshot/resource_collectors/dynamodb.py +65 -0
  41. src/snapshot/resource_collectors/ec2.py +240 -0
  42. src/snapshot/resource_collectors/ecs.py +215 -0
  43. src/snapshot/resource_collectors/eks.py +200 -0
  44. src/snapshot/resource_collectors/elb.py +126 -0
  45. src/snapshot/resource_collectors/eventbridge.py +156 -0
  46. src/snapshot/resource_collectors/iam.py +188 -0
  47. src/snapshot/resource_collectors/kms.py +111 -0
  48. src/snapshot/resource_collectors/lambda_func.py +112 -0
  49. src/snapshot/resource_collectors/rds.py +109 -0
  50. src/snapshot/resource_collectors/route53.py +86 -0
  51. src/snapshot/resource_collectors/s3.py +105 -0
  52. src/snapshot/resource_collectors/secretsmanager.py +70 -0
  53. src/snapshot/resource_collectors/sns.py +68 -0
  54. src/snapshot/resource_collectors/sqs.py +72 -0
  55. src/snapshot/resource_collectors/ssm.py +160 -0
  56. src/snapshot/resource_collectors/stepfunctions.py +74 -0
  57. src/snapshot/resource_collectors/vpcendpoints.py +79 -0
  58. src/snapshot/resource_collectors/waf.py +159 -0
  59. src/snapshot/storage.py +259 -0
  60. src/utils/__init__.py +12 -0
  61. src/utils/export.py +87 -0
  62. src/utils/hash.py +60 -0
  63. src/utils/logging.py +63 -0
  64. src/utils/paths.py +51 -0
  65. src/utils/progress.py +41 -0
@@ -0,0 +1,124 @@
1
+ """Inventory model for organizing snapshots by account and purpose."""
2
+
3
+ import re
4
+ from dataclasses import dataclass, field
5
+ from datetime import datetime, timezone
6
+ from typing import Any, Dict, List, Optional
7
+
8
+
9
+ @dataclass
10
+ class Inventory:
11
+ """Named container for organizing snapshots by account and purpose.
12
+
13
+ Attributes:
14
+ name: Unique identifier within account (alphanumeric + hyphens + underscores, 1-50 chars)
15
+ account_id: AWS account ID (12 digits)
16
+ include_tags: Tag filters (resource MUST have ALL)
17
+ exclude_tags: Tag filters (resource MUST NOT have ANY)
18
+ snapshots: List of snapshot filenames in this inventory
19
+ active_snapshot: Filename of active baseline snapshot
20
+ description: Human-readable description
21
+ created_at: Inventory creation timestamp (timezone-aware UTC)
22
+ last_updated: Last modification timestamp (timezone-aware UTC, auto-updated)
23
+ """
24
+
25
+ name: str
26
+ account_id: str
27
+ include_tags: Dict[str, str] = field(default_factory=dict)
28
+ exclude_tags: Dict[str, str] = field(default_factory=dict)
29
+ snapshots: List[str] = field(default_factory=list)
30
+ active_snapshot: Optional[str] = None
31
+ description: str = ""
32
+ created_at: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
33
+ last_updated: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
34
+
35
+ def to_dict(self) -> Dict[str, Any]:
36
+ """Serialize to dictionary for YAML storage.
37
+
38
+ Returns:
39
+ Dictionary representation suitable for YAML serialization
40
+ """
41
+ return {
42
+ "name": self.name,
43
+ "account_id": self.account_id,
44
+ "description": self.description,
45
+ "include_tags": self.include_tags,
46
+ "exclude_tags": self.exclude_tags,
47
+ "snapshots": self.snapshots,
48
+ "active_snapshot": self.active_snapshot,
49
+ "created_at": self.created_at.isoformat(),
50
+ "last_updated": self.last_updated.isoformat(),
51
+ }
52
+
53
+ @classmethod
54
+ def from_dict(cls, data: Dict[str, Any]) -> "Inventory":
55
+ """Deserialize from dictionary (YAML load).
56
+
57
+ Args:
58
+ data: Dictionary loaded from YAML
59
+
60
+ Returns:
61
+ Inventory instance
62
+ """
63
+ return cls(
64
+ name=data["name"],
65
+ account_id=data["account_id"],
66
+ description=data.get("description", ""),
67
+ include_tags=data.get("include_tags", {}),
68
+ exclude_tags=data.get("exclude_tags", {}),
69
+ snapshots=data.get("snapshots", []),
70
+ active_snapshot=data.get("active_snapshot"),
71
+ created_at=datetime.fromisoformat(data["created_at"]),
72
+ last_updated=datetime.fromisoformat(data["last_updated"]),
73
+ )
74
+
75
+ def add_snapshot(self, snapshot_filename: str, set_active: bool = False) -> None:
76
+ """Add snapshot to inventory, optionally marking as active.
77
+
78
+ Args:
79
+ snapshot_filename: Name of snapshot file to add
80
+ set_active: Whether to mark this snapshot as active baseline
81
+ """
82
+ if snapshot_filename not in self.snapshots:
83
+ self.snapshots.append(snapshot_filename)
84
+ if set_active:
85
+ self.active_snapshot = snapshot_filename
86
+ self.last_updated = datetime.now(timezone.utc)
87
+
88
+ def remove_snapshot(self, snapshot_filename: str) -> None:
89
+ """Remove snapshot from inventory, clearing active if it was active.
90
+
91
+ Args:
92
+ snapshot_filename: Name of snapshot file to remove
93
+ """
94
+ if snapshot_filename in self.snapshots:
95
+ self.snapshots.remove(snapshot_filename)
96
+ if self.active_snapshot == snapshot_filename:
97
+ self.active_snapshot = None
98
+ self.last_updated = datetime.now(timezone.utc)
99
+
100
+ def validate(self) -> List[str]:
101
+ """Validate inventory data, return list of errors.
102
+
103
+ Returns:
104
+ List of validation error messages (empty if valid)
105
+ """
106
+ errors = []
107
+
108
+ # Validate name format (alphanumeric + hyphens + underscores only)
109
+ if not self.name or not re.match(r"^[a-zA-Z0-9_-]+$", self.name):
110
+ errors.append("Name must contain only alphanumeric characters, hyphens, and underscores")
111
+
112
+ # Validate name length
113
+ if len(self.name) > 50:
114
+ errors.append("Name must be 50 characters or less")
115
+
116
+ # Validate account ID format (12 digits)
117
+ if not self.account_id or not re.match(r"^\d{12}$", self.account_id):
118
+ errors.append("Account ID must be 12 digits")
119
+
120
+ # Validate active snapshot exists in snapshots list
121
+ if self.active_snapshot and self.active_snapshot not in self.snapshots:
122
+ errors.append("Active snapshot must exist in snapshots list")
123
+
124
+ return errors
src/models/resource.py ADDED
@@ -0,0 +1,99 @@
1
+ """Resource data model representing a single AWS resource."""
2
+
3
+ import re
4
+ from dataclasses import dataclass, field
5
+ from datetime import datetime
6
+ from typing import Any, Dict, Optional
7
+
8
+
9
+ @dataclass
10
+ class Resource:
11
+ """Represents a single AWS resource captured in a snapshot."""
12
+
13
+ arn: str
14
+ resource_type: str
15
+ name: str
16
+ region: str
17
+ config_hash: str
18
+ raw_config: Dict[str, Any]
19
+ tags: Dict[str, str] = field(default_factory=dict)
20
+ created_at: Optional[datetime] = None
21
+
22
+ def to_dict(self) -> Dict[str, Any]:
23
+ """Convert resource to dictionary for serialization."""
24
+ return {
25
+ "arn": self.arn,
26
+ "type": self.resource_type,
27
+ "name": self.name,
28
+ "region": self.region,
29
+ "tags": self.tags,
30
+ "config_hash": self.config_hash,
31
+ "created_at": self.created_at.isoformat() if self.created_at else None,
32
+ "raw_config": self.raw_config,
33
+ }
34
+
35
+ @classmethod
36
+ def from_dict(cls, data: Dict[str, Any]) -> "Resource":
37
+ """Create resource from dictionary."""
38
+ created_at = None
39
+ if data.get("created_at"):
40
+ created_at = datetime.fromisoformat(data["created_at"])
41
+
42
+ return cls(
43
+ arn=data["arn"],
44
+ resource_type=data["type"],
45
+ name=data["name"],
46
+ region=data["region"],
47
+ config_hash=data["config_hash"],
48
+ raw_config=data["raw_config"],
49
+ tags=data.get("tags", {}),
50
+ created_at=created_at,
51
+ )
52
+
53
+ def validate(self) -> bool:
54
+ """Validate resource data integrity.
55
+
56
+ Returns:
57
+ True if valid, raises ValueError if invalid
58
+ """
59
+ # Validate ARN format
60
+ arn_pattern = r"^arn:aws:[a-z0-9-]+:[a-z0-9-]*:[0-9]*:.*$"
61
+ if not re.match(arn_pattern, self.arn):
62
+ raise ValueError(f"Invalid ARN format: {self.arn}")
63
+
64
+ # Validate config_hash is 64-character hex string (SHA256)
65
+ if not re.match(r"^[a-fA-F0-9]{64}$", self.config_hash):
66
+ raise ValueError(f"Invalid config_hash: {self.config_hash}. Must be 64-character SHA256 hex string.")
67
+
68
+ # Validate region format
69
+ valid_regions = ["global"] + [
70
+ "us-east-1",
71
+ "us-east-2",
72
+ "us-west-1",
73
+ "us-west-2",
74
+ "eu-west-1",
75
+ "eu-west-2",
76
+ "eu-west-3",
77
+ "eu-central-1",
78
+ "ap-southeast-1",
79
+ "ap-southeast-2",
80
+ "ap-northeast-1",
81
+ "ap-northeast-2",
82
+ "ca-central-1",
83
+ "sa-east-1",
84
+ "ap-south-1",
85
+ ]
86
+ # Basic validation - starts with region pattern or is 'global'
87
+ if self.region != "global" and not any(self.region.startswith(r[:6]) for r in valid_regions if r != "global"):
88
+ # Allow it anyway - AWS adds new regions regularly
89
+ pass
90
+
91
+ return True
92
+
93
+ @property
94
+ def service(self) -> str:
95
+ """Extract service name from resource type.
96
+
97
+ Example: 'iam:role' -> 'iam'
98
+ """
99
+ return self.resource_type.split(":")[0] if ":" in self.resource_type else self.resource_type
src/models/snapshot.py ADDED
@@ -0,0 +1,108 @@
1
+ """Snapshot data model representing a point-in-time inventory of AWS resources."""
2
+
3
+ from dataclasses import dataclass, field
4
+ from datetime import datetime
5
+ from typing import Any, Dict, List, Optional
6
+
7
+
8
+ @dataclass
9
+ class Snapshot:
10
+ """Represents a point-in-time inventory of AWS resources.
11
+
12
+ This serves as the baseline reference for delta tracking and cost analysis.
13
+ """
14
+
15
+ name: str
16
+ created_at: datetime
17
+ account_id: str
18
+ regions: List[str]
19
+ resources: List[Any] # List[Resource] - avoiding circular import
20
+ is_active: bool = True
21
+ resource_count: int = 0
22
+ service_counts: Dict[str, int] = field(default_factory=dict)
23
+ metadata: Dict[str, Any] = field(default_factory=dict)
24
+ filters_applied: Optional[Dict[str, Any]] = None
25
+ total_resources_before_filter: Optional[int] = None
26
+ inventory_name: str = "default" # Name of inventory this snapshot belongs to
27
+
28
+ def __post_init__(self) -> None:
29
+ """Calculate derived fields after initialization."""
30
+ if self.resource_count == 0:
31
+ self.resource_count = len(self.resources)
32
+
33
+ if not self.service_counts:
34
+ self._calculate_service_counts()
35
+
36
+ def _calculate_service_counts(self) -> None:
37
+ """Calculate resource counts by service type."""
38
+ counts: Dict[str, int] = {}
39
+ for resource in self.resources:
40
+ service = resource.resource_type.split(":")[0] if ":" in resource.resource_type else resource.resource_type
41
+ counts[service] = counts.get(service, 0) + 1
42
+ self.service_counts = counts
43
+
44
+ def to_dict(self) -> Dict[str, Any]:
45
+ """Convert snapshot to dictionary for serialization."""
46
+ return {
47
+ "name": self.name,
48
+ "created_at": self.created_at.isoformat(),
49
+ "account_id": self.account_id,
50
+ "regions": self.regions,
51
+ "is_active": self.is_active,
52
+ "resource_count": self.resource_count,
53
+ "service_counts": self.service_counts,
54
+ "metadata": self.metadata,
55
+ "filters_applied": self.filters_applied,
56
+ "total_resources_before_filter": self.total_resources_before_filter,
57
+ "inventory_name": self.inventory_name,
58
+ "resources": [r.to_dict() for r in self.resources],
59
+ }
60
+
61
+ @classmethod
62
+ def from_dict(cls, data: Dict[str, Any]) -> "Snapshot":
63
+ """Create snapshot from dictionary.
64
+
65
+ Note: This requires Resource class to be imported at call time
66
+ to avoid circular imports.
67
+ """
68
+ from .resource import Resource
69
+
70
+ return cls(
71
+ name=data["name"],
72
+ created_at=datetime.fromisoformat(data["created_at"]),
73
+ account_id=data["account_id"],
74
+ regions=data["regions"],
75
+ resources=[Resource.from_dict(r) for r in data["resources"]],
76
+ is_active=data.get("is_active", True),
77
+ resource_count=data.get("resource_count", 0),
78
+ service_counts=data.get("service_counts", {}),
79
+ metadata=data.get("metadata", {}),
80
+ filters_applied=data.get("filters_applied"),
81
+ total_resources_before_filter=data.get("total_resources_before_filter"),
82
+ inventory_name=data.get("inventory_name", "default"), # Default for backward compatibility
83
+ )
84
+
85
+ def validate(self) -> bool:
86
+ """Validate snapshot data integrity.
87
+
88
+ Returns:
89
+ True if valid, raises ValueError if invalid
90
+ """
91
+ import re
92
+
93
+ # Validate name format (alphanumeric, hyphens, underscores)
94
+ if not re.match(r"^[a-zA-Z0-9_-]+$", self.name):
95
+ raise ValueError(
96
+ f"Invalid snapshot name: {self.name}. "
97
+ f"Must contain only alphanumeric characters, hyphens, and underscores."
98
+ )
99
+
100
+ # Validate account ID (12-digit string)
101
+ if not re.match(r"^\d{12}$", self.account_id):
102
+ raise ValueError(f"Invalid AWS account ID: {self.account_id}. Must be a 12-digit string.")
103
+
104
+ # Validate regions list is not empty
105
+ if not self.regions:
106
+ raise ValueError("Snapshot must include at least one AWS region.")
107
+
108
+ return True
@@ -0,0 +1,6 @@
1
+ """Snapshot capture and storage functionality."""
2
+
3
+ from .inventory_storage import InventoryNotFoundError, InventoryStorage
4
+ from .storage import SnapshotStorage
5
+
6
+ __all__ = ["SnapshotStorage", "InventoryStorage", "InventoryNotFoundError"]
@@ -0,0 +1,347 @@
1
+ """Snapshot capture coordinator for AWS resources."""
2
+
3
+ import logging
4
+ from concurrent.futures import ThreadPoolExecutor, as_completed
5
+ from datetime import datetime, timezone
6
+ from threading import Lock
7
+ from typing import TYPE_CHECKING, Dict, List, Optional, Type
8
+
9
+ import boto3
10
+ from rich.progress import BarColumn, Progress, SpinnerColumn, TaskProgressColumn, TextColumn
11
+
12
+ from ..models.snapshot import Snapshot
13
+
14
+ if TYPE_CHECKING:
15
+ from .filter import ResourceFilter
16
+ from .resource_collectors.apigateway import APIGatewayCollector
17
+ from .resource_collectors.backup import BackupCollector
18
+ from .resource_collectors.base import BaseResourceCollector
19
+ from .resource_collectors.cloudformation import CloudFormationCollector
20
+ from .resource_collectors.cloudwatch import CloudWatchCollector
21
+ from .resource_collectors.codebuild import CodeBuildCollector
22
+ from .resource_collectors.codepipeline import CodePipelineCollector
23
+ from .resource_collectors.dynamodb import DynamoDBCollector
24
+ from .resource_collectors.ec2 import EC2Collector
25
+ from .resource_collectors.ecs import ECSCollector
26
+ from .resource_collectors.eks import EKSCollector
27
+ from .resource_collectors.elb import ELBCollector
28
+ from .resource_collectors.eventbridge import EventBridgeCollector
29
+ from .resource_collectors.iam import IAMCollector
30
+ from .resource_collectors.kms import KMSCollector
31
+ from .resource_collectors.lambda_func import LambdaCollector
32
+ from .resource_collectors.rds import RDSCollector
33
+ from .resource_collectors.route53 import Route53Collector
34
+ from .resource_collectors.s3 import S3Collector
35
+ from .resource_collectors.secretsmanager import SecretsManagerCollector
36
+ from .resource_collectors.sns import SNSCollector
37
+ from .resource_collectors.sqs import SQSCollector
38
+ from .resource_collectors.ssm import SSMCollector
39
+ from .resource_collectors.stepfunctions import StepFunctionsCollector
40
+ from .resource_collectors.vpcendpoints import VPCEndpointsCollector
41
+ from .resource_collectors.waf import WAFCollector
42
+
43
+ logger = logging.getLogger(__name__)
44
+
45
+
46
+ # Registry of all available collectors
47
+ COLLECTOR_REGISTRY: List[Type[BaseResourceCollector]] = [
48
+ IAMCollector,
49
+ LambdaCollector,
50
+ S3Collector,
51
+ EC2Collector,
52
+ RDSCollector,
53
+ CloudWatchCollector,
54
+ SNSCollector,
55
+ SQSCollector,
56
+ DynamoDBCollector,
57
+ ELBCollector,
58
+ CloudFormationCollector,
59
+ APIGatewayCollector,
60
+ EventBridgeCollector,
61
+ SecretsManagerCollector,
62
+ KMSCollector,
63
+ SSMCollector,
64
+ Route53Collector,
65
+ ECSCollector,
66
+ StepFunctionsCollector,
67
+ VPCEndpointsCollector,
68
+ WAFCollector,
69
+ EKSCollector,
70
+ CodePipelineCollector,
71
+ CodeBuildCollector,
72
+ BackupCollector,
73
+ ]
74
+
75
+
76
+ def create_snapshot(
77
+ name: str,
78
+ regions: List[str],
79
+ account_id: str,
80
+ profile_name: Optional[str] = None,
81
+ set_active: bool = True,
82
+ resource_types: Optional[List[str]] = None,
83
+ parallel_workers: int = 10,
84
+ resource_filter: Optional["ResourceFilter"] = None,
85
+ inventory_name: str = "default",
86
+ ) -> Snapshot:
87
+ """Create a comprehensive snapshot of AWS resources.
88
+
89
+ Args:
90
+ name: Snapshot name
91
+ regions: List of AWS regions to scan
92
+ account_id: AWS account ID
93
+ profile_name: AWS profile name (optional)
94
+ set_active: Whether to set as active baseline
95
+ resource_types: Optional list of resource types to collect (e.g., ['iam', 'lambda'])
96
+ parallel_workers: Number of parallel collection tasks
97
+ resource_filter: Optional ResourceFilter for date/tag-based filtering
98
+ inventory_name: Name of inventory this snapshot belongs to (default: "default")
99
+
100
+ Returns:
101
+ Snapshot instance with captured resources
102
+ """
103
+ logger.debug(f"Creating snapshot '{name}' for regions: {regions}")
104
+
105
+ # Create session with optional profile
106
+ session_kwargs = {}
107
+ if profile_name:
108
+ session_kwargs["profile_name"] = profile_name
109
+
110
+ session = boto3.Session(**session_kwargs)
111
+
112
+ # Collect resources
113
+ all_resources = []
114
+ resource_counts = {} # Track counts per service for progress
115
+ collection_errors = [] # Track errors for summary
116
+
117
+ # Expected errors that we'll suppress (service not enabled, pagination issues, etc.)
118
+ expected_error_patterns = [
119
+ "Operation cannot be paginated",
120
+ "is not subscribed",
121
+ "AccessDenied",
122
+ "not authorized",
123
+ "InvalidAction",
124
+ "OptInRequired",
125
+ ]
126
+
127
+ def is_expected_error(error_msg: str) -> bool:
128
+ """Check if error is expected and can be safely ignored."""
129
+ return any(pattern in error_msg for pattern in expected_error_patterns)
130
+
131
+ with Progress(
132
+ SpinnerColumn(),
133
+ TextColumn("[bold blue]{task.description}"),
134
+ BarColumn(),
135
+ TaskProgressColumn(),
136
+ ) as progress:
137
+ # Determine which collectors to use
138
+ collectors_to_use = _get_collectors(resource_types)
139
+
140
+ # Separate global and regional collectors
141
+ # Create temporary instances to check is_global_service property
142
+ global_collectors = []
143
+ regional_collectors = []
144
+ for c in collectors_to_use:
145
+ temp_instance = c(session, "us-east-1")
146
+ if temp_instance.is_global_service:
147
+ global_collectors.append(c)
148
+ else:
149
+ regional_collectors.append(c)
150
+
151
+ total_tasks = len(global_collectors) + (len(regional_collectors) * len(regions))
152
+ main_task = progress.add_task(
153
+ f"[bold]Collecting AWS resources from {len(regions)} region(s)...", total=total_tasks
154
+ )
155
+
156
+ # Thread-safe lock for updating shared state
157
+ lock = Lock()
158
+
159
+ def collect_service(collector_class: Type[BaseResourceCollector], region: str, is_global: bool = False) -> Dict:
160
+ """Collect resources for a single service in a region (thread-safe)."""
161
+ try:
162
+ collector = collector_class(session, region)
163
+ service_name = collector.service_name.upper()
164
+ region_label = "global" if is_global else region
165
+
166
+ # Update progress (thread-safe)
167
+ with lock:
168
+ progress.update(main_task, description=f"📦 {service_name} • {region_label}")
169
+
170
+ resources = collector.collect()
171
+
172
+ logger.debug(f"Collected {len(resources)} {service_name} resources from {region_label}")
173
+
174
+ return {"success": True, "resources": resources, "service": service_name, "region": region_label}
175
+
176
+ except Exception as e:
177
+ error_msg = str(e)
178
+ service_name = collector_class.__name__.replace("Collector", "").upper()
179
+ region_label = "global" if is_global else region
180
+
181
+ if not is_expected_error(error_msg):
182
+ logger.debug(f"Collection error - {service_name} ({region_label}): {error_msg[:80]}")
183
+ return {
184
+ "success": False,
185
+ "error": {"service": service_name, "region": region_label, "error": error_msg[:100]},
186
+ }
187
+ else:
188
+ logger.debug(f"Skipping {service_name} in {region_label} (not available): {error_msg[:80]}")
189
+ return {"success": False, "expected": True}
190
+
191
+ # Create list of collection tasks
192
+ collection_tasks = []
193
+
194
+ # Add global service tasks
195
+ for collector_class in global_collectors:
196
+ collection_tasks.append((collector_class, "us-east-1", True))
197
+
198
+ # Add regional service tasks
199
+ for region in regions:
200
+ for collector_class in regional_collectors:
201
+ collection_tasks.append((collector_class, region, False))
202
+
203
+ # Execute collections in parallel
204
+ with ThreadPoolExecutor(max_workers=parallel_workers) as executor:
205
+ # Submit all tasks
206
+ future_to_task = {
207
+ executor.submit(collect_service, collector_class, region, is_global): (
208
+ collector_class,
209
+ region,
210
+ is_global,
211
+ )
212
+ for collector_class, region, is_global in collection_tasks
213
+ }
214
+
215
+ # Process completed tasks
216
+ for future in as_completed(future_to_task):
217
+ result = future.result()
218
+
219
+ if result["success"]:
220
+ with lock:
221
+ all_resources.extend(result["resources"])
222
+ if result["region"] == "global":
223
+ resource_counts[result["service"]] = len(result["resources"])
224
+ else:
225
+ key = f"{result['service']}_{result['region']}"
226
+ resource_counts[key] = len(result["resources"])
227
+ elif not result.get("expected", False):
228
+ with lock:
229
+ collection_errors.append(result["error"])
230
+
231
+ # Advance progress (thread-safe)
232
+ with lock:
233
+ progress.advance(main_task)
234
+
235
+ progress.update(main_task, description=f"[bold green]✓ Successfully collected {len(all_resources)} resources")
236
+
237
+ # Log summary of collection errors if any (but not expected ones)
238
+ if collection_errors:
239
+ logger.debug(f"\nCollection completed with {len(collection_errors)} service(s) unavailable")
240
+ logger.debug("Services that failed:")
241
+ for error in collection_errors:
242
+ logger.debug(f" - {error['service']} ({error['region']}): {error['error']}")
243
+
244
+ # Apply filters if specified
245
+ total_before_filter = len(all_resources)
246
+ filters_applied = None
247
+
248
+ if resource_filter:
249
+ logger.debug(f"Applying filters: {resource_filter.get_filter_summary()}")
250
+ all_resources = resource_filter.apply(all_resources)
251
+ filter_stats = resource_filter.get_statistics_summary()
252
+
253
+ filters_applied = {
254
+ "date_filters": {
255
+ "before_date": resource_filter.before_date.isoformat() if resource_filter.before_date else None,
256
+ "after_date": resource_filter.after_date.isoformat() if resource_filter.after_date else None,
257
+ },
258
+ "tag_filters": resource_filter.required_tags,
259
+ "statistics": filter_stats,
260
+ }
261
+
262
+ logger.debug(
263
+ f"Filtering complete: {filter_stats['total_collected']} collected, "
264
+ f"{filter_stats['final_count']} matched filters"
265
+ )
266
+
267
+ # Calculate service counts
268
+ service_counts: Dict[str, int] = {}
269
+ for resource in all_resources:
270
+ service_counts[resource.resource_type] = service_counts.get(resource.resource_type, 0) + 1
271
+
272
+ # Create snapshot
273
+ snapshot = Snapshot(
274
+ name=name,
275
+ created_at=datetime.now(timezone.utc),
276
+ account_id=account_id,
277
+ regions=regions,
278
+ resources=all_resources,
279
+ metadata={
280
+ "tool": "aws-inventory-manager",
281
+ "version": "1.0.0",
282
+ "collectors_used": [c(session, "us-east-1").service_name for c in collectors_to_use],
283
+ "collection_errors": collection_errors if collection_errors else None,
284
+ },
285
+ is_active=set_active,
286
+ service_counts=service_counts,
287
+ filters_applied=filters_applied,
288
+ total_resources_before_filter=total_before_filter if resource_filter else None,
289
+ inventory_name=inventory_name,
290
+ )
291
+
292
+ logger.debug(f"Snapshot '{name}' created with {len(all_resources)} resources")
293
+
294
+ return snapshot
295
+
296
+
297
+ def create_snapshot_mvp(
298
+ name: str,
299
+ regions: List[str],
300
+ account_id: str,
301
+ profile_name: Optional[str] = None,
302
+ set_active: bool = True,
303
+ ) -> Snapshot:
304
+ """Create snapshot using the full implementation.
305
+
306
+ This is a wrapper for backward compatibility with the MVP CLI code.
307
+
308
+ Args:
309
+ name: Snapshot name
310
+ regions: List of AWS regions to scan
311
+ account_id: AWS account ID
312
+ profile_name: AWS profile name (optional)
313
+ set_active: Whether to set as active baseline
314
+
315
+ Returns:
316
+ Snapshot instance with captured resources
317
+ """
318
+ return create_snapshot(
319
+ name=name,
320
+ regions=regions,
321
+ account_id=account_id,
322
+ profile_name=profile_name,
323
+ set_active=set_active,
324
+ )
325
+
326
+
327
+ def _get_collectors(resource_types: Optional[List[str]] = None) -> List[Type[BaseResourceCollector]]:
328
+ """Get list of collectors to use based on resource type filter.
329
+
330
+ Args:
331
+ resource_types: Optional list of service names to filter (e.g., ['iam', 'lambda'])
332
+
333
+ Returns:
334
+ List of collector classes to use
335
+ """
336
+ if not resource_types:
337
+ return COLLECTOR_REGISTRY
338
+
339
+ # Filter collectors based on service name
340
+ filtered = []
341
+ for collector_class in COLLECTOR_REGISTRY:
342
+ # Create temporary instance to check service name
343
+ temp_collector = collector_class(boto3.Session(), "us-east-1")
344
+ if temp_collector.service_name in resource_types:
345
+ filtered.append(collector_class)
346
+
347
+ return filtered if filtered else COLLECTOR_REGISTRY