aws-inventory-manager 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of aws-inventory-manager might be problematic. Click here for more details.
- aws_inventory_manager-0.2.0.dist-info/METADATA +508 -0
- aws_inventory_manager-0.2.0.dist-info/RECORD +65 -0
- aws_inventory_manager-0.2.0.dist-info/WHEEL +5 -0
- aws_inventory_manager-0.2.0.dist-info/entry_points.txt +2 -0
- aws_inventory_manager-0.2.0.dist-info/licenses/LICENSE +21 -0
- aws_inventory_manager-0.2.0.dist-info/top_level.txt +1 -0
- src/__init__.py +3 -0
- src/aws/__init__.py +11 -0
- src/aws/client.py +128 -0
- src/aws/credentials.py +191 -0
- src/aws/rate_limiter.py +177 -0
- src/cli/__init__.py +5 -0
- src/cli/config.py +130 -0
- src/cli/main.py +1450 -0
- src/cost/__init__.py +5 -0
- src/cost/analyzer.py +226 -0
- src/cost/explorer.py +209 -0
- src/cost/reporter.py +237 -0
- src/delta/__init__.py +5 -0
- src/delta/calculator.py +180 -0
- src/delta/reporter.py +225 -0
- src/models/__init__.py +17 -0
- src/models/cost_report.py +87 -0
- src/models/delta_report.py +111 -0
- src/models/inventory.py +124 -0
- src/models/resource.py +99 -0
- src/models/snapshot.py +108 -0
- src/snapshot/__init__.py +6 -0
- src/snapshot/capturer.py +347 -0
- src/snapshot/filter.py +245 -0
- src/snapshot/inventory_storage.py +264 -0
- src/snapshot/resource_collectors/__init__.py +5 -0
- src/snapshot/resource_collectors/apigateway.py +140 -0
- src/snapshot/resource_collectors/backup.py +136 -0
- src/snapshot/resource_collectors/base.py +81 -0
- src/snapshot/resource_collectors/cloudformation.py +55 -0
- src/snapshot/resource_collectors/cloudwatch.py +109 -0
- src/snapshot/resource_collectors/codebuild.py +69 -0
- src/snapshot/resource_collectors/codepipeline.py +82 -0
- src/snapshot/resource_collectors/dynamodb.py +65 -0
- src/snapshot/resource_collectors/ec2.py +240 -0
- src/snapshot/resource_collectors/ecs.py +215 -0
- src/snapshot/resource_collectors/eks.py +200 -0
- src/snapshot/resource_collectors/elb.py +126 -0
- src/snapshot/resource_collectors/eventbridge.py +156 -0
- src/snapshot/resource_collectors/iam.py +188 -0
- src/snapshot/resource_collectors/kms.py +111 -0
- src/snapshot/resource_collectors/lambda_func.py +112 -0
- src/snapshot/resource_collectors/rds.py +109 -0
- src/snapshot/resource_collectors/route53.py +86 -0
- src/snapshot/resource_collectors/s3.py +105 -0
- src/snapshot/resource_collectors/secretsmanager.py +70 -0
- src/snapshot/resource_collectors/sns.py +68 -0
- src/snapshot/resource_collectors/sqs.py +72 -0
- src/snapshot/resource_collectors/ssm.py +160 -0
- src/snapshot/resource_collectors/stepfunctions.py +74 -0
- src/snapshot/resource_collectors/vpcendpoints.py +79 -0
- src/snapshot/resource_collectors/waf.py +159 -0
- src/snapshot/storage.py +259 -0
- src/utils/__init__.py +12 -0
- src/utils/export.py +87 -0
- src/utils/hash.py +60 -0
- src/utils/logging.py +63 -0
- src/utils/paths.py +51 -0
- src/utils/progress.py +41 -0
src/models/inventory.py
ADDED
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
"""Inventory model for organizing snapshots by account and purpose."""
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from dataclasses import dataclass, field
|
|
5
|
+
from datetime import datetime, timezone
|
|
6
|
+
from typing import Any, Dict, List, Optional
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass
|
|
10
|
+
class Inventory:
|
|
11
|
+
"""Named container for organizing snapshots by account and purpose.
|
|
12
|
+
|
|
13
|
+
Attributes:
|
|
14
|
+
name: Unique identifier within account (alphanumeric + hyphens + underscores, 1-50 chars)
|
|
15
|
+
account_id: AWS account ID (12 digits)
|
|
16
|
+
include_tags: Tag filters (resource MUST have ALL)
|
|
17
|
+
exclude_tags: Tag filters (resource MUST NOT have ANY)
|
|
18
|
+
snapshots: List of snapshot filenames in this inventory
|
|
19
|
+
active_snapshot: Filename of active baseline snapshot
|
|
20
|
+
description: Human-readable description
|
|
21
|
+
created_at: Inventory creation timestamp (timezone-aware UTC)
|
|
22
|
+
last_updated: Last modification timestamp (timezone-aware UTC, auto-updated)
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
name: str
|
|
26
|
+
account_id: str
|
|
27
|
+
include_tags: Dict[str, str] = field(default_factory=dict)
|
|
28
|
+
exclude_tags: Dict[str, str] = field(default_factory=dict)
|
|
29
|
+
snapshots: List[str] = field(default_factory=list)
|
|
30
|
+
active_snapshot: Optional[str] = None
|
|
31
|
+
description: str = ""
|
|
32
|
+
created_at: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
|
|
33
|
+
last_updated: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
|
|
34
|
+
|
|
35
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
36
|
+
"""Serialize to dictionary for YAML storage.
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
Dictionary representation suitable for YAML serialization
|
|
40
|
+
"""
|
|
41
|
+
return {
|
|
42
|
+
"name": self.name,
|
|
43
|
+
"account_id": self.account_id,
|
|
44
|
+
"description": self.description,
|
|
45
|
+
"include_tags": self.include_tags,
|
|
46
|
+
"exclude_tags": self.exclude_tags,
|
|
47
|
+
"snapshots": self.snapshots,
|
|
48
|
+
"active_snapshot": self.active_snapshot,
|
|
49
|
+
"created_at": self.created_at.isoformat(),
|
|
50
|
+
"last_updated": self.last_updated.isoformat(),
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
@classmethod
|
|
54
|
+
def from_dict(cls, data: Dict[str, Any]) -> "Inventory":
|
|
55
|
+
"""Deserialize from dictionary (YAML load).
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
data: Dictionary loaded from YAML
|
|
59
|
+
|
|
60
|
+
Returns:
|
|
61
|
+
Inventory instance
|
|
62
|
+
"""
|
|
63
|
+
return cls(
|
|
64
|
+
name=data["name"],
|
|
65
|
+
account_id=data["account_id"],
|
|
66
|
+
description=data.get("description", ""),
|
|
67
|
+
include_tags=data.get("include_tags", {}),
|
|
68
|
+
exclude_tags=data.get("exclude_tags", {}),
|
|
69
|
+
snapshots=data.get("snapshots", []),
|
|
70
|
+
active_snapshot=data.get("active_snapshot"),
|
|
71
|
+
created_at=datetime.fromisoformat(data["created_at"]),
|
|
72
|
+
last_updated=datetime.fromisoformat(data["last_updated"]),
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
def add_snapshot(self, snapshot_filename: str, set_active: bool = False) -> None:
|
|
76
|
+
"""Add snapshot to inventory, optionally marking as active.
|
|
77
|
+
|
|
78
|
+
Args:
|
|
79
|
+
snapshot_filename: Name of snapshot file to add
|
|
80
|
+
set_active: Whether to mark this snapshot as active baseline
|
|
81
|
+
"""
|
|
82
|
+
if snapshot_filename not in self.snapshots:
|
|
83
|
+
self.snapshots.append(snapshot_filename)
|
|
84
|
+
if set_active:
|
|
85
|
+
self.active_snapshot = snapshot_filename
|
|
86
|
+
self.last_updated = datetime.now(timezone.utc)
|
|
87
|
+
|
|
88
|
+
def remove_snapshot(self, snapshot_filename: str) -> None:
|
|
89
|
+
"""Remove snapshot from inventory, clearing active if it was active.
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
snapshot_filename: Name of snapshot file to remove
|
|
93
|
+
"""
|
|
94
|
+
if snapshot_filename in self.snapshots:
|
|
95
|
+
self.snapshots.remove(snapshot_filename)
|
|
96
|
+
if self.active_snapshot == snapshot_filename:
|
|
97
|
+
self.active_snapshot = None
|
|
98
|
+
self.last_updated = datetime.now(timezone.utc)
|
|
99
|
+
|
|
100
|
+
def validate(self) -> List[str]:
|
|
101
|
+
"""Validate inventory data, return list of errors.
|
|
102
|
+
|
|
103
|
+
Returns:
|
|
104
|
+
List of validation error messages (empty if valid)
|
|
105
|
+
"""
|
|
106
|
+
errors = []
|
|
107
|
+
|
|
108
|
+
# Validate name format (alphanumeric + hyphens + underscores only)
|
|
109
|
+
if not self.name or not re.match(r"^[a-zA-Z0-9_-]+$", self.name):
|
|
110
|
+
errors.append("Name must contain only alphanumeric characters, hyphens, and underscores")
|
|
111
|
+
|
|
112
|
+
# Validate name length
|
|
113
|
+
if len(self.name) > 50:
|
|
114
|
+
errors.append("Name must be 50 characters or less")
|
|
115
|
+
|
|
116
|
+
# Validate account ID format (12 digits)
|
|
117
|
+
if not self.account_id or not re.match(r"^\d{12}$", self.account_id):
|
|
118
|
+
errors.append("Account ID must be 12 digits")
|
|
119
|
+
|
|
120
|
+
# Validate active snapshot exists in snapshots list
|
|
121
|
+
if self.active_snapshot and self.active_snapshot not in self.snapshots:
|
|
122
|
+
errors.append("Active snapshot must exist in snapshots list")
|
|
123
|
+
|
|
124
|
+
return errors
|
src/models/resource.py
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
"""Resource data model representing a single AWS resource."""
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from dataclasses import dataclass, field
|
|
5
|
+
from datetime import datetime
|
|
6
|
+
from typing import Any, Dict, Optional
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass
|
|
10
|
+
class Resource:
|
|
11
|
+
"""Represents a single AWS resource captured in a snapshot."""
|
|
12
|
+
|
|
13
|
+
arn: str
|
|
14
|
+
resource_type: str
|
|
15
|
+
name: str
|
|
16
|
+
region: str
|
|
17
|
+
config_hash: str
|
|
18
|
+
raw_config: Dict[str, Any]
|
|
19
|
+
tags: Dict[str, str] = field(default_factory=dict)
|
|
20
|
+
created_at: Optional[datetime] = None
|
|
21
|
+
|
|
22
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
23
|
+
"""Convert resource to dictionary for serialization."""
|
|
24
|
+
return {
|
|
25
|
+
"arn": self.arn,
|
|
26
|
+
"type": self.resource_type,
|
|
27
|
+
"name": self.name,
|
|
28
|
+
"region": self.region,
|
|
29
|
+
"tags": self.tags,
|
|
30
|
+
"config_hash": self.config_hash,
|
|
31
|
+
"created_at": self.created_at.isoformat() if self.created_at else None,
|
|
32
|
+
"raw_config": self.raw_config,
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
@classmethod
|
|
36
|
+
def from_dict(cls, data: Dict[str, Any]) -> "Resource":
|
|
37
|
+
"""Create resource from dictionary."""
|
|
38
|
+
created_at = None
|
|
39
|
+
if data.get("created_at"):
|
|
40
|
+
created_at = datetime.fromisoformat(data["created_at"])
|
|
41
|
+
|
|
42
|
+
return cls(
|
|
43
|
+
arn=data["arn"],
|
|
44
|
+
resource_type=data["type"],
|
|
45
|
+
name=data["name"],
|
|
46
|
+
region=data["region"],
|
|
47
|
+
config_hash=data["config_hash"],
|
|
48
|
+
raw_config=data["raw_config"],
|
|
49
|
+
tags=data.get("tags", {}),
|
|
50
|
+
created_at=created_at,
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
def validate(self) -> bool:
|
|
54
|
+
"""Validate resource data integrity.
|
|
55
|
+
|
|
56
|
+
Returns:
|
|
57
|
+
True if valid, raises ValueError if invalid
|
|
58
|
+
"""
|
|
59
|
+
# Validate ARN format
|
|
60
|
+
arn_pattern = r"^arn:aws:[a-z0-9-]+:[a-z0-9-]*:[0-9]*:.*$"
|
|
61
|
+
if not re.match(arn_pattern, self.arn):
|
|
62
|
+
raise ValueError(f"Invalid ARN format: {self.arn}")
|
|
63
|
+
|
|
64
|
+
# Validate config_hash is 64-character hex string (SHA256)
|
|
65
|
+
if not re.match(r"^[a-fA-F0-9]{64}$", self.config_hash):
|
|
66
|
+
raise ValueError(f"Invalid config_hash: {self.config_hash}. Must be 64-character SHA256 hex string.")
|
|
67
|
+
|
|
68
|
+
# Validate region format
|
|
69
|
+
valid_regions = ["global"] + [
|
|
70
|
+
"us-east-1",
|
|
71
|
+
"us-east-2",
|
|
72
|
+
"us-west-1",
|
|
73
|
+
"us-west-2",
|
|
74
|
+
"eu-west-1",
|
|
75
|
+
"eu-west-2",
|
|
76
|
+
"eu-west-3",
|
|
77
|
+
"eu-central-1",
|
|
78
|
+
"ap-southeast-1",
|
|
79
|
+
"ap-southeast-2",
|
|
80
|
+
"ap-northeast-1",
|
|
81
|
+
"ap-northeast-2",
|
|
82
|
+
"ca-central-1",
|
|
83
|
+
"sa-east-1",
|
|
84
|
+
"ap-south-1",
|
|
85
|
+
]
|
|
86
|
+
# Basic validation - starts with region pattern or is 'global'
|
|
87
|
+
if self.region != "global" and not any(self.region.startswith(r[:6]) for r in valid_regions if r != "global"):
|
|
88
|
+
# Allow it anyway - AWS adds new regions regularly
|
|
89
|
+
pass
|
|
90
|
+
|
|
91
|
+
return True
|
|
92
|
+
|
|
93
|
+
@property
|
|
94
|
+
def service(self) -> str:
|
|
95
|
+
"""Extract service name from resource type.
|
|
96
|
+
|
|
97
|
+
Example: 'iam:role' -> 'iam'
|
|
98
|
+
"""
|
|
99
|
+
return self.resource_type.split(":")[0] if ":" in self.resource_type else self.resource_type
|
src/models/snapshot.py
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
"""Snapshot data model representing a point-in-time inventory of AWS resources."""
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from typing import Any, Dict, List, Optional
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@dataclass
|
|
9
|
+
class Snapshot:
|
|
10
|
+
"""Represents a point-in-time inventory of AWS resources.
|
|
11
|
+
|
|
12
|
+
This serves as the baseline reference for delta tracking and cost analysis.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
name: str
|
|
16
|
+
created_at: datetime
|
|
17
|
+
account_id: str
|
|
18
|
+
regions: List[str]
|
|
19
|
+
resources: List[Any] # List[Resource] - avoiding circular import
|
|
20
|
+
is_active: bool = True
|
|
21
|
+
resource_count: int = 0
|
|
22
|
+
service_counts: Dict[str, int] = field(default_factory=dict)
|
|
23
|
+
metadata: Dict[str, Any] = field(default_factory=dict)
|
|
24
|
+
filters_applied: Optional[Dict[str, Any]] = None
|
|
25
|
+
total_resources_before_filter: Optional[int] = None
|
|
26
|
+
inventory_name: str = "default" # Name of inventory this snapshot belongs to
|
|
27
|
+
|
|
28
|
+
def __post_init__(self) -> None:
|
|
29
|
+
"""Calculate derived fields after initialization."""
|
|
30
|
+
if self.resource_count == 0:
|
|
31
|
+
self.resource_count = len(self.resources)
|
|
32
|
+
|
|
33
|
+
if not self.service_counts:
|
|
34
|
+
self._calculate_service_counts()
|
|
35
|
+
|
|
36
|
+
def _calculate_service_counts(self) -> None:
|
|
37
|
+
"""Calculate resource counts by service type."""
|
|
38
|
+
counts: Dict[str, int] = {}
|
|
39
|
+
for resource in self.resources:
|
|
40
|
+
service = resource.resource_type.split(":")[0] if ":" in resource.resource_type else resource.resource_type
|
|
41
|
+
counts[service] = counts.get(service, 0) + 1
|
|
42
|
+
self.service_counts = counts
|
|
43
|
+
|
|
44
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
45
|
+
"""Convert snapshot to dictionary for serialization."""
|
|
46
|
+
return {
|
|
47
|
+
"name": self.name,
|
|
48
|
+
"created_at": self.created_at.isoformat(),
|
|
49
|
+
"account_id": self.account_id,
|
|
50
|
+
"regions": self.regions,
|
|
51
|
+
"is_active": self.is_active,
|
|
52
|
+
"resource_count": self.resource_count,
|
|
53
|
+
"service_counts": self.service_counts,
|
|
54
|
+
"metadata": self.metadata,
|
|
55
|
+
"filters_applied": self.filters_applied,
|
|
56
|
+
"total_resources_before_filter": self.total_resources_before_filter,
|
|
57
|
+
"inventory_name": self.inventory_name,
|
|
58
|
+
"resources": [r.to_dict() for r in self.resources],
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
@classmethod
|
|
62
|
+
def from_dict(cls, data: Dict[str, Any]) -> "Snapshot":
|
|
63
|
+
"""Create snapshot from dictionary.
|
|
64
|
+
|
|
65
|
+
Note: This requires Resource class to be imported at call time
|
|
66
|
+
to avoid circular imports.
|
|
67
|
+
"""
|
|
68
|
+
from .resource import Resource
|
|
69
|
+
|
|
70
|
+
return cls(
|
|
71
|
+
name=data["name"],
|
|
72
|
+
created_at=datetime.fromisoformat(data["created_at"]),
|
|
73
|
+
account_id=data["account_id"],
|
|
74
|
+
regions=data["regions"],
|
|
75
|
+
resources=[Resource.from_dict(r) for r in data["resources"]],
|
|
76
|
+
is_active=data.get("is_active", True),
|
|
77
|
+
resource_count=data.get("resource_count", 0),
|
|
78
|
+
service_counts=data.get("service_counts", {}),
|
|
79
|
+
metadata=data.get("metadata", {}),
|
|
80
|
+
filters_applied=data.get("filters_applied"),
|
|
81
|
+
total_resources_before_filter=data.get("total_resources_before_filter"),
|
|
82
|
+
inventory_name=data.get("inventory_name", "default"), # Default for backward compatibility
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
def validate(self) -> bool:
|
|
86
|
+
"""Validate snapshot data integrity.
|
|
87
|
+
|
|
88
|
+
Returns:
|
|
89
|
+
True if valid, raises ValueError if invalid
|
|
90
|
+
"""
|
|
91
|
+
import re
|
|
92
|
+
|
|
93
|
+
# Validate name format (alphanumeric, hyphens, underscores)
|
|
94
|
+
if not re.match(r"^[a-zA-Z0-9_-]+$", self.name):
|
|
95
|
+
raise ValueError(
|
|
96
|
+
f"Invalid snapshot name: {self.name}. "
|
|
97
|
+
f"Must contain only alphanumeric characters, hyphens, and underscores."
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
# Validate account ID (12-digit string)
|
|
101
|
+
if not re.match(r"^\d{12}$", self.account_id):
|
|
102
|
+
raise ValueError(f"Invalid AWS account ID: {self.account_id}. Must be a 12-digit string.")
|
|
103
|
+
|
|
104
|
+
# Validate regions list is not empty
|
|
105
|
+
if not self.regions:
|
|
106
|
+
raise ValueError("Snapshot must include at least one AWS region.")
|
|
107
|
+
|
|
108
|
+
return True
|
src/snapshot/__init__.py
ADDED
src/snapshot/capturer.py
ADDED
|
@@ -0,0 +1,347 @@
|
|
|
1
|
+
"""Snapshot capture coordinator for AWS resources."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
5
|
+
from datetime import datetime, timezone
|
|
6
|
+
from threading import Lock
|
|
7
|
+
from typing import TYPE_CHECKING, Dict, List, Optional, Type
|
|
8
|
+
|
|
9
|
+
import boto3
|
|
10
|
+
from rich.progress import BarColumn, Progress, SpinnerColumn, TaskProgressColumn, TextColumn
|
|
11
|
+
|
|
12
|
+
from ..models.snapshot import Snapshot
|
|
13
|
+
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from .filter import ResourceFilter
|
|
16
|
+
from .resource_collectors.apigateway import APIGatewayCollector
|
|
17
|
+
from .resource_collectors.backup import BackupCollector
|
|
18
|
+
from .resource_collectors.base import BaseResourceCollector
|
|
19
|
+
from .resource_collectors.cloudformation import CloudFormationCollector
|
|
20
|
+
from .resource_collectors.cloudwatch import CloudWatchCollector
|
|
21
|
+
from .resource_collectors.codebuild import CodeBuildCollector
|
|
22
|
+
from .resource_collectors.codepipeline import CodePipelineCollector
|
|
23
|
+
from .resource_collectors.dynamodb import DynamoDBCollector
|
|
24
|
+
from .resource_collectors.ec2 import EC2Collector
|
|
25
|
+
from .resource_collectors.ecs import ECSCollector
|
|
26
|
+
from .resource_collectors.eks import EKSCollector
|
|
27
|
+
from .resource_collectors.elb import ELBCollector
|
|
28
|
+
from .resource_collectors.eventbridge import EventBridgeCollector
|
|
29
|
+
from .resource_collectors.iam import IAMCollector
|
|
30
|
+
from .resource_collectors.kms import KMSCollector
|
|
31
|
+
from .resource_collectors.lambda_func import LambdaCollector
|
|
32
|
+
from .resource_collectors.rds import RDSCollector
|
|
33
|
+
from .resource_collectors.route53 import Route53Collector
|
|
34
|
+
from .resource_collectors.s3 import S3Collector
|
|
35
|
+
from .resource_collectors.secretsmanager import SecretsManagerCollector
|
|
36
|
+
from .resource_collectors.sns import SNSCollector
|
|
37
|
+
from .resource_collectors.sqs import SQSCollector
|
|
38
|
+
from .resource_collectors.ssm import SSMCollector
|
|
39
|
+
from .resource_collectors.stepfunctions import StepFunctionsCollector
|
|
40
|
+
from .resource_collectors.vpcendpoints import VPCEndpointsCollector
|
|
41
|
+
from .resource_collectors.waf import WAFCollector
|
|
42
|
+
|
|
43
|
+
logger = logging.getLogger(__name__)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
# Registry of all available collectors
|
|
47
|
+
COLLECTOR_REGISTRY: List[Type[BaseResourceCollector]] = [
|
|
48
|
+
IAMCollector,
|
|
49
|
+
LambdaCollector,
|
|
50
|
+
S3Collector,
|
|
51
|
+
EC2Collector,
|
|
52
|
+
RDSCollector,
|
|
53
|
+
CloudWatchCollector,
|
|
54
|
+
SNSCollector,
|
|
55
|
+
SQSCollector,
|
|
56
|
+
DynamoDBCollector,
|
|
57
|
+
ELBCollector,
|
|
58
|
+
CloudFormationCollector,
|
|
59
|
+
APIGatewayCollector,
|
|
60
|
+
EventBridgeCollector,
|
|
61
|
+
SecretsManagerCollector,
|
|
62
|
+
KMSCollector,
|
|
63
|
+
SSMCollector,
|
|
64
|
+
Route53Collector,
|
|
65
|
+
ECSCollector,
|
|
66
|
+
StepFunctionsCollector,
|
|
67
|
+
VPCEndpointsCollector,
|
|
68
|
+
WAFCollector,
|
|
69
|
+
EKSCollector,
|
|
70
|
+
CodePipelineCollector,
|
|
71
|
+
CodeBuildCollector,
|
|
72
|
+
BackupCollector,
|
|
73
|
+
]
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def create_snapshot(
|
|
77
|
+
name: str,
|
|
78
|
+
regions: List[str],
|
|
79
|
+
account_id: str,
|
|
80
|
+
profile_name: Optional[str] = None,
|
|
81
|
+
set_active: bool = True,
|
|
82
|
+
resource_types: Optional[List[str]] = None,
|
|
83
|
+
parallel_workers: int = 10,
|
|
84
|
+
resource_filter: Optional["ResourceFilter"] = None,
|
|
85
|
+
inventory_name: str = "default",
|
|
86
|
+
) -> Snapshot:
|
|
87
|
+
"""Create a comprehensive snapshot of AWS resources.
|
|
88
|
+
|
|
89
|
+
Args:
|
|
90
|
+
name: Snapshot name
|
|
91
|
+
regions: List of AWS regions to scan
|
|
92
|
+
account_id: AWS account ID
|
|
93
|
+
profile_name: AWS profile name (optional)
|
|
94
|
+
set_active: Whether to set as active baseline
|
|
95
|
+
resource_types: Optional list of resource types to collect (e.g., ['iam', 'lambda'])
|
|
96
|
+
parallel_workers: Number of parallel collection tasks
|
|
97
|
+
resource_filter: Optional ResourceFilter for date/tag-based filtering
|
|
98
|
+
inventory_name: Name of inventory this snapshot belongs to (default: "default")
|
|
99
|
+
|
|
100
|
+
Returns:
|
|
101
|
+
Snapshot instance with captured resources
|
|
102
|
+
"""
|
|
103
|
+
logger.debug(f"Creating snapshot '{name}' for regions: {regions}")
|
|
104
|
+
|
|
105
|
+
# Create session with optional profile
|
|
106
|
+
session_kwargs = {}
|
|
107
|
+
if profile_name:
|
|
108
|
+
session_kwargs["profile_name"] = profile_name
|
|
109
|
+
|
|
110
|
+
session = boto3.Session(**session_kwargs)
|
|
111
|
+
|
|
112
|
+
# Collect resources
|
|
113
|
+
all_resources = []
|
|
114
|
+
resource_counts = {} # Track counts per service for progress
|
|
115
|
+
collection_errors = [] # Track errors for summary
|
|
116
|
+
|
|
117
|
+
# Expected errors that we'll suppress (service not enabled, pagination issues, etc.)
|
|
118
|
+
expected_error_patterns = [
|
|
119
|
+
"Operation cannot be paginated",
|
|
120
|
+
"is not subscribed",
|
|
121
|
+
"AccessDenied",
|
|
122
|
+
"not authorized",
|
|
123
|
+
"InvalidAction",
|
|
124
|
+
"OptInRequired",
|
|
125
|
+
]
|
|
126
|
+
|
|
127
|
+
def is_expected_error(error_msg: str) -> bool:
|
|
128
|
+
"""Check if error is expected and can be safely ignored."""
|
|
129
|
+
return any(pattern in error_msg for pattern in expected_error_patterns)
|
|
130
|
+
|
|
131
|
+
with Progress(
|
|
132
|
+
SpinnerColumn(),
|
|
133
|
+
TextColumn("[bold blue]{task.description}"),
|
|
134
|
+
BarColumn(),
|
|
135
|
+
TaskProgressColumn(),
|
|
136
|
+
) as progress:
|
|
137
|
+
# Determine which collectors to use
|
|
138
|
+
collectors_to_use = _get_collectors(resource_types)
|
|
139
|
+
|
|
140
|
+
# Separate global and regional collectors
|
|
141
|
+
# Create temporary instances to check is_global_service property
|
|
142
|
+
global_collectors = []
|
|
143
|
+
regional_collectors = []
|
|
144
|
+
for c in collectors_to_use:
|
|
145
|
+
temp_instance = c(session, "us-east-1")
|
|
146
|
+
if temp_instance.is_global_service:
|
|
147
|
+
global_collectors.append(c)
|
|
148
|
+
else:
|
|
149
|
+
regional_collectors.append(c)
|
|
150
|
+
|
|
151
|
+
total_tasks = len(global_collectors) + (len(regional_collectors) * len(regions))
|
|
152
|
+
main_task = progress.add_task(
|
|
153
|
+
f"[bold]Collecting AWS resources from {len(regions)} region(s)...", total=total_tasks
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
# Thread-safe lock for updating shared state
|
|
157
|
+
lock = Lock()
|
|
158
|
+
|
|
159
|
+
def collect_service(collector_class: Type[BaseResourceCollector], region: str, is_global: bool = False) -> Dict:
|
|
160
|
+
"""Collect resources for a single service in a region (thread-safe)."""
|
|
161
|
+
try:
|
|
162
|
+
collector = collector_class(session, region)
|
|
163
|
+
service_name = collector.service_name.upper()
|
|
164
|
+
region_label = "global" if is_global else region
|
|
165
|
+
|
|
166
|
+
# Update progress (thread-safe)
|
|
167
|
+
with lock:
|
|
168
|
+
progress.update(main_task, description=f"📦 {service_name} • {region_label}")
|
|
169
|
+
|
|
170
|
+
resources = collector.collect()
|
|
171
|
+
|
|
172
|
+
logger.debug(f"Collected {len(resources)} {service_name} resources from {region_label}")
|
|
173
|
+
|
|
174
|
+
return {"success": True, "resources": resources, "service": service_name, "region": region_label}
|
|
175
|
+
|
|
176
|
+
except Exception as e:
|
|
177
|
+
error_msg = str(e)
|
|
178
|
+
service_name = collector_class.__name__.replace("Collector", "").upper()
|
|
179
|
+
region_label = "global" if is_global else region
|
|
180
|
+
|
|
181
|
+
if not is_expected_error(error_msg):
|
|
182
|
+
logger.debug(f"Collection error - {service_name} ({region_label}): {error_msg[:80]}")
|
|
183
|
+
return {
|
|
184
|
+
"success": False,
|
|
185
|
+
"error": {"service": service_name, "region": region_label, "error": error_msg[:100]},
|
|
186
|
+
}
|
|
187
|
+
else:
|
|
188
|
+
logger.debug(f"Skipping {service_name} in {region_label} (not available): {error_msg[:80]}")
|
|
189
|
+
return {"success": False, "expected": True}
|
|
190
|
+
|
|
191
|
+
# Create list of collection tasks
|
|
192
|
+
collection_tasks = []
|
|
193
|
+
|
|
194
|
+
# Add global service tasks
|
|
195
|
+
for collector_class in global_collectors:
|
|
196
|
+
collection_tasks.append((collector_class, "us-east-1", True))
|
|
197
|
+
|
|
198
|
+
# Add regional service tasks
|
|
199
|
+
for region in regions:
|
|
200
|
+
for collector_class in regional_collectors:
|
|
201
|
+
collection_tasks.append((collector_class, region, False))
|
|
202
|
+
|
|
203
|
+
# Execute collections in parallel
|
|
204
|
+
with ThreadPoolExecutor(max_workers=parallel_workers) as executor:
|
|
205
|
+
# Submit all tasks
|
|
206
|
+
future_to_task = {
|
|
207
|
+
executor.submit(collect_service, collector_class, region, is_global): (
|
|
208
|
+
collector_class,
|
|
209
|
+
region,
|
|
210
|
+
is_global,
|
|
211
|
+
)
|
|
212
|
+
for collector_class, region, is_global in collection_tasks
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
# Process completed tasks
|
|
216
|
+
for future in as_completed(future_to_task):
|
|
217
|
+
result = future.result()
|
|
218
|
+
|
|
219
|
+
if result["success"]:
|
|
220
|
+
with lock:
|
|
221
|
+
all_resources.extend(result["resources"])
|
|
222
|
+
if result["region"] == "global":
|
|
223
|
+
resource_counts[result["service"]] = len(result["resources"])
|
|
224
|
+
else:
|
|
225
|
+
key = f"{result['service']}_{result['region']}"
|
|
226
|
+
resource_counts[key] = len(result["resources"])
|
|
227
|
+
elif not result.get("expected", False):
|
|
228
|
+
with lock:
|
|
229
|
+
collection_errors.append(result["error"])
|
|
230
|
+
|
|
231
|
+
# Advance progress (thread-safe)
|
|
232
|
+
with lock:
|
|
233
|
+
progress.advance(main_task)
|
|
234
|
+
|
|
235
|
+
progress.update(main_task, description=f"[bold green]✓ Successfully collected {len(all_resources)} resources")
|
|
236
|
+
|
|
237
|
+
# Log summary of collection errors if any (but not expected ones)
|
|
238
|
+
if collection_errors:
|
|
239
|
+
logger.debug(f"\nCollection completed with {len(collection_errors)} service(s) unavailable")
|
|
240
|
+
logger.debug("Services that failed:")
|
|
241
|
+
for error in collection_errors:
|
|
242
|
+
logger.debug(f" - {error['service']} ({error['region']}): {error['error']}")
|
|
243
|
+
|
|
244
|
+
# Apply filters if specified
|
|
245
|
+
total_before_filter = len(all_resources)
|
|
246
|
+
filters_applied = None
|
|
247
|
+
|
|
248
|
+
if resource_filter:
|
|
249
|
+
logger.debug(f"Applying filters: {resource_filter.get_filter_summary()}")
|
|
250
|
+
all_resources = resource_filter.apply(all_resources)
|
|
251
|
+
filter_stats = resource_filter.get_statistics_summary()
|
|
252
|
+
|
|
253
|
+
filters_applied = {
|
|
254
|
+
"date_filters": {
|
|
255
|
+
"before_date": resource_filter.before_date.isoformat() if resource_filter.before_date else None,
|
|
256
|
+
"after_date": resource_filter.after_date.isoformat() if resource_filter.after_date else None,
|
|
257
|
+
},
|
|
258
|
+
"tag_filters": resource_filter.required_tags,
|
|
259
|
+
"statistics": filter_stats,
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
logger.debug(
|
|
263
|
+
f"Filtering complete: {filter_stats['total_collected']} collected, "
|
|
264
|
+
f"{filter_stats['final_count']} matched filters"
|
|
265
|
+
)
|
|
266
|
+
|
|
267
|
+
# Calculate service counts
|
|
268
|
+
service_counts: Dict[str, int] = {}
|
|
269
|
+
for resource in all_resources:
|
|
270
|
+
service_counts[resource.resource_type] = service_counts.get(resource.resource_type, 0) + 1
|
|
271
|
+
|
|
272
|
+
# Create snapshot
|
|
273
|
+
snapshot = Snapshot(
|
|
274
|
+
name=name,
|
|
275
|
+
created_at=datetime.now(timezone.utc),
|
|
276
|
+
account_id=account_id,
|
|
277
|
+
regions=regions,
|
|
278
|
+
resources=all_resources,
|
|
279
|
+
metadata={
|
|
280
|
+
"tool": "aws-inventory-manager",
|
|
281
|
+
"version": "1.0.0",
|
|
282
|
+
"collectors_used": [c(session, "us-east-1").service_name for c in collectors_to_use],
|
|
283
|
+
"collection_errors": collection_errors if collection_errors else None,
|
|
284
|
+
},
|
|
285
|
+
is_active=set_active,
|
|
286
|
+
service_counts=service_counts,
|
|
287
|
+
filters_applied=filters_applied,
|
|
288
|
+
total_resources_before_filter=total_before_filter if resource_filter else None,
|
|
289
|
+
inventory_name=inventory_name,
|
|
290
|
+
)
|
|
291
|
+
|
|
292
|
+
logger.debug(f"Snapshot '{name}' created with {len(all_resources)} resources")
|
|
293
|
+
|
|
294
|
+
return snapshot
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
def create_snapshot_mvp(
|
|
298
|
+
name: str,
|
|
299
|
+
regions: List[str],
|
|
300
|
+
account_id: str,
|
|
301
|
+
profile_name: Optional[str] = None,
|
|
302
|
+
set_active: bool = True,
|
|
303
|
+
) -> Snapshot:
|
|
304
|
+
"""Create snapshot using the full implementation.
|
|
305
|
+
|
|
306
|
+
This is a wrapper for backward compatibility with the MVP CLI code.
|
|
307
|
+
|
|
308
|
+
Args:
|
|
309
|
+
name: Snapshot name
|
|
310
|
+
regions: List of AWS regions to scan
|
|
311
|
+
account_id: AWS account ID
|
|
312
|
+
profile_name: AWS profile name (optional)
|
|
313
|
+
set_active: Whether to set as active baseline
|
|
314
|
+
|
|
315
|
+
Returns:
|
|
316
|
+
Snapshot instance with captured resources
|
|
317
|
+
"""
|
|
318
|
+
return create_snapshot(
|
|
319
|
+
name=name,
|
|
320
|
+
regions=regions,
|
|
321
|
+
account_id=account_id,
|
|
322
|
+
profile_name=profile_name,
|
|
323
|
+
set_active=set_active,
|
|
324
|
+
)
|
|
325
|
+
|
|
326
|
+
|
|
327
|
+
def _get_collectors(resource_types: Optional[List[str]] = None) -> List[Type[BaseResourceCollector]]:
|
|
328
|
+
"""Get list of collectors to use based on resource type filter.
|
|
329
|
+
|
|
330
|
+
Args:
|
|
331
|
+
resource_types: Optional list of service names to filter (e.g., ['iam', 'lambda'])
|
|
332
|
+
|
|
333
|
+
Returns:
|
|
334
|
+
List of collector classes to use
|
|
335
|
+
"""
|
|
336
|
+
if not resource_types:
|
|
337
|
+
return COLLECTOR_REGISTRY
|
|
338
|
+
|
|
339
|
+
# Filter collectors based on service name
|
|
340
|
+
filtered = []
|
|
341
|
+
for collector_class in COLLECTOR_REGISTRY:
|
|
342
|
+
# Create temporary instance to check service name
|
|
343
|
+
temp_collector = collector_class(boto3.Session(), "us-east-1")
|
|
344
|
+
if temp_collector.service_name in resource_types:
|
|
345
|
+
filtered.append(collector_class)
|
|
346
|
+
|
|
347
|
+
return filtered if filtered else COLLECTOR_REGISTRY
|