runbooks 0.2.5__py3-none-any.whl → 0.6.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- conftest.py +26 -0
- jupyter-agent/.env.template +2 -0
- jupyter-agent/.gitattributes +35 -0
- jupyter-agent/README.md +16 -0
- jupyter-agent/app.py +256 -0
- jupyter-agent/cloudops-agent.png +0 -0
- jupyter-agent/ds-system-prompt.txt +154 -0
- jupyter-agent/jupyter-agent.png +0 -0
- jupyter-agent/llama3_template.jinja +123 -0
- jupyter-agent/requirements.txt +9 -0
- jupyter-agent/utils.py +409 -0
- runbooks/__init__.py +71 -3
- runbooks/__main__.py +13 -0
- runbooks/aws/ec2_describe_instances.py +1 -1
- runbooks/aws/ec2_run_instances.py +8 -2
- runbooks/aws/ec2_start_stop_instances.py +17 -4
- runbooks/aws/ec2_unused_volumes.py +5 -1
- runbooks/aws/s3_create_bucket.py +4 -2
- runbooks/aws/s3_list_objects.py +6 -1
- runbooks/aws/tagging_lambda_handler.py +13 -2
- runbooks/aws/tags.json +12 -0
- runbooks/base.py +353 -0
- runbooks/cfat/README.md +49 -0
- runbooks/cfat/__init__.py +74 -0
- runbooks/cfat/app.ts +644 -0
- runbooks/cfat/assessment/__init__.py +40 -0
- runbooks/cfat/assessment/asana-import.csv +39 -0
- runbooks/cfat/assessment/cfat-checks.csv +31 -0
- runbooks/cfat/assessment/cfat.txt +520 -0
- runbooks/cfat/assessment/collectors.py +200 -0
- runbooks/cfat/assessment/jira-import.csv +39 -0
- runbooks/cfat/assessment/runner.py +387 -0
- runbooks/cfat/assessment/validators.py +290 -0
- runbooks/cfat/cli.py +103 -0
- runbooks/cfat/docs/asana-import.csv +24 -0
- runbooks/cfat/docs/cfat-checks.csv +31 -0
- runbooks/cfat/docs/cfat.txt +335 -0
- runbooks/cfat/docs/checks-output.png +0 -0
- runbooks/cfat/docs/cloudshell-console-run.png +0 -0
- runbooks/cfat/docs/cloudshell-download.png +0 -0
- runbooks/cfat/docs/cloudshell-output.png +0 -0
- runbooks/cfat/docs/downloadfile.png +0 -0
- runbooks/cfat/docs/jira-import.csv +24 -0
- runbooks/cfat/docs/open-cloudshell.png +0 -0
- runbooks/cfat/docs/report-header.png +0 -0
- runbooks/cfat/models.py +1026 -0
- runbooks/cfat/package-lock.json +5116 -0
- runbooks/cfat/package.json +38 -0
- runbooks/cfat/report.py +496 -0
- runbooks/cfat/reporting/__init__.py +46 -0
- runbooks/cfat/reporting/exporters.py +337 -0
- runbooks/cfat/reporting/formatters.py +496 -0
- runbooks/cfat/reporting/templates.py +135 -0
- runbooks/cfat/run-assessment.sh +23 -0
- runbooks/cfat/runner.py +69 -0
- runbooks/cfat/src/actions/check-cloudtrail-existence.ts +43 -0
- runbooks/cfat/src/actions/check-config-existence.ts +37 -0
- runbooks/cfat/src/actions/check-control-tower.ts +37 -0
- runbooks/cfat/src/actions/check-ec2-existence.ts +46 -0
- runbooks/cfat/src/actions/check-iam-users.ts +50 -0
- runbooks/cfat/src/actions/check-legacy-cur.ts +30 -0
- runbooks/cfat/src/actions/check-org-cloudformation.ts +30 -0
- runbooks/cfat/src/actions/check-vpc-existence.ts +43 -0
- runbooks/cfat/src/actions/create-asanaimport.ts +14 -0
- runbooks/cfat/src/actions/create-backlog.ts +372 -0
- runbooks/cfat/src/actions/create-jiraimport.ts +15 -0
- runbooks/cfat/src/actions/create-report.ts +616 -0
- runbooks/cfat/src/actions/define-account-type.ts +51 -0
- runbooks/cfat/src/actions/get-enabled-org-policy-types.ts +40 -0
- runbooks/cfat/src/actions/get-enabled-org-services.ts +26 -0
- runbooks/cfat/src/actions/get-idc-info.ts +34 -0
- runbooks/cfat/src/actions/get-org-da-accounts.ts +34 -0
- runbooks/cfat/src/actions/get-org-details.ts +35 -0
- runbooks/cfat/src/actions/get-org-member-accounts.ts +44 -0
- runbooks/cfat/src/actions/get-org-ous.ts +35 -0
- runbooks/cfat/src/actions/get-regions.ts +22 -0
- runbooks/cfat/src/actions/zip-assessment.ts +27 -0
- runbooks/cfat/src/types/index.d.ts +147 -0
- runbooks/cfat/tests/__init__.py +141 -0
- runbooks/cfat/tests/test_cli.py +340 -0
- runbooks/cfat/tests/test_integration.py +290 -0
- runbooks/cfat/tests/test_models.py +505 -0
- runbooks/cfat/tests/test_reporting.py +354 -0
- runbooks/cfat/tsconfig.json +16 -0
- runbooks/cfat/webpack.config.cjs +27 -0
- runbooks/config.py +260 -0
- runbooks/finops/__init__.py +88 -0
- runbooks/finops/aws_client.py +245 -0
- runbooks/finops/cli.py +151 -0
- runbooks/finops/cost_processor.py +410 -0
- runbooks/finops/dashboard_runner.py +448 -0
- runbooks/finops/helpers.py +355 -0
- runbooks/finops/main.py +14 -0
- runbooks/finops/profile_processor.py +174 -0
- runbooks/finops/types.py +66 -0
- runbooks/finops/visualisations.py +80 -0
- runbooks/inventory/.gitignore +354 -0
- runbooks/inventory/ArgumentsClass.py +261 -0
- runbooks/inventory/Inventory_Modules.py +6130 -0
- runbooks/inventory/LandingZone/delete_lz.py +1075 -0
- runbooks/inventory/README.md +1320 -0
- runbooks/inventory/__init__.py +62 -0
- runbooks/inventory/account_class.py +532 -0
- runbooks/inventory/all_my_instances_wrapper.py +123 -0
- runbooks/inventory/aws_decorators.py +201 -0
- runbooks/inventory/cfn_move_stack_instances.py +1526 -0
- runbooks/inventory/check_cloudtrail_compliance.py +614 -0
- runbooks/inventory/check_controltower_readiness.py +1107 -0
- runbooks/inventory/check_landingzone_readiness.py +711 -0
- runbooks/inventory/cloudtrail.md +727 -0
- runbooks/inventory/collectors/__init__.py +20 -0
- runbooks/inventory/collectors/aws_compute.py +518 -0
- runbooks/inventory/collectors/aws_networking.py +275 -0
- runbooks/inventory/collectors/base.py +222 -0
- runbooks/inventory/core/__init__.py +19 -0
- runbooks/inventory/core/collector.py +303 -0
- runbooks/inventory/core/formatter.py +296 -0
- runbooks/inventory/delete_s3_buckets_objects.py +169 -0
- runbooks/inventory/discovery.md +81 -0
- runbooks/inventory/draw_org_structure.py +748 -0
- runbooks/inventory/ec2_vpc_utils.py +341 -0
- runbooks/inventory/find_cfn_drift_detection.py +272 -0
- runbooks/inventory/find_cfn_orphaned_stacks.py +719 -0
- runbooks/inventory/find_cfn_stackset_drift.py +733 -0
- runbooks/inventory/find_ec2_security_groups.py +669 -0
- runbooks/inventory/find_landingzone_versions.py +201 -0
- runbooks/inventory/find_vpc_flow_logs.py +1221 -0
- runbooks/inventory/inventory.sh +659 -0
- runbooks/inventory/list_cfn_stacks.py +558 -0
- runbooks/inventory/list_cfn_stackset_operation_results.py +252 -0
- runbooks/inventory/list_cfn_stackset_operations.py +734 -0
- runbooks/inventory/list_cfn_stacksets.py +453 -0
- runbooks/inventory/list_config_recorders_delivery_channels.py +681 -0
- runbooks/inventory/list_ds_directories.py +354 -0
- runbooks/inventory/list_ec2_availability_zones.py +286 -0
- runbooks/inventory/list_ec2_ebs_volumes.py +244 -0
- runbooks/inventory/list_ec2_instances.py +425 -0
- runbooks/inventory/list_ecs_clusters_and_tasks.py +562 -0
- runbooks/inventory/list_elbs_load_balancers.py +411 -0
- runbooks/inventory/list_enis_network_interfaces.py +526 -0
- runbooks/inventory/list_guardduty_detectors.py +568 -0
- runbooks/inventory/list_iam_policies.py +404 -0
- runbooks/inventory/list_iam_roles.py +518 -0
- runbooks/inventory/list_iam_saml_providers.py +359 -0
- runbooks/inventory/list_lambda_functions.py +882 -0
- runbooks/inventory/list_org_accounts.py +446 -0
- runbooks/inventory/list_org_accounts_users.py +354 -0
- runbooks/inventory/list_rds_db_instances.py +406 -0
- runbooks/inventory/list_route53_hosted_zones.py +318 -0
- runbooks/inventory/list_servicecatalog_provisioned_products.py +575 -0
- runbooks/inventory/list_sns_topics.py +360 -0
- runbooks/inventory/list_ssm_parameters.py +402 -0
- runbooks/inventory/list_vpc_subnets.py +433 -0
- runbooks/inventory/list_vpcs.py +422 -0
- runbooks/inventory/lockdown_cfn_stackset_role.py +224 -0
- runbooks/inventory/models/__init__.py +24 -0
- runbooks/inventory/models/account.py +192 -0
- runbooks/inventory/models/inventory.py +309 -0
- runbooks/inventory/models/resource.py +247 -0
- runbooks/inventory/recover_cfn_stack_ids.py +205 -0
- runbooks/inventory/requirements.txt +12 -0
- runbooks/inventory/run_on_multi_accounts.py +211 -0
- runbooks/inventory/tests/common_test_data.py +3661 -0
- runbooks/inventory/tests/common_test_functions.py +204 -0
- runbooks/inventory/tests/script_test_data.py +0 -0
- runbooks/inventory/tests/setup.py +24 -0
- runbooks/inventory/tests/src.py +18 -0
- runbooks/inventory/tests/test_cfn_describe_stacks.py +208 -0
- runbooks/inventory/tests/test_ec2_describe_instances.py +162 -0
- runbooks/inventory/tests/test_inventory_modules.py +55 -0
- runbooks/inventory/tests/test_lambda_list_functions.py +86 -0
- runbooks/inventory/tests/test_moto_integration_example.py +273 -0
- runbooks/inventory/tests/test_org_list_accounts.py +49 -0
- runbooks/inventory/update_aws_actions.py +173 -0
- runbooks/inventory/update_cfn_stacksets.py +1215 -0
- runbooks/inventory/update_cloudwatch_logs_retention_policy.py +294 -0
- runbooks/inventory/update_iam_roles_cross_accounts.py +478 -0
- runbooks/inventory/update_s3_public_access_block.py +539 -0
- runbooks/inventory/utils/__init__.py +23 -0
- runbooks/inventory/utils/aws_helpers.py +510 -0
- runbooks/inventory/utils/threading_utils.py +493 -0
- runbooks/inventory/utils/validation.py +682 -0
- runbooks/inventory/verify_ec2_security_groups.py +1430 -0
- runbooks/main.py +785 -0
- runbooks/organizations/__init__.py +12 -0
- runbooks/organizations/manager.py +374 -0
- runbooks/security_baseline/README.md +324 -0
- runbooks/security_baseline/checklist/alternate_contacts.py +8 -1
- runbooks/security_baseline/checklist/bucket_public_access.py +4 -1
- runbooks/security_baseline/checklist/cloudwatch_alarm_configuration.py +9 -2
- runbooks/security_baseline/checklist/guardduty_enabled.py +9 -2
- runbooks/security_baseline/checklist/multi_region_instance_usage.py +5 -1
- runbooks/security_baseline/checklist/root_access_key.py +6 -1
- runbooks/security_baseline/config-origin.json +1 -1
- runbooks/security_baseline/config.json +1 -1
- runbooks/security_baseline/permission.json +1 -1
- runbooks/security_baseline/report_generator.py +10 -2
- runbooks/security_baseline/report_template_en.html +7 -7
- runbooks/security_baseline/report_template_jp.html +7 -7
- runbooks/security_baseline/report_template_kr.html +12 -12
- runbooks/security_baseline/report_template_vn.html +7 -7
- runbooks/security_baseline/requirements.txt +7 -0
- runbooks/security_baseline/run_script.py +8 -2
- runbooks/security_baseline/security_baseline_tester.py +10 -2
- runbooks/security_baseline/utils/common.py +5 -1
- runbooks/utils/__init__.py +204 -0
- runbooks-0.6.1.dist-info/METADATA +373 -0
- runbooks-0.6.1.dist-info/RECORD +237 -0
- {runbooks-0.2.5.dist-info → runbooks-0.6.1.dist-info}/WHEEL +1 -1
- runbooks-0.6.1.dist-info/entry_points.txt +7 -0
- runbooks-0.6.1.dist-info/licenses/LICENSE +201 -0
- runbooks-0.6.1.dist-info/top_level.txt +3 -0
- runbooks/python101/calculator.py +0 -34
- runbooks/python101/config.py +0 -1
- runbooks/python101/exceptions.py +0 -16
- runbooks/python101/file_manager.py +0 -218
- runbooks/python101/toolkit.py +0 -153
- runbooks-0.2.5.dist-info/METADATA +0 -439
- runbooks-0.2.5.dist-info/RECORD +0 -61
- runbooks-0.2.5.dist-info/entry_points.txt +0 -3
- runbooks-0.2.5.dist-info/top_level.txt +0 -1
@@ -0,0 +1,1221 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
AWS VPC Flow Log Analysis and Data Transfer Calculation Script
|
4
|
+
|
5
|
+
Comprehensive enterprise-grade tool for analyzing VPC Flow Logs across multi-account AWS Organizations
|
6
|
+
environments to calculate outbound data transfer volumes and costs. Designed for network traffic analysis,
|
7
|
+
cost optimization, and bandwidth monitoring with advanced CloudWatch Logs query capabilities and automated
|
8
|
+
data aggregation across organizational boundaries.
|
9
|
+
|
10
|
+
Key Features:
|
11
|
+
- Multi-account, multi-region VPC Flow Log discovery and analysis
|
12
|
+
- Automated CloudWatch Logs query generation for outbound data transfer calculation
|
13
|
+
- CIDR block-aware traffic filtering for accurate internal vs external data classification
|
14
|
+
- Comprehensive date range support with configurable analysis periods
|
15
|
+
- Enterprise authentication with cross-account role assumption
|
16
|
+
- Real-time query progress monitoring with performance optimization
|
17
|
+
- Structured data export for integration with cost analysis and reporting systems
|
18
|
+
|
19
|
+
Enterprise Capabilities:
|
20
|
+
- Organizational network traffic visibility and cost analysis
|
21
|
+
- Multi-environment bandwidth monitoring and optimization
|
22
|
+
- Automated data transfer cost calculation and forecasting
|
23
|
+
- Cross-account VPC Flow Log aggregation and analysis
|
24
|
+
- Enterprise credential management with comprehensive error handling
|
25
|
+
- Scalable processing for large-scale multi-account environments
|
26
|
+
|
27
|
+
Operational Use Cases:
|
28
|
+
- Network traffic cost analysis and optimization across organizational VPCs
|
29
|
+
- Bandwidth monitoring and capacity planning for enterprise workloads
|
30
|
+
- Data transfer cost attribution and chargeback for business units
|
31
|
+
- Security analysis through network traffic pattern identification
|
32
|
+
- Compliance monitoring for data transfer and network access patterns
|
33
|
+
|
34
|
+
Output Format:
|
35
|
+
- Tabular display with Account, Region, VPC Name, CIDR Block, and data transfer metrics
|
36
|
+
- Comprehensive operational metrics including query duration and data volumes
|
37
|
+
- Color-coded terminal output for enhanced operational visibility
|
38
|
+
- Optional CSV/JSON export for integration with enterprise cost management systems
|
39
|
+
|
40
|
+
Authentication & Security:
|
41
|
+
- Multi-profile AWS credential management with cross-account role assumption
|
42
|
+
- Regional validation and access control for secure VPC Flow Log operations
|
43
|
+
- Comprehensive error handling for credential retrieval and configuration issues
|
44
|
+
- CloudWatch Logs access validation ensuring authorized data querying
|
45
|
+
|
46
|
+
Performance & Scale:
|
47
|
+
- Efficient multi-threaded CloudWatch Logs query processing
|
48
|
+
- Memory-efficient processing for extensive VPC Flow Log data analysis
|
49
|
+
- Configurable query timeouts and retry mechanisms for large datasets
|
50
|
+
- Optimized API usage patterns for improved performance and reduced costs
|
51
|
+
|
52
|
+
Flow Log Analysis Logic:
|
53
|
+
- VPC Flow Log discovery through EC2 describe_flow_logs API
|
54
|
+
- CIDR block enumeration and network address calculation for traffic classification
|
55
|
+
- Dynamic CloudWatch Logs query generation based on VPC network topology
|
56
|
+
- Advanced IP address filtering for accurate outbound traffic identification
|
57
|
+
- Statistical aggregation of byte transfer volumes across specified time periods
|
58
|
+
|
59
|
+
CloudWatch Integration:
|
60
|
+
- Automated log group retention policy validation and adjustment
|
61
|
+
- CloudWatch Logs Insights query optimization for large-scale data analysis
|
62
|
+
- Real-time query status monitoring with timeout management
|
63
|
+
- Comprehensive error handling for CloudWatch API limitations
|
64
|
+
|
65
|
+
Error Handling & Resilience:
|
66
|
+
- AWS API authorization failure detection with detailed troubleshooting guidance
|
67
|
+
- CloudWatch Logs query timeout management with graceful degradation
|
68
|
+
- Network connectivity error handling with operation retry capabilities
|
69
|
+
- Comprehensive credential validation and rotation support
|
70
|
+
|
71
|
+
Dependencies:
|
72
|
+
- boto3: AWS SDK for EC2, CloudWatch Logs, and STS operations
|
73
|
+
- Custom modules: Inventory_Modules, ArgumentsClass, account_class
|
74
|
+
- colorama: Enhanced terminal output and progress indicators
|
75
|
+
- ipaddress: Advanced IP network calculation and CIDR block processing
|
76
|
+
|
77
|
+
Authors: AWS CloudOps Team
|
78
|
+
Version: 2024.03.10
|
79
|
+
License: MIT
|
80
|
+
"""
|
81
|
+
|
82
|
+
import logging
|
83
|
+
import platform
|
84
|
+
import sys
|
85
|
+
from datetime import datetime, timedelta
|
86
|
+
from os.path import split
|
87
|
+
from time import sleep, time
|
88
|
+
from typing import Any, List
|
89
|
+
|
90
|
+
import boto3
|
91
|
+
import Inventory_Modules
|
92
|
+
from account_class import aws_acct_access
|
93
|
+
from ArgumentsClass import CommonArguments
|
94
|
+
from botocore.config import Config
|
95
|
+
from botocore.exceptions import ClientError
|
96
|
+
from colorama import Fore, init
|
97
|
+
from Inventory_Modules import RemoveCoreAccounts, display_results, get_all_credentials, get_regions3
|
98
|
+
|
99
|
+
# Initialize colorama for cross-platform colored terminal output
|
100
|
+
init()
|
101
|
+
__version__ = "2024.03.10"
|
102
|
+
ERASE_LINE = "\x1b[2K" # ANSI escape sequence for clearing terminal line during progress display
|
103
|
+
begin_time = time() # Script execution timing for performance monitoring
|
104
|
+
sleep_interval = 5 # Default wait interval for CloudWatch Logs query processing
|
105
|
+
|
106
|
+
|
107
|
+
#####################
|
108
|
+
# Functions
|
109
|
+
#####################
|
110
|
+
|
111
|
+
|
112
|
+
def parse_args(args):
|
113
|
+
"""
|
114
|
+
Parse and validate command line arguments for VPC Flow Log analysis operations.
|
115
|
+
|
116
|
+
Configures comprehensive CLI argument parsing with enterprise-grade options for VPC Flow Log
|
117
|
+
analysis, including profile authentication, regional targeting, date range specification,
|
118
|
+
cross-account role assumption, and data export configuration for organizational network
|
119
|
+
traffic analysis and cost optimization operations.
|
120
|
+
|
121
|
+
Args:
|
122
|
+
args (list): Command line arguments from sys.argv[1:] for argument parsing
|
123
|
+
|
124
|
+
Returns:
|
125
|
+
argparse.Namespace: Parsed arguments object containing:
|
126
|
+
- Profile: AWS profile name for authentication and cross-account access
|
127
|
+
- Regions: List of target AWS regions for VPC Flow Log analysis
|
128
|
+
- AccessRole: Cross-account IAM role name for multi-account operations
|
129
|
+
- RootOnly: Boolean flag for management account only vs organizational analysis
|
130
|
+
- Accounts: List of specific account IDs for targeted analysis
|
131
|
+
- SkipAccounts: List of account IDs to exclude from analysis
|
132
|
+
- pStartDate: Analysis start date in YYYY-MM-DD format for historical analysis
|
133
|
+
- pEndDate: Analysis end date in YYYY-MM-DD format for period boundaries
|
134
|
+
- Filename: Optional output file path for results export and persistence
|
135
|
+
- Time: Boolean flag for execution timing and performance monitoring
|
136
|
+
- loglevel: Logging verbosity level for operational visibility
|
137
|
+
|
138
|
+
CLI Arguments:
|
139
|
+
Authentication & Targeting:
|
140
|
+
--profile (-p): AWS profile for authentication and account access
|
141
|
+
--regions (-r): Target AWS regions for VPC Flow Log analysis
|
142
|
+
--role: Cross-account IAM role for multi-account VPC access
|
143
|
+
|
144
|
+
Account Filtering & Scope:
|
145
|
+
--rootonly: Limit analysis to management account only
|
146
|
+
--accounts: Specific account IDs for targeted VPC Flow Log analysis
|
147
|
+
--skipaccounts: Account IDs to exclude from organizational analysis
|
148
|
+
|
149
|
+
Date Range Configuration:
|
150
|
+
--start: Analysis start date (YYYY-MM-DD format) for historical data analysis
|
151
|
+
--end: Analysis end date (YYYY-MM-DD format) for period boundary definition
|
152
|
+
|
153
|
+
Output & Reporting:
|
154
|
+
--filename: Output file path for results export (CSV/JSON format)
|
155
|
+
--timing (-t): Enable execution timing and performance metrics
|
156
|
+
--loglevel (-v): Logging verbosity (DEBUG, INFO, WARNING, ERROR)
|
157
|
+
|
158
|
+
Enterprise Features:
|
159
|
+
- Multi-profile authentication for organizational account management
|
160
|
+
- Regional targeting with validation for VPC Flow Log analysis operations
|
161
|
+
- Cross-account role assumption for secure multi-account data access
|
162
|
+
- Flexible date range configuration for historical and real-time analysis
|
163
|
+
- Account filtering capabilities for targeted cost analysis and reporting
|
164
|
+
|
165
|
+
Validation & Error Handling:
|
166
|
+
- AWS profile validation with comprehensive error messaging
|
167
|
+
- Regional access validation preventing unauthorized operations
|
168
|
+
- Date format validation with detailed error reporting
|
169
|
+
- Cross-account role validation ensuring proper IAM permissions
|
170
|
+
|
171
|
+
Date Range Logic:
|
172
|
+
- Default start date: Yesterday at 00:00:00 for recent activity analysis
|
173
|
+
- Default end date: Yesterday at 23:59:59 for complete daily analysis
|
174
|
+
- Custom date ranges: User-specified periods for historical cost analysis
|
175
|
+
- Automatic padding validation for single-digit months and days
|
176
|
+
"""
|
177
|
+
# Extract script name for argument group organization and help display
|
178
|
+
script_path, script_name = split(sys.argv[0])
|
179
|
+
|
180
|
+
# Initialize common argument parser with enterprise authentication and targeting
|
181
|
+
parser = CommonArguments()
|
182
|
+
parser.singleprofile() # AWS profile for authentication and account access
|
183
|
+
parser.multiregion() # Multi-region targeting for comprehensive VPC Flow Log analysis
|
184
|
+
parser.roletouse() # Cross-account IAM role for multi-account operations
|
185
|
+
parser.rootOnly() # Management account only vs organizational scope
|
186
|
+
parser.save_to_file() # Output file options for results export and persistence
|
187
|
+
parser.extendedargs() # Extended arguments for account filtering and targeting
|
188
|
+
parser.timing() # Execution timing for performance monitoring
|
189
|
+
parser.verbosity() # Configurable logging levels for operational visibility
|
190
|
+
parser.version(__version__) # Script version tracking for compatibility management
|
191
|
+
|
192
|
+
# Configure script-specific arguments for VPC Flow Log analysis
|
193
|
+
local = parser.my_parser.add_argument_group(script_name, "Parameters specific to this script")
|
194
|
+
|
195
|
+
# Analysis start date configuration for historical data analysis
|
196
|
+
local.add_argument(
|
197
|
+
"--start",
|
198
|
+
dest="pStartDate",
|
199
|
+
metavar="Start Date",
|
200
|
+
type=str,
|
201
|
+
default=None,
|
202
|
+
help="Start date for VPC Flow Log analysis. Format: YYYY-MM-DD with zero-padding for single digits. Default: yesterday at 00:00:00 for recent activity analysis.",
|
203
|
+
)
|
204
|
+
|
205
|
+
# Analysis end date configuration for period boundary definition
|
206
|
+
local.add_argument(
|
207
|
+
"--end",
|
208
|
+
dest="pEndDate",
|
209
|
+
metavar="End Date",
|
210
|
+
type=str,
|
211
|
+
default=None,
|
212
|
+
help="End date for VPC Flow Log analysis. Format: YYYY-MM-DD with zero-padding for single digits. Default: yesterday at 23:59:59 for complete daily analysis periods.",
|
213
|
+
)
|
214
|
+
|
215
|
+
return parser.my_parser.parse_args(args)
|
216
|
+
|
217
|
+
|
218
|
+
def setup_auth_accounts_and_regions(fProfile: str) -> (aws_acct_access, list, list):
|
219
|
+
"""
|
220
|
+
Configure AWS authentication and establish account/region scope for VPC Flow Log analysis operations.
|
221
|
+
|
222
|
+
Establishes secure AWS authentication and defines the operational scope for multi-account,
|
223
|
+
multi-region VPC Flow Log analysis including account filtering, region validation, and
|
224
|
+
comprehensive operational context display. Designed for enterprise organizational environments
|
225
|
+
with complex account hierarchies and regional distribution requirements.
|
226
|
+
|
227
|
+
Args:
|
228
|
+
fProfile (str): AWS profile name for authentication and organizational account access
|
229
|
+
None or empty string defaults to default profile or environment credentials
|
230
|
+
|
231
|
+
Returns:
|
232
|
+
tuple: Comprehensive authentication and scope configuration containing:
|
233
|
+
- aws_acct_access: Authenticated AWS account access object with organizational context
|
234
|
+
- AccountList: List of account IDs within scope for VPC Flow Log analysis
|
235
|
+
- RegionList: List of validated AWS regions for multi-region analysis operations
|
236
|
+
|
237
|
+
Authentication Process:
|
238
|
+
1. Initialize AWS account access object with organizational profile
|
239
|
+
2. Retrieve child accounts from AWS Organizations for multi-account operations
|
240
|
+
3. Validate and filter regional scope based on service availability
|
241
|
+
4. Apply account filtering based on user specifications and organizational policies
|
242
|
+
5. Display comprehensive operational context for user confirmation
|
243
|
+
|
244
|
+
Account Filtering Logic:
|
245
|
+
- No account list specified: Include all organizational child accounts
|
246
|
+
- Access role specified: Use provided account list for targeted analysis
|
247
|
+
- Account list provided: Filter child accounts to match specified accounts
|
248
|
+
- Skip accounts: Remove specified accounts from analysis scope
|
249
|
+
- Core account filtering: Exclude management/security accounts based on policy
|
250
|
+
|
251
|
+
Regional Scope Configuration:
|
252
|
+
- Multi-region validation ensuring VPC Flow Log service availability
|
253
|
+
- Regional access control preventing unauthorized cross-region operations
|
254
|
+
- Service endpoint validation for CloudWatch Logs and EC2 APIs
|
255
|
+
- Regional cost optimization through targeted geographic analysis
|
256
|
+
|
257
|
+
Operational Context Display:
|
258
|
+
- Account scope visualization with color-coded output for clarity
|
259
|
+
- Regional targeting confirmation for multi-region analysis operations
|
260
|
+
- Date range specification for historical and real-time analysis periods
|
261
|
+
- Account exclusion confirmation for organizational policy compliance
|
262
|
+
|
263
|
+
Error Handling:
|
264
|
+
- Connection error management with graceful exit and detailed logging
|
265
|
+
- Authentication failure detection with comprehensive troubleshooting guidance
|
266
|
+
- Organizational access validation ensuring proper cross-account permissions
|
267
|
+
- Regional service availability validation preventing operation failures
|
268
|
+
|
269
|
+
Enterprise Security:
|
270
|
+
- Multi-profile authentication for organizational account management
|
271
|
+
- Cross-account access validation through AWS Organizations integration
|
272
|
+
- Regional access control ensuring authorized VPC Flow Log operations
|
273
|
+
- Comprehensive audit logging for security and compliance tracking
|
274
|
+
|
275
|
+
Raises:
|
276
|
+
SystemExit: On authentication failures or connection errors with exit code 8
|
277
|
+
"""
|
278
|
+
try:
|
279
|
+
# Initialize AWS account access with organizational profile authentication
|
280
|
+
aws_acct = aws_acct_access(fProfile)
|
281
|
+
except ConnectionError as my_Error:
|
282
|
+
# Handle connection errors with detailed logging and graceful exit
|
283
|
+
logging.error(f"Exiting due to error: {my_Error}")
|
284
|
+
sys.exit(8)
|
285
|
+
|
286
|
+
# Extract organizational child accounts for multi-account VPC Flow Log analysis
|
287
|
+
ChildAccounts = aws_acct.ChildAccounts
|
288
|
+
|
289
|
+
# Validate and configure regional scope for VPC Flow Log service availability
|
290
|
+
RegionList = get_regions3(aws_acct, pRegionList)
|
291
|
+
|
292
|
+
# Apply account filtering based on organizational policies and user specifications
|
293
|
+
ChildAccounts = RemoveCoreAccounts(ChildAccounts, pSkipAccounts)
|
294
|
+
|
295
|
+
# Configure account scope based on user specifications and operational requirements
|
296
|
+
if pAccountList is None:
|
297
|
+
# Include all organizational child accounts when no specific targeting is specified
|
298
|
+
AccountList = [account["AccountId"] for account in ChildAccounts]
|
299
|
+
elif pAccessRole is not None:
|
300
|
+
# Use provided account list for targeted analysis with cross-account role assumption
|
301
|
+
AccountList = pAccountList
|
302
|
+
else:
|
303
|
+
# Filter child accounts to match user-specified account list
|
304
|
+
AccountList = [account["AccountId"] for account in ChildAccounts if account["AccountId"] in pAccountList]
|
305
|
+
|
306
|
+
# Display comprehensive operational context for user confirmation and audit logging
|
307
|
+
print(f"You asked to sum flow log data")
|
308
|
+
print(f"\tin these accounts: {Fore.RED}{AccountList}{Fore.RESET}")
|
309
|
+
print(f"\tin these regions: {Fore.RED}{RegionList}{Fore.RESET}")
|
310
|
+
print(f"\tFrom: {pStartDate} until {pEndDate}")
|
311
|
+
|
312
|
+
# Display account exclusion information for operational transparency
|
313
|
+
if pSkipAccounts is not None:
|
314
|
+
print(f"\tWhile skipping these accounts: {Fore.RED}{pSkipAccounts}{Fore.RESET}")
|
315
|
+
|
316
|
+
return aws_acct, AccountList, RegionList
|
317
|
+
|
318
|
+
|
319
|
+
def check_account_access(faws_acct, faccount_num, fAccessRole=None):
|
320
|
+
"""
|
321
|
+
Validate cross-account access through IAM role assumption for VPC Flow Log operations.
|
322
|
+
|
323
|
+
Performs comprehensive cross-account access validation using AWS STS assume role operations
|
324
|
+
to ensure proper IAM permissions for VPC Flow Log analysis across organizational boundaries.
|
325
|
+
Designed for enterprise multi-account environments with stringent security controls and
|
326
|
+
comprehensive error handling for various IAM policy and credential scenarios.
|
327
|
+
|
328
|
+
Args:
|
329
|
+
faws_acct: Authenticated AWS account access object containing management account session
|
330
|
+
faccount_num (str): Target AWS account number for cross-account role assumption
|
331
|
+
fAccessRole (str): IAM role name in target account for VPC Flow Log access
|
332
|
+
None results in validation failure with detailed error messaging
|
333
|
+
|
334
|
+
Returns:
|
335
|
+
dict: Comprehensive access validation result containing:
|
336
|
+
- Success: Boolean indicating successful cross-account access validation
|
337
|
+
- AccountNumber: Target account number for operational context
|
338
|
+
- Credentials: AWS STS temporary credentials for cross-account operations
|
339
|
+
- ErrorMessage: Detailed error information for troubleshooting and audit
|
340
|
+
|
341
|
+
Cross-Account Access Validation:
|
342
|
+
- IAM role ARN construction using account number and role name
|
343
|
+
- STS assume role operation with temporary session credentials
|
344
|
+
- Credential validation ensuring proper cross-account access permissions
|
345
|
+
- Session name assignment for audit tracking and security monitoring
|
346
|
+
|
347
|
+
Error Handling & Security:
|
348
|
+
- Role requirement validation preventing unauthorized access attempts
|
349
|
+
- AWS API client error detection with specific error categorization
|
350
|
+
- IAM policy validation with detailed error messaging for troubleshooting
|
351
|
+
- Regional access control validation ensuring authorized operations
|
352
|
+
- Token expiration detection with graceful degradation and retry guidance
|
353
|
+
|
354
|
+
IAM Policy Validation:
|
355
|
+
- Malformed policy document detection with specific error identification
|
356
|
+
- Policy size validation preventing oversized policy-related failures
|
357
|
+
- Cross-account trust relationship validation ensuring proper configuration
|
358
|
+
- Resource-based policy validation for VPC Flow Log access requirements
|
359
|
+
|
360
|
+
Security & Compliance:
|
361
|
+
- Temporary credential management with automatic expiration handling
|
362
|
+
- Cross-account access logging for security audit and compliance tracking
|
363
|
+
- Role assumption tracking with session naming for operational visibility
|
364
|
+
- Comprehensive error categorization for security incident response
|
365
|
+
|
366
|
+
Enterprise Features:
|
367
|
+
- Structured error response format for integration with enterprise monitoring
|
368
|
+
- Detailed error messaging for operational troubleshooting and resolution
|
369
|
+
- Security-focused validation preventing unauthorized cross-account access
|
370
|
+
- Audit-ready logging for compliance and security reporting requirements
|
371
|
+
|
372
|
+
Regional Considerations:
|
373
|
+
- Regional service availability validation for cross-account operations
|
374
|
+
- Region-specific IAM policy validation ensuring proper geographic access
|
375
|
+
- Cross-region access control validation preventing unauthorized operations
|
376
|
+
- Regional compliance enforcement through access validation controls
|
377
|
+
"""
|
378
|
+
# Validate role requirement for cross-account access security
|
379
|
+
if fAccessRole is None:
|
380
|
+
logging.error(f"Role must be provided")
|
381
|
+
return_response = {"Success": False, "ErrorMessage": "Role wasn't provided"}
|
382
|
+
return return_response
|
383
|
+
|
384
|
+
# Initialize STS client for cross-account role assumption operations
|
385
|
+
sts_client = faws_acct.session.client("sts")
|
386
|
+
|
387
|
+
try:
|
388
|
+
# Construct IAM role ARN for cross-account access validation
|
389
|
+
role_arn = f"arn:aws:iam::{faccount_num}:role/{fAccessRole}"
|
390
|
+
|
391
|
+
# Execute cross-account role assumption with temporary credential generation
|
392
|
+
credentials = sts_client.assume_role(RoleArn=role_arn, RoleSessionName="TheOtherGuy")["Credentials"]
|
393
|
+
|
394
|
+
# Return successful access validation with temporary credentials
|
395
|
+
return_response = {
|
396
|
+
"AccountNumber": faccount_num,
|
397
|
+
"Credentials": credentials,
|
398
|
+
"Success": True,
|
399
|
+
"ErrorMessage": "",
|
400
|
+
}
|
401
|
+
return return_response
|
402
|
+
|
403
|
+
except ClientError as my_Error:
|
404
|
+
# Handle AWS API client errors with detailed error logging
|
405
|
+
print(f"Client Error: {my_Error}")
|
406
|
+
return_response = {"Success": False, "ErrorMessage": "Client Error"}
|
407
|
+
return return_response
|
408
|
+
|
409
|
+
except sts_client.exceptions.MalformedPolicyDocumentException as my_Error:
|
410
|
+
# Handle IAM policy document format errors with specific error identification
|
411
|
+
print(f"MalformedPolicy: {my_Error}")
|
412
|
+
return_response = {"Success": False, "ErrorMessage": "Malformed Policy"}
|
413
|
+
return return_response
|
414
|
+
|
415
|
+
except sts_client.exceptions.PackedPolicyTooLargeException as my_Error:
|
416
|
+
# Handle IAM policy size limit errors with detailed error messaging
|
417
|
+
print(f"Policy is too large: {my_Error}")
|
418
|
+
return_response = {"Success": False, "ErrorMessage": "Policy is too large"}
|
419
|
+
return return_response
|
420
|
+
|
421
|
+
except sts_client.exceptions.RegionDisabledException as my_Error:
|
422
|
+
# Handle regional access control errors with geographic compliance messaging
|
423
|
+
print(f"Region is disabled: {my_Error}")
|
424
|
+
return_response = {"Success": False, "ErrorMessage": "Region Disabled"}
|
425
|
+
return return_response
|
426
|
+
|
427
|
+
except sts_client.exceptions.ExpiredTokenException as my_Error:
|
428
|
+
# Handle credential expiration errors with renewal guidance
|
429
|
+
print(f"Expired Token: {my_Error}")
|
430
|
+
return_response = {"Success": False, "ErrorMessage": "Expired Token"}
|
431
|
+
return return_response
|
432
|
+
|
433
|
+
|
434
|
+
def get_flow_log_cloudwatch_groups(ocredentials) -> list[dict]:
|
435
|
+
"""
|
436
|
+
Discover and enumerate VPC Flow Logs with CloudWatch Logs integration for data transfer analysis.
|
437
|
+
|
438
|
+
Performs comprehensive VPC Flow Log discovery within a specific AWS account and region,
|
439
|
+
identifying Flow Logs configured with CloudWatch Logs destinations for subsequent data
|
440
|
+
transfer analysis and cost calculation. Designed for enterprise multi-account environments
|
441
|
+
with extensive VPC topologies and varied Flow Log configurations.
|
442
|
+
|
443
|
+
Args:
|
444
|
+
ocredentials (dict): Cross-account credentials dictionary containing:
|
445
|
+
- AccessKeyId: AWS access key for temporary cross-account session
|
446
|
+
- SecretAccessKey: AWS secret key for authentication
|
447
|
+
- SessionToken: AWS session token for temporary credential validation
|
448
|
+
- AccountId: Target account identifier for Flow Log discovery
|
449
|
+
- Region: Target AWS region for Flow Log enumeration
|
450
|
+
|
451
|
+
Returns:
|
452
|
+
list[dict]: Comprehensive VPC Flow Log inventory containing:
|
453
|
+
- Credentials: Original credential object for subsequent CloudWatch operations
|
454
|
+
- AccountId: AWS account containing the VPC Flow Log configuration
|
455
|
+
- Region: AWS region containing the VPC Flow Log resources
|
456
|
+
- VPCId: VPC identifier for Flow Log association and analysis
|
457
|
+
- LogGroupName: CloudWatch Logs group name for data query operations
|
458
|
+
|
459
|
+
Flow Log Discovery Process:
|
460
|
+
1. Establish authenticated EC2 session using cross-account credentials
|
461
|
+
2. Execute describe_flow_logs API call for comprehensive Flow Log enumeration
|
462
|
+
3. Filter Flow Logs for VPC-associated configurations (vpc- prefix validation)
|
463
|
+
4. Extract CloudWatch Logs integration metadata for query preparation
|
464
|
+
5. Structure Flow Log inventory for downstream analysis operations
|
465
|
+
|
466
|
+
VPC Flow Log Filtering:
|
467
|
+
- CloudWatch Logs destination validation ensuring query capability
|
468
|
+
- VPC resource type filtering excluding non-VPC Flow Log configurations
|
469
|
+
- Active Flow Log status validation for operational analysis accuracy
|
470
|
+
- LogGroupName presence validation ensuring CloudWatch Logs integration
|
471
|
+
|
472
|
+
Enterprise Features:
|
473
|
+
- Cross-account credential management with temporary session handling
|
474
|
+
- Regional Flow Log discovery with comprehensive inventory aggregation
|
475
|
+
- Structured metadata extraction for integration with cost analysis systems
|
476
|
+
- Performance-optimized API usage patterns for large-scale VPC environments
|
477
|
+
|
478
|
+
CloudWatch Integration:
|
479
|
+
- LogGroupName extraction for subsequent CloudWatch Logs query operations
|
480
|
+
- Flow Log destination validation ensuring CloudWatch Logs accessibility
|
481
|
+
- Regional CloudWatch Logs service validation for cross-region analysis
|
482
|
+
- Credential preservation for downstream CloudWatch API operations
|
483
|
+
|
484
|
+
Error Handling:
|
485
|
+
- AWS API exception propagation for upstream error management
|
486
|
+
- Credential validation through successful EC2 API operations
|
487
|
+
- Regional service availability validation preventing operation failures
|
488
|
+
- Comprehensive error context preservation for troubleshooting
|
489
|
+
|
490
|
+
Performance Considerations:
|
491
|
+
- Memory-efficient list comprehension for large-scale Flow Log processing
|
492
|
+
- Regional API optimization reducing cross-region latency impacts
|
493
|
+
- Credential reuse optimization for multiple CloudWatch operations
|
494
|
+
- Structured data format optimization for downstream processing efficiency
|
495
|
+
|
496
|
+
Security & Compliance:
|
497
|
+
- Cross-account credential validation ensuring authorized VPC access
|
498
|
+
- Flow Log metadata extraction without exposing sensitive network data
|
499
|
+
- Regional access control validation for compliance with geographic policies
|
500
|
+
- Audit-ready logging for security and compliance reporting requirements
|
501
|
+
"""
|
502
|
+
# Establish authenticated EC2 session using cross-account credentials
|
503
|
+
session_ec2 = boto3.Session(
|
504
|
+
aws_access_key_id=ocredentials["AccessKeyId"],
|
505
|
+
aws_secret_access_key=ocredentials["SecretAccessKey"],
|
506
|
+
aws_session_token=ocredentials["SessionToken"],
|
507
|
+
region_name=ocredentials["Region"],
|
508
|
+
)
|
509
|
+
|
510
|
+
# Initialize EC2 client with retry configuration for resilient API operations
|
511
|
+
client_ec2 = session_ec2.client("ec2", config=my_config)
|
512
|
+
|
513
|
+
try:
|
514
|
+
# Execute comprehensive VPC Flow Log discovery across account and region
|
515
|
+
response = client_ec2.describe_flow_logs()
|
516
|
+
|
517
|
+
# Filter and structure VPC Flow Logs with CloudWatch Logs integration
|
518
|
+
CW_LogGroups = [
|
519
|
+
{
|
520
|
+
"Credentials": ocredentials, # Preserve credentials for CloudWatch operations
|
521
|
+
"AccountId": ocredentials["AccountId"], # Account context for analysis
|
522
|
+
"Region": ocredentials["Region"], # Regional context for data transfer costs
|
523
|
+
"VPCId": x["ResourceId"], # VPC identifier for network topology analysis
|
524
|
+
"LogGroupName": x["LogGroupName"], # CloudWatch Logs group for query operations
|
525
|
+
}
|
526
|
+
for x in response["FlowLogs"]
|
527
|
+
# Filter for VPC-associated Flow Logs with CloudWatch Logs destinations
|
528
|
+
if "LogGroupName" in x.keys() and x["ResourceId"].find("vpc-") == 0
|
529
|
+
]
|
530
|
+
|
531
|
+
except Exception as my_Error:
|
532
|
+
# Propagate exceptions for upstream error handling and logging
|
533
|
+
raise my_Error
|
534
|
+
|
535
|
+
return CW_LogGroups
|
536
|
+
|
537
|
+
|
538
|
+
def prep_cloudwatch_log_query(f_flow_logs: list) -> list[dict]:
|
539
|
+
"""
|
540
|
+
Generate comprehensive CloudWatch Logs queries for VPC outbound data transfer analysis.
|
541
|
+
|
542
|
+
Constructs advanced CloudWatch Logs Insights queries for precise outbound data transfer
|
543
|
+
calculation based on VPC CIDR blocks and network topology. Performs sophisticated IP
|
544
|
+
address analysis to differentiate internal vs external traffic for accurate cost attribution
|
545
|
+
and bandwidth monitoring across complex enterprise network architectures.
|
546
|
+
|
547
|
+
Args:
|
548
|
+
f_flow_logs (list): VPC Flow Log inventory containing:
|
549
|
+
- Credentials: Cross-account credentials for VPC and CloudWatch access
|
550
|
+
- AccountId: AWS account containing the VPC and Flow Log configuration
|
551
|
+
- Region: AWS region for VPC Flow Log and CloudWatch operations
|
552
|
+
- VPCId: VPC identifier for network topology analysis
|
553
|
+
- LogGroupName: CloudWatch Logs group containing Flow Log data
|
554
|
+
|
555
|
+
Returns:
|
556
|
+
list[dict]: Comprehensive query configuration for each VPC CIDR block containing:
|
557
|
+
- VPC: VPC identifier for network association
|
558
|
+
- VPCName: VPC name from tags or default value for operational context
|
559
|
+
- cidr_block: CIDR block for network boundary definition
|
560
|
+
- Query: CloudWatch Logs Insights query string for data transfer analysis
|
561
|
+
- Credentials: Preserved credentials for CloudWatch query execution
|
562
|
+
- Additional Flow Log metadata for operational context
|
563
|
+
|
564
|
+
CIDR Block Analysis Logic:
|
565
|
+
- VPC CIDR block enumeration through EC2 describe_vpcs API
|
566
|
+
- Network prefix length analysis for query optimization (8, 16, 24, 28-bit networks)
|
567
|
+
- Dynamic IP address range calculation for internal vs external traffic classification
|
568
|
+
- Advanced subnet boundary detection for accurate traffic categorization
|
569
|
+
|
570
|
+
Query Generation Strategy:
|
571
|
+
- /8 networks: Single octet filtering for Class A network efficiency
|
572
|
+
- /9-/15 networks: Second octet range filtering with bitwise calculation
|
573
|
+
- /16 networks: Two-octet filtering for Class B network optimization
|
574
|
+
- /17-/23 networks: Third octet range filtering with prefix-based calculation
|
575
|
+
- /24 networks: Three-octet filtering for Class C network precision
|
576
|
+
- /25-/28 networks: Fourth octet range filtering for subnet-level analysis
|
577
|
+
|
578
|
+
CloudWatch Logs Query Construction:
|
579
|
+
- Field selection: timestamp, message, logStream, account, action, addresses, bytes
|
580
|
+
- Action filtering: ACCEPT actions only for successful data transfer measurement
|
581
|
+
- Source address filtering: Internal network identification using CIDR patterns
|
582
|
+
- Destination address filtering: External network identification through exclusion
|
583
|
+
- Statistical aggregation: sum(bytes) for total outbound data transfer calculation
|
584
|
+
|
585
|
+
Network Topology Integration:
|
586
|
+
- VPC metadata extraction including tags and naming conventions
|
587
|
+
- CIDR block association analysis for multi-block VPC configurations
|
588
|
+
- Network boundary calculation using ipaddress library for precision
|
589
|
+
- Advanced bit manipulation for network range determination
|
590
|
+
|
591
|
+
Enterprise Features:
|
592
|
+
- Multi-CIDR VPC support with individual query generation per CIDR block
|
593
|
+
- VPC naming convention support through tag-based name extraction
|
594
|
+
- Scalable query generation for complex enterprise network topologies
|
595
|
+
- Performance-optimized query patterns for large-scale data analysis
|
596
|
+
|
597
|
+
Error Handling & Validation:
|
598
|
+
- CIDR block format validation with comprehensive error messaging
|
599
|
+
- Network prefix length validation ensuring supported query patterns
|
600
|
+
- VPC metadata validation with graceful degradation for missing information
|
601
|
+
- IP address calculation validation preventing malformed query generation
|
602
|
+
|
603
|
+
Performance Optimization:
|
604
|
+
- Network prefix-based query optimization reducing CloudWatch processing time
|
605
|
+
- Memory-efficient CIDR block processing for large VPC inventories
|
606
|
+
- Structured query format optimization for CloudWatch Logs Insights efficiency
|
607
|
+
- Batch processing optimization for multi-VPC organizational environments
|
608
|
+
"""
|
609
|
+
import ipaddress
|
610
|
+
|
611
|
+
# Initialize VPC CIDR block query configuration collection
|
612
|
+
vpc_cidr_blocks = list()
|
613
|
+
|
614
|
+
# Process each Flow Log to generate CIDR-specific CloudWatch queries
|
615
|
+
for flow_log in f_flow_logs:
|
616
|
+
# Establish authenticated EC2 session for VPC metadata retrieval
|
617
|
+
session_ec2 = boto3.Session(
|
618
|
+
aws_access_key_id=flow_log["Credentials"]["AccessKeyId"],
|
619
|
+
aws_secret_access_key=flow_log["Credentials"]["SecretAccessKey"],
|
620
|
+
aws_session_token=flow_log["Credentials"]["SessionToken"],
|
621
|
+
region_name=flow_log["Credentials"]["Region"],
|
622
|
+
)
|
623
|
+
|
624
|
+
# Initialize EC2 client with retry configuration for VPC analysis
|
625
|
+
client_ec2 = session_ec2.client("ec2", config=my_config)
|
626
|
+
|
627
|
+
# Retrieve comprehensive VPC inventory for CIDR block analysis
|
628
|
+
VPCs = client_ec2.describe_vpcs()["Vpcs"]
|
629
|
+
|
630
|
+
# Process each VPC to extract CIDR blocks and generate queries
|
631
|
+
for vpc in VPCs:
|
632
|
+
if vpc["VpcId"] == flow_log["VPCId"]:
|
633
|
+
# Extract VPC name from tags for operational context
|
634
|
+
tag_dict = {x["Key"]: x["Value"] for x in vpc["Tags"]} if "Tags" in vpc.keys() else {}
|
635
|
+
if "Name" in tag_dict.keys():
|
636
|
+
vpc_name = tag_dict["Name"]
|
637
|
+
else:
|
638
|
+
vpc_name = None
|
639
|
+
|
640
|
+
# Process each CIDR block association for query generation
|
641
|
+
for cidr_block in vpc["CidrBlockAssociationSet"]:
|
642
|
+
new_record = flow_log
|
643
|
+
|
644
|
+
# Parse CIDR block for network topology analysis
|
645
|
+
# Note: Debugging statement preserved for development reference
|
646
|
+
# cidr_block.update({'CidrBlock': '172.16.64.0/22'})
|
647
|
+
cidr_net_name = ipaddress.ip_network(cidr_block["CidrBlock"])
|
648
|
+
|
649
|
+
# Extract IP address octets for dynamic query construction
|
650
|
+
first_dot = cidr_block["CidrBlock"].find(".")
|
651
|
+
first_octet = cidr_block["CidrBlock"][:first_dot]
|
652
|
+
second_dot = cidr_block["CidrBlock"].find(".", first_dot + 1)
|
653
|
+
second_octet = cidr_block["CidrBlock"][first_dot + 1 : second_dot]
|
654
|
+
third_dot = cidr_block["CidrBlock"].find(".", second_dot + 1)
|
655
|
+
third_octet = cidr_block["CidrBlock"][second_dot + 1 : third_dot]
|
656
|
+
fourth_octet = cidr_block["CidrBlock"].find(".", third_dot + 1)
|
657
|
+
|
658
|
+
# Generate network-prefix-optimized CloudWatch Logs queries based on CIDR block size
|
659
|
+
if cidr_net_name.prefixlen == 8:
|
660
|
+
# Class A network optimization: Single octet filtering for /8 networks
|
661
|
+
network_name = f"{first_octet}"
|
662
|
+
query_string = f"fields @timestamp, @message, @logStream, @log, accountId, action, srcAddr, dstAddr, bytes | filter action = 'ACCEPT' and srcAddr like '{network_name}' and dstAddr not like '{network_name}' | sort @timestamp desc | stats sum(bytes)"
|
663
|
+
elif cidr_net_name.prefixlen > 8 and cidr_net_name.prefixlen < 16:
|
664
|
+
# Handle /9-/15 networks with second octet range calculation for efficient filtering
|
665
|
+
# Calculates variable second octet range using bitwise operations for network boundary detection
|
666
|
+
# Example: /12 network (172.16.0.0/12) includes 172.16-172.31 range (16 subnets)
|
667
|
+
# Formula: variable_octet = base_octet + range(0, 2^(16-prefixlen))
|
668
|
+
and_string = " and "
|
669
|
+
or_string = " or "
|
670
|
+
|
671
|
+
# Generate destination address exclusion patterns for internal network filtering
|
672
|
+
dst_query_seq = [
|
673
|
+
f"dstAddr not like '{first_octet}.{int(second_octet) + x}'"
|
674
|
+
for x in range(0, 2 ** (cidr_net_name.prefixlen % 8))
|
675
|
+
]
|
676
|
+
src_query_seq = [
|
677
|
+
f"srcAddr like '{first_octet}.{int(second_octet) + x}'"
|
678
|
+
for x in range(0, 2 ** (cidr_net_name.prefixlen % 8))
|
679
|
+
]
|
680
|
+
dst_string = and_string.join(dst_query_seq)
|
681
|
+
src_string = or_string.join(src_query_seq)
|
682
|
+
filter_query = f"{src_string} and {dst_string}"
|
683
|
+
query_string = f"fields @timestamp, @message, @logStream, @log, accountId, action, srcAddr, dstAddr, bytes | filter action = 'ACCEPT' and {filter_query} | sort @timestamp desc | stats sum(bytes)"
|
684
|
+
|
685
|
+
elif cidr_net_name.prefixlen == 16:
|
686
|
+
# Class B network optimization: Two-octet filtering for /16 networks
|
687
|
+
network_name = f"{first_octet}.{second_octet}"
|
688
|
+
query_string = f"fields @timestamp, @message, @logStream, @log, accountId, action, srcAddr, dstAddr, bytes | filter action = 'ACCEPT' and srcAddr like '{network_name}' and dstAddr not like '{network_name}' | sort @timestamp desc | stats sum(bytes)"
|
689
|
+
|
690
|
+
elif cidr_net_name.prefixlen > 16 and cidr_net_name.prefixlen < 24:
|
691
|
+
# Handle /17-/23 networks with third octet range calculation for subnet-level filtering
|
692
|
+
# Calculates variable third octet range using prefix-based bitwise operations
|
693
|
+
# Example: /20 network (10.1.0.0/20) includes 10.1.0-10.1.15 range (16 subnets)
|
694
|
+
and_string = " and "
|
695
|
+
or_string = " or "
|
696
|
+
|
697
|
+
# Generate destination address exclusion patterns for subnet filtering
|
698
|
+
dst_query_seq = [
|
699
|
+
f"dstAddr not like '{first_octet}.{second_octet}.{int(third_octet) + x}'"
|
700
|
+
for x in range(0, 2 ** (cidr_net_name.prefixlen % 8))
|
701
|
+
]
|
702
|
+
src_query_seq = [
|
703
|
+
f"srcAddr like '{first_octet}.{second_octet}.{int(third_octet) + x}'"
|
704
|
+
for x in range(0, 2 ** (cidr_net_name.prefixlen % 8))
|
705
|
+
]
|
706
|
+
dst_string = and_string.join(dst_query_seq)
|
707
|
+
src_string = or_string.join(src_query_seq)
|
708
|
+
query_string = f"fields @timestamp, @message, @logStream, @log, accountId, action, srcAddr, dstAddr, bytes | filter action = 'ACCEPT' and ({src_string}) and ({dst_string}) | sort @timestamp desc | stats sum(bytes)"
|
709
|
+
|
710
|
+
elif cidr_net_name.prefixlen == 24:
|
711
|
+
# Class C network optimization: Three-octet filtering for /24 networks
|
712
|
+
network_name = f"{first_octet}.{second_octet}.{third_octet}"
|
713
|
+
query_string = f"fields @timestamp, @message, @logStream, @log, accountId, action, srcAddr, dstAddr, bytes | filter action = 'ACCEPT' and srcAddr like '{network_name}' and dstAddr not like '{network_name}' | sort @timestamp desc | stats sum(bytes)"
|
714
|
+
|
715
|
+
elif cidr_net_name.prefixlen > 24 and cidr_net_name.prefixlen <= 28:
|
716
|
+
# Handle /25-/28 networks with fourth octet range calculation for host-level filtering
|
717
|
+
# Calculates variable fourth octet range using subnet mask-based bitwise operations
|
718
|
+
# Example: /26 network (192.168.1.0/26) includes 192.168.1.0-192.168.1.63 range (64 hosts)
|
719
|
+
and_string = " and "
|
720
|
+
or_string = " or "
|
721
|
+
|
722
|
+
# Extract fourth octet value for host-level range calculation
|
723
|
+
slash_location = cidr_block["CidrBlock"].find("/")
|
724
|
+
fourth_octet = cidr_block["CidrBlock"][third_dot + 1 : slash_location]
|
725
|
+
|
726
|
+
# Generate destination address exclusion patterns for host-level filtering
|
727
|
+
dst_query_seq = [
|
728
|
+
f"dstAddr not like '{first_octet}.{second_octet}.{third_octet}.{int(fourth_octet) + x}'"
|
729
|
+
for x in range(0, 2 ** (cidr_net_name.prefixlen % 8))
|
730
|
+
]
|
731
|
+
src_query_seq = [
|
732
|
+
f"srcAddr like '{first_octet}.{second_octet}.{third_octet}.{int(fourth_octet) + x}'"
|
733
|
+
for x in range(0, 2 ** (cidr_net_name.prefixlen % 8))
|
734
|
+
]
|
735
|
+
dst_string = and_string.join(dst_query_seq)
|
736
|
+
src_string = or_string.join(src_query_seq)
|
737
|
+
query_string = f"fields @timestamp, @message, @logStream, @log, accountId, action, srcAddr, dstAddr, bytes | filter action = 'ACCEPT' and ({src_string}) and ({dst_string}) | sort @timestamp desc | stats sum(bytes)"
|
738
|
+
elif cidr_net_name.prefixlen < 8 or cidr_net_name.prefixlen > 28:
|
739
|
+
# Handle unsupported network prefix lengths with validation error
|
740
|
+
raise ValueError(f"Netmask of {cidr_net_name.prefixlen} is not supported")
|
741
|
+
else:
|
742
|
+
# Default case for unhandled prefix lengths
|
743
|
+
query_string = None
|
744
|
+
|
745
|
+
# Update query record with VPC metadata and generated CloudWatch query
|
746
|
+
new_record.update(
|
747
|
+
{
|
748
|
+
"VPC": vpc["VpcId"], # VPC identifier for network association
|
749
|
+
"VPCName": vpc_name
|
750
|
+
if vpc_name is not None
|
751
|
+
else "No Name Available", # VPC name for operational context
|
752
|
+
"cidr_block": cidr_block["CidrBlock"], # CIDR block for network boundary definition
|
753
|
+
"Query": query_string, # Generated CloudWatch Logs Insights query
|
754
|
+
}
|
755
|
+
)
|
756
|
+
|
757
|
+
# Append complete query configuration to collection for execution
|
758
|
+
vpc_cidr_blocks.append(new_record.copy())
|
759
|
+
else:
|
760
|
+
# Skip non-matching VPCs in Flow Log processing
|
761
|
+
continue
|
762
|
+
|
763
|
+
return vpc_cidr_blocks
|
764
|
+
|
765
|
+
|
766
|
+
# def query_cloudwatch_logs(ocredentials, queries: list, f_all_cw_log_groups: list, fRegion: str = 'us-east-1') -> list:
|
767
|
+
def query_cloudwatch_logs(f_queries: list, f_start: datetime, f_end: datetime) -> list[dict]:
|
768
|
+
"""
|
769
|
+
Execute comprehensive CloudWatch Logs Insights queries for VPC outbound data transfer analysis.
|
770
|
+
|
771
|
+
Orchestrates the execution of sophisticated CloudWatch Logs Insights queries across multiple
|
772
|
+
VPC Flow Logs to calculate accurate outbound data transfer volumes for cost analysis and
|
773
|
+
bandwidth monitoring. Handles log group retention validation, query execution coordination,
|
774
|
+
and comprehensive error management for enterprise multi-account environments.
|
775
|
+
|
776
|
+
Args:
|
777
|
+
f_queries (list): Complete query configuration collection containing:
|
778
|
+
- Credentials: Cross-account credentials for CloudWatch Logs access
|
779
|
+
- LogGroupName: CloudWatch Logs group containing VPC Flow Log data
|
780
|
+
- Query: CloudWatch Logs Insights query string for data transfer calculation
|
781
|
+
- VPC metadata including VPCId, VPCName, and CIDR block information
|
782
|
+
|
783
|
+
f_start (datetime): Query start timestamp for data transfer analysis period
|
784
|
+
f_end (datetime): Query end timestamp for data transfer analysis period
|
785
|
+
|
786
|
+
Returns:
|
787
|
+
list[dict]: Comprehensive query execution results containing:
|
788
|
+
- QueryId: CloudWatch Logs query identifier for result retrieval
|
789
|
+
- StartDate: Actual query start date accounting for retention constraints
|
790
|
+
- EndDate: Actual query end date for analysis period
|
791
|
+
- Days: Total analysis period duration for cost calculation context
|
792
|
+
- Original query metadata for result association and operational context
|
793
|
+
|
794
|
+
Query Execution Process:
|
795
|
+
1. Establish authenticated CloudWatch Logs session using cross-account credentials
|
796
|
+
2. Validate log group retention policies against requested analysis period
|
797
|
+
3. Adjust query timeframe to respect log retention constraints
|
798
|
+
4. Execute CloudWatch Logs Insights query with optimized time range conversion
|
799
|
+
5. Capture query identifier for subsequent result retrieval operations
|
800
|
+
|
801
|
+
Log Retention Management:
|
802
|
+
- Automatic log group retention policy discovery and validation
|
803
|
+
- Dynamic query period adjustment for retention constraint compliance
|
804
|
+
- Retention period conflict detection with comprehensive warning messaging
|
805
|
+
- Graceful handling of insufficient retention period scenarios
|
806
|
+
|
807
|
+
Time Range Optimization:
|
808
|
+
- Epoch time conversion for CloudWatch Logs API compatibility
|
809
|
+
- Timezone-aware timestamp handling for accurate analysis periods
|
810
|
+
- Retention-constrained time range calculation with precision
|
811
|
+
- Day-level duration calculation for cost attribution and reporting
|
812
|
+
|
813
|
+
Error Handling & Resilience:
|
814
|
+
- AWS API client error detection with detailed error categorization
|
815
|
+
- Query execution failure handling with comprehensive error logging
|
816
|
+
- Cross-account access validation through successful CloudWatch operations
|
817
|
+
- Regional service availability validation preventing operation failures
|
818
|
+
|
819
|
+
Enterprise Features:
|
820
|
+
- Multi-account query coordination with individual credential management
|
821
|
+
- Concurrent query execution capability for large-scale VPC environments
|
822
|
+
- Comprehensive audit logging for security and compliance requirements
|
823
|
+
- Structured error reporting for integration with enterprise monitoring systems
|
824
|
+
|
825
|
+
Performance Considerations:
|
826
|
+
- Efficient epoch time conversion reducing API call overhead
|
827
|
+
- Memory-optimized query result aggregation for large-scale analysis
|
828
|
+
- Regional API optimization reducing cross-region latency impacts
|
829
|
+
- Batch query execution patterns for enterprise-scale VPC inventories
|
830
|
+
|
831
|
+
Security & Compliance:
|
832
|
+
- Cross-account credential validation ensuring authorized CloudWatch access
|
833
|
+
- Query execution logging for security audit and compliance tracking
|
834
|
+
- Regional access control validation for compliance with geographic policies
|
835
|
+
- Comprehensive error context preservation for security incident response
|
836
|
+
"""
|
837
|
+
from botocore.exceptions import ClientError
|
838
|
+
|
839
|
+
# Initialize query execution results collection for CloudWatch operations
|
840
|
+
all_query_ids = list()
|
841
|
+
|
842
|
+
# Execute CloudWatch Logs Insights queries for each VPC Flow Log configuration
|
843
|
+
for query in f_queries:
|
844
|
+
new_record = query
|
845
|
+
|
846
|
+
# Establish authenticated CloudWatch Logs session using cross-account credentials
|
847
|
+
session_logs = boto3.Session(
|
848
|
+
aws_access_key_id=query["Credentials"]["AccessKeyId"],
|
849
|
+
aws_secret_access_key=query["Credentials"]["SecretAccessKey"],
|
850
|
+
aws_session_token=query["Credentials"]["SessionToken"],
|
851
|
+
region_name=query["Credentials"]["Region"],
|
852
|
+
)
|
853
|
+
|
854
|
+
# Initialize CloudWatch Logs client with retry configuration for query operations
|
855
|
+
client_logs = session_logs.client("logs", config=my_config)
|
856
|
+
|
857
|
+
# Debug logging for cross-account CloudWatch Logs access validation
|
858
|
+
logging.debug(
|
859
|
+
f"About to try to connect to describe the log groups within account {query['Credentials']['AccountId']}"
|
860
|
+
)
|
861
|
+
|
862
|
+
# Retrieve log group retention policy for query period validation
|
863
|
+
log_group_retention = client_logs.describe_log_groups(logGroupNamePrefix=query["LogGroupName"])
|
864
|
+
|
865
|
+
# Debug logging for successful cross-account access confirmation
|
866
|
+
logging.debug(
|
867
|
+
f"Just tried to connect to describe the log groups within account {query['Credentials']['AccountId']}"
|
868
|
+
)
|
869
|
+
|
870
|
+
# Validate log group retention against requested analysis period
|
871
|
+
if log_group_retention["logGroups"][0]["retentionInDays"] < (yesterday - start_date_time).days:
|
872
|
+
# Adjust query timeframe to respect log retention constraints
|
873
|
+
logging.warning(
|
874
|
+
f"Log group {query['LogGroupName']} has a {log_group_retention['logGroups'][0]['retentionInDays']} day retention policy, so data will be constrained to that period."
|
875
|
+
)
|
876
|
+
|
877
|
+
# Calculate retention-constrained analysis period with precision
|
878
|
+
f_start = (yesterday - timedelta(days=log_group_retention["logGroups"][0]["retentionInDays"])).replace(
|
879
|
+
hour=0, minute=0, second=0, microsecond=0
|
880
|
+
)
|
881
|
+
f_end = yesterday.replace(hour=23, minute=59, second=59, microsecond=999999)
|
882
|
+
|
883
|
+
# Debug logging for query execution parameters and retention information
|
884
|
+
logging.debug(
|
885
|
+
f"About to start the query for {query['LogGroupName']} with retention of {log_group_retention['logGroups'][0]['retentionInDays']} days, with start of {f_start} and end of {f_end}."
|
886
|
+
)
|
887
|
+
logging.debug(f"Query: {query['Query']}")
|
888
|
+
|
889
|
+
try:
|
890
|
+
# Execute CloudWatch Logs Insights query with optimized time range conversion
|
891
|
+
query_id = client_logs.start_query(
|
892
|
+
logGroupName=query["LogGroupName"],
|
893
|
+
startTime=int((f_start - epoch_time).total_seconds()),
|
894
|
+
endTime=int((f_end - epoch_time).total_seconds()),
|
895
|
+
queryString=query["Query"],
|
896
|
+
)
|
897
|
+
|
898
|
+
# Confirm successful query execution with debug logging
|
899
|
+
logging.debug("Was able to run query...")
|
900
|
+
|
901
|
+
# Update query record with execution metadata for result retrieval
|
902
|
+
new_record.update(
|
903
|
+
{
|
904
|
+
"QueryId": query_id["queryId"], # CloudWatch query identifier for result retrieval
|
905
|
+
"StartDate": f_start, # Actual query start date with retention adjustment
|
906
|
+
"EndDate": f_end, # Actual query end date for analysis period
|
907
|
+
"Days": (f_end - f_start).days, # Analysis period duration for cost context
|
908
|
+
}
|
909
|
+
)
|
910
|
+
|
911
|
+
# Append successful query configuration to execution results
|
912
|
+
all_query_ids.append(query.copy())
|
913
|
+
|
914
|
+
except ClientError as my_Error:
|
915
|
+
# Handle AWS API client errors with comprehensive error logging
|
916
|
+
logging.error(
|
917
|
+
f"Received ClientError ({my_Error.operation_name} - {my_Error.response['Error']['Code']} - {my_Error.response['Error']['Message']} - {my_Error.response['Error']['Type']}) - {my_Error.response}"
|
918
|
+
)
|
919
|
+
logging.error(
|
920
|
+
f"Unable to run query for {query['LogGroupName']} in account {query['Credentials']['AccountId']} in region {query['Credentials']['Region']}"
|
921
|
+
)
|
922
|
+
# Continue processing remaining queries despite individual failures
|
923
|
+
continue
|
924
|
+
|
925
|
+
except Exception as my_Error:
|
926
|
+
# Handle general exceptions with detailed error logging
|
927
|
+
logging.error(
|
928
|
+
f"Unable to run query for {query['LogGroupName']} in account {query['Credentials']['AccountId']} in region {query['Credentials']['Region']} - {my_Error}"
|
929
|
+
)
|
930
|
+
# Continue processing remaining queries despite individual failures
|
931
|
+
continue
|
932
|
+
|
933
|
+
return all_query_ids
|
934
|
+
|
935
|
+
|
936
|
+
def get_cw_query_results(fquery_requests: list) -> list[dict]:
|
937
|
+
"""
|
938
|
+
Retrieve and aggregate CloudWatch Logs Insights query results for VPC data transfer analysis.
|
939
|
+
|
940
|
+
Orchestrates the retrieval of CloudWatch Logs Insights query results across multiple VPCs
|
941
|
+
and accounts, processing outbound data transfer calculations with comprehensive error handling
|
942
|
+
and progress monitoring. Designed for enterprise environments with extensive VPC topologies
|
943
|
+
and high-volume Flow Log data requiring efficient result aggregation and processing.
|
944
|
+
|
945
|
+
Args:
|
946
|
+
fquery_requests (list): Query execution collection containing:
|
947
|
+
- QueryId: CloudWatch Logs query identifier for result retrieval
|
948
|
+
- Credentials: Cross-account credentials for CloudWatch Logs access
|
949
|
+
- StartDate: Query start date for analysis period context
|
950
|
+
- EndDate: Query end date for analysis period context
|
951
|
+
- Days: Analysis period duration for cost calculation context
|
952
|
+
- VPC metadata including VPCId, VPCName, CIDR block, and LogGroupName
|
953
|
+
|
954
|
+
Returns:
|
955
|
+
list[dict]: Comprehensive query results collection containing:
|
956
|
+
- Outbound_Data_GB: Calculated outbound data transfer in gigabytes
|
957
|
+
- Query execution metadata for operational context and analysis
|
958
|
+
- VPC network topology information for cost attribution
|
959
|
+
- Analysis period information for reporting and compliance
|
960
|
+
|
961
|
+
Query Result Processing:
|
962
|
+
1. Establish authenticated CloudWatch Logs session for each query
|
963
|
+
2. Poll CloudWatch Logs query execution status until completion
|
964
|
+
3. Retrieve query results with comprehensive data validation
|
965
|
+
4. Extract outbound data transfer statistics from query results
|
966
|
+
5. Convert byte values to gigabytes for cost analysis and reporting
|
967
|
+
|
968
|
+
Progress Monitoring & User Experience:
|
969
|
+
- Real-time progress indication for multi-VPC query processing
|
970
|
+
- Estimated completion time calculation based on analysis period
|
971
|
+
- Processing status updates for operational transparency
|
972
|
+
- Comprehensive error messaging for troubleshooting and resolution
|
973
|
+
|
974
|
+
Data Processing & Calculation:
|
975
|
+
- Byte-to-gigabyte conversion for standard cost reporting units
|
976
|
+
- Statistical result extraction from CloudWatch Logs aggregation
|
977
|
+
- Data validation ensuring accurate numerical calculations
|
978
|
+
- Missing data handling with graceful degradation and error reporting
|
979
|
+
|
980
|
+
Error Handling & Resilience:
|
981
|
+
- Query completion polling with timeout and retry logic
|
982
|
+
- AWS API error handling with specific error categorization
|
983
|
+
- Missing result handling with comprehensive error logging
|
984
|
+
- Individual query failure isolation preventing batch processing failures
|
985
|
+
|
986
|
+
Enterprise Features:
|
987
|
+
- Multi-account result aggregation with individual credential management
|
988
|
+
- Scalable processing patterns for large-scale VPC environments
|
989
|
+
- Comprehensive audit logging for security and compliance requirements
|
990
|
+
- Structured result format for integration with enterprise reporting systems
|
991
|
+
|
992
|
+
Performance Optimization:
|
993
|
+
- Efficient result polling patterns reducing API call overhead
|
994
|
+
- Memory-optimized result aggregation for large-scale analysis
|
995
|
+
- Regional API optimization reducing cross-region latency impacts
|
996
|
+
- Concurrent processing capability for enterprise-scale VPC inventories
|
997
|
+
|
998
|
+
Security & Compliance:
|
999
|
+
- Cross-account credential validation ensuring authorized result access
|
1000
|
+
- Query result logging for security audit and compliance tracking
|
1001
|
+
- Regional access control validation for compliance with geographic policies
|
1002
|
+
- Comprehensive error context preservation for security incident response
|
1003
|
+
"""
|
1004
|
+
# Initialize comprehensive query results collection
|
1005
|
+
all_query_results = list()
|
1006
|
+
|
1007
|
+
# Display progress information and estimated completion time for user awareness
|
1008
|
+
print()
|
1009
|
+
print(
|
1010
|
+
f"Checking {len(fquery_requests)} flow logs that launched scanning across {SpannedDaysChecked} days. \n"
|
1011
|
+
f"Based on how much data is in the flow logs, this could take {SpannedDaysChecked * 5} seconds for the busiest VPCs"
|
1012
|
+
)
|
1013
|
+
print()
|
1014
|
+
|
1015
|
+
# Process each CloudWatch Logs query to retrieve outbound data transfer results
|
1016
|
+
for query in fquery_requests:
|
1017
|
+
new_record = query
|
1018
|
+
|
1019
|
+
# Establish authenticated CloudWatch Logs session for result retrieval
|
1020
|
+
session_logs = boto3.Session(
|
1021
|
+
aws_access_key_id=query["Credentials"]["AccessKeyId"],
|
1022
|
+
aws_secret_access_key=query["Credentials"]["SecretAccessKey"],
|
1023
|
+
aws_session_token=query["Credentials"]["SessionToken"],
|
1024
|
+
region_name=query["Credentials"]["Region"],
|
1025
|
+
)
|
1026
|
+
|
1027
|
+
# Initialize CloudWatch Logs client for query result operations
|
1028
|
+
client_logs = session_logs.client("logs", config=my_config)
|
1029
|
+
|
1030
|
+
# Retrieve initial query results and status for processing
|
1031
|
+
response = client_logs.get_query_results(queryId=query["QueryId"])
|
1032
|
+
|
1033
|
+
# Initialize polling timer for query completion monitoring
|
1034
|
+
waited_seconds_total = 0
|
1035
|
+
|
1036
|
+
# Poll CloudWatch Logs query until completion with timeout protection
|
1037
|
+
while response["status"] == "Running":
|
1038
|
+
waited_seconds_total += sleep_interval
|
1039
|
+
|
1040
|
+
# Implement timeout protection for long-running queries
|
1041
|
+
if waited_seconds_total > (SpannedDaysChecked * 5):
|
1042
|
+
print(
|
1043
|
+
f"{ERASE_LINE}Query is still running... Waited {waited_seconds_total} seconds already, we'll have to check manually later. "
|
1044
|
+
)
|
1045
|
+
break
|
1046
|
+
|
1047
|
+
# Display real-time progress for query execution monitoring
|
1048
|
+
print(
|
1049
|
+
f"{ERASE_LINE}Query for vpc {query['VPCId']} in account {query['AccountId']} in region {query['Region']} is still running... It's been {waited_seconds_total} seconds so far",
|
1050
|
+
end="\r",
|
1051
|
+
)
|
1052
|
+
|
1053
|
+
# Sleep before next polling iteration to prevent API throttling
|
1054
|
+
sleep(sleep_interval)
|
1055
|
+
|
1056
|
+
# Retrieve updated query results and status
|
1057
|
+
response = client_logs.get_query_results(queryId=query["QueryId"])
|
1058
|
+
|
1059
|
+
# Process successful query results with data transfer calculations
|
1060
|
+
if response["statistics"]["recordsMatched"] > 0:
|
1061
|
+
# Extract outbound data transfer results from CloudWatch Logs aggregation
|
1062
|
+
new_record.update(
|
1063
|
+
{
|
1064
|
+
"Results": response["results"][0][0]["value"], # Outbound bytes total from query results
|
1065
|
+
"Status": response["status"], # Final query execution status
|
1066
|
+
"Stats": response["statistics"], # Query execution statistics for analysis
|
1067
|
+
}
|
1068
|
+
)
|
1069
|
+
# Append successful query results to collection
|
1070
|
+
all_query_results.append(query.copy())
|
1071
|
+
else:
|
1072
|
+
# Handle queries with no matching data transfer records
|
1073
|
+
logging.info(
|
1074
|
+
f"The CloudWatch query for vpc {query['VPCId']} in account {query['AccountId']} in region {query['Region']} returned no results:"
|
1075
|
+
)
|
1076
|
+
|
1077
|
+
# Record zero results with execution metadata for completeness
|
1078
|
+
new_record.update(
|
1079
|
+
{
|
1080
|
+
"Results": 0, # Zero outbound data transfer for VPC
|
1081
|
+
"Status": response["status"], # Query completion status
|
1082
|
+
"Stats": response["statistics"], # Query execution statistics
|
1083
|
+
}
|
1084
|
+
)
|
1085
|
+
|
1086
|
+
# Include zero-result queries in final collection for comprehensive reporting
|
1087
|
+
all_query_results.append(query.copy())
|
1088
|
+
|
1089
|
+
return all_query_results
|
1090
|
+
|
1091
|
+
|
1092
|
+
#####################
|
1093
|
+
# Main
|
1094
|
+
#####################
|
1095
|
+
|
1096
|
+
if __name__ == "__main__":
|
1097
|
+
args = parse_args(sys.argv[1:])
|
1098
|
+
pProfile = args.Profile
|
1099
|
+
pRegionList = args.Regions
|
1100
|
+
pAccessRole = args.AccessRole
|
1101
|
+
# pAccountFile = args.pAccountFile
|
1102
|
+
pSkipProfiles = args.SkipProfiles
|
1103
|
+
pSkipAccounts = args.SkipAccounts
|
1104
|
+
pRootOnly = args.RootOnly
|
1105
|
+
pAccountList = args.Accounts
|
1106
|
+
pTiming = args.Time
|
1107
|
+
verbose = args.loglevel
|
1108
|
+
pFilename = args.Filename
|
1109
|
+
pStartDate = args.pStartDate
|
1110
|
+
pEndDate = args.pEndDate
|
1111
|
+
# Setup logging levels
|
1112
|
+
logging.basicConfig(level=verbose, format="[%(filename)s:%(lineno)s - %(funcName)20s() ] %(message)s")
|
1113
|
+
logging.getLogger("boto3").setLevel(logging.CRITICAL)
|
1114
|
+
logging.getLogger("botocore").setLevel(logging.CRITICAL)
|
1115
|
+
logging.getLogger("s3transfer").setLevel(logging.CRITICAL)
|
1116
|
+
logging.getLogger("urllib3").setLevel(logging.CRITICAL)
|
1117
|
+
|
1118
|
+
my_config = Config(signature_version="v4", retries={"max_attempts": 6, "mode": "standard"})
|
1119
|
+
|
1120
|
+
if platform.system() == "Linux":
|
1121
|
+
platform = "Linux"
|
1122
|
+
elif platform.system() == "Windows":
|
1123
|
+
platform = "Windows"
|
1124
|
+
else:
|
1125
|
+
platform = "Mac"
|
1126
|
+
|
1127
|
+
display_dict = {
|
1128
|
+
"AccountId": {"DisplayOrder": 1, "Heading": "Acct Number"},
|
1129
|
+
"Region": {"DisplayOrder": 2, "Heading": "Region"},
|
1130
|
+
"VPCName": {"DisplayOrder": 3, "Heading": "VPC Name"},
|
1131
|
+
"cidr_block": {"DisplayOrder": 4, "Heading": "CIDR Block"},
|
1132
|
+
"Days": {"DisplayOrder": 5, "Heading": "# of Days"},
|
1133
|
+
"Results": {"DisplayOrder": 6, "Heading": "Raw Bytes"},
|
1134
|
+
"OutboundGBData": {"DisplayOrder": 7, "Heading": "GBytes"},
|
1135
|
+
}
|
1136
|
+
|
1137
|
+
# Validate the parameters passed in
|
1138
|
+
try:
|
1139
|
+
yesterday = datetime.today() - timedelta(days=1)
|
1140
|
+
if pStartDate is None:
|
1141
|
+
start_date_time = yesterday.replace(hour=0, minute=0, second=0, microsecond=0)
|
1142
|
+
else:
|
1143
|
+
start_date_time = datetime.strptime(pStartDate, "%Y-%m-%d")
|
1144
|
+
start_date_time.replace(hour=0, minute=0, second=0, microsecond=0)
|
1145
|
+
except Exception as my_Error:
|
1146
|
+
logging.error(f"Start Date must be entered as 'YYYY-MM-DD'")
|
1147
|
+
print(f"Start Date input Error: {my_Error}")
|
1148
|
+
sys.exit(1)
|
1149
|
+
try:
|
1150
|
+
if pEndDate is None:
|
1151
|
+
end_date_time = yesterday.replace(hour=23, minute=59, second=59, microsecond=999999)
|
1152
|
+
else:
|
1153
|
+
end_date_time = datetime.strptime(pEndDate, "%Y-%m-%d")
|
1154
|
+
except Exception as my_Error:
|
1155
|
+
logging.error(f"End Date must be entered as 'YYYY-MM-DD'")
|
1156
|
+
print(f"End Date input Error: {my_Error}")
|
1157
|
+
sys.exit(1)
|
1158
|
+
|
1159
|
+
epoch_time = datetime(1970, 1, 1)
|
1160
|
+
|
1161
|
+
SpannedDaysChecked = (end_date_time - start_date_time).days
|
1162
|
+
# Setup the aws_acct object
|
1163
|
+
aws_acct, AccountList, RegionList = setup_auth_accounts_and_regions(pProfile)
|
1164
|
+
# Get credentials for all Child Accounts
|
1165
|
+
if pAccessRole is None:
|
1166
|
+
pAccessRoles = pAccessRole
|
1167
|
+
else:
|
1168
|
+
pAccessRoles = [pAccessRole]
|
1169
|
+
CredentialList = get_all_credentials(
|
1170
|
+
pProfile, pTiming, pSkipProfiles, pSkipAccounts, pRootOnly, AccountList, RegionList, pAccessRoles
|
1171
|
+
)
|
1172
|
+
|
1173
|
+
all_query_requests = list()
|
1174
|
+
for credential in CredentialList:
|
1175
|
+
logging.info(
|
1176
|
+
f"Accessing account #{credential['AccountId']} as {pAccessRole} using account {aws_acct.acct_number}'s credentials"
|
1177
|
+
)
|
1178
|
+
# response = check_account_access(aws_acct, account_num, pAccessRole)
|
1179
|
+
if credential["Success"]:
|
1180
|
+
logging.info(
|
1181
|
+
f"Account {credential['AccountId']} was successfully connected via role {credential.get('Role', pAccessRole)} from {aws_acct.acct_number}"
|
1182
|
+
)
|
1183
|
+
print(
|
1184
|
+
f"{ERASE_LINE}Checking account {Fore.BLUE}{credential['AccountId']}{Fore.RESET} in region {Fore.BLUE}{credential['Region']}{Fore.RESET}...",
|
1185
|
+
end="\r",
|
1186
|
+
)
|
1187
|
+
"""
|
1188
|
+
Put more commands here... Or you can write functions that represent your commands and call them from here.
|
1189
|
+
"""
|
1190
|
+
try:
|
1191
|
+
# Get flow log names from each account and region
|
1192
|
+
logging.debug("Getting flow_log cloudwatch groups")
|
1193
|
+
acct_flow_logs = get_flow_log_cloudwatch_groups(credential)
|
1194
|
+
# Create the queries necessary for CloudWatch to get the necessary data
|
1195
|
+
logging.debug("Preparing the queries - getting VPC info")
|
1196
|
+
queries = prep_cloudwatch_log_query(acct_flow_logs)
|
1197
|
+
# Run the queries against the CloudWatch in each account / region
|
1198
|
+
logging.debug("Running the queries with the start/end dates")
|
1199
|
+
query_ids = query_cloudwatch_logs(queries, start_date_time, end_date_time)
|
1200
|
+
logging.debug("Successfully ran queries - now adding all efforts to the final dictionary")
|
1201
|
+
all_query_requests.extend(query_ids)
|
1202
|
+
except Exception as my_Error:
|
1203
|
+
logging.debug(f"Credential: {credential}")
|
1204
|
+
print(f"Exception Error: {my_Error}")
|
1205
|
+
else:
|
1206
|
+
print(
|
1207
|
+
f"Failed to connect to {credential['AccountId']} from {aws_acct.acct_number} {'with Access Role ' + pAccessRole if pAccessRole is not None else ''} with error: {credential['ErrorMessage']}"
|
1208
|
+
)
|
1209
|
+
|
1210
|
+
# Using the list of queries created above, go back into each account and region and get the query results
|
1211
|
+
all_query_results = get_cw_query_results(all_query_requests)
|
1212
|
+
|
1213
|
+
# Display the information we've found this far
|
1214
|
+
sorted_all_query_results = sorted(all_query_results, key=lambda k: (k["AccountId"], k["Region"], k["VPCName"]))
|
1215
|
+
for query_result in all_query_results:
|
1216
|
+
query_result["OutboundGBData"] = int(query_result["Results"]) / 1000000000
|
1217
|
+
display_results(sorted_all_query_results, display_dict, None, pFilename)
|
1218
|
+
|
1219
|
+
print()
|
1220
|
+
print("Thanks for using this script...")
|
1221
|
+
print()
|