aws-cost-calculator-cli 1.6.3__py3-none-any.whl → 1.9.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of aws-cost-calculator-cli might be problematic. Click here for more details.
- {aws_cost_calculator_cli-1.6.3.dist-info → aws_cost_calculator_cli-1.9.1.dist-info}/METADATA +13 -1
- aws_cost_calculator_cli-1.9.1.dist-info/RECORD +15 -0
- {aws_cost_calculator_cli-1.6.3.dist-info → aws_cost_calculator_cli-1.9.1.dist-info}/WHEEL +1 -1
- {aws_cost_calculator_cli-1.6.3.dist-info → aws_cost_calculator_cli-1.9.1.dist-info}/top_level.txt +0 -1
- cost_calculator/api_client.py +2 -1
- cost_calculator/cli.py +301 -5
- cost_calculator/cur.py +244 -0
- cost_calculator/executor.py +59 -92
- cost_calculator/forensics.py +323 -0
- aws_cost_calculator_cli-1.6.3.dist-info/RECORD +0 -25
- backend/__init__.py +0 -1
- backend/algorithms/__init__.py +0 -1
- backend/algorithms/analyze.py +0 -272
- backend/algorithms/drill.py +0 -323
- backend/algorithms/monthly.py +0 -242
- backend/algorithms/trends.py +0 -353
- backend/handlers/__init__.py +0 -1
- backend/handlers/analyze.py +0 -112
- backend/handlers/drill.py +0 -117
- backend/handlers/monthly.py +0 -106
- backend/handlers/profiles.py +0 -148
- backend/handlers/trends.py +0 -106
- {aws_cost_calculator_cli-1.6.3.dist-info → aws_cost_calculator_cli-1.9.1.dist-info}/entry_points.txt +0 -0
- {aws_cost_calculator_cli-1.6.3.dist-info → aws_cost_calculator_cli-1.9.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,323 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Cost forensics module - Resource inventory and CloudTrail analysis
|
|
3
|
+
"""
|
|
4
|
+
import boto3
|
|
5
|
+
from datetime import datetime, timedelta
|
|
6
|
+
from collections import defaultdict
|
|
7
|
+
import json
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def inventory_resources(account_id, profile, region='us-west-2'):
|
|
11
|
+
"""
|
|
12
|
+
Inventory AWS resources in an account
|
|
13
|
+
|
|
14
|
+
Args:
|
|
15
|
+
account_id: AWS account ID
|
|
16
|
+
profile: AWS profile name (SSO)
|
|
17
|
+
region: AWS region
|
|
18
|
+
|
|
19
|
+
Returns:
|
|
20
|
+
dict with resource inventory
|
|
21
|
+
"""
|
|
22
|
+
session = boto3.Session(profile_name=profile)
|
|
23
|
+
inventory = {
|
|
24
|
+
'account_id': account_id,
|
|
25
|
+
'profile': profile,
|
|
26
|
+
'region': region,
|
|
27
|
+
'timestamp': datetime.utcnow().isoformat(),
|
|
28
|
+
'ec2_instances': [],
|
|
29
|
+
'efs_file_systems': [],
|
|
30
|
+
'load_balancers': [],
|
|
31
|
+
'dynamodb_tables': []
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
try:
|
|
35
|
+
# EC2 Instances
|
|
36
|
+
ec2_client = session.client('ec2', region_name=region)
|
|
37
|
+
instances_response = ec2_client.describe_instances()
|
|
38
|
+
|
|
39
|
+
for reservation in instances_response['Reservations']:
|
|
40
|
+
for instance in reservation['Instances']:
|
|
41
|
+
if instance['State']['Name'] == 'running':
|
|
42
|
+
name = 'N/A'
|
|
43
|
+
for tag in instance.get('Tags', []):
|
|
44
|
+
if tag['Key'] == 'Name':
|
|
45
|
+
name = tag['Value']
|
|
46
|
+
break
|
|
47
|
+
|
|
48
|
+
inventory['ec2_instances'].append({
|
|
49
|
+
'instance_id': instance['InstanceId'],
|
|
50
|
+
'instance_type': instance['InstanceType'],
|
|
51
|
+
'name': name,
|
|
52
|
+
'state': instance['State']['Name'],
|
|
53
|
+
'launch_time': instance['LaunchTime'].isoformat(),
|
|
54
|
+
'availability_zone': instance['Placement']['AvailabilityZone']
|
|
55
|
+
})
|
|
56
|
+
|
|
57
|
+
# EFS File Systems
|
|
58
|
+
efs_client = session.client('efs', region_name=region)
|
|
59
|
+
efs_response = efs_client.describe_file_systems()
|
|
60
|
+
|
|
61
|
+
total_efs_size = 0
|
|
62
|
+
for fs in efs_response['FileSystems']:
|
|
63
|
+
size_bytes = fs['SizeInBytes']['Value']
|
|
64
|
+
size_gb = size_bytes / (1024**3)
|
|
65
|
+
total_efs_size += size_gb
|
|
66
|
+
|
|
67
|
+
inventory['efs_file_systems'].append({
|
|
68
|
+
'file_system_id': fs['FileSystemId'],
|
|
69
|
+
'name': fs.get('Name', 'N/A'),
|
|
70
|
+
'size_gb': round(size_gb, 2),
|
|
71
|
+
'creation_time': fs['CreationTime'].isoformat(),
|
|
72
|
+
'number_of_mount_targets': fs['NumberOfMountTargets']
|
|
73
|
+
})
|
|
74
|
+
|
|
75
|
+
inventory['total_efs_size_gb'] = round(total_efs_size, 2)
|
|
76
|
+
|
|
77
|
+
# Load Balancers
|
|
78
|
+
elbv2_client = session.client('elbv2', region_name=region)
|
|
79
|
+
elb_response = elbv2_client.describe_load_balancers()
|
|
80
|
+
|
|
81
|
+
for lb in elb_response['LoadBalancers']:
|
|
82
|
+
inventory['load_balancers'].append({
|
|
83
|
+
'name': lb['LoadBalancerName'],
|
|
84
|
+
'type': lb['Type'],
|
|
85
|
+
'dns_name': lb['DNSName'],
|
|
86
|
+
'scheme': lb['Scheme'],
|
|
87
|
+
'created_time': lb['CreatedTime'].isoformat(),
|
|
88
|
+
'availability_zones': [az['ZoneName'] for az in lb['AvailabilityZones']]
|
|
89
|
+
})
|
|
90
|
+
|
|
91
|
+
# DynamoDB Tables (only if region supports it)
|
|
92
|
+
try:
|
|
93
|
+
ddb_client = session.client('dynamodb', region_name=region)
|
|
94
|
+
tables_response = ddb_client.list_tables()
|
|
95
|
+
|
|
96
|
+
for table_name in tables_response['TableNames'][:20]: # Limit to 20 tables
|
|
97
|
+
table_desc = ddb_client.describe_table(TableName=table_name)
|
|
98
|
+
table_info = table_desc['Table']
|
|
99
|
+
|
|
100
|
+
# Get backup settings
|
|
101
|
+
try:
|
|
102
|
+
backup_desc = ddb_client.describe_continuous_backups(TableName=table_name)
|
|
103
|
+
pitr_status = backup_desc['ContinuousBackupsDescription']['PointInTimeRecoveryDescription']['PointInTimeRecoveryStatus']
|
|
104
|
+
except:
|
|
105
|
+
pitr_status = 'UNKNOWN'
|
|
106
|
+
|
|
107
|
+
size_gb = table_info.get('TableSizeBytes', 0) / (1024**3)
|
|
108
|
+
|
|
109
|
+
inventory['dynamodb_tables'].append({
|
|
110
|
+
'table_name': table_name,
|
|
111
|
+
'size_gb': round(size_gb, 2),
|
|
112
|
+
'item_count': table_info.get('ItemCount', 0),
|
|
113
|
+
'pitr_status': pitr_status,
|
|
114
|
+
'created_time': table_info['CreationDateTime'].isoformat()
|
|
115
|
+
})
|
|
116
|
+
except Exception as e:
|
|
117
|
+
# DynamoDB might not be available in all regions
|
|
118
|
+
pass
|
|
119
|
+
|
|
120
|
+
except Exception as e:
|
|
121
|
+
inventory['error'] = str(e)
|
|
122
|
+
|
|
123
|
+
return inventory
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def analyze_cloudtrail(account_id, profile, start_date, end_date, region='us-west-2'):
|
|
127
|
+
"""
|
|
128
|
+
Analyze CloudTrail events for an account
|
|
129
|
+
|
|
130
|
+
Args:
|
|
131
|
+
account_id: AWS account ID
|
|
132
|
+
profile: AWS profile name (SSO)
|
|
133
|
+
start_date: Start datetime
|
|
134
|
+
end_date: End datetime
|
|
135
|
+
region: AWS region
|
|
136
|
+
|
|
137
|
+
Returns:
|
|
138
|
+
dict with CloudTrail event summary
|
|
139
|
+
"""
|
|
140
|
+
session = boto3.Session(profile_name=profile)
|
|
141
|
+
ct_client = session.client('cloudtrail', region_name=region)
|
|
142
|
+
|
|
143
|
+
analysis = {
|
|
144
|
+
'account_id': account_id,
|
|
145
|
+
'profile': profile,
|
|
146
|
+
'region': region,
|
|
147
|
+
'start_date': start_date.isoformat(),
|
|
148
|
+
'end_date': end_date.isoformat(),
|
|
149
|
+
'event_summary': {},
|
|
150
|
+
'write_events': [],
|
|
151
|
+
'error': None
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
# Events that indicate resource creation/modification
|
|
155
|
+
write_event_names = [
|
|
156
|
+
'RunInstances', 'CreateVolume', 'AttachVolume',
|
|
157
|
+
'CreateFileSystem', 'ModifyFileSystem',
|
|
158
|
+
'CreateLoadBalancer', 'ModifyLoadBalancerAttributes',
|
|
159
|
+
'CreateTable', 'UpdateTable', 'UpdateContinuousBackups',
|
|
160
|
+
'CreateBackupVault', 'StartBackupJob'
|
|
161
|
+
]
|
|
162
|
+
|
|
163
|
+
try:
|
|
164
|
+
event_counts = defaultdict(int)
|
|
165
|
+
|
|
166
|
+
# Query CloudTrail
|
|
167
|
+
paginator = ct_client.get_paginator('lookup_events')
|
|
168
|
+
|
|
169
|
+
for page in paginator.paginate(
|
|
170
|
+
StartTime=start_date,
|
|
171
|
+
EndTime=end_date,
|
|
172
|
+
MaxResults=50,
|
|
173
|
+
PaginationConfig={'MaxItems': 200}
|
|
174
|
+
):
|
|
175
|
+
for event in page.get('Events', []):
|
|
176
|
+
event_name = event.get('EventName', '')
|
|
177
|
+
event_counts[event_name] += 1
|
|
178
|
+
|
|
179
|
+
# Capture write events
|
|
180
|
+
if event_name in write_event_names:
|
|
181
|
+
event_detail = json.loads(event['CloudTrailEvent'])
|
|
182
|
+
|
|
183
|
+
analysis['write_events'].append({
|
|
184
|
+
'time': event.get('EventTime').isoformat(),
|
|
185
|
+
'event_name': event_name,
|
|
186
|
+
'username': event.get('Username', 'N/A'),
|
|
187
|
+
'resources': [
|
|
188
|
+
{
|
|
189
|
+
'type': r.get('ResourceType', 'N/A'),
|
|
190
|
+
'name': r.get('ResourceName', 'N/A')
|
|
191
|
+
}
|
|
192
|
+
for r in event.get('Resources', [])[:3]
|
|
193
|
+
]
|
|
194
|
+
})
|
|
195
|
+
|
|
196
|
+
# Convert to regular dict and sort
|
|
197
|
+
analysis['event_summary'] = dict(sorted(
|
|
198
|
+
event_counts.items(),
|
|
199
|
+
key=lambda x: x[1],
|
|
200
|
+
reverse=True
|
|
201
|
+
))
|
|
202
|
+
|
|
203
|
+
except Exception as e:
|
|
204
|
+
analysis['error'] = str(e)
|
|
205
|
+
|
|
206
|
+
return analysis
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
def format_investigation_report(cost_data, inventories, cloudtrail_data=None):
|
|
210
|
+
"""
|
|
211
|
+
Format investigation data into markdown report
|
|
212
|
+
|
|
213
|
+
Args:
|
|
214
|
+
cost_data: Cost analysis results from trends/drill
|
|
215
|
+
inventories: List of resource inventories
|
|
216
|
+
cloudtrail_data: List of CloudTrail analyses (optional)
|
|
217
|
+
|
|
218
|
+
Returns:
|
|
219
|
+
str: Markdown formatted report
|
|
220
|
+
"""
|
|
221
|
+
report = []
|
|
222
|
+
report.append("# Cost Investigation Report")
|
|
223
|
+
report.append(f"**Generated:** {datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S UTC')}")
|
|
224
|
+
report.append("")
|
|
225
|
+
|
|
226
|
+
# Cost Analysis Section
|
|
227
|
+
if cost_data:
|
|
228
|
+
report.append("## Cost Analysis")
|
|
229
|
+
report.append("")
|
|
230
|
+
# Add cost data formatting here
|
|
231
|
+
# This will be populated from trends/drill results
|
|
232
|
+
|
|
233
|
+
# Resource Inventory Section
|
|
234
|
+
if inventories:
|
|
235
|
+
report.append("## Resource Inventory")
|
|
236
|
+
report.append("")
|
|
237
|
+
|
|
238
|
+
for inv in inventories:
|
|
239
|
+
profile_name = inv.get('profile', inv['account_id'])
|
|
240
|
+
report.append(f"### Account {inv['account_id']} ({profile_name})")
|
|
241
|
+
report.append(f"**Region:** {inv['region']}")
|
|
242
|
+
report.append("")
|
|
243
|
+
|
|
244
|
+
# EC2 Instances
|
|
245
|
+
if inv['ec2_instances']:
|
|
246
|
+
report.append(f"**EC2 Instances:** {len(inv['ec2_instances'])} running")
|
|
247
|
+
for instance in inv['ec2_instances'][:10]: # Show first 10
|
|
248
|
+
report.append(f"- `{instance['instance_id']}`: {instance['instance_type']} ({instance['name']})")
|
|
249
|
+
report.append(f" - Launched: {instance['launch_time'][:10]}, AZ: {instance['availability_zone']}")
|
|
250
|
+
if len(inv['ec2_instances']) > 10:
|
|
251
|
+
report.append(f" ... and {len(inv['ec2_instances']) - 10} more")
|
|
252
|
+
report.append("")
|
|
253
|
+
|
|
254
|
+
# EFS File Systems
|
|
255
|
+
if inv['efs_file_systems']:
|
|
256
|
+
total_size = inv.get('total_efs_size_gb', 0)
|
|
257
|
+
report.append(f"**EFS File Systems:** {len(inv['efs_file_systems'])} total, {total_size:,.0f} GB")
|
|
258
|
+
for fs in inv['efs_file_systems']:
|
|
259
|
+
report.append(f"- `{fs['file_system_id']}` ({fs['name']}): {fs['size_gb']:,.2f} GB")
|
|
260
|
+
report.append(f" - Created: {fs['creation_time'][:10]}")
|
|
261
|
+
report.append("")
|
|
262
|
+
|
|
263
|
+
# Load Balancers
|
|
264
|
+
if inv['load_balancers']:
|
|
265
|
+
report.append(f"**Load Balancers:** {len(inv['load_balancers'])}")
|
|
266
|
+
for lb in inv['load_balancers'][:10]: # Show first 10
|
|
267
|
+
report.append(f"- `{lb['name']}`: {lb['type']}")
|
|
268
|
+
report.append(f" - Created: {lb['created_time'][:10]}, Scheme: {lb['scheme']}")
|
|
269
|
+
if len(inv['load_balancers']) > 10:
|
|
270
|
+
report.append(f" ... and {len(inv['load_balancers']) - 10} more")
|
|
271
|
+
report.append("")
|
|
272
|
+
|
|
273
|
+
# DynamoDB Tables
|
|
274
|
+
if inv['dynamodb_tables']:
|
|
275
|
+
report.append(f"**DynamoDB Tables:** {len(inv['dynamodb_tables'])}")
|
|
276
|
+
for table in inv['dynamodb_tables'][:10]:
|
|
277
|
+
report.append(f"- `{table['table_name']}`: {table['size_gb']:.2f} GB, {table['item_count']:,} items")
|
|
278
|
+
report.append(f" - PITR: {table['pitr_status']}, Created: {table['created_time'][:10]}")
|
|
279
|
+
if len(inv['dynamodb_tables']) > 10:
|
|
280
|
+
report.append(f" ... and {len(inv['dynamodb_tables']) - 10} more")
|
|
281
|
+
report.append("")
|
|
282
|
+
|
|
283
|
+
report.append("---")
|
|
284
|
+
report.append("")
|
|
285
|
+
|
|
286
|
+
# CloudTrail Section
|
|
287
|
+
if cloudtrail_data:
|
|
288
|
+
report.append("## CloudTrail Events")
|
|
289
|
+
report.append("")
|
|
290
|
+
|
|
291
|
+
for ct in cloudtrail_data:
|
|
292
|
+
profile_name = ct.get('profile', ct['account_id'])
|
|
293
|
+
report.append(f"### Account {ct['account_id']} ({profile_name})")
|
|
294
|
+
report.append(f"**Period:** {ct['start_date'][:10]} to {ct['end_date'][:10]}")
|
|
295
|
+
report.append("")
|
|
296
|
+
|
|
297
|
+
if ct.get('error'):
|
|
298
|
+
report.append(f"⚠️ Error: {ct['error']}")
|
|
299
|
+
report.append("")
|
|
300
|
+
continue
|
|
301
|
+
|
|
302
|
+
# Write events (resource changes)
|
|
303
|
+
if ct['write_events']:
|
|
304
|
+
report.append(f"**Resource Changes:** {len(ct['write_events'])} events")
|
|
305
|
+
for evt in ct['write_events'][:10]:
|
|
306
|
+
report.append(f"- `{evt['time'][:19]}` - **{evt['event_name']}**")
|
|
307
|
+
report.append(f" - User: {evt['username']}")
|
|
308
|
+
if evt['resources']:
|
|
309
|
+
for res in evt['resources']:
|
|
310
|
+
report.append(f" - Resource: {res['type']} - {res['name']}")
|
|
311
|
+
report.append("")
|
|
312
|
+
|
|
313
|
+
# Event summary
|
|
314
|
+
if ct['event_summary']:
|
|
315
|
+
report.append("**Top Events:**")
|
|
316
|
+
for event_name, count in list(ct['event_summary'].items())[:15]:
|
|
317
|
+
report.append(f"- {event_name}: {count}")
|
|
318
|
+
report.append("")
|
|
319
|
+
|
|
320
|
+
report.append("---")
|
|
321
|
+
report.append("")
|
|
322
|
+
|
|
323
|
+
return "\n".join(report)
|
|
@@ -1,25 +0,0 @@
|
|
|
1
|
-
aws_cost_calculator_cli-1.6.3.dist-info/licenses/LICENSE,sha256=cYtmQZHNGGTXOtg3T7LHDRneleaH0dHXHfxFV3WR50Y,1079
|
|
2
|
-
backend/__init__.py,sha256=PnH3-V1bIUu7nKDGdfPykzx0sz3x4lsLP0OheoAqY4U,18
|
|
3
|
-
backend/algorithms/__init__.py,sha256=QWrMPtDO_nVOFzKm8yI6_RXdSE0n25RQAFnpS1GsGZs,21
|
|
4
|
-
backend/algorithms/analyze.py,sha256=LvYuY83vIW162km3MvxrL1xsdFdpBqSUOWm7YZ-Tdyc,8922
|
|
5
|
-
backend/algorithms/drill.py,sha256=hGi-prLgZDvNMMICQc4fl3LenM7YaZ3To_Ei4LKwrdc,10543
|
|
6
|
-
backend/algorithms/monthly.py,sha256=6k9F8S7djhX1wGV3-T1MZP7CvWbbfhSTEaddwCfVu5M,7932
|
|
7
|
-
backend/algorithms/trends.py,sha256=k_s4ylBX50sqoiM_fwepi58HW01zz767FMJhQUPDznk,12246
|
|
8
|
-
backend/handlers/__init__.py,sha256=YGc9-XVhFcT5NOvnu0Vg5qaGy0Md0J_PNj8dJIM5PhE,19
|
|
9
|
-
backend/handlers/analyze.py,sha256=ULeYYMpD5VS4qBd-WvPP_OjgbSLm9VzT6BZYHrt47eE,3546
|
|
10
|
-
backend/handlers/drill.py,sha256=WQbqM5RvOcJHsUpBOR6PS-BtKHqszeCZ7xZu3499jPo,3847
|
|
11
|
-
backend/handlers/monthly.py,sha256=A4B-BLrWHsR9OnhsTEvbYHIATbM5fBRIf_h-liczfE0,3415
|
|
12
|
-
backend/handlers/profiles.py,sha256=tSnHxvGvwH4ynR0R-WrsPgz_VMgxaWHu-SnuhmGPxcs,5107
|
|
13
|
-
backend/handlers/trends.py,sha256=loNvfoc1B-nAb-dTJVLf4TnRZ-UZd_AilyA2l4YahK8,3404
|
|
14
|
-
cost_calculator/__init__.py,sha256=PJeIqvWh5AYJVrJxPPkI4pJnAt37rIjasrNS0I87kaM,52
|
|
15
|
-
cost_calculator/api_client.py,sha256=LUzQmveDF0X9MqAyThp9mbSzJzkOO73Pk4F7IEJjASU,2353
|
|
16
|
-
cost_calculator/cli.py,sha256=zGxFsErjmZAy9v7fqGQDS8qZf4lXoir8Fel0Ka7lw3Y,42235
|
|
17
|
-
cost_calculator/drill.py,sha256=hGi-prLgZDvNMMICQc4fl3LenM7YaZ3To_Ei4LKwrdc,10543
|
|
18
|
-
cost_calculator/executor.py,sha256=tVyyBtXIj9OPyG-xQj8CUmyFjDhb9IVK639360dUZDc,8076
|
|
19
|
-
cost_calculator/monthly.py,sha256=6k9F8S7djhX1wGV3-T1MZP7CvWbbfhSTEaddwCfVu5M,7932
|
|
20
|
-
cost_calculator/trends.py,sha256=k_s4ylBX50sqoiM_fwepi58HW01zz767FMJhQUPDznk,12246
|
|
21
|
-
aws_cost_calculator_cli-1.6.3.dist-info/METADATA,sha256=VNV5xwWJraHcgf44FpX1TZEGpfkSkXB98BLnkGIo8n0,11506
|
|
22
|
-
aws_cost_calculator_cli-1.6.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
23
|
-
aws_cost_calculator_cli-1.6.3.dist-info/entry_points.txt,sha256=_5Qy4EcHbYVYrdgOu1E48faMHb9fLUl5VJ3djDHuJBo,47
|
|
24
|
-
aws_cost_calculator_cli-1.6.3.dist-info/top_level.txt,sha256=YV8sPp9unLPDmK3ixw8-yoyVEKU3O4kskxvZAxFgIK0,24
|
|
25
|
-
aws_cost_calculator_cli-1.6.3.dist-info/RECORD,,
|
backend/__init__.py
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
# Backend package
|
backend/algorithms/__init__.py
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
# Algorithms package
|
backend/algorithms/analyze.py
DELETED
|
@@ -1,272 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Analysis algorithm using pandas for aggregations.
|
|
3
|
-
Reuses existing algorithms and adds pandas-based analytics.
|
|
4
|
-
"""
|
|
5
|
-
import pandas as pd
|
|
6
|
-
import numpy as np
|
|
7
|
-
from datetime import datetime, timedelta
|
|
8
|
-
from algorithms.trends import analyze_trends
|
|
9
|
-
from algorithms.drill import analyze_drill_down
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
def analyze_aggregated(ce_client, accounts, weeks=12, analysis_type='summary'):
|
|
13
|
-
"""
|
|
14
|
-
Perform pandas-based analysis on cost data.
|
|
15
|
-
|
|
16
|
-
Args:
|
|
17
|
-
ce_client: boto3 Cost Explorer client
|
|
18
|
-
accounts: List of account IDs
|
|
19
|
-
weeks: Number of weeks to analyze
|
|
20
|
-
analysis_type: 'summary', 'volatility', 'trends', 'multi_group'
|
|
21
|
-
|
|
22
|
-
Returns:
|
|
23
|
-
dict with analysis results
|
|
24
|
-
"""
|
|
25
|
-
# Get raw data from trends
|
|
26
|
-
trends_data = analyze_trends(ce_client, accounts, weeks)
|
|
27
|
-
|
|
28
|
-
# Convert to pandas DataFrame
|
|
29
|
-
rows = []
|
|
30
|
-
for comp in trends_data['wow_comparisons']:
|
|
31
|
-
week_label = comp['curr_week']['label']
|
|
32
|
-
for item in comp['increases'] + comp['decreases']:
|
|
33
|
-
rows.append({
|
|
34
|
-
'week': week_label,
|
|
35
|
-
'service': item['service'],
|
|
36
|
-
'prev_cost': item['prev_cost'],
|
|
37
|
-
'curr_cost': item['curr_cost'],
|
|
38
|
-
'change': item['change'],
|
|
39
|
-
'pct_change': item['pct_change']
|
|
40
|
-
})
|
|
41
|
-
|
|
42
|
-
df = pd.DataFrame(rows)
|
|
43
|
-
|
|
44
|
-
if df.empty:
|
|
45
|
-
return {'error': 'No data available'}
|
|
46
|
-
|
|
47
|
-
# Perform requested analysis
|
|
48
|
-
if analysis_type == 'summary':
|
|
49
|
-
return _analyze_summary(df, weeks)
|
|
50
|
-
elif analysis_type == 'volatility':
|
|
51
|
-
return _analyze_volatility(df)
|
|
52
|
-
elif analysis_type == 'trends':
|
|
53
|
-
return _detect_trends(df)
|
|
54
|
-
elif analysis_type == 'multi_group':
|
|
55
|
-
return _multi_group_analysis(ce_client, accounts, weeks)
|
|
56
|
-
else:
|
|
57
|
-
return {'error': f'Unknown analysis type: {analysis_type}'}
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
def _analyze_summary(df, weeks):
|
|
61
|
-
"""Aggregate summary statistics across all weeks."""
|
|
62
|
-
# Group by service and aggregate
|
|
63
|
-
summary = df.groupby('service').agg({
|
|
64
|
-
'change': ['sum', 'mean', 'std', 'min', 'max', 'count'],
|
|
65
|
-
'curr_cost': ['sum', 'mean']
|
|
66
|
-
}).round(2)
|
|
67
|
-
|
|
68
|
-
# Flatten column names
|
|
69
|
-
summary.columns = ['_'.join(col).strip() for col in summary.columns.values]
|
|
70
|
-
summary = summary.reset_index()
|
|
71
|
-
|
|
72
|
-
# Calculate coefficient of variation
|
|
73
|
-
summary['volatility'] = (summary['change_std'] / summary['change_mean'].abs()).fillna(0).round(3)
|
|
74
|
-
|
|
75
|
-
# Sort by total change
|
|
76
|
-
summary = summary.sort_values('change_sum', ascending=False)
|
|
77
|
-
|
|
78
|
-
# Convert to dict
|
|
79
|
-
results = summary.to_dict('records')
|
|
80
|
-
|
|
81
|
-
# Add percentiles
|
|
82
|
-
percentiles = df.groupby('service')['change'].sum().quantile([0.5, 0.9, 0.99]).to_dict()
|
|
83
|
-
|
|
84
|
-
return {
|
|
85
|
-
'analysis_type': 'summary',
|
|
86
|
-
'weeks_analyzed': weeks,
|
|
87
|
-
'total_services': len(results),
|
|
88
|
-
'services': results[:50], # Top 50
|
|
89
|
-
'percentiles': {
|
|
90
|
-
'p50': round(percentiles.get(0.5, 0), 2),
|
|
91
|
-
'p90': round(percentiles.get(0.9, 0), 2),
|
|
92
|
-
'p99': round(percentiles.get(0.99, 0), 2)
|
|
93
|
-
}
|
|
94
|
-
}
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
def _analyze_volatility(df):
|
|
98
|
-
"""Identify services with high cost volatility."""
|
|
99
|
-
# Calculate volatility metrics
|
|
100
|
-
volatility = df.groupby('service').agg({
|
|
101
|
-
'change': ['mean', 'std', 'count']
|
|
102
|
-
})
|
|
103
|
-
|
|
104
|
-
volatility.columns = ['mean_change', 'std_change', 'weeks']
|
|
105
|
-
volatility['coefficient_of_variation'] = (volatility['std_change'] / volatility['mean_change'].abs()).fillna(0)
|
|
106
|
-
|
|
107
|
-
# Only services that appear in at least 3 weeks
|
|
108
|
-
volatility = volatility[volatility['weeks'] >= 3]
|
|
109
|
-
|
|
110
|
-
# Sort by CV
|
|
111
|
-
volatility = volatility.sort_values('coefficient_of_variation', ascending=False)
|
|
112
|
-
volatility = volatility.reset_index()
|
|
113
|
-
|
|
114
|
-
# Identify outliers (z-score > 2)
|
|
115
|
-
df['z_score'] = df.groupby('service')['change'].transform(
|
|
116
|
-
lambda x: (x - x.mean()) / x.std() if x.std() > 0 else 0
|
|
117
|
-
)
|
|
118
|
-
outliers = df[df['z_score'].abs() > 2][['week', 'service', 'change', 'z_score']].to_dict('records')
|
|
119
|
-
|
|
120
|
-
return {
|
|
121
|
-
'analysis_type': 'volatility',
|
|
122
|
-
'high_volatility_services': volatility.head(20).to_dict('records'),
|
|
123
|
-
'outliers': outliers[:20]
|
|
124
|
-
}
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
def _detect_trends(df):
|
|
128
|
-
"""Detect services with consistent increasing/decreasing trends."""
|
|
129
|
-
# Calculate trend for each service
|
|
130
|
-
trends = []
|
|
131
|
-
|
|
132
|
-
for service in df['service'].unique():
|
|
133
|
-
service_df = df[df['service'] == service].sort_values('week')
|
|
134
|
-
|
|
135
|
-
if len(service_df) < 3:
|
|
136
|
-
continue
|
|
137
|
-
|
|
138
|
-
# Calculate linear regression slope
|
|
139
|
-
x = np.arange(len(service_df))
|
|
140
|
-
y = service_df['change'].values
|
|
141
|
-
|
|
142
|
-
if len(x) > 1:
|
|
143
|
-
slope = np.polyfit(x, y, 1)[0]
|
|
144
|
-
avg_change = service_df['change'].mean()
|
|
145
|
-
|
|
146
|
-
# Classify trend
|
|
147
|
-
if slope > avg_change * 0.1: # Increasing by >10% on average
|
|
148
|
-
trend_type = 'increasing'
|
|
149
|
-
elif slope < -avg_change * 0.1: # Decreasing by >10%
|
|
150
|
-
trend_type = 'decreasing'
|
|
151
|
-
else:
|
|
152
|
-
trend_type = 'stable'
|
|
153
|
-
|
|
154
|
-
trends.append({
|
|
155
|
-
'service': service,
|
|
156
|
-
'trend': trend_type,
|
|
157
|
-
'slope': round(slope, 2),
|
|
158
|
-
'avg_change': round(avg_change, 2),
|
|
159
|
-
'weeks_analyzed': len(service_df)
|
|
160
|
-
})
|
|
161
|
-
|
|
162
|
-
# Separate by trend type
|
|
163
|
-
increasing = [t for t in trends if t['trend'] == 'increasing']
|
|
164
|
-
decreasing = [t for t in trends if t['trend'] == 'decreasing']
|
|
165
|
-
stable = [t for t in trends if t['trend'] == 'stable']
|
|
166
|
-
|
|
167
|
-
# Sort by slope magnitude
|
|
168
|
-
increasing.sort(key=lambda x: x['slope'], reverse=True)
|
|
169
|
-
decreasing.sort(key=lambda x: x['slope'])
|
|
170
|
-
|
|
171
|
-
return {
|
|
172
|
-
'analysis_type': 'trend_detection',
|
|
173
|
-
'increasing_trends': increasing[:20],
|
|
174
|
-
'decreasing_trends': decreasing[:20],
|
|
175
|
-
'stable_services': len(stable)
|
|
176
|
-
}
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
def _multi_group_analysis(ce_client, accounts, weeks):
|
|
180
|
-
"""Multi-dimensional grouping (service + account)."""
|
|
181
|
-
# Get drill-down data for all services
|
|
182
|
-
drill_data = analyze_drill_down(ce_client, accounts, weeks)
|
|
183
|
-
|
|
184
|
-
# Convert to DataFrame
|
|
185
|
-
rows = []
|
|
186
|
-
for comp in drill_data['comparisons']:
|
|
187
|
-
week = comp['curr_week']['label']
|
|
188
|
-
for item in comp['increases'] + comp['decreases']:
|
|
189
|
-
rows.append({
|
|
190
|
-
'week': week,
|
|
191
|
-
'dimension': item['dimension'], # This is account when drilling by service
|
|
192
|
-
'change': item['change'],
|
|
193
|
-
'curr_cost': item['curr_cost']
|
|
194
|
-
})
|
|
195
|
-
|
|
196
|
-
df = pd.DataFrame(rows)
|
|
197
|
-
|
|
198
|
-
if df.empty:
|
|
199
|
-
return {'error': 'No drill-down data available'}
|
|
200
|
-
|
|
201
|
-
# Group by dimension (account) and aggregate
|
|
202
|
-
grouped = df.groupby('dimension').agg({
|
|
203
|
-
'change': ['sum', 'mean', 'count'],
|
|
204
|
-
'curr_cost': 'sum'
|
|
205
|
-
}).round(2)
|
|
206
|
-
|
|
207
|
-
grouped.columns = ['total_change', 'avg_change', 'weeks_appeared', 'total_cost']
|
|
208
|
-
grouped = grouped.sort_values('total_change', ascending=False).reset_index()
|
|
209
|
-
|
|
210
|
-
return {
|
|
211
|
-
'analysis_type': 'multi_group',
|
|
212
|
-
'group_by': drill_data.get('group_by', 'account'),
|
|
213
|
-
'groups': grouped.head(50).to_dict('records')
|
|
214
|
-
}
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
def search_services(ce_client, accounts, weeks, pattern=None, min_cost=None):
|
|
218
|
-
"""
|
|
219
|
-
Search and filter services.
|
|
220
|
-
|
|
221
|
-
Args:
|
|
222
|
-
ce_client: boto3 Cost Explorer client
|
|
223
|
-
accounts: List of account IDs
|
|
224
|
-
weeks: Number of weeks
|
|
225
|
-
pattern: Service name pattern (e.g., "EC2*", "*Compute*")
|
|
226
|
-
min_cost: Minimum total cost threshold
|
|
227
|
-
|
|
228
|
-
Returns:
|
|
229
|
-
dict with matching services
|
|
230
|
-
"""
|
|
231
|
-
# Get trends data
|
|
232
|
-
trends_data = analyze_trends(ce_client, accounts, weeks)
|
|
233
|
-
|
|
234
|
-
# Convert to DataFrame
|
|
235
|
-
rows = []
|
|
236
|
-
for comp in trends_data['wow_comparisons']:
|
|
237
|
-
for item in comp['increases'] + comp['decreases']:
|
|
238
|
-
rows.append({
|
|
239
|
-
'service': item['service'],
|
|
240
|
-
'change': item['change'],
|
|
241
|
-
'curr_cost': item['curr_cost']
|
|
242
|
-
})
|
|
243
|
-
|
|
244
|
-
df = pd.DataFrame(rows)
|
|
245
|
-
|
|
246
|
-
if df.empty:
|
|
247
|
-
return {'matches': []}
|
|
248
|
-
|
|
249
|
-
# Aggregate by service
|
|
250
|
-
summary = df.groupby('service').agg({
|
|
251
|
-
'change': 'sum',
|
|
252
|
-
'curr_cost': 'sum'
|
|
253
|
-
}).reset_index()
|
|
254
|
-
|
|
255
|
-
# Apply filters
|
|
256
|
-
if pattern:
|
|
257
|
-
# Convert glob pattern to regex
|
|
258
|
-
import re
|
|
259
|
-
regex_pattern = pattern.replace('*', '.*').replace('?', '.')
|
|
260
|
-
summary = summary[summary['service'].str.contains(regex_pattern, case=False, regex=True)]
|
|
261
|
-
|
|
262
|
-
if min_cost:
|
|
263
|
-
summary = summary[summary['curr_cost'] >= min_cost]
|
|
264
|
-
|
|
265
|
-
# Sort by total cost
|
|
266
|
-
summary = summary.sort_values('curr_cost', ascending=False)
|
|
267
|
-
|
|
268
|
-
return {
|
|
269
|
-
'pattern': pattern,
|
|
270
|
-
'min_cost': min_cost,
|
|
271
|
-
'matches': summary.to_dict('records')
|
|
272
|
-
}
|