aws-cost-calculator-cli 1.6.3__py3-none-any.whl → 1.9.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aws-cost-calculator-cli might be problematic. Click here for more details.

@@ -0,0 +1,323 @@
1
+ """
2
+ Cost forensics module - Resource inventory and CloudTrail analysis
3
+ """
4
+ import boto3
5
+ from datetime import datetime, timedelta
6
+ from collections import defaultdict
7
+ import json
8
+
9
+
10
+ def inventory_resources(account_id, profile, region='us-west-2'):
11
+ """
12
+ Inventory AWS resources in an account
13
+
14
+ Args:
15
+ account_id: AWS account ID
16
+ profile: AWS profile name (SSO)
17
+ region: AWS region
18
+
19
+ Returns:
20
+ dict with resource inventory
21
+ """
22
+ session = boto3.Session(profile_name=profile)
23
+ inventory = {
24
+ 'account_id': account_id,
25
+ 'profile': profile,
26
+ 'region': region,
27
+ 'timestamp': datetime.utcnow().isoformat(),
28
+ 'ec2_instances': [],
29
+ 'efs_file_systems': [],
30
+ 'load_balancers': [],
31
+ 'dynamodb_tables': []
32
+ }
33
+
34
+ try:
35
+ # EC2 Instances
36
+ ec2_client = session.client('ec2', region_name=region)
37
+ instances_response = ec2_client.describe_instances()
38
+
39
+ for reservation in instances_response['Reservations']:
40
+ for instance in reservation['Instances']:
41
+ if instance['State']['Name'] == 'running':
42
+ name = 'N/A'
43
+ for tag in instance.get('Tags', []):
44
+ if tag['Key'] == 'Name':
45
+ name = tag['Value']
46
+ break
47
+
48
+ inventory['ec2_instances'].append({
49
+ 'instance_id': instance['InstanceId'],
50
+ 'instance_type': instance['InstanceType'],
51
+ 'name': name,
52
+ 'state': instance['State']['Name'],
53
+ 'launch_time': instance['LaunchTime'].isoformat(),
54
+ 'availability_zone': instance['Placement']['AvailabilityZone']
55
+ })
56
+
57
+ # EFS File Systems
58
+ efs_client = session.client('efs', region_name=region)
59
+ efs_response = efs_client.describe_file_systems()
60
+
61
+ total_efs_size = 0
62
+ for fs in efs_response['FileSystems']:
63
+ size_bytes = fs['SizeInBytes']['Value']
64
+ size_gb = size_bytes / (1024**3)
65
+ total_efs_size += size_gb
66
+
67
+ inventory['efs_file_systems'].append({
68
+ 'file_system_id': fs['FileSystemId'],
69
+ 'name': fs.get('Name', 'N/A'),
70
+ 'size_gb': round(size_gb, 2),
71
+ 'creation_time': fs['CreationTime'].isoformat(),
72
+ 'number_of_mount_targets': fs['NumberOfMountTargets']
73
+ })
74
+
75
+ inventory['total_efs_size_gb'] = round(total_efs_size, 2)
76
+
77
+ # Load Balancers
78
+ elbv2_client = session.client('elbv2', region_name=region)
79
+ elb_response = elbv2_client.describe_load_balancers()
80
+
81
+ for lb in elb_response['LoadBalancers']:
82
+ inventory['load_balancers'].append({
83
+ 'name': lb['LoadBalancerName'],
84
+ 'type': lb['Type'],
85
+ 'dns_name': lb['DNSName'],
86
+ 'scheme': lb['Scheme'],
87
+ 'created_time': lb['CreatedTime'].isoformat(),
88
+ 'availability_zones': [az['ZoneName'] for az in lb['AvailabilityZones']]
89
+ })
90
+
91
+ # DynamoDB Tables (only if region supports it)
92
+ try:
93
+ ddb_client = session.client('dynamodb', region_name=region)
94
+ tables_response = ddb_client.list_tables()
95
+
96
+ for table_name in tables_response['TableNames'][:20]: # Limit to 20 tables
97
+ table_desc = ddb_client.describe_table(TableName=table_name)
98
+ table_info = table_desc['Table']
99
+
100
+ # Get backup settings
101
+ try:
102
+ backup_desc = ddb_client.describe_continuous_backups(TableName=table_name)
103
+ pitr_status = backup_desc['ContinuousBackupsDescription']['PointInTimeRecoveryDescription']['PointInTimeRecoveryStatus']
104
+ except:
105
+ pitr_status = 'UNKNOWN'
106
+
107
+ size_gb = table_info.get('TableSizeBytes', 0) / (1024**3)
108
+
109
+ inventory['dynamodb_tables'].append({
110
+ 'table_name': table_name,
111
+ 'size_gb': round(size_gb, 2),
112
+ 'item_count': table_info.get('ItemCount', 0),
113
+ 'pitr_status': pitr_status,
114
+ 'created_time': table_info['CreationDateTime'].isoformat()
115
+ })
116
+ except Exception as e:
117
+ # DynamoDB might not be available in all regions
118
+ pass
119
+
120
+ except Exception as e:
121
+ inventory['error'] = str(e)
122
+
123
+ return inventory
124
+
125
+
126
+ def analyze_cloudtrail(account_id, profile, start_date, end_date, region='us-west-2'):
127
+ """
128
+ Analyze CloudTrail events for an account
129
+
130
+ Args:
131
+ account_id: AWS account ID
132
+ profile: AWS profile name (SSO)
133
+ start_date: Start datetime
134
+ end_date: End datetime
135
+ region: AWS region
136
+
137
+ Returns:
138
+ dict with CloudTrail event summary
139
+ """
140
+ session = boto3.Session(profile_name=profile)
141
+ ct_client = session.client('cloudtrail', region_name=region)
142
+
143
+ analysis = {
144
+ 'account_id': account_id,
145
+ 'profile': profile,
146
+ 'region': region,
147
+ 'start_date': start_date.isoformat(),
148
+ 'end_date': end_date.isoformat(),
149
+ 'event_summary': {},
150
+ 'write_events': [],
151
+ 'error': None
152
+ }
153
+
154
+ # Events that indicate resource creation/modification
155
+ write_event_names = [
156
+ 'RunInstances', 'CreateVolume', 'AttachVolume',
157
+ 'CreateFileSystem', 'ModifyFileSystem',
158
+ 'CreateLoadBalancer', 'ModifyLoadBalancerAttributes',
159
+ 'CreateTable', 'UpdateTable', 'UpdateContinuousBackups',
160
+ 'CreateBackupVault', 'StartBackupJob'
161
+ ]
162
+
163
+ try:
164
+ event_counts = defaultdict(int)
165
+
166
+ # Query CloudTrail
167
+ paginator = ct_client.get_paginator('lookup_events')
168
+
169
+ for page in paginator.paginate(
170
+ StartTime=start_date,
171
+ EndTime=end_date,
172
+ MaxResults=50,
173
+ PaginationConfig={'MaxItems': 200}
174
+ ):
175
+ for event in page.get('Events', []):
176
+ event_name = event.get('EventName', '')
177
+ event_counts[event_name] += 1
178
+
179
+ # Capture write events
180
+ if event_name in write_event_names:
181
+ event_detail = json.loads(event['CloudTrailEvent'])
182
+
183
+ analysis['write_events'].append({
184
+ 'time': event.get('EventTime').isoformat(),
185
+ 'event_name': event_name,
186
+ 'username': event.get('Username', 'N/A'),
187
+ 'resources': [
188
+ {
189
+ 'type': r.get('ResourceType', 'N/A'),
190
+ 'name': r.get('ResourceName', 'N/A')
191
+ }
192
+ for r in event.get('Resources', [])[:3]
193
+ ]
194
+ })
195
+
196
+ # Convert to regular dict and sort
197
+ analysis['event_summary'] = dict(sorted(
198
+ event_counts.items(),
199
+ key=lambda x: x[1],
200
+ reverse=True
201
+ ))
202
+
203
+ except Exception as e:
204
+ analysis['error'] = str(e)
205
+
206
+ return analysis
207
+
208
+
209
+ def format_investigation_report(cost_data, inventories, cloudtrail_data=None):
210
+ """
211
+ Format investigation data into markdown report
212
+
213
+ Args:
214
+ cost_data: Cost analysis results from trends/drill
215
+ inventories: List of resource inventories
216
+ cloudtrail_data: List of CloudTrail analyses (optional)
217
+
218
+ Returns:
219
+ str: Markdown formatted report
220
+ """
221
+ report = []
222
+ report.append("# Cost Investigation Report")
223
+ report.append(f"**Generated:** {datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S UTC')}")
224
+ report.append("")
225
+
226
+ # Cost Analysis Section
227
+ if cost_data:
228
+ report.append("## Cost Analysis")
229
+ report.append("")
230
+ # Add cost data formatting here
231
+ # This will be populated from trends/drill results
232
+
233
+ # Resource Inventory Section
234
+ if inventories:
235
+ report.append("## Resource Inventory")
236
+ report.append("")
237
+
238
+ for inv in inventories:
239
+ profile_name = inv.get('profile', inv['account_id'])
240
+ report.append(f"### Account {inv['account_id']} ({profile_name})")
241
+ report.append(f"**Region:** {inv['region']}")
242
+ report.append("")
243
+
244
+ # EC2 Instances
245
+ if inv['ec2_instances']:
246
+ report.append(f"**EC2 Instances:** {len(inv['ec2_instances'])} running")
247
+ for instance in inv['ec2_instances'][:10]: # Show first 10
248
+ report.append(f"- `{instance['instance_id']}`: {instance['instance_type']} ({instance['name']})")
249
+ report.append(f" - Launched: {instance['launch_time'][:10]}, AZ: {instance['availability_zone']}")
250
+ if len(inv['ec2_instances']) > 10:
251
+ report.append(f" ... and {len(inv['ec2_instances']) - 10} more")
252
+ report.append("")
253
+
254
+ # EFS File Systems
255
+ if inv['efs_file_systems']:
256
+ total_size = inv.get('total_efs_size_gb', 0)
257
+ report.append(f"**EFS File Systems:** {len(inv['efs_file_systems'])} total, {total_size:,.0f} GB")
258
+ for fs in inv['efs_file_systems']:
259
+ report.append(f"- `{fs['file_system_id']}` ({fs['name']}): {fs['size_gb']:,.2f} GB")
260
+ report.append(f" - Created: {fs['creation_time'][:10]}")
261
+ report.append("")
262
+
263
+ # Load Balancers
264
+ if inv['load_balancers']:
265
+ report.append(f"**Load Balancers:** {len(inv['load_balancers'])}")
266
+ for lb in inv['load_balancers'][:10]: # Show first 10
267
+ report.append(f"- `{lb['name']}`: {lb['type']}")
268
+ report.append(f" - Created: {lb['created_time'][:10]}, Scheme: {lb['scheme']}")
269
+ if len(inv['load_balancers']) > 10:
270
+ report.append(f" ... and {len(inv['load_balancers']) - 10} more")
271
+ report.append("")
272
+
273
+ # DynamoDB Tables
274
+ if inv['dynamodb_tables']:
275
+ report.append(f"**DynamoDB Tables:** {len(inv['dynamodb_tables'])}")
276
+ for table in inv['dynamodb_tables'][:10]:
277
+ report.append(f"- `{table['table_name']}`: {table['size_gb']:.2f} GB, {table['item_count']:,} items")
278
+ report.append(f" - PITR: {table['pitr_status']}, Created: {table['created_time'][:10]}")
279
+ if len(inv['dynamodb_tables']) > 10:
280
+ report.append(f" ... and {len(inv['dynamodb_tables']) - 10} more")
281
+ report.append("")
282
+
283
+ report.append("---")
284
+ report.append("")
285
+
286
+ # CloudTrail Section
287
+ if cloudtrail_data:
288
+ report.append("## CloudTrail Events")
289
+ report.append("")
290
+
291
+ for ct in cloudtrail_data:
292
+ profile_name = ct.get('profile', ct['account_id'])
293
+ report.append(f"### Account {ct['account_id']} ({profile_name})")
294
+ report.append(f"**Period:** {ct['start_date'][:10]} to {ct['end_date'][:10]}")
295
+ report.append("")
296
+
297
+ if ct.get('error'):
298
+ report.append(f"⚠️ Error: {ct['error']}")
299
+ report.append("")
300
+ continue
301
+
302
+ # Write events (resource changes)
303
+ if ct['write_events']:
304
+ report.append(f"**Resource Changes:** {len(ct['write_events'])} events")
305
+ for evt in ct['write_events'][:10]:
306
+ report.append(f"- `{evt['time'][:19]}` - **{evt['event_name']}**")
307
+ report.append(f" - User: {evt['username']}")
308
+ if evt['resources']:
309
+ for res in evt['resources']:
310
+ report.append(f" - Resource: {res['type']} - {res['name']}")
311
+ report.append("")
312
+
313
+ # Event summary
314
+ if ct['event_summary']:
315
+ report.append("**Top Events:**")
316
+ for event_name, count in list(ct['event_summary'].items())[:15]:
317
+ report.append(f"- {event_name}: {count}")
318
+ report.append("")
319
+
320
+ report.append("---")
321
+ report.append("")
322
+
323
+ return "\n".join(report)
@@ -1,25 +0,0 @@
1
- aws_cost_calculator_cli-1.6.3.dist-info/licenses/LICENSE,sha256=cYtmQZHNGGTXOtg3T7LHDRneleaH0dHXHfxFV3WR50Y,1079
2
- backend/__init__.py,sha256=PnH3-V1bIUu7nKDGdfPykzx0sz3x4lsLP0OheoAqY4U,18
3
- backend/algorithms/__init__.py,sha256=QWrMPtDO_nVOFzKm8yI6_RXdSE0n25RQAFnpS1GsGZs,21
4
- backend/algorithms/analyze.py,sha256=LvYuY83vIW162km3MvxrL1xsdFdpBqSUOWm7YZ-Tdyc,8922
5
- backend/algorithms/drill.py,sha256=hGi-prLgZDvNMMICQc4fl3LenM7YaZ3To_Ei4LKwrdc,10543
6
- backend/algorithms/monthly.py,sha256=6k9F8S7djhX1wGV3-T1MZP7CvWbbfhSTEaddwCfVu5M,7932
7
- backend/algorithms/trends.py,sha256=k_s4ylBX50sqoiM_fwepi58HW01zz767FMJhQUPDznk,12246
8
- backend/handlers/__init__.py,sha256=YGc9-XVhFcT5NOvnu0Vg5qaGy0Md0J_PNj8dJIM5PhE,19
9
- backend/handlers/analyze.py,sha256=ULeYYMpD5VS4qBd-WvPP_OjgbSLm9VzT6BZYHrt47eE,3546
10
- backend/handlers/drill.py,sha256=WQbqM5RvOcJHsUpBOR6PS-BtKHqszeCZ7xZu3499jPo,3847
11
- backend/handlers/monthly.py,sha256=A4B-BLrWHsR9OnhsTEvbYHIATbM5fBRIf_h-liczfE0,3415
12
- backend/handlers/profiles.py,sha256=tSnHxvGvwH4ynR0R-WrsPgz_VMgxaWHu-SnuhmGPxcs,5107
13
- backend/handlers/trends.py,sha256=loNvfoc1B-nAb-dTJVLf4TnRZ-UZd_AilyA2l4YahK8,3404
14
- cost_calculator/__init__.py,sha256=PJeIqvWh5AYJVrJxPPkI4pJnAt37rIjasrNS0I87kaM,52
15
- cost_calculator/api_client.py,sha256=LUzQmveDF0X9MqAyThp9mbSzJzkOO73Pk4F7IEJjASU,2353
16
- cost_calculator/cli.py,sha256=zGxFsErjmZAy9v7fqGQDS8qZf4lXoir8Fel0Ka7lw3Y,42235
17
- cost_calculator/drill.py,sha256=hGi-prLgZDvNMMICQc4fl3LenM7YaZ3To_Ei4LKwrdc,10543
18
- cost_calculator/executor.py,sha256=tVyyBtXIj9OPyG-xQj8CUmyFjDhb9IVK639360dUZDc,8076
19
- cost_calculator/monthly.py,sha256=6k9F8S7djhX1wGV3-T1MZP7CvWbbfhSTEaddwCfVu5M,7932
20
- cost_calculator/trends.py,sha256=k_s4ylBX50sqoiM_fwepi58HW01zz767FMJhQUPDznk,12246
21
- aws_cost_calculator_cli-1.6.3.dist-info/METADATA,sha256=VNV5xwWJraHcgf44FpX1TZEGpfkSkXB98BLnkGIo8n0,11506
22
- aws_cost_calculator_cli-1.6.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
23
- aws_cost_calculator_cli-1.6.3.dist-info/entry_points.txt,sha256=_5Qy4EcHbYVYrdgOu1E48faMHb9fLUl5VJ3djDHuJBo,47
24
- aws_cost_calculator_cli-1.6.3.dist-info/top_level.txt,sha256=YV8sPp9unLPDmK3ixw8-yoyVEKU3O4kskxvZAxFgIK0,24
25
- aws_cost_calculator_cli-1.6.3.dist-info/RECORD,,
backend/__init__.py DELETED
@@ -1 +0,0 @@
1
- # Backend package
@@ -1 +0,0 @@
1
- # Algorithms package
@@ -1,272 +0,0 @@
1
- """
2
- Analysis algorithm using pandas for aggregations.
3
- Reuses existing algorithms and adds pandas-based analytics.
4
- """
5
- import pandas as pd
6
- import numpy as np
7
- from datetime import datetime, timedelta
8
- from algorithms.trends import analyze_trends
9
- from algorithms.drill import analyze_drill_down
10
-
11
-
12
- def analyze_aggregated(ce_client, accounts, weeks=12, analysis_type='summary'):
13
- """
14
- Perform pandas-based analysis on cost data.
15
-
16
- Args:
17
- ce_client: boto3 Cost Explorer client
18
- accounts: List of account IDs
19
- weeks: Number of weeks to analyze
20
- analysis_type: 'summary', 'volatility', 'trends', 'multi_group'
21
-
22
- Returns:
23
- dict with analysis results
24
- """
25
- # Get raw data from trends
26
- trends_data = analyze_trends(ce_client, accounts, weeks)
27
-
28
- # Convert to pandas DataFrame
29
- rows = []
30
- for comp in trends_data['wow_comparisons']:
31
- week_label = comp['curr_week']['label']
32
- for item in comp['increases'] + comp['decreases']:
33
- rows.append({
34
- 'week': week_label,
35
- 'service': item['service'],
36
- 'prev_cost': item['prev_cost'],
37
- 'curr_cost': item['curr_cost'],
38
- 'change': item['change'],
39
- 'pct_change': item['pct_change']
40
- })
41
-
42
- df = pd.DataFrame(rows)
43
-
44
- if df.empty:
45
- return {'error': 'No data available'}
46
-
47
- # Perform requested analysis
48
- if analysis_type == 'summary':
49
- return _analyze_summary(df, weeks)
50
- elif analysis_type == 'volatility':
51
- return _analyze_volatility(df)
52
- elif analysis_type == 'trends':
53
- return _detect_trends(df)
54
- elif analysis_type == 'multi_group':
55
- return _multi_group_analysis(ce_client, accounts, weeks)
56
- else:
57
- return {'error': f'Unknown analysis type: {analysis_type}'}
58
-
59
-
60
- def _analyze_summary(df, weeks):
61
- """Aggregate summary statistics across all weeks."""
62
- # Group by service and aggregate
63
- summary = df.groupby('service').agg({
64
- 'change': ['sum', 'mean', 'std', 'min', 'max', 'count'],
65
- 'curr_cost': ['sum', 'mean']
66
- }).round(2)
67
-
68
- # Flatten column names
69
- summary.columns = ['_'.join(col).strip() for col in summary.columns.values]
70
- summary = summary.reset_index()
71
-
72
- # Calculate coefficient of variation
73
- summary['volatility'] = (summary['change_std'] / summary['change_mean'].abs()).fillna(0).round(3)
74
-
75
- # Sort by total change
76
- summary = summary.sort_values('change_sum', ascending=False)
77
-
78
- # Convert to dict
79
- results = summary.to_dict('records')
80
-
81
- # Add percentiles
82
- percentiles = df.groupby('service')['change'].sum().quantile([0.5, 0.9, 0.99]).to_dict()
83
-
84
- return {
85
- 'analysis_type': 'summary',
86
- 'weeks_analyzed': weeks,
87
- 'total_services': len(results),
88
- 'services': results[:50], # Top 50
89
- 'percentiles': {
90
- 'p50': round(percentiles.get(0.5, 0), 2),
91
- 'p90': round(percentiles.get(0.9, 0), 2),
92
- 'p99': round(percentiles.get(0.99, 0), 2)
93
- }
94
- }
95
-
96
-
97
- def _analyze_volatility(df):
98
- """Identify services with high cost volatility."""
99
- # Calculate volatility metrics
100
- volatility = df.groupby('service').agg({
101
- 'change': ['mean', 'std', 'count']
102
- })
103
-
104
- volatility.columns = ['mean_change', 'std_change', 'weeks']
105
- volatility['coefficient_of_variation'] = (volatility['std_change'] / volatility['mean_change'].abs()).fillna(0)
106
-
107
- # Only services that appear in at least 3 weeks
108
- volatility = volatility[volatility['weeks'] >= 3]
109
-
110
- # Sort by CV
111
- volatility = volatility.sort_values('coefficient_of_variation', ascending=False)
112
- volatility = volatility.reset_index()
113
-
114
- # Identify outliers (z-score > 2)
115
- df['z_score'] = df.groupby('service')['change'].transform(
116
- lambda x: (x - x.mean()) / x.std() if x.std() > 0 else 0
117
- )
118
- outliers = df[df['z_score'].abs() > 2][['week', 'service', 'change', 'z_score']].to_dict('records')
119
-
120
- return {
121
- 'analysis_type': 'volatility',
122
- 'high_volatility_services': volatility.head(20).to_dict('records'),
123
- 'outliers': outliers[:20]
124
- }
125
-
126
-
127
- def _detect_trends(df):
128
- """Detect services with consistent increasing/decreasing trends."""
129
- # Calculate trend for each service
130
- trends = []
131
-
132
- for service in df['service'].unique():
133
- service_df = df[df['service'] == service].sort_values('week')
134
-
135
- if len(service_df) < 3:
136
- continue
137
-
138
- # Calculate linear regression slope
139
- x = np.arange(len(service_df))
140
- y = service_df['change'].values
141
-
142
- if len(x) > 1:
143
- slope = np.polyfit(x, y, 1)[0]
144
- avg_change = service_df['change'].mean()
145
-
146
- # Classify trend
147
- if slope > avg_change * 0.1: # Increasing by >10% on average
148
- trend_type = 'increasing'
149
- elif slope < -avg_change * 0.1: # Decreasing by >10%
150
- trend_type = 'decreasing'
151
- else:
152
- trend_type = 'stable'
153
-
154
- trends.append({
155
- 'service': service,
156
- 'trend': trend_type,
157
- 'slope': round(slope, 2),
158
- 'avg_change': round(avg_change, 2),
159
- 'weeks_analyzed': len(service_df)
160
- })
161
-
162
- # Separate by trend type
163
- increasing = [t for t in trends if t['trend'] == 'increasing']
164
- decreasing = [t for t in trends if t['trend'] == 'decreasing']
165
- stable = [t for t in trends if t['trend'] == 'stable']
166
-
167
- # Sort by slope magnitude
168
- increasing.sort(key=lambda x: x['slope'], reverse=True)
169
- decreasing.sort(key=lambda x: x['slope'])
170
-
171
- return {
172
- 'analysis_type': 'trend_detection',
173
- 'increasing_trends': increasing[:20],
174
- 'decreasing_trends': decreasing[:20],
175
- 'stable_services': len(stable)
176
- }
177
-
178
-
179
- def _multi_group_analysis(ce_client, accounts, weeks):
180
- """Multi-dimensional grouping (service + account)."""
181
- # Get drill-down data for all services
182
- drill_data = analyze_drill_down(ce_client, accounts, weeks)
183
-
184
- # Convert to DataFrame
185
- rows = []
186
- for comp in drill_data['comparisons']:
187
- week = comp['curr_week']['label']
188
- for item in comp['increases'] + comp['decreases']:
189
- rows.append({
190
- 'week': week,
191
- 'dimension': item['dimension'], # This is account when drilling by service
192
- 'change': item['change'],
193
- 'curr_cost': item['curr_cost']
194
- })
195
-
196
- df = pd.DataFrame(rows)
197
-
198
- if df.empty:
199
- return {'error': 'No drill-down data available'}
200
-
201
- # Group by dimension (account) and aggregate
202
- grouped = df.groupby('dimension').agg({
203
- 'change': ['sum', 'mean', 'count'],
204
- 'curr_cost': 'sum'
205
- }).round(2)
206
-
207
- grouped.columns = ['total_change', 'avg_change', 'weeks_appeared', 'total_cost']
208
- grouped = grouped.sort_values('total_change', ascending=False).reset_index()
209
-
210
- return {
211
- 'analysis_type': 'multi_group',
212
- 'group_by': drill_data.get('group_by', 'account'),
213
- 'groups': grouped.head(50).to_dict('records')
214
- }
215
-
216
-
217
- def search_services(ce_client, accounts, weeks, pattern=None, min_cost=None):
218
- """
219
- Search and filter services.
220
-
221
- Args:
222
- ce_client: boto3 Cost Explorer client
223
- accounts: List of account IDs
224
- weeks: Number of weeks
225
- pattern: Service name pattern (e.g., "EC2*", "*Compute*")
226
- min_cost: Minimum total cost threshold
227
-
228
- Returns:
229
- dict with matching services
230
- """
231
- # Get trends data
232
- trends_data = analyze_trends(ce_client, accounts, weeks)
233
-
234
- # Convert to DataFrame
235
- rows = []
236
- for comp in trends_data['wow_comparisons']:
237
- for item in comp['increases'] + comp['decreases']:
238
- rows.append({
239
- 'service': item['service'],
240
- 'change': item['change'],
241
- 'curr_cost': item['curr_cost']
242
- })
243
-
244
- df = pd.DataFrame(rows)
245
-
246
- if df.empty:
247
- return {'matches': []}
248
-
249
- # Aggregate by service
250
- summary = df.groupby('service').agg({
251
- 'change': 'sum',
252
- 'curr_cost': 'sum'
253
- }).reset_index()
254
-
255
- # Apply filters
256
- if pattern:
257
- # Convert glob pattern to regex
258
- import re
259
- regex_pattern = pattern.replace('*', '.*').replace('?', '.')
260
- summary = summary[summary['service'].str.contains(regex_pattern, case=False, regex=True)]
261
-
262
- if min_cost:
263
- summary = summary[summary['curr_cost'] >= min_cost]
264
-
265
- # Sort by total cost
266
- summary = summary.sort_values('curr_cost', ascending=False)
267
-
268
- return {
269
- 'pattern': pattern,
270
- 'min_cost': min_cost,
271
- 'matches': summary.to_dict('records')
272
- }