argus-cloud-optimizer 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. adapters/__init__.py +0 -0
  2. adapters/aws/__init__.py +0 -0
  3. adapters/aws/adapter.py +85 -0
  4. adapters/aws/auth.py +57 -0
  5. adapters/aws/cloudtrail.py +83 -0
  6. adapters/aws/cloudwatch.py +732 -0
  7. adapters/aws/config.py +9 -0
  8. adapters/aws/cost_explorer.py +116 -0
  9. adapters/aws/resource_explorer.py +186 -0
  10. adapters/aws/retry.py +55 -0
  11. adapters/azure/__init__.py +0 -0
  12. adapters/azure/activity_log.py +159 -0
  13. adapters/azure/adapter.py +117 -0
  14. adapters/azure/cost_management.py +125 -0
  15. adapters/azure/monitor.py +311 -0
  16. adapters/azure/resource_graph.py +113 -0
  17. adapters/azure/retry.py +57 -0
  18. adapters/base.py +105 -0
  19. adapters/gcp/__init__.py +0 -0
  20. adapters/gcp/adapter.py +86 -0
  21. adapters/gcp/asset_inventory.py +116 -0
  22. adapters/gcp/billing.py +118 -0
  23. adapters/gcp/cloud_logging.py +93 -0
  24. adapters/gcp/cloud_monitoring.py +276 -0
  25. adapters/gcp/retry.py +46 -0
  26. ai/__init__.py +0 -0
  27. ai/anthropic.py +174 -0
  28. ai/azure_openai.py +241 -0
  29. ai/base.py +78 -0
  30. ai/bedrock.py +169 -0
  31. ai/vertexai.py +234 -0
  32. argus_cloud_optimizer-0.2.0.dist-info/METADATA +433 -0
  33. argus_cloud_optimizer-0.2.0.dist-info/RECORD +62 -0
  34. argus_cloud_optimizer-0.2.0.dist-info/WHEEL +5 -0
  35. argus_cloud_optimizer-0.2.0.dist-info/entry_points.txt +2 -0
  36. argus_cloud_optimizer-0.2.0.dist-info/licenses/LICENSE +21 -0
  37. argus_cloud_optimizer-0.2.0.dist-info/top_level.txt +4 -0
  38. core/__init__.py +0 -0
  39. core/__version__.py +1 -0
  40. core/agent/__init__.py +0 -0
  41. core/agent/loop.py +390 -0
  42. core/agent/prompts.py +317 -0
  43. core/config.py +235 -0
  44. core/log.py +69 -0
  45. core/models/__init__.py +0 -0
  46. core/models/finding.py +76 -0
  47. core/py.typed +0 -0
  48. core/reports/__init__.py +0 -0
  49. core/reports/comparison.py +49 -0
  50. core/reports/delivery.py +323 -0
  51. core/reports/export.py +111 -0
  52. core/reports/generator.py +168 -0
  53. core/reports/html.py +286 -0
  54. core/reports/multi_cloud.py +162 -0
  55. core/secrets.py +145 -0
  56. core/token_tracker.py +97 -0
  57. core/validation.py +214 -0
  58. entrypoints/__init__.py +0 -0
  59. entrypoints/aws_lambda.py +299 -0
  60. entrypoints/azure_function.py +257 -0
  61. entrypoints/cli.py +156 -0
  62. entrypoints/gcp_cloudrun.py +209 -0
ai/vertexai.py ADDED
@@ -0,0 +1,234 @@
1
+ """
2
+ AI provider backed by Google Vertex AI (Gemini models).
3
+
4
+ Authentication uses Application Default Credentials (ADC) — run:
5
+ gcloud auth application-default login
6
+
7
+ No API key needed when running on Cloud Run / GCE with the right service account.
8
+
9
+ Environment variables:
10
+ VERTEXAI_PROJECT GCP project ID (required)
11
+ VERTEXAI_LOCATION GCP region (default: us-central1)
12
+ VERTEXAI_MODEL Model name (default: gemini-1.5-pro-002)
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import json
18
+ import time
19
+ from typing import Any
20
+
21
+ import google.auth
22
+ import google.auth.transport.requests
23
+ import openai
24
+ import structlog
25
+
26
+ from ai.base import AIProvider, AIResponse, Message, Tool, ToolCall
27
+
28
+ logger = structlog.get_logger(__name__)
29
+
30
+ MAX_RETRIES = 3
31
+ _BASE_DELAY = 1.0
32
+
33
+
34
+ class VertexAIProvider(AIProvider):
35
+ """
36
+ AI provider backed by Vertex AI Gemini models.
37
+ Uses the google-cloud-aiplatform SDK — included via the openai compat layer
38
+ or directly via vertexai package. Falls back to the OpenAI-compatible
39
+ Vertex AI endpoint so we can reuse the openai SDK already in requirements.txt.
40
+
41
+ Model: gemini-1.5-pro-002 (default) — supports function calling + large context.
42
+ """
43
+
44
+ DEFAULT_MODEL = "gemini-1.5-pro-002"
45
+ DEFAULT_LOCATION = "us-central1"
46
+ DEFAULT_MAX_TOKENS = 4096
47
+ DEFAULT_TEMPERATURE = 0.0
48
+
49
+ def __init__(
50
+ self,
51
+ project: str | None = None,
52
+ location: str | None = None,
53
+ model: str | None = None,
54
+ max_tokens: int = DEFAULT_MAX_TOKENS,
55
+ temperature: float | None = None,
56
+ ) -> None:
57
+ from core.config import get_settings
58
+
59
+ cfg = get_settings().ai
60
+ self._project = project or cfg.vertexai_project
61
+ if not self._project:
62
+ raise EnvironmentError(
63
+ "VERTEXAI_PROJECT is not set. "
64
+ "Set it in .env or pass project= explicitly."
65
+ )
66
+ self._location = location or cfg.vertexai_location
67
+ self._model = model or cfg.resolved_model("vertexai")
68
+ self._max_tokens = max_tokens
69
+ self._temperature = temperature if temperature is not None else cfg.temperature
70
+
71
+ # Use openai SDK with the Vertex AI endpoint.
72
+ # This avoids adding google-cloud-aiplatform as a dependency.
73
+ credentials, _ = google.auth.default(
74
+ scopes=["https://www.googleapis.com/auth/cloud-platform"]
75
+ )
76
+ credentials.refresh(google.auth.transport.requests.Request())
77
+
78
+ self._client = openai.OpenAI(
79
+ base_url=(
80
+ f"https://{self._location}-aiplatform.googleapis.com/v1beta1/"
81
+ f"projects/{self._project}/locations/{self._location}/endpoints/openapi"
82
+ ),
83
+ api_key=credentials.token,
84
+ )
85
+ self._credentials = credentials
86
+
87
+ @classmethod
88
+ def from_env(cls) -> "VertexAIProvider":
89
+ return cls()
90
+
91
+ def chat(
92
+ self,
93
+ messages: list[Message],
94
+ tools: list[Tool],
95
+ system_prompt: str | None = None,
96
+ ) -> AIResponse:
97
+ openai_messages = self._build_messages(messages, system_prompt)
98
+ openai_tools = [self._to_openai_tool(t) for t in tools] if tools else None
99
+
100
+ kwargs: dict[str, Any] = {
101
+ "model": self._model,
102
+ "messages": openai_messages,
103
+ "max_tokens": self._max_tokens,
104
+ "temperature": self._temperature,
105
+ }
106
+ if openai_tools:
107
+ kwargs["tools"] = openai_tools
108
+ kwargs["tool_choice"] = "auto"
109
+
110
+ response = self._call_with_retry(kwargs)
111
+ return self._parse_response(response)
112
+
113
+ # ------------------------------------------------------------------
114
+ # Internal helpers
115
+ # ------------------------------------------------------------------
116
+
117
+ def _call_with_retry(self, kwargs: dict[str, Any]) -> Any:
118
+ delay = _BASE_DELAY
119
+ for attempt in range(MAX_RETRIES):
120
+ try:
121
+ # Refresh credentials if they may have expired (1-hour TTL)
122
+ if not self._credentials.valid:
123
+ self._credentials.refresh(google.auth.transport.requests.Request())
124
+ self._client.api_key = self._credentials.token
125
+
126
+ return self._client.chat.completions.create(**kwargs)
127
+ except openai.RateLimitError:
128
+ if attempt < MAX_RETRIES - 1:
129
+ logger.warning(
130
+ "Vertex AI rate limited (attempt %d/%d), retrying in %.1fs",
131
+ attempt + 1,
132
+ MAX_RETRIES,
133
+ delay,
134
+ )
135
+ time.sleep(delay)
136
+ delay *= 2
137
+ else:
138
+ raise
139
+ raise RuntimeError("Unreachable") # pragma: no cover
140
+
141
+ def _build_messages(
142
+ self,
143
+ messages: list[Message],
144
+ system_prompt: str | None,
145
+ ) -> list[dict[str, Any]]:
146
+ result: list[dict[str, Any]] = []
147
+
148
+ if system_prompt:
149
+ result.append({"role": "system", "content": system_prompt})
150
+
151
+ for msg in messages:
152
+ if msg.role == "user":
153
+ if msg.tool_results:
154
+ # Each tool result is its own message in the OpenAI protocol
155
+ for tr in msg.tool_results:
156
+ result.append(
157
+ {
158
+ "role": "tool",
159
+ "tool_call_id": tr.tool_call_id,
160
+ "content": tr.content,
161
+ }
162
+ )
163
+ else:
164
+ result.append({"role": "user", "content": msg.text or ""})
165
+
166
+ else:
167
+ # assistant — may have text, tool_calls, or both
168
+ content: list[dict[str, Any]] | str = msg.text or ""
169
+ tool_calls_out = []
170
+ for tc in msg.tool_calls:
171
+ tool_calls_out.append(
172
+ {
173
+ "id": tc.id,
174
+ "type": "function",
175
+ "function": {
176
+ "name": tc.name,
177
+ "arguments": json.dumps(tc.arguments),
178
+ },
179
+ }
180
+ )
181
+ assistant_msg: dict[str, Any] = {
182
+ "role": "assistant",
183
+ "content": content,
184
+ }
185
+ if tool_calls_out:
186
+ assistant_msg["tool_calls"] = tool_calls_out
187
+ result.append(assistant_msg)
188
+
189
+ return result
190
+
191
+ def _to_openai_tool(self, tool: Tool) -> dict[str, Any]:
192
+ return {
193
+ "type": "function",
194
+ "function": {
195
+ "name": tool.name,
196
+ "description": tool.description,
197
+ "parameters": tool.input_schema,
198
+ },
199
+ }
200
+
201
+ def _parse_response(self, response: Any) -> AIResponse:
202
+ choice = response.choices[0]
203
+ message = choice.message
204
+ stop_reason = choice.finish_reason # "stop" | "tool_calls" | "length"
205
+
206
+ text: str | None = message.content or None
207
+ tool_calls: list[ToolCall] = []
208
+
209
+ if message.tool_calls:
210
+ for tc in message.tool_calls:
211
+ tool_calls.append(
212
+ ToolCall(
213
+ id=tc.id,
214
+ name=tc.function.name,
215
+ arguments=json.loads(tc.function.arguments),
216
+ )
217
+ )
218
+
219
+ # Normalise finish_reason to our internal vocabulary
220
+ if tool_calls:
221
+ stop_reason = "tool_use"
222
+ elif stop_reason == "stop":
223
+ stop_reason = "end_turn"
224
+ elif stop_reason == "length":
225
+ stop_reason = "max_tokens"
226
+
227
+ usage = getattr(response, "usage", None)
228
+ return AIResponse(
229
+ stop_reason=stop_reason,
230
+ text=text,
231
+ tool_calls=tool_calls,
232
+ input_tokens=getattr(usage, "prompt_tokens", 0) if usage else 0,
233
+ output_tokens=getattr(usage, "completion_tokens", 0) if usage else 0,
234
+ )
@@ -0,0 +1,433 @@
1
+ Metadata-Version: 2.4
2
+ Name: argus-cloud-optimizer
3
+ Version: 0.2.0
4
+ Summary: AI-powered multi-cloud cost optimization agent
5
+ Author-email: Vamshi Siddarth Gaddam <vamshisiddarth02@gmail.com>
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/vamshisiddarth/argus
8
+ Project-URL: Documentation, https://vamshisiddarth.github.io/argus/
9
+ Project-URL: Repository, https://github.com/vamshisiddarth/argus
10
+ Project-URL: Issues, https://github.com/vamshisiddarth/argus/issues
11
+ Keywords: cloud,cost-optimization,aws,gcp,azure,finops
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Intended Audience :: System Administrators
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Programming Language :: Python :: 3.13
18
+ Classifier: Topic :: System :: Systems Administration
19
+ Requires-Python: >=3.11
20
+ Description-Content-Type: text/markdown
21
+ License-File: LICENSE
22
+ Requires-Dist: anthropic>=0.40.0
23
+ Requires-Dist: python-dotenv>=1.0.1
24
+ Requires-Dist: PyYAML>=6.0.2
25
+ Requires-Dist: pydantic>=2.9.2
26
+ Requires-Dist: pydantic-settings>=2.6.0
27
+ Requires-Dist: python-dateutil>=2.9.0
28
+ Requires-Dist: structlog>=24.4.0
29
+ Requires-Dist: boto3>=1.35.36
30
+ Requires-Dist: botocore>=1.35.36
31
+ Requires-Dist: google-cloud-asset>=3.26.0
32
+ Requires-Dist: google-cloud-monitoring>=2.23.0
33
+ Requires-Dist: google-cloud-billing>=1.14.0
34
+ Requires-Dist: google-cloud-logging>=3.11.3
35
+ Requires-Dist: google-cloud-secret-manager>=2.20.0
36
+ Requires-Dist: azure-identity>=1.19.0
37
+ Requires-Dist: azure-mgmt-resourcegraph>=8.0.0
38
+ Requires-Dist: azure-monitor-query<2.0.0,>=1.4.0
39
+ Requires-Dist: azure-mgmt-costmanagement>=4.0.0
40
+ Requires-Dist: azure-keyvault-secrets>=4.8.0
41
+ Requires-Dist: openai>=1.55.0
42
+ Provides-Extra: export
43
+ Requires-Dist: weasyprint>=62.0; extra == "export"
44
+ Requires-Dist: python-pptx>=1.0.0; extra == "export"
45
+ Provides-Extra: dev
46
+ Requires-Dist: pytest>=8.3.3; extra == "dev"
47
+ Requires-Dist: pytest-cov>=5.0.0; extra == "dev"
48
+ Requires-Dist: pytest-mock>=3.14.0; extra == "dev"
49
+ Requires-Dist: pytest-timeout>=2.3.1; extra == "dev"
50
+ Requires-Dist: moto[s3,sts]>=5.0.16; extra == "dev"
51
+ Requires-Dist: freezegun>=1.5.1; extra == "dev"
52
+ Requires-Dist: black>=24.10.0; extra == "dev"
53
+ Requires-Dist: ruff>=0.7.1; extra == "dev"
54
+ Requires-Dist: mypy>=1.13.0; extra == "dev"
55
+ Requires-Dist: types-PyYAML>=6.0.12; extra == "dev"
56
+ Requires-Dist: types-python-dateutil>=2.9.0; extra == "dev"
57
+ Requires-Dist: boto3-stubs[bedrock-runtime,ce,cloudwatch,resourceexplorer2,s3,sts]>=1.35.36; extra == "dev"
58
+ Requires-Dist: mkdocs-material>=9.5.44; extra == "dev"
59
+ Requires-Dist: mkdocs-minify-plugin>=0.8.0; extra == "dev"
60
+ Dynamic: license-file
61
+
62
+ <p align="center">
63
+ <img src="docs/assets/images/logo-full.svg" alt="Argus" height="72">
64
+ </p>
65
+
66
+ <p align="center"><strong>AI-powered cloud cost optimization agent for AWS, GCP, and Azure.</strong></p>
67
+
68
+ Argus finds idle and wasted cloud resources — stopped EC2 instances, unattached EBS volumes, orphaned Elastic IPs, underutilized RDS databases — and delivers a prioritized, AI-reasoned report to Slack every week.
69
+
70
+ [![CI](https://github.com/vamshisiddarth/argus/actions/workflows/ci.yml/badge.svg)](https://github.com/vamshisiddarth/argus/actions/workflows/ci.yml)
71
+ [![Python 3.11+](https://img.shields.io/badge/python-3.11+-blue.svg)](https://www.python.org/downloads/)
72
+ [![PyPI](https://img.shields.io/pypi/v/argus-cloud-optimizer.svg)](https://pypi.org/project/argus-cloud-optimizer/)
73
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE)
74
+ [![Docs](https://img.shields.io/badge/docs-vamshisiddarth.github.io%2Fargus-blue)](https://vamshisiddarth.github.io/argus/)
75
+
76
+ <p align="center">
77
+ <img src="docs/assets/images/slack-demo.png" alt="Argus Slack report" width="600">
78
+ </p>
79
+
80
+ ---
81
+
82
+ ## What it does
83
+
84
+ Every week (or on demand), Argus:
85
+
86
+ 1. **Discovers** every resource in your cloud account using AWS Resource Explorer / GCP Asset Inventory / Azure Resource Graph
87
+ 2. **Analyzes** each candidate — CloudWatch/Cloud Monitoring/Azure Monitor metrics, Cost Explorer/BigQuery/Cost Management cost data, and CloudTrail/Audit Log/Activity Log last-activity timestamps
88
+ 3. **Reasons** about idleness using Claude (via AWS Bedrock, Anthropic API, or Vertex AI) — no hardcoded thresholds
89
+ 4. **Reports** a compact digest (Slack, Microsoft Teams, or generic webhook) with top findings and a link to a full self-contained HTML report
90
+
91
+ Example Slack output:
92
+
93
+ ```
94
+ Argus — AWS Waste Report (2026-06-17)
95
+
96
+ 💸 $42.65/month estimated waste 📊 4 idle resources across 1 account
97
+
98
+ Two stopped EC2 instances and a forgotten NAT Gateway account for 72% of
99
+ total waste. One EBS volume has had no I/O in over 30 days.
100
+
101
+ Top findings
102
+ 🔴 i-0abc123def · EC2 t3.large · $28.40/mo
103
+ 🔴 nat-0def456 · NAT Gateway · $10.80/mo
104
+ 🟡 vol-orphan · EBS gp3 100GiB · $8.00/mo
105
+ 🟢 eipalloc-xyz · Elastic IP · $3.65/mo
106
+
107
+ [ 📄 Full report (HTML) ] [ vamshisiddarth/argus ]
108
+ ```
109
+
110
+ The **Full report** button links to a self-contained HTML file (S3 / GCS / Azure Blob) with a filterable/sortable table and expandable AI reasoning per finding. Works offline, no login required.
111
+
112
+ > **See a realistic example:** [`examples/sample-report-aws.json`](examples/sample-report-aws.json) — 5 findings from a real-looking AWS scan with AI-written reasoning, metrics, and cost data.
113
+
114
+ ---
115
+
116
+ ## Architecture
117
+
118
+ ```
119
+ ┌─────────────────────────────────────────────────────────┐
120
+ │ Agent Loop (ReAct) │
121
+ │ Think → Call Tool → Observe → Think → Submit │
122
+ └────────────────────┬────────────────────────────────────┘
123
+
124
+ ┌────────────┴────────────┐
125
+ ▼ ▼
126
+ CloudAdapter AIProvider
127
+ (AWS / GCP / Azure) (Bedrock / Anthropic / Vertex)
128
+
129
+ ┌────┴──────────────────┐
130
+ │ list_resources │ Resource Explorer / Asset Inventory / Resource Graph
131
+ │ get_metrics │ CloudWatch / Cloud Monitoring / Azure Monitor
132
+ │ get_cost │ Cost Explorer / BigQuery / Cost Management
133
+ │ get_last_activity │ CloudTrail / Audit Logs / Activity Log
134
+ └───────────────────────┘
135
+ ```
136
+
137
+ **Design principle: Same brain. Different hands. Different home.**
138
+ - **Brain** = agent loop + AI reasoning (`core/`) — pure Python, zero cloud imports
139
+ - **Hands** = cloud adapters (`adapters/`) — swappable per cloud
140
+ - **Home** = entrypoints (`entrypoints/`) — Lambda / Cloud Run / Azure Function
141
+
142
+ ---
143
+
144
+ ## Quick start
145
+
146
+ ### Option A — Docker (fastest)
147
+
148
+ ```bash
149
+ docker build --build-arg CLOUD=aws -t argus .
150
+
151
+ docker run --rm \
152
+ -e ANTHROPIC_API_KEY=sk-ant-... \
153
+ -e DRY_RUN=true \
154
+ -v ~/.aws:/root/.aws:ro \
155
+ argus --cloud aws --run-now --dry-run
156
+ ```
157
+
158
+ ### Option B — Install from PyPI
159
+
160
+ **Prerequisites**
161
+ - Python 3.11+
162
+ - Cloud credentials configured (see below)
163
+ - An Anthropic API key **or** cloud-native AI access (Bedrock / Vertex AI / Azure OpenAI)
164
+
165
+ ```bash
166
+ pip install argus-cloud-optimizer
167
+ ```
168
+
169
+ One package — all three clouds included. No extras needed.
170
+
171
+ > **AWS-specific setup:** Enable [Resource Explorer](https://docs.aws.amazon.com/resource-explorer/latest/userguide/) with an **aggregator index** in `us-east-1` (or set `RESOURCE_EXPLORER_REGION` to your aggregator region). Without this, Argus cannot discover resources.
172
+
173
+ Set minimum env vars:
174
+
175
+ ```bash
176
+ export AI_PROVIDER=anthropic
177
+ export ANTHROPIC_API_KEY=sk-ant-...
178
+ export DRY_RUN=true # remove to post to Slack
179
+ ```
180
+
181
+ ```bash
182
+ argus --cloud aws --run-now --dry-run
183
+ ```
184
+
185
+ ### Option C — Clone and develop
186
+
187
+ ```bash
188
+ git clone https://github.com/vamshisiddarth/argus.git
189
+ cd argus
190
+ pip install -e ".[all,dev]"
191
+ cp .env.example .env # edit with your values
192
+ argus --cloud aws --run-now
193
+ ```
194
+
195
+ ### CLI Options
196
+
197
+ ```
198
+ argus --cloud aws|gcp|azure --run-now [options]
199
+
200
+ -V, --version Show version and exit
201
+ --dry-run Print notification payload instead of posting
202
+ --ignore-regions REGIONS Comma-separated regions to skip (e.g. ap-east-1,me-south-1)
203
+ --ai-provider PROVIDER anthropic | bedrock | vertexai | azure_openai (default: anthropic)
204
+ --accounts PATH Path to accounts.yaml for multi-account mode (AWS only)
205
+ --max-resources N Maximum resources to analyze per scan (default: 200)
206
+ --lookback-days DAYS Metrics lookback window in days (default: 90, use 14 for faster local dev)
207
+ ```
208
+
209
+ ---
210
+
211
+ ## Deploy to AWS Lambda
212
+
213
+ Uses [AWS SAM](https://docs.aws.amazon.com/serverless-application-model/latest/developerguide/install-sam-cli.html) — handles packaging and upload automatically. No S3 bucket needed.
214
+
215
+ ### Single account
216
+
217
+ ```bash
218
+ make deploy-aws
219
+ # or manually:
220
+ cd deploy/aws/single-account
221
+ sam build && sam deploy --guided
222
+ ```
223
+
224
+ `sam deploy --guided` walks you through parameters (Slack webhook, region, AI provider) and saves them to `samconfig.toml` for future deploys. Subsequent deploys are just `sam deploy`.
225
+
226
+ The stack creates:
227
+ - Lambda function (runs weekly via EventBridge)
228
+ - IAM role with least-privilege read-only permissions
229
+ - S3 bucket for full JSON report storage (90-day retention)
230
+
231
+ ### Multi-account
232
+
233
+ **Hub account** (runs Argus):
234
+
235
+ ```bash
236
+ make deploy-aws-multi
237
+ # or manually:
238
+ cd deploy/aws/multi-account/hub
239
+ sam build && sam deploy --guided
240
+ ```
241
+
242
+ **Each spoke account** (read-only IAM role only — no Lambda):
243
+
244
+ ```bash
245
+ aws cloudformation deploy \
246
+ --template-file deploy/aws/multi-account/spoke-role.yaml \
247
+ --stack-name Argus-Spoke \
248
+ --capabilities CAPABILITY_IAM \
249
+ --parameter-overrides HubAccountId=<hub-account-id>
250
+ ```
251
+
252
+ The hub stack output includes the `HubRoleArn` — use it as the `HubRoleArn` parameter for spoke deployments.
253
+
254
+ ---
255
+
256
+ ## Deploy to GCP (Cloud Run)
257
+
258
+ ```bash
259
+ # Authenticate
260
+ gcloud auth application-default login
261
+
262
+ # Set your project
263
+ gcloud config set project YOUR_PROJECT_ID
264
+
265
+ # Deploy
266
+ bash deploy/gcp/deploy.sh
267
+ ```
268
+
269
+ Requires: Cloud Run API, Cloud Scheduler API, BigQuery billing export enabled.
270
+
271
+ ---
272
+
273
+ ## Deploy to Azure (Function App)
274
+
275
+ ```bash
276
+ # Authenticate
277
+ az login
278
+
279
+ # Deploy
280
+ az deployment group create \
281
+ --resource-group Argus-RG \
282
+ --template-file deploy/azure/function-app.bicep \
283
+ --parameters subscriptionIds="sub-id-1,sub-id-2" \
284
+ slackWebhookUrl="https://hooks.slack.com/services/..."
285
+ ```
286
+
287
+ ---
288
+
289
+ ## AI providers
290
+
291
+ | Provider | Use case | Setup |
292
+ |----------|----------|-------|
293
+ | Anthropic API | Local dev, any cloud | Set `ANTHROPIC_API_KEY` |
294
+ | AWS Bedrock | AWS production | IAM role — no key needed |
295
+ | Vertex AI (Gemini) | GCP production | ADC — no key needed |
296
+ | Azure OpenAI (GPT-4o) | Azure production | Managed identity — no key needed |
297
+
298
+ Set `AI_PROVIDER=anthropic|bedrock|vertexai|azure_openai` in `.env` or the deployment environment. Use `AI_MODEL` to override the model for any provider, and `AI_TEMPERATURE` to control creativity (default: `0.0`).
299
+
300
+ ---
301
+
302
+ ## Multi-account setup
303
+
304
+ Create `accounts.yaml`:
305
+
306
+ ```yaml
307
+ mode: multi
308
+
309
+ accounts:
310
+ - id: "111122223333"
311
+ name: dev
312
+ role_arn: arn:aws:iam::111122223333:role/ArgusSpokeRole
313
+ - id: "444455556666"
314
+ name: prod
315
+ role_arn: arn:aws:iam::444455556666:role/ArgusSpokeRole
316
+ ```
317
+
318
+ Then run:
319
+
320
+ ```bash
321
+ argus --cloud aws --run-now --accounts accounts.yaml
322
+ ```
323
+
324
+ ---
325
+
326
+ ## IAM permissions (AWS)
327
+
328
+ Argus needs **read-only** access. The Lambda execution role requires:
329
+
330
+ ```
331
+ resource-explorer-2:Search
332
+ resource-explorer-2:GetView
333
+ cloudwatch:GetMetricData
334
+ ce:GetCostAndUsage
335
+ ce:GetCostAndUsageWithResources
336
+ cloudtrail:LookupEvents
337
+ bedrock:InvokeModel # only if AI_PROVIDER=bedrock
338
+ sts:AssumeRole # only for multi-account mode
339
+ s3:PutObject # only if REPORT_S3_BUCKET is set
340
+ ```
341
+
342
+ No write permissions are ever requested.
343
+
344
+ > **Cost Explorer note:** `GetCostAndUsageWithResources` requires resource-level cost allocation
345
+ > to be enabled in AWS Cost Management → Preferences → Resource-level data.
346
+ > If not enabled, Argus logs a warning and continues — cost fields will show $0.00.
347
+
348
+ ---
349
+
350
+ ## Limitations & known issues
351
+
352
+ Before you invest time deploying Argus, know what it **can't** do yet:
353
+
354
+ | Area | Status | Details |
355
+ |------|--------|---------|
356
+ | **Resource discovery** | AWS: strong, GCP/Azure: adequate | AWS covers 43 resource types via Resource Explorer. GCP covers 22 asset types; Azure covers 25 via Resource Graph. Some niche resource types (e.g. AWS Glue, SageMaker endpoints) are not yet mapped. |
357
+ | **Cost accuracy** | Best-effort | AWS Cost Explorer charges $0.01/API call — Argus batches aggressively (max 2 calls/scan). GCP requires BigQuery billing export enabled. Azure cost data depends on subscription-level access. Resource-level cost allocation must be enabled in AWS for per-resource costs; without it, costs show $0.00. |
358
+ | **AI non-determinism** | By design | The AI decides what's idle — different runs may produce slightly different findings or reasoning. Set `AI_TEMPERATURE=0.0` (default) for most consistent results. |
359
+ | **LLM cost** | Configurable | A full scan of ~200 resources costs ~$0.05–$0.50 in LLM API fees depending on provider. Use `--llm-budget` to set a hard cap (default: $2.00/scan). Large estates (1000+ resources) will hit the budget limit — increase it or use `--max-resources`. |
360
+ | **AWS Resource Explorer setup** | Manual step | You must enable Resource Explorer with an **aggregator index** (typically in `us-east-1`). Without this, Argus cannot discover resources. This is a one-time setup but is easy to miss. |
361
+ | **Write actions** | None | Argus is read-only. It reports findings but never deletes, stops, or modifies resources. Remediation is manual. |
362
+ | **Multi-cloud in one scan** | Not yet | Each `argus` invocation scans one cloud. Use the merge report feature (`core/reports/multi_cloud.py`) to combine results after separate runs. |
363
+ | **Notifications** | Slack + Teams + webhook | No email. Slack/Teams delivery requires a webhook URL. |
364
+
365
+ ### Multi-cloud parity
366
+
367
+ | Capability | AWS | GCP | Azure |
368
+ |-----------|-----|-----|-------|
369
+ | Resource discovery | 43 types (Resource Explorer) | 22 types (Asset Inventory) | 25 types (Resource Graph) |
370
+ | Metrics | CloudWatch (43 types + fallback) | Cloud Monitoring (15 types + fallback) | Azure Monitor (25 types + fallback) |
371
+ | Cost data | Cost Explorer (batched) | BigQuery billing export | Cost Management API |
372
+ | Last activity | CloudTrail (90-day lookback) | Cloud Audit Logs | Activity Log / Log Analytics |
373
+ | Deployment | Lambda (SAM) | Cloud Run Job | Azure Function (Bicep) |
374
+ | Multi-account | Hub/spoke with STS | Single project only | Cross-subscription via Resource Graph |
375
+ | Secret management | Secrets Manager | Secret Manager | Key Vault |
376
+
377
+ ---
378
+
379
+ ## Running tests
380
+
381
+ ```bash
382
+ make test # unit tests only (431 tests, no cloud creds needed)
383
+ make test-integration # integration tests (32 tests — adapter contracts, report schema)
384
+ make test-all # everything (463 tests)
385
+ ```
386
+
387
+ Tests use `unittest.mock` throughout — no real AWS/GCP/Azure calls are made.
388
+
389
+ ---
390
+
391
+ ## Project structure
392
+
393
+ ```
394
+ argus/
395
+ ├── core/ # Pure Python — no cloud imports
396
+ │ ├── agent/loop.py # ReAct agent loop
397
+ │ ├── agent/prompts.py # System prompt + tool schemas
398
+ │ ├── models/finding.py # ResourceFinding dataclass
399
+ │ └── reports/ # Report generator, multi-cloud merge, export, notifications
400
+ ├── adapters/
401
+ │ ├── base.py # CloudAdapter abstract class
402
+ │ ├── aws/ # AWS adapter (Resource Explorer, CloudWatch, Cost Explorer, CloudTrail)
403
+ │ ├── gcp/ # GCP adapter (Asset Inventory, Cloud Monitoring, BigQuery, Audit Logs)
404
+ │ └── azure/ # Azure adapter (Resource Graph, Monitor, Cost Management, Activity Log)
405
+ ├── ai/
406
+ │ ├── base.py # AIProvider abstract class
407
+ │ ├── anthropic.py # Anthropic API (local dev / universal fallback)
408
+ │ ├── bedrock.py # AWS Bedrock (Converse API)
409
+ │ ├── vertexai.py # Vertex AI / Gemini (GCP)
410
+ │ └── azure_openai.py # Azure OpenAI / GPT-4o (Azure)
411
+ ├── entrypoints/
412
+ │ ├── cli.py # argus --cloud aws --run-now
413
+ │ ├── aws_lambda.py # AWS Lambda handler
414
+ │ ├── gcp_cloudrun.py # GCP Cloud Run Job handler
415
+ │ └── azure_function.py # Azure Function timer trigger
416
+ ├── deploy/
417
+ │ ├── aws/ # CloudFormation templates
418
+ │ ├── gcp/ # Cloud Run + Scheduler deploy script
419
+ │ └── azure/ # Bicep templates
420
+ └── tests/ # 463 tests, all pass offline
421
+ ```
422
+
423
+ ---
424
+
425
+ ## Contributing
426
+
427
+ See [CONTRIBUTING.md](CONTRIBUTING.md).
428
+
429
+ ---
430
+
431
+ ## License
432
+
433
+ MIT