argus-cloud-optimizer 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- adapters/__init__.py +0 -0
- adapters/aws/__init__.py +0 -0
- adapters/aws/adapter.py +85 -0
- adapters/aws/auth.py +57 -0
- adapters/aws/cloudtrail.py +83 -0
- adapters/aws/cloudwatch.py +732 -0
- adapters/aws/config.py +9 -0
- adapters/aws/cost_explorer.py +116 -0
- adapters/aws/resource_explorer.py +186 -0
- adapters/aws/retry.py +55 -0
- adapters/azure/__init__.py +0 -0
- adapters/azure/activity_log.py +159 -0
- adapters/azure/adapter.py +117 -0
- adapters/azure/cost_management.py +125 -0
- adapters/azure/monitor.py +311 -0
- adapters/azure/resource_graph.py +113 -0
- adapters/azure/retry.py +57 -0
- adapters/base.py +105 -0
- adapters/gcp/__init__.py +0 -0
- adapters/gcp/adapter.py +86 -0
- adapters/gcp/asset_inventory.py +116 -0
- adapters/gcp/billing.py +118 -0
- adapters/gcp/cloud_logging.py +93 -0
- adapters/gcp/cloud_monitoring.py +276 -0
- adapters/gcp/retry.py +46 -0
- ai/__init__.py +0 -0
- ai/anthropic.py +174 -0
- ai/azure_openai.py +241 -0
- ai/base.py +78 -0
- ai/bedrock.py +169 -0
- ai/vertexai.py +234 -0
- argus_cloud_optimizer-0.2.0.dist-info/METADATA +433 -0
- argus_cloud_optimizer-0.2.0.dist-info/RECORD +62 -0
- argus_cloud_optimizer-0.2.0.dist-info/WHEEL +5 -0
- argus_cloud_optimizer-0.2.0.dist-info/entry_points.txt +2 -0
- argus_cloud_optimizer-0.2.0.dist-info/licenses/LICENSE +21 -0
- argus_cloud_optimizer-0.2.0.dist-info/top_level.txt +4 -0
- core/__init__.py +0 -0
- core/__version__.py +1 -0
- core/agent/__init__.py +0 -0
- core/agent/loop.py +390 -0
- core/agent/prompts.py +317 -0
- core/config.py +235 -0
- core/log.py +69 -0
- core/models/__init__.py +0 -0
- core/models/finding.py +76 -0
- core/py.typed +0 -0
- core/reports/__init__.py +0 -0
- core/reports/comparison.py +49 -0
- core/reports/delivery.py +323 -0
- core/reports/export.py +111 -0
- core/reports/generator.py +168 -0
- core/reports/html.py +286 -0
- core/reports/multi_cloud.py +162 -0
- core/secrets.py +145 -0
- core/token_tracker.py +97 -0
- core/validation.py +214 -0
- entrypoints/__init__.py +0 -0
- entrypoints/aws_lambda.py +299 -0
- entrypoints/azure_function.py +257 -0
- entrypoints/cli.py +156 -0
- entrypoints/gcp_cloudrun.py +209 -0
ai/vertexai.py
ADDED
|
@@ -0,0 +1,234 @@
|
|
|
1
|
+
"""
|
|
2
|
+
AI provider backed by Google Vertex AI (Gemini models).
|
|
3
|
+
|
|
4
|
+
Authentication uses Application Default Credentials (ADC) — run:
|
|
5
|
+
gcloud auth application-default login
|
|
6
|
+
|
|
7
|
+
No API key needed when running on Cloud Run / GCE with the right service account.
|
|
8
|
+
|
|
9
|
+
Environment variables:
|
|
10
|
+
VERTEXAI_PROJECT GCP project ID (required)
|
|
11
|
+
VERTEXAI_LOCATION GCP region (default: us-central1)
|
|
12
|
+
VERTEXAI_MODEL Model name (default: gemini-1.5-pro-002)
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import json
|
|
18
|
+
import time
|
|
19
|
+
from typing import Any
|
|
20
|
+
|
|
21
|
+
import google.auth
|
|
22
|
+
import google.auth.transport.requests
|
|
23
|
+
import openai
|
|
24
|
+
import structlog
|
|
25
|
+
|
|
26
|
+
from ai.base import AIProvider, AIResponse, Message, Tool, ToolCall
|
|
27
|
+
|
|
28
|
+
logger = structlog.get_logger(__name__)
|
|
29
|
+
|
|
30
|
+
MAX_RETRIES = 3
|
|
31
|
+
_BASE_DELAY = 1.0
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class VertexAIProvider(AIProvider):
|
|
35
|
+
"""
|
|
36
|
+
AI provider backed by Vertex AI Gemini models.
|
|
37
|
+
Uses the google-cloud-aiplatform SDK — included via the openai compat layer
|
|
38
|
+
or directly via vertexai package. Falls back to the OpenAI-compatible
|
|
39
|
+
Vertex AI endpoint so we can reuse the openai SDK already in requirements.txt.
|
|
40
|
+
|
|
41
|
+
Model: gemini-1.5-pro-002 (default) — supports function calling + large context.
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
DEFAULT_MODEL = "gemini-1.5-pro-002"
|
|
45
|
+
DEFAULT_LOCATION = "us-central1"
|
|
46
|
+
DEFAULT_MAX_TOKENS = 4096
|
|
47
|
+
DEFAULT_TEMPERATURE = 0.0
|
|
48
|
+
|
|
49
|
+
def __init__(
|
|
50
|
+
self,
|
|
51
|
+
project: str | None = None,
|
|
52
|
+
location: str | None = None,
|
|
53
|
+
model: str | None = None,
|
|
54
|
+
max_tokens: int = DEFAULT_MAX_TOKENS,
|
|
55
|
+
temperature: float | None = None,
|
|
56
|
+
) -> None:
|
|
57
|
+
from core.config import get_settings
|
|
58
|
+
|
|
59
|
+
cfg = get_settings().ai
|
|
60
|
+
self._project = project or cfg.vertexai_project
|
|
61
|
+
if not self._project:
|
|
62
|
+
raise EnvironmentError(
|
|
63
|
+
"VERTEXAI_PROJECT is not set. "
|
|
64
|
+
"Set it in .env or pass project= explicitly."
|
|
65
|
+
)
|
|
66
|
+
self._location = location or cfg.vertexai_location
|
|
67
|
+
self._model = model or cfg.resolved_model("vertexai")
|
|
68
|
+
self._max_tokens = max_tokens
|
|
69
|
+
self._temperature = temperature if temperature is not None else cfg.temperature
|
|
70
|
+
|
|
71
|
+
# Use openai SDK with the Vertex AI endpoint.
|
|
72
|
+
# This avoids adding google-cloud-aiplatform as a dependency.
|
|
73
|
+
credentials, _ = google.auth.default(
|
|
74
|
+
scopes=["https://www.googleapis.com/auth/cloud-platform"]
|
|
75
|
+
)
|
|
76
|
+
credentials.refresh(google.auth.transport.requests.Request())
|
|
77
|
+
|
|
78
|
+
self._client = openai.OpenAI(
|
|
79
|
+
base_url=(
|
|
80
|
+
f"https://{self._location}-aiplatform.googleapis.com/v1beta1/"
|
|
81
|
+
f"projects/{self._project}/locations/{self._location}/endpoints/openapi"
|
|
82
|
+
),
|
|
83
|
+
api_key=credentials.token,
|
|
84
|
+
)
|
|
85
|
+
self._credentials = credentials
|
|
86
|
+
|
|
87
|
+
@classmethod
|
|
88
|
+
def from_env(cls) -> "VertexAIProvider":
|
|
89
|
+
return cls()
|
|
90
|
+
|
|
91
|
+
def chat(
|
|
92
|
+
self,
|
|
93
|
+
messages: list[Message],
|
|
94
|
+
tools: list[Tool],
|
|
95
|
+
system_prompt: str | None = None,
|
|
96
|
+
) -> AIResponse:
|
|
97
|
+
openai_messages = self._build_messages(messages, system_prompt)
|
|
98
|
+
openai_tools = [self._to_openai_tool(t) for t in tools] if tools else None
|
|
99
|
+
|
|
100
|
+
kwargs: dict[str, Any] = {
|
|
101
|
+
"model": self._model,
|
|
102
|
+
"messages": openai_messages,
|
|
103
|
+
"max_tokens": self._max_tokens,
|
|
104
|
+
"temperature": self._temperature,
|
|
105
|
+
}
|
|
106
|
+
if openai_tools:
|
|
107
|
+
kwargs["tools"] = openai_tools
|
|
108
|
+
kwargs["tool_choice"] = "auto"
|
|
109
|
+
|
|
110
|
+
response = self._call_with_retry(kwargs)
|
|
111
|
+
return self._parse_response(response)
|
|
112
|
+
|
|
113
|
+
# ------------------------------------------------------------------
|
|
114
|
+
# Internal helpers
|
|
115
|
+
# ------------------------------------------------------------------
|
|
116
|
+
|
|
117
|
+
def _call_with_retry(self, kwargs: dict[str, Any]) -> Any:
|
|
118
|
+
delay = _BASE_DELAY
|
|
119
|
+
for attempt in range(MAX_RETRIES):
|
|
120
|
+
try:
|
|
121
|
+
# Refresh credentials if they may have expired (1-hour TTL)
|
|
122
|
+
if not self._credentials.valid:
|
|
123
|
+
self._credentials.refresh(google.auth.transport.requests.Request())
|
|
124
|
+
self._client.api_key = self._credentials.token
|
|
125
|
+
|
|
126
|
+
return self._client.chat.completions.create(**kwargs)
|
|
127
|
+
except openai.RateLimitError:
|
|
128
|
+
if attempt < MAX_RETRIES - 1:
|
|
129
|
+
logger.warning(
|
|
130
|
+
"Vertex AI rate limited (attempt %d/%d), retrying in %.1fs",
|
|
131
|
+
attempt + 1,
|
|
132
|
+
MAX_RETRIES,
|
|
133
|
+
delay,
|
|
134
|
+
)
|
|
135
|
+
time.sleep(delay)
|
|
136
|
+
delay *= 2
|
|
137
|
+
else:
|
|
138
|
+
raise
|
|
139
|
+
raise RuntimeError("Unreachable") # pragma: no cover
|
|
140
|
+
|
|
141
|
+
def _build_messages(
|
|
142
|
+
self,
|
|
143
|
+
messages: list[Message],
|
|
144
|
+
system_prompt: str | None,
|
|
145
|
+
) -> list[dict[str, Any]]:
|
|
146
|
+
result: list[dict[str, Any]] = []
|
|
147
|
+
|
|
148
|
+
if system_prompt:
|
|
149
|
+
result.append({"role": "system", "content": system_prompt})
|
|
150
|
+
|
|
151
|
+
for msg in messages:
|
|
152
|
+
if msg.role == "user":
|
|
153
|
+
if msg.tool_results:
|
|
154
|
+
# Each tool result is its own message in the OpenAI protocol
|
|
155
|
+
for tr in msg.tool_results:
|
|
156
|
+
result.append(
|
|
157
|
+
{
|
|
158
|
+
"role": "tool",
|
|
159
|
+
"tool_call_id": tr.tool_call_id,
|
|
160
|
+
"content": tr.content,
|
|
161
|
+
}
|
|
162
|
+
)
|
|
163
|
+
else:
|
|
164
|
+
result.append({"role": "user", "content": msg.text or ""})
|
|
165
|
+
|
|
166
|
+
else:
|
|
167
|
+
# assistant — may have text, tool_calls, or both
|
|
168
|
+
content: list[dict[str, Any]] | str = msg.text or ""
|
|
169
|
+
tool_calls_out = []
|
|
170
|
+
for tc in msg.tool_calls:
|
|
171
|
+
tool_calls_out.append(
|
|
172
|
+
{
|
|
173
|
+
"id": tc.id,
|
|
174
|
+
"type": "function",
|
|
175
|
+
"function": {
|
|
176
|
+
"name": tc.name,
|
|
177
|
+
"arguments": json.dumps(tc.arguments),
|
|
178
|
+
},
|
|
179
|
+
}
|
|
180
|
+
)
|
|
181
|
+
assistant_msg: dict[str, Any] = {
|
|
182
|
+
"role": "assistant",
|
|
183
|
+
"content": content,
|
|
184
|
+
}
|
|
185
|
+
if tool_calls_out:
|
|
186
|
+
assistant_msg["tool_calls"] = tool_calls_out
|
|
187
|
+
result.append(assistant_msg)
|
|
188
|
+
|
|
189
|
+
return result
|
|
190
|
+
|
|
191
|
+
def _to_openai_tool(self, tool: Tool) -> dict[str, Any]:
|
|
192
|
+
return {
|
|
193
|
+
"type": "function",
|
|
194
|
+
"function": {
|
|
195
|
+
"name": tool.name,
|
|
196
|
+
"description": tool.description,
|
|
197
|
+
"parameters": tool.input_schema,
|
|
198
|
+
},
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
def _parse_response(self, response: Any) -> AIResponse:
|
|
202
|
+
choice = response.choices[0]
|
|
203
|
+
message = choice.message
|
|
204
|
+
stop_reason = choice.finish_reason # "stop" | "tool_calls" | "length"
|
|
205
|
+
|
|
206
|
+
text: str | None = message.content or None
|
|
207
|
+
tool_calls: list[ToolCall] = []
|
|
208
|
+
|
|
209
|
+
if message.tool_calls:
|
|
210
|
+
for tc in message.tool_calls:
|
|
211
|
+
tool_calls.append(
|
|
212
|
+
ToolCall(
|
|
213
|
+
id=tc.id,
|
|
214
|
+
name=tc.function.name,
|
|
215
|
+
arguments=json.loads(tc.function.arguments),
|
|
216
|
+
)
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
# Normalise finish_reason to our internal vocabulary
|
|
220
|
+
if tool_calls:
|
|
221
|
+
stop_reason = "tool_use"
|
|
222
|
+
elif stop_reason == "stop":
|
|
223
|
+
stop_reason = "end_turn"
|
|
224
|
+
elif stop_reason == "length":
|
|
225
|
+
stop_reason = "max_tokens"
|
|
226
|
+
|
|
227
|
+
usage = getattr(response, "usage", None)
|
|
228
|
+
return AIResponse(
|
|
229
|
+
stop_reason=stop_reason,
|
|
230
|
+
text=text,
|
|
231
|
+
tool_calls=tool_calls,
|
|
232
|
+
input_tokens=getattr(usage, "prompt_tokens", 0) if usage else 0,
|
|
233
|
+
output_tokens=getattr(usage, "completion_tokens", 0) if usage else 0,
|
|
234
|
+
)
|
|
@@ -0,0 +1,433 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: argus-cloud-optimizer
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: AI-powered multi-cloud cost optimization agent
|
|
5
|
+
Author-email: Vamshi Siddarth Gaddam <vamshisiddarth02@gmail.com>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/vamshisiddarth/argus
|
|
8
|
+
Project-URL: Documentation, https://vamshisiddarth.github.io/argus/
|
|
9
|
+
Project-URL: Repository, https://github.com/vamshisiddarth/argus
|
|
10
|
+
Project-URL: Issues, https://github.com/vamshisiddarth/argus/issues
|
|
11
|
+
Keywords: cloud,cost-optimization,aws,gcp,azure,finops
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Intended Audience :: System Administrators
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
18
|
+
Classifier: Topic :: System :: Systems Administration
|
|
19
|
+
Requires-Python: >=3.11
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
License-File: LICENSE
|
|
22
|
+
Requires-Dist: anthropic>=0.40.0
|
|
23
|
+
Requires-Dist: python-dotenv>=1.0.1
|
|
24
|
+
Requires-Dist: PyYAML>=6.0.2
|
|
25
|
+
Requires-Dist: pydantic>=2.9.2
|
|
26
|
+
Requires-Dist: pydantic-settings>=2.6.0
|
|
27
|
+
Requires-Dist: python-dateutil>=2.9.0
|
|
28
|
+
Requires-Dist: structlog>=24.4.0
|
|
29
|
+
Requires-Dist: boto3>=1.35.36
|
|
30
|
+
Requires-Dist: botocore>=1.35.36
|
|
31
|
+
Requires-Dist: google-cloud-asset>=3.26.0
|
|
32
|
+
Requires-Dist: google-cloud-monitoring>=2.23.0
|
|
33
|
+
Requires-Dist: google-cloud-billing>=1.14.0
|
|
34
|
+
Requires-Dist: google-cloud-logging>=3.11.3
|
|
35
|
+
Requires-Dist: google-cloud-secret-manager>=2.20.0
|
|
36
|
+
Requires-Dist: azure-identity>=1.19.0
|
|
37
|
+
Requires-Dist: azure-mgmt-resourcegraph>=8.0.0
|
|
38
|
+
Requires-Dist: azure-monitor-query<2.0.0,>=1.4.0
|
|
39
|
+
Requires-Dist: azure-mgmt-costmanagement>=4.0.0
|
|
40
|
+
Requires-Dist: azure-keyvault-secrets>=4.8.0
|
|
41
|
+
Requires-Dist: openai>=1.55.0
|
|
42
|
+
Provides-Extra: export
|
|
43
|
+
Requires-Dist: weasyprint>=62.0; extra == "export"
|
|
44
|
+
Requires-Dist: python-pptx>=1.0.0; extra == "export"
|
|
45
|
+
Provides-Extra: dev
|
|
46
|
+
Requires-Dist: pytest>=8.3.3; extra == "dev"
|
|
47
|
+
Requires-Dist: pytest-cov>=5.0.0; extra == "dev"
|
|
48
|
+
Requires-Dist: pytest-mock>=3.14.0; extra == "dev"
|
|
49
|
+
Requires-Dist: pytest-timeout>=2.3.1; extra == "dev"
|
|
50
|
+
Requires-Dist: moto[s3,sts]>=5.0.16; extra == "dev"
|
|
51
|
+
Requires-Dist: freezegun>=1.5.1; extra == "dev"
|
|
52
|
+
Requires-Dist: black>=24.10.0; extra == "dev"
|
|
53
|
+
Requires-Dist: ruff>=0.7.1; extra == "dev"
|
|
54
|
+
Requires-Dist: mypy>=1.13.0; extra == "dev"
|
|
55
|
+
Requires-Dist: types-PyYAML>=6.0.12; extra == "dev"
|
|
56
|
+
Requires-Dist: types-python-dateutil>=2.9.0; extra == "dev"
|
|
57
|
+
Requires-Dist: boto3-stubs[bedrock-runtime,ce,cloudwatch,resourceexplorer2,s3,sts]>=1.35.36; extra == "dev"
|
|
58
|
+
Requires-Dist: mkdocs-material>=9.5.44; extra == "dev"
|
|
59
|
+
Requires-Dist: mkdocs-minify-plugin>=0.8.0; extra == "dev"
|
|
60
|
+
Dynamic: license-file
|
|
61
|
+
|
|
62
|
+
<p align="center">
|
|
63
|
+
<img src="docs/assets/images/logo-full.svg" alt="Argus" height="72">
|
|
64
|
+
</p>
|
|
65
|
+
|
|
66
|
+
<p align="center"><strong>AI-powered cloud cost optimization agent for AWS, GCP, and Azure.</strong></p>
|
|
67
|
+
|
|
68
|
+
Argus finds idle and wasted cloud resources — stopped EC2 instances, unattached EBS volumes, orphaned Elastic IPs, underutilized RDS databases — and delivers a prioritized, AI-reasoned report to Slack every week.
|
|
69
|
+
|
|
70
|
+
[](https://github.com/vamshisiddarth/argus/actions/workflows/ci.yml)
|
|
71
|
+
[](https://www.python.org/downloads/)
|
|
72
|
+
[](https://pypi.org/project/argus-cloud-optimizer/)
|
|
73
|
+
[](LICENSE)
|
|
74
|
+
[](https://vamshisiddarth.github.io/argus/)
|
|
75
|
+
|
|
76
|
+
<p align="center">
|
|
77
|
+
<img src="docs/assets/images/slack-demo.png" alt="Argus Slack report" width="600">
|
|
78
|
+
</p>
|
|
79
|
+
|
|
80
|
+
---
|
|
81
|
+
|
|
82
|
+
## What it does
|
|
83
|
+
|
|
84
|
+
Every week (or on demand), Argus:
|
|
85
|
+
|
|
86
|
+
1. **Discovers** every resource in your cloud account using AWS Resource Explorer / GCP Asset Inventory / Azure Resource Graph
|
|
87
|
+
2. **Analyzes** each candidate — CloudWatch/Cloud Monitoring/Azure Monitor metrics, Cost Explorer/BigQuery/Cost Management cost data, and CloudTrail/Audit Log/Activity Log last-activity timestamps
|
|
88
|
+
3. **Reasons** about idleness using Claude (via AWS Bedrock, Anthropic API, or Vertex AI) — no hardcoded thresholds
|
|
89
|
+
4. **Reports** a compact digest (Slack, Microsoft Teams, or generic webhook) with top findings and a link to a full self-contained HTML report
|
|
90
|
+
|
|
91
|
+
Example Slack output:
|
|
92
|
+
|
|
93
|
+
```
|
|
94
|
+
Argus — AWS Waste Report (2026-06-17)
|
|
95
|
+
|
|
96
|
+
💸 $42.65/month estimated waste 📊 4 idle resources across 1 account
|
|
97
|
+
|
|
98
|
+
Two stopped EC2 instances and a forgotten NAT Gateway account for 72% of
|
|
99
|
+
total waste. One EBS volume has had no I/O in over 30 days.
|
|
100
|
+
|
|
101
|
+
Top findings
|
|
102
|
+
🔴 i-0abc123def · EC2 t3.large · $28.40/mo
|
|
103
|
+
🔴 nat-0def456 · NAT Gateway · $10.80/mo
|
|
104
|
+
🟡 vol-orphan · EBS gp3 100GiB · $8.00/mo
|
|
105
|
+
🟢 eipalloc-xyz · Elastic IP · $3.65/mo
|
|
106
|
+
|
|
107
|
+
[ 📄 Full report (HTML) ] [ vamshisiddarth/argus ]
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
The **Full report** button links to a self-contained HTML file (S3 / GCS / Azure Blob) with a filterable/sortable table and expandable AI reasoning per finding. Works offline, no login required.
|
|
111
|
+
|
|
112
|
+
> **See a realistic example:** [`examples/sample-report-aws.json`](examples/sample-report-aws.json) — 5 findings from a real-looking AWS scan with AI-written reasoning, metrics, and cost data.
|
|
113
|
+
|
|
114
|
+
---
|
|
115
|
+
|
|
116
|
+
## Architecture
|
|
117
|
+
|
|
118
|
+
```
|
|
119
|
+
┌─────────────────────────────────────────────────────────┐
|
|
120
|
+
│ Agent Loop (ReAct) │
|
|
121
|
+
│ Think → Call Tool → Observe → Think → Submit │
|
|
122
|
+
└────────────────────┬────────────────────────────────────┘
|
|
123
|
+
│
|
|
124
|
+
┌────────────┴────────────┐
|
|
125
|
+
▼ ▼
|
|
126
|
+
CloudAdapter AIProvider
|
|
127
|
+
(AWS / GCP / Azure) (Bedrock / Anthropic / Vertex)
|
|
128
|
+
│
|
|
129
|
+
┌────┴──────────────────┐
|
|
130
|
+
│ list_resources │ Resource Explorer / Asset Inventory / Resource Graph
|
|
131
|
+
│ get_metrics │ CloudWatch / Cloud Monitoring / Azure Monitor
|
|
132
|
+
│ get_cost │ Cost Explorer / BigQuery / Cost Management
|
|
133
|
+
│ get_last_activity │ CloudTrail / Audit Logs / Activity Log
|
|
134
|
+
└───────────────────────┘
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
**Design principle: Same brain. Different hands. Different home.**
|
|
138
|
+
- **Brain** = agent loop + AI reasoning (`core/`) — pure Python, zero cloud imports
|
|
139
|
+
- **Hands** = cloud adapters (`adapters/`) — swappable per cloud
|
|
140
|
+
- **Home** = entrypoints (`entrypoints/`) — Lambda / Cloud Run / Azure Function
|
|
141
|
+
|
|
142
|
+
---
|
|
143
|
+
|
|
144
|
+
## Quick start
|
|
145
|
+
|
|
146
|
+
### Option A — Docker (fastest)
|
|
147
|
+
|
|
148
|
+
```bash
|
|
149
|
+
docker build --build-arg CLOUD=aws -t argus .
|
|
150
|
+
|
|
151
|
+
docker run --rm \
|
|
152
|
+
-e ANTHROPIC_API_KEY=sk-ant-... \
|
|
153
|
+
-e DRY_RUN=true \
|
|
154
|
+
-v ~/.aws:/root/.aws:ro \
|
|
155
|
+
argus --cloud aws --run-now --dry-run
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
### Option B — Install from PyPI
|
|
159
|
+
|
|
160
|
+
**Prerequisites**
|
|
161
|
+
- Python 3.11+
|
|
162
|
+
- Cloud credentials configured (see below)
|
|
163
|
+
- An Anthropic API key **or** cloud-native AI access (Bedrock / Vertex AI / Azure OpenAI)
|
|
164
|
+
|
|
165
|
+
```bash
|
|
166
|
+
pip install argus-cloud-optimizer
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
One package — all three clouds included. No extras needed.
|
|
170
|
+
|
|
171
|
+
> **AWS-specific setup:** Enable [Resource Explorer](https://docs.aws.amazon.com/resource-explorer/latest/userguide/) with an **aggregator index** in `us-east-1` (or set `RESOURCE_EXPLORER_REGION` to your aggregator region). Without this, Argus cannot discover resources.
|
|
172
|
+
|
|
173
|
+
Set minimum env vars:
|
|
174
|
+
|
|
175
|
+
```bash
|
|
176
|
+
export AI_PROVIDER=anthropic
|
|
177
|
+
export ANTHROPIC_API_KEY=sk-ant-...
|
|
178
|
+
export DRY_RUN=true # remove to post to Slack
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
```bash
|
|
182
|
+
argus --cloud aws --run-now --dry-run
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
### Option C — Clone and develop
|
|
186
|
+
|
|
187
|
+
```bash
|
|
188
|
+
git clone https://github.com/vamshisiddarth/argus.git
|
|
189
|
+
cd argus
|
|
190
|
+
pip install -e ".[all,dev]"
|
|
191
|
+
cp .env.example .env # edit with your values
|
|
192
|
+
argus --cloud aws --run-now
|
|
193
|
+
```
|
|
194
|
+
|
|
195
|
+
### CLI Options
|
|
196
|
+
|
|
197
|
+
```
|
|
198
|
+
argus --cloud aws|gcp|azure --run-now [options]
|
|
199
|
+
|
|
200
|
+
-V, --version Show version and exit
|
|
201
|
+
--dry-run Print notification payload instead of posting
|
|
202
|
+
--ignore-regions REGIONS Comma-separated regions to skip (e.g. ap-east-1,me-south-1)
|
|
203
|
+
--ai-provider PROVIDER anthropic | bedrock | vertexai | azure_openai (default: anthropic)
|
|
204
|
+
--accounts PATH Path to accounts.yaml for multi-account mode (AWS only)
|
|
205
|
+
--max-resources N Maximum resources to analyze per scan (default: 200)
|
|
206
|
+
--lookback-days DAYS Metrics lookback window in days (default: 90, use 14 for faster local dev)
|
|
207
|
+
```
|
|
208
|
+
|
|
209
|
+
---
|
|
210
|
+
|
|
211
|
+
## Deploy to AWS Lambda
|
|
212
|
+
|
|
213
|
+
Uses [AWS SAM](https://docs.aws.amazon.com/serverless-application-model/latest/developerguide/install-sam-cli.html) — handles packaging and upload automatically. No S3 bucket needed.
|
|
214
|
+
|
|
215
|
+
### Single account
|
|
216
|
+
|
|
217
|
+
```bash
|
|
218
|
+
make deploy-aws
|
|
219
|
+
# or manually:
|
|
220
|
+
cd deploy/aws/single-account
|
|
221
|
+
sam build && sam deploy --guided
|
|
222
|
+
```
|
|
223
|
+
|
|
224
|
+
`sam deploy --guided` walks you through parameters (Slack webhook, region, AI provider) and saves them to `samconfig.toml` for future deploys. Subsequent deploys are just `sam deploy`.
|
|
225
|
+
|
|
226
|
+
The stack creates:
|
|
227
|
+
- Lambda function (runs weekly via EventBridge)
|
|
228
|
+
- IAM role with least-privilege read-only permissions
|
|
229
|
+
- S3 bucket for full JSON report storage (90-day retention)
|
|
230
|
+
|
|
231
|
+
### Multi-account
|
|
232
|
+
|
|
233
|
+
**Hub account** (runs Argus):
|
|
234
|
+
|
|
235
|
+
```bash
|
|
236
|
+
make deploy-aws-multi
|
|
237
|
+
# or manually:
|
|
238
|
+
cd deploy/aws/multi-account/hub
|
|
239
|
+
sam build && sam deploy --guided
|
|
240
|
+
```
|
|
241
|
+
|
|
242
|
+
**Each spoke account** (read-only IAM role only — no Lambda):
|
|
243
|
+
|
|
244
|
+
```bash
|
|
245
|
+
aws cloudformation deploy \
|
|
246
|
+
--template-file deploy/aws/multi-account/spoke-role.yaml \
|
|
247
|
+
--stack-name Argus-Spoke \
|
|
248
|
+
--capabilities CAPABILITY_IAM \
|
|
249
|
+
--parameter-overrides HubAccountId=<hub-account-id>
|
|
250
|
+
```
|
|
251
|
+
|
|
252
|
+
The hub stack output includes the `HubRoleArn` — use it as the `HubRoleArn` parameter for spoke deployments.
|
|
253
|
+
|
|
254
|
+
---
|
|
255
|
+
|
|
256
|
+
## Deploy to GCP (Cloud Run)
|
|
257
|
+
|
|
258
|
+
```bash
|
|
259
|
+
# Authenticate
|
|
260
|
+
gcloud auth application-default login
|
|
261
|
+
|
|
262
|
+
# Set your project
|
|
263
|
+
gcloud config set project YOUR_PROJECT_ID
|
|
264
|
+
|
|
265
|
+
# Deploy
|
|
266
|
+
bash deploy/gcp/deploy.sh
|
|
267
|
+
```
|
|
268
|
+
|
|
269
|
+
Requires: Cloud Run API, Cloud Scheduler API, BigQuery billing export enabled.
|
|
270
|
+
|
|
271
|
+
---
|
|
272
|
+
|
|
273
|
+
## Deploy to Azure (Function App)
|
|
274
|
+
|
|
275
|
+
```bash
|
|
276
|
+
# Authenticate
|
|
277
|
+
az login
|
|
278
|
+
|
|
279
|
+
# Deploy
|
|
280
|
+
az deployment group create \
|
|
281
|
+
--resource-group Argus-RG \
|
|
282
|
+
--template-file deploy/azure/function-app.bicep \
|
|
283
|
+
--parameters subscriptionIds="sub-id-1,sub-id-2" \
|
|
284
|
+
slackWebhookUrl="https://hooks.slack.com/services/..."
|
|
285
|
+
```
|
|
286
|
+
|
|
287
|
+
---
|
|
288
|
+
|
|
289
|
+
## AI providers
|
|
290
|
+
|
|
291
|
+
| Provider | Use case | Setup |
|
|
292
|
+
|----------|----------|-------|
|
|
293
|
+
| Anthropic API | Local dev, any cloud | Set `ANTHROPIC_API_KEY` |
|
|
294
|
+
| AWS Bedrock | AWS production | IAM role — no key needed |
|
|
295
|
+
| Vertex AI (Gemini) | GCP production | ADC — no key needed |
|
|
296
|
+
| Azure OpenAI (GPT-4o) | Azure production | Managed identity — no key needed |
|
|
297
|
+
|
|
298
|
+
Set `AI_PROVIDER=anthropic|bedrock|vertexai|azure_openai` in `.env` or the deployment environment. Use `AI_MODEL` to override the model for any provider, and `AI_TEMPERATURE` to control creativity (default: `0.0`).
|
|
299
|
+
|
|
300
|
+
---
|
|
301
|
+
|
|
302
|
+
## Multi-account setup
|
|
303
|
+
|
|
304
|
+
Create `accounts.yaml`:
|
|
305
|
+
|
|
306
|
+
```yaml
|
|
307
|
+
mode: multi
|
|
308
|
+
|
|
309
|
+
accounts:
|
|
310
|
+
- id: "111122223333"
|
|
311
|
+
name: dev
|
|
312
|
+
role_arn: arn:aws:iam::111122223333:role/ArgusSpokeRole
|
|
313
|
+
- id: "444455556666"
|
|
314
|
+
name: prod
|
|
315
|
+
role_arn: arn:aws:iam::444455556666:role/ArgusSpokeRole
|
|
316
|
+
```
|
|
317
|
+
|
|
318
|
+
Then run:
|
|
319
|
+
|
|
320
|
+
```bash
|
|
321
|
+
argus --cloud aws --run-now --accounts accounts.yaml
|
|
322
|
+
```
|
|
323
|
+
|
|
324
|
+
---
|
|
325
|
+
|
|
326
|
+
## IAM permissions (AWS)
|
|
327
|
+
|
|
328
|
+
Argus needs **read-only** access. The Lambda execution role requires:
|
|
329
|
+
|
|
330
|
+
```
|
|
331
|
+
resource-explorer-2:Search
|
|
332
|
+
resource-explorer-2:GetView
|
|
333
|
+
cloudwatch:GetMetricData
|
|
334
|
+
ce:GetCostAndUsage
|
|
335
|
+
ce:GetCostAndUsageWithResources
|
|
336
|
+
cloudtrail:LookupEvents
|
|
337
|
+
bedrock:InvokeModel # only if AI_PROVIDER=bedrock
|
|
338
|
+
sts:AssumeRole # only for multi-account mode
|
|
339
|
+
s3:PutObject # only if REPORT_S3_BUCKET is set
|
|
340
|
+
```
|
|
341
|
+
|
|
342
|
+
No write permissions are ever requested.
|
|
343
|
+
|
|
344
|
+
> **Cost Explorer note:** `GetCostAndUsageWithResources` requires resource-level cost allocation
|
|
345
|
+
> to be enabled in AWS Cost Management → Preferences → Resource-level data.
|
|
346
|
+
> If not enabled, Argus logs a warning and continues — cost fields will show $0.00.
|
|
347
|
+
|
|
348
|
+
---
|
|
349
|
+
|
|
350
|
+
## Limitations & known issues
|
|
351
|
+
|
|
352
|
+
Before you invest time deploying Argus, know what it **can't** do yet:
|
|
353
|
+
|
|
354
|
+
| Area | Status | Details |
|
|
355
|
+
|------|--------|---------|
|
|
356
|
+
| **Resource discovery** | AWS: strong, GCP/Azure: adequate | AWS covers 43 resource types via Resource Explorer. GCP covers 22 asset types; Azure covers 25 via Resource Graph. Some niche resource types (e.g. AWS Glue, SageMaker endpoints) are not yet mapped. |
|
|
357
|
+
| **Cost accuracy** | Best-effort | AWS Cost Explorer charges $0.01/API call — Argus batches aggressively (max 2 calls/scan). GCP requires BigQuery billing export enabled. Azure cost data depends on subscription-level access. Resource-level cost allocation must be enabled in AWS for per-resource costs; without it, costs show $0.00. |
|
|
358
|
+
| **AI non-determinism** | By design | The AI decides what's idle — different runs may produce slightly different findings or reasoning. Set `AI_TEMPERATURE=0.0` (default) for most consistent results. |
|
|
359
|
+
| **LLM cost** | Configurable | A full scan of ~200 resources costs ~$0.05–$0.50 in LLM API fees depending on provider. Use `--llm-budget` to set a hard cap (default: $2.00/scan). Large estates (1000+ resources) will hit the budget limit — increase it or use `--max-resources`. |
|
|
360
|
+
| **AWS Resource Explorer setup** | Manual step | You must enable Resource Explorer with an **aggregator index** (typically in `us-east-1`). Without this, Argus cannot discover resources. This is a one-time setup but is easy to miss. |
|
|
361
|
+
| **Write actions** | None | Argus is read-only. It reports findings but never deletes, stops, or modifies resources. Remediation is manual. |
|
|
362
|
+
| **Multi-cloud in one scan** | Not yet | Each `argus` invocation scans one cloud. Use the merge report feature (`core/reports/multi_cloud.py`) to combine results after separate runs. |
|
|
363
|
+
| **Notifications** | Slack + Teams + webhook | No email. Slack/Teams delivery requires a webhook URL. |
|
|
364
|
+
|
|
365
|
+
### Multi-cloud parity
|
|
366
|
+
|
|
367
|
+
| Capability | AWS | GCP | Azure |
|
|
368
|
+
|-----------|-----|-----|-------|
|
|
369
|
+
| Resource discovery | 43 types (Resource Explorer) | 22 types (Asset Inventory) | 25 types (Resource Graph) |
|
|
370
|
+
| Metrics | CloudWatch (43 types + fallback) | Cloud Monitoring (15 types + fallback) | Azure Monitor (25 types + fallback) |
|
|
371
|
+
| Cost data | Cost Explorer (batched) | BigQuery billing export | Cost Management API |
|
|
372
|
+
| Last activity | CloudTrail (90-day lookback) | Cloud Audit Logs | Activity Log / Log Analytics |
|
|
373
|
+
| Deployment | Lambda (SAM) | Cloud Run Job | Azure Function (Bicep) |
|
|
374
|
+
| Multi-account | Hub/spoke with STS | Single project only | Cross-subscription via Resource Graph |
|
|
375
|
+
| Secret management | Secrets Manager | Secret Manager | Key Vault |
|
|
376
|
+
|
|
377
|
+
---
|
|
378
|
+
|
|
379
|
+
## Running tests
|
|
380
|
+
|
|
381
|
+
```bash
|
|
382
|
+
make test # unit tests only (431 tests, no cloud creds needed)
|
|
383
|
+
make test-integration # integration tests (32 tests — adapter contracts, report schema)
|
|
384
|
+
make test-all # everything (463 tests)
|
|
385
|
+
```
|
|
386
|
+
|
|
387
|
+
Tests use `unittest.mock` throughout — no real AWS/GCP/Azure calls are made.
|
|
388
|
+
|
|
389
|
+
---
|
|
390
|
+
|
|
391
|
+
## Project structure
|
|
392
|
+
|
|
393
|
+
```
|
|
394
|
+
argus/
|
|
395
|
+
├── core/ # Pure Python — no cloud imports
|
|
396
|
+
│ ├── agent/loop.py # ReAct agent loop
|
|
397
|
+
│ ├── agent/prompts.py # System prompt + tool schemas
|
|
398
|
+
│ ├── models/finding.py # ResourceFinding dataclass
|
|
399
|
+
│ └── reports/ # Report generator, multi-cloud merge, export, notifications
|
|
400
|
+
├── adapters/
|
|
401
|
+
│ ├── base.py # CloudAdapter abstract class
|
|
402
|
+
│ ├── aws/ # AWS adapter (Resource Explorer, CloudWatch, Cost Explorer, CloudTrail)
|
|
403
|
+
│ ├── gcp/ # GCP adapter (Asset Inventory, Cloud Monitoring, BigQuery, Audit Logs)
|
|
404
|
+
│ └── azure/ # Azure adapter (Resource Graph, Monitor, Cost Management, Activity Log)
|
|
405
|
+
├── ai/
|
|
406
|
+
│ ├── base.py # AIProvider abstract class
|
|
407
|
+
│ ├── anthropic.py # Anthropic API (local dev / universal fallback)
|
|
408
|
+
│ ├── bedrock.py # AWS Bedrock (Converse API)
|
|
409
|
+
│ ├── vertexai.py # Vertex AI / Gemini (GCP)
|
|
410
|
+
│ └── azure_openai.py # Azure OpenAI / GPT-4o (Azure)
|
|
411
|
+
├── entrypoints/
|
|
412
|
+
│ ├── cli.py # argus --cloud aws --run-now
|
|
413
|
+
│ ├── aws_lambda.py # AWS Lambda handler
|
|
414
|
+
│ ├── gcp_cloudrun.py # GCP Cloud Run Job handler
|
|
415
|
+
│ └── azure_function.py # Azure Function timer trigger
|
|
416
|
+
├── deploy/
|
|
417
|
+
│ ├── aws/ # CloudFormation templates
|
|
418
|
+
│ ├── gcp/ # Cloud Run + Scheduler deploy script
|
|
419
|
+
│ └── azure/ # Bicep templates
|
|
420
|
+
└── tests/ # 463 tests, all pass offline
|
|
421
|
+
```
|
|
422
|
+
|
|
423
|
+
---
|
|
424
|
+
|
|
425
|
+
## Contributing
|
|
426
|
+
|
|
427
|
+
See [CONTRIBUTING.md](CONTRIBUTING.md).
|
|
428
|
+
|
|
429
|
+
---
|
|
430
|
+
|
|
431
|
+
## License
|
|
432
|
+
|
|
433
|
+
MIT
|