argus-cloud-optimizer 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. adapters/__init__.py +0 -0
  2. adapters/aws/__init__.py +0 -0
  3. adapters/aws/adapter.py +85 -0
  4. adapters/aws/auth.py +57 -0
  5. adapters/aws/cloudtrail.py +83 -0
  6. adapters/aws/cloudwatch.py +732 -0
  7. adapters/aws/config.py +9 -0
  8. adapters/aws/cost_explorer.py +116 -0
  9. adapters/aws/resource_explorer.py +186 -0
  10. adapters/aws/retry.py +55 -0
  11. adapters/azure/__init__.py +0 -0
  12. adapters/azure/activity_log.py +159 -0
  13. adapters/azure/adapter.py +117 -0
  14. adapters/azure/cost_management.py +125 -0
  15. adapters/azure/monitor.py +311 -0
  16. adapters/azure/resource_graph.py +113 -0
  17. adapters/azure/retry.py +57 -0
  18. adapters/base.py +105 -0
  19. adapters/gcp/__init__.py +0 -0
  20. adapters/gcp/adapter.py +86 -0
  21. adapters/gcp/asset_inventory.py +116 -0
  22. adapters/gcp/billing.py +118 -0
  23. adapters/gcp/cloud_logging.py +93 -0
  24. adapters/gcp/cloud_monitoring.py +276 -0
  25. adapters/gcp/retry.py +46 -0
  26. ai/__init__.py +0 -0
  27. ai/anthropic.py +174 -0
  28. ai/azure_openai.py +241 -0
  29. ai/base.py +78 -0
  30. ai/bedrock.py +169 -0
  31. ai/vertexai.py +234 -0
  32. argus_cloud_optimizer-0.2.0.dist-info/METADATA +433 -0
  33. argus_cloud_optimizer-0.2.0.dist-info/RECORD +62 -0
  34. argus_cloud_optimizer-0.2.0.dist-info/WHEEL +5 -0
  35. argus_cloud_optimizer-0.2.0.dist-info/entry_points.txt +2 -0
  36. argus_cloud_optimizer-0.2.0.dist-info/licenses/LICENSE +21 -0
  37. argus_cloud_optimizer-0.2.0.dist-info/top_level.txt +4 -0
  38. core/__init__.py +0 -0
  39. core/__version__.py +1 -0
  40. core/agent/__init__.py +0 -0
  41. core/agent/loop.py +390 -0
  42. core/agent/prompts.py +317 -0
  43. core/config.py +235 -0
  44. core/log.py +69 -0
  45. core/models/__init__.py +0 -0
  46. core/models/finding.py +76 -0
  47. core/py.typed +0 -0
  48. core/reports/__init__.py +0 -0
  49. core/reports/comparison.py +49 -0
  50. core/reports/delivery.py +323 -0
  51. core/reports/export.py +111 -0
  52. core/reports/generator.py +168 -0
  53. core/reports/html.py +286 -0
  54. core/reports/multi_cloud.py +162 -0
  55. core/secrets.py +145 -0
  56. core/token_tracker.py +97 -0
  57. core/validation.py +214 -0
  58. entrypoints/__init__.py +0 -0
  59. entrypoints/aws_lambda.py +299 -0
  60. entrypoints/azure_function.py +257 -0
  61. entrypoints/cli.py +156 -0
  62. entrypoints/gcp_cloudrun.py +209 -0
adapters/base.py ADDED
@@ -0,0 +1,105 @@
1
+ from __future__ import annotations
2
+
3
+ from abc import ABC, abstractmethod
4
+ from dataclasses import dataclass, field
5
+ from datetime import datetime
6
+ from typing import Any
7
+
8
+
9
+ @dataclass
10
+ class Resource:
11
+ """Minimal representation of a discovered cloud resource."""
12
+
13
+ resource_id: str
14
+ resource_type: str # e.g. "AWS::EC2::Instance"
15
+ cloud: str # "aws" | "gcp" | "azure"
16
+ region: str
17
+ name: str | None = None
18
+ tags: dict[str, str] = field(default_factory=dict)
19
+
20
+ def to_dict(self) -> dict[str, Any]:
21
+ return {
22
+ "resource_id": self.resource_id,
23
+ "resource_type": self.resource_type,
24
+ "cloud": self.cloud,
25
+ "region": self.region,
26
+ "name": self.name,
27
+ "tags": self.tags,
28
+ }
29
+
30
+
31
+ @dataclass
32
+ class MetricSummary:
33
+ """Key usage metrics for a resource over a lookback window."""
34
+
35
+ resource_id: str
36
+ resource_type: str
37
+ period_days: int
38
+ metrics: dict[str, Any] # {"avg_cpu_pct": 1.2, "network_bytes_total": 847, ...}
39
+ has_data: bool = True # False if CloudWatch has no data points
40
+
41
+ def to_dict(self) -> dict[str, Any]:
42
+ return {
43
+ "resource_id": self.resource_id,
44
+ "resource_type": self.resource_type,
45
+ "period_days": self.period_days,
46
+ "metrics": self.metrics,
47
+ "has_data": self.has_data,
48
+ }
49
+
50
+
51
+ class CloudAdapter(ABC):
52
+ """
53
+ Abstract cloud adapter. One implementation per cloud provider.
54
+ The agent loop only ever calls these four methods — never raw SDK clients.
55
+ All implementations must be read-only (no mutations to cloud resources).
56
+ """
57
+
58
+ @abstractmethod
59
+ def list_resources(self, ignore_regions: list[str] | None = None) -> list[Resource]:
60
+ """
61
+ Return every resource across ALL regions, excluding ignore_regions.
62
+ Empty or None means scan everything — new regions are included automatically.
63
+ Implementation uses Resource Explorer (AWS), Asset Inventory (GCP),
64
+ or Resource Graph (Azure). Never hardcode resource types.
65
+ """
66
+ ...
67
+
68
+ @abstractmethod
69
+ def get_metrics(
70
+ self,
71
+ resource_id: str,
72
+ resource_type: str,
73
+ days: int = 90,
74
+ ) -> MetricSummary:
75
+ """
76
+ Fetch usage metrics relevant to this resource type over the last N days.
77
+ The adapter decides which metrics matter per resource type.
78
+ Default is 90 days — covers quarterly usage patterns. Override via
79
+ METRICS_LOOKBACK_DAYS env var (see cloudwatch.DEFAULT_METRICS_DAYS).
80
+ """
81
+ ...
82
+
83
+ @abstractmethod
84
+ def get_cost(
85
+ self,
86
+ resource_ids: list[str],
87
+ days: int = 30,
88
+ ) -> dict[str, float]:
89
+ """
90
+ Return estimated monthly cost in USD per resource ID.
91
+ Always batch resource_ids — never call per-resource.
92
+ """
93
+ ...
94
+
95
+ @abstractmethod
96
+ def get_last_activity(
97
+ self,
98
+ resource_id: str,
99
+ resource_type: str,
100
+ ) -> datetime | None:
101
+ """
102
+ Return the timestamp of the last meaningful activity for this resource.
103
+ Returns None if no activity found in the lookback window.
104
+ """
105
+ ...
File without changes
@@ -0,0 +1,86 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ from datetime import datetime
5
+
6
+ from adapters.base import CloudAdapter, MetricSummary, Resource
7
+ from adapters.gcp import asset_inventory, billing, cloud_logging, cloud_monitoring
8
+
9
+
10
+ class GCPAdapter(CloudAdapter):
11
+ """
12
+ GCP implementation of CloudAdapter.
13
+ Wires together Cloud Asset Inventory, Cloud Monitoring, Billing (BigQuery),
14
+ and Cloud Audit Logs. All API calls are read-only.
15
+
16
+ Auth: uses Application Default Credentials (ADC).
17
+ - Cloud Run Job: the service account attached to the job
18
+ - Local dev: `gcloud auth application-default login`
19
+
20
+ Usage:
21
+ adapter = GCPAdapter(project_id="my-gcp-project")
22
+ """
23
+
24
+ def __init__(
25
+ self,
26
+ project_id: str | None = None,
27
+ bq_billing_table: str | None = None,
28
+ ) -> None:
29
+ resolved = project_id or os.environ.get("GCP_PROJECT_ID", "")
30
+ if not resolved:
31
+ raise EnvironmentError(
32
+ "GCP_PROJECT_ID is not set. "
33
+ "Pass project_id= or export GCP_PROJECT_ID."
34
+ )
35
+ self._project_id: str = resolved
36
+ self._bq_billing_table = bq_billing_table or os.environ.get("BILLING_BQ_TABLE")
37
+
38
+ def list_resources(self, ignore_regions: list[str] | None = None) -> list[Resource]:
39
+ return asset_inventory.list_resources(
40
+ project_id=self._project_id,
41
+ ignore_regions=ignore_regions,
42
+ )
43
+
44
+ def get_metrics(
45
+ self,
46
+ resource_id: str,
47
+ resource_type: str,
48
+ days: int = 90,
49
+ ) -> MetricSummary:
50
+ return cloud_monitoring.get_metrics(
51
+ project_id=self._project_id,
52
+ resource_id=resource_id,
53
+ resource_type=resource_type,
54
+ days=days,
55
+ )
56
+
57
+ def get_cost(
58
+ self,
59
+ resource_ids: list[str],
60
+ days: int = 30,
61
+ ) -> dict[str, float]:
62
+ return billing.get_cost(
63
+ project_id=self._project_id,
64
+ resource_ids=resource_ids,
65
+ days=days,
66
+ bq_table=self._bq_billing_table,
67
+ )
68
+
69
+ def get_last_activity(
70
+ self,
71
+ resource_id: str,
72
+ resource_type: str,
73
+ ) -> datetime | None:
74
+ return cloud_logging.get_last_activity(
75
+ project_id=self._project_id,
76
+ resource_id=resource_id,
77
+ resource_type=resource_type,
78
+ )
79
+
80
+ @classmethod
81
+ def from_env(cls) -> "GCPAdapter":
82
+ """Convenience constructor — reads all config from env vars."""
83
+ return cls(
84
+ project_id=os.environ.get("GCP_PROJECT_ID"),
85
+ bq_billing_table=os.environ.get("BILLING_BQ_TABLE"),
86
+ )
@@ -0,0 +1,116 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any
4
+
5
+ import structlog
6
+ from google.api_core.exceptions import GoogleAPICallError, PermissionDenied
7
+ from google.cloud import asset_v1
8
+
9
+ from adapters.base import Resource
10
+ from adapters.gcp.retry import retry_on_transient
11
+
12
+ logger = structlog.get_logger(__name__)
13
+
14
+ # Asset types Argus cares about. Empty list = all types (too noisy for cost analysis).
15
+ # We scope to resource types that have associated billing.
16
+ SCANNED_ASSET_TYPES: list[str] = [
17
+ "compute.googleapis.com/Instance",
18
+ "compute.googleapis.com/Disk",
19
+ "compute.googleapis.com/Address", # static IPs
20
+ "compute.googleapis.com/ForwardingRule",
21
+ "compute.googleapis.com/BackendService",
22
+ "sql.googleapis.com/Instance", # Cloud SQL
23
+ "container.googleapis.com/Cluster", # GKE
24
+ "run.googleapis.com/Service", # Cloud Run
25
+ "cloudfunctions.googleapis.com/Function", # Cloud Functions
26
+ "storage.googleapis.com/Bucket",
27
+ "bigquery.googleapis.com/Dataset",
28
+ "bigquery.googleapis.com/Table",
29
+ "redis.googleapis.com/Instance", # Memorystore Redis
30
+ "spanner.googleapis.com/Instance",
31
+ "bigtable.googleapis.com/Instance",
32
+ "pubsub.googleapis.com/Topic",
33
+ "pubsub.googleapis.com/Subscription",
34
+ "dataflow.googleapis.com/Job",
35
+ "dataproc.googleapis.com/Cluster",
36
+ "aiplatform.googleapis.com/Endpoint", # Vertex AI
37
+ "composer.googleapis.com/Environment", # Cloud Composer (Airflow)
38
+ "notebooks.googleapis.com/Instance", # Vertex AI Workbench
39
+ ]
40
+
41
+
42
+ def list_resources(
43
+ project_id: str,
44
+ ignore_regions: list[str] | None = None,
45
+ ) -> list[Resource]:
46
+ """
47
+ Return all billable GCP resources in a project using Cloud Asset Inventory.
48
+ Uses a single paginated API call — no per-resource-type enumeration needed.
49
+ """
50
+ client = asset_v1.AssetServiceClient()
51
+ parent = f"projects/{project_id}"
52
+ ignore_set = set(ignore_regions or [])
53
+ resources: list[Resource] = []
54
+
55
+ request = asset_v1.ListAssetsRequest(
56
+ parent=parent,
57
+ asset_types=SCANNED_ASSET_TYPES,
58
+ content_type=asset_v1.ContentType.RESOURCE,
59
+ )
60
+
61
+ try:
62
+ for asset in retry_on_transient(
63
+ client.list_assets, request=request, timeout=60
64
+ ):
65
+ parsed = _parse_asset(asset, ignore_set)
66
+ if parsed:
67
+ resources.append(parsed)
68
+ except PermissionDenied as exc:
69
+ raise PermissionError(
70
+ f"Argus service account is missing cloudasset.assets.listAssets "
71
+ f"permission on project {project_id}."
72
+ ) from exc
73
+ except GoogleAPICallError as exc:
74
+ raise RuntimeError(f"Cloud Asset Inventory API error: {exc}") from exc
75
+
76
+ logger.info(
77
+ "asset_inventory_complete",
78
+ extra={"project_id": project_id, "total": len(resources)},
79
+ )
80
+ return resources
81
+
82
+
83
+ def _parse_asset(asset: Any, ignore_set: set[str]) -> Resource | None:
84
+ resource = asset.resource
85
+ if not resource:
86
+ return None
87
+
88
+ data: dict[str, Any] = dict(resource.data)
89
+ name: str = asset.name # full resource name: //compute.googleapis.com/projects/…
90
+ asset_type: str = asset.asset_type # e.g. compute.googleapis.com/Instance
91
+ location: str = data.get("location", data.get("zone", data.get("region", "global")))
92
+
93
+ # Normalise zone (us-central1-a) to region (us-central1)
94
+ region = _to_region(location)
95
+ if region in ignore_set:
96
+ return None
97
+
98
+ labels: dict[str, str] = dict(data.get("labels", {}))
99
+ friendly_name: str | None = data.get("name") or data.get("displayName")
100
+
101
+ return Resource(
102
+ resource_id=name,
103
+ resource_type=asset_type,
104
+ cloud="gcp",
105
+ region=region,
106
+ name=friendly_name,
107
+ tags=labels,
108
+ )
109
+
110
+
111
+ def _to_region(location: str) -> str:
112
+ """Strip the zone suffix from a zone string to get the region."""
113
+ parts = location.rsplit("-", 1)
114
+ if len(parts) == 2 and len(parts[1]) == 1 and parts[1].isalpha():
115
+ return parts[0]
116
+ return location
@@ -0,0 +1,118 @@
1
+ from __future__ import annotations
2
+
3
+ from datetime import datetime, timedelta, timezone
4
+
5
+ import structlog
6
+
7
+ logger = structlog.get_logger(__name__)
8
+
9
+ # BigQuery dataset where Cloud Billing export is written.
10
+ # Users must enable billing export to BigQuery — this is the standard GCP cost path.
11
+ # Set via BILLING_BQ_DATASET env var: "project.dataset" or "project.dataset.table"
12
+ _DEFAULT_TABLE = "argus_billing.gcp_billing_export_v1"
13
+
14
+
15
+ def get_cost(
16
+ project_id: str,
17
+ resource_ids: list[str],
18
+ days: int = 30,
19
+ bq_table: str | None = None,
20
+ ) -> dict[str, float]:
21
+ """
22
+ Return estimated cost in USD per resource ID over the last N days.
23
+
24
+ GCP billing data is available via two paths:
25
+ 1. Cloud Billing Budget API — account-level budgets only, no per-resource breakdown.
26
+ 2. BigQuery billing export — per-resource cost, requires export to be enabled.
27
+
28
+ We use the BigQuery export path since it's the only way to get per-resource cost.
29
+ If the export table doesn't exist or isn't configured, returns zeros with a warning.
30
+
31
+ The caller is responsible for passing resource_ids as the full GCP resource names
32
+ (//compute.googleapis.com/projects/…) — we extract the short name for BQ filtering.
33
+ """
34
+ if not resource_ids:
35
+ return {}
36
+
37
+ import os
38
+
39
+ resolved_table = bq_table or os.environ.get("BILLING_BQ_TABLE", _DEFAULT_TABLE)
40
+
41
+ try:
42
+ return _query_bigquery(project_id, resource_ids, days, resolved_table or "")
43
+ except Exception as exc: # noqa: BLE001
44
+ logger.warning(
45
+ "gcp_billing_query_failed",
46
+ extra={
47
+ "project_id": project_id,
48
+ "error": str(exc),
49
+ "hint": (
50
+ "Enable Cloud Billing export to BigQuery in the GCP console "
51
+ "(Billing → Billing export → BigQuery export). "
52
+ "Set BILLING_BQ_TABLE env var to 'project.dataset.table'."
53
+ ),
54
+ },
55
+ )
56
+ return {rid: 0.0 for rid in resource_ids}
57
+
58
+
59
+ def _query_bigquery(
60
+ project_id: str,
61
+ resource_ids: list[str],
62
+ days: int,
63
+ bq_table: str,
64
+ ) -> dict[str, float]:
65
+ from google.cloud import bigquery # type: ignore[import-untyped,attr-defined]
66
+
67
+ client = bigquery.Client(project=project_id)
68
+ end_date = datetime.now(tz=timezone.utc).date()
69
+ start_date = end_date - timedelta(days=days)
70
+
71
+ # Extract short resource names from full asset names for matching.
72
+ # Full: //compute.googleapis.com/projects/p/zones/z/instances/my-vm
73
+ # Short: my-vm
74
+ short_names = [rid.rstrip("/").split("/")[-1] for rid in resource_ids]
75
+ name_to_full = {rid.rstrip("/").split("/")[-1]: rid for rid in resource_ids}
76
+
77
+ placeholders = ", ".join(f"@name_{i}" for i in range(len(short_names)))
78
+ query = f"""
79
+ SELECT
80
+ resource.name AS resource_name,
81
+ SUM(cost) AS total_cost
82
+ FROM `{bq_table}`
83
+ WHERE
84
+ DATE(usage_start_time) >= @start_date
85
+ AND DATE(usage_end_time) <= @end_date
86
+ AND resource.name IN ({placeholders})
87
+ GROUP BY resource.name
88
+ """
89
+
90
+ job_config = bigquery.QueryJobConfig(
91
+ query_parameters=[
92
+ bigquery.ScalarQueryParameter("start_date", "DATE", start_date.isoformat()),
93
+ bigquery.ScalarQueryParameter("end_date", "DATE", end_date.isoformat()),
94
+ *[
95
+ bigquery.ScalarQueryParameter(f"name_{i}", "STRING", name)
96
+ for i, name in enumerate(short_names)
97
+ ],
98
+ ]
99
+ )
100
+
101
+ costs: dict[str, float] = {rid: 0.0 for rid in resource_ids}
102
+ results = client.query(query, job_config=job_config).result()
103
+
104
+ for row in results:
105
+ short = row.resource_name
106
+ full_id = name_to_full.get(short)
107
+ if full_id:
108
+ costs[full_id] = round(float(row.total_cost), 4)
109
+
110
+ logger.info(
111
+ "gcp_billing_query_complete",
112
+ extra={
113
+ "project_id": project_id,
114
+ "resources_queried": len(resource_ids),
115
+ "resources_with_cost": sum(1 for v in costs.values() if v > 0),
116
+ },
117
+ )
118
+ return costs
@@ -0,0 +1,93 @@
1
+ from __future__ import annotations
2
+
3
+ from datetime import datetime, timedelta, timezone
4
+
5
+ import structlog
6
+ from google.api_core.exceptions import GoogleAPICallError
7
+ from google.cloud import logging as gcp_logging
8
+
9
+ from adapters.gcp.retry import retry_on_transient
10
+
11
+ logger = structlog.get_logger(__name__)
12
+
13
+ _LOOKBACK_DAYS = 90 # Cloud Logging retention default is 30-400 days depending on tier
14
+
15
+
16
+ def get_last_activity(
17
+ project_id: str,
18
+ resource_id: str,
19
+ resource_type: str,
20
+ ) -> datetime | None:
21
+ """
22
+ Return the timestamp of the most recent admin/data activity for a GCP resource.
23
+ Uses Cloud Audit Logs (Admin Activity + Data Access) via the Cloud Logging API.
24
+ Returns None if no activity found in the last 90 days.
25
+
26
+ resource_id is a full GCP asset name:
27
+ //compute.googleapis.com/projects/p/zones/z/instances/my-vm
28
+ """
29
+ short_name = resource_id.rstrip("/").split("/")[-1]
30
+ service = _service_from_resource_type(resource_type)
31
+
32
+ client = gcp_logging.Client(project=project_id)
33
+
34
+ end_time = datetime.now(tz=timezone.utc)
35
+ start_time = end_time - timedelta(days=_LOOKBACK_DAYS)
36
+
37
+ # Cloud Audit Log filter — matches admin activity on the specific resource.
38
+ log_filter = (
39
+ f'logName=("projects/{project_id}/logs/cloudaudit.googleapis.com%2Factivity" '
40
+ f'OR "projects/{project_id}/logs/cloudaudit.googleapis.com%2Fdata_access") '
41
+ f'AND resource.labels.resource_name:"{short_name}" '
42
+ f'AND timestamp >= "{start_time.isoformat()}" '
43
+ f'AND timestamp <= "{end_time.isoformat()}"'
44
+ )
45
+ if service:
46
+ log_filter += f' AND protoPayload.serviceName="{service}"'
47
+
48
+ try:
49
+ entries = list(
50
+ retry_on_transient(
51
+ client.list_entries,
52
+ filter_=log_filter,
53
+ order_by=gcp_logging.DESCENDING,
54
+ page_size=1,
55
+ timeout=60,
56
+ )
57
+ )
58
+ except GoogleAPICallError as exc:
59
+ logger.warning(
60
+ "cloud_logging_lookup_failed",
61
+ extra={"resource_id": resource_id, "error": str(exc)},
62
+ )
63
+ return None
64
+
65
+ if not entries:
66
+ return None
67
+
68
+ event_time: datetime = entries[0].timestamp
69
+ if event_time.tzinfo is None:
70
+ event_time = event_time.replace(tzinfo=timezone.utc)
71
+ return event_time
72
+
73
+
74
+ def _service_from_resource_type(resource_type: str) -> str | None:
75
+ """Map GCP asset type to the Cloud Audit Log service name for tighter filtering."""
76
+ mapping: dict[str, str] = {
77
+ "compute.googleapis.com/Instance": "compute.googleapis.com",
78
+ "compute.googleapis.com/Disk": "compute.googleapis.com",
79
+ "sql.googleapis.com/Instance": "cloudsql.googleapis.com",
80
+ "container.googleapis.com/Cluster": "container.googleapis.com",
81
+ "storage.googleapis.com/Bucket": "storage.googleapis.com",
82
+ "bigquery.googleapis.com/Dataset": "bigquery.googleapis.com",
83
+ "bigquery.googleapis.com/Table": "bigquery.googleapis.com",
84
+ "run.googleapis.com/Service": "run.googleapis.com",
85
+ "cloudfunctions.googleapis.com/Function": "cloudfunctions.googleapis.com",
86
+ "pubsub.googleapis.com/Topic": "pubsub.googleapis.com",
87
+ "redis.googleapis.com/Instance": "redis.googleapis.com",
88
+ "spanner.googleapis.com/Instance": "spanner.googleapis.com",
89
+ "dataflow.googleapis.com/Job": "dataflow.googleapis.com",
90
+ "dataproc.googleapis.com/Cluster": "dataproc.googleapis.com",
91
+ "aiplatform.googleapis.com/Endpoint": "aiplatform.googleapis.com",
92
+ }
93
+ return mapping.get(resource_type)