PyPI - argus-cloud-optimizer - Versions diffs - 0.2.0__py3-none-any.whl - Mend

argus-cloud-optimizer 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (62) hide show

adapters/__init__.py +0 -0
adapters/aws/__init__.py +0 -0
adapters/aws/adapter.py +85 -0
adapters/aws/auth.py +57 -0
adapters/aws/cloudtrail.py +83 -0
adapters/aws/cloudwatch.py +732 -0
adapters/aws/config.py +9 -0
adapters/aws/cost_explorer.py +116 -0
adapters/aws/resource_explorer.py +186 -0
adapters/aws/retry.py +55 -0
adapters/azure/__init__.py +0 -0
adapters/azure/activity_log.py +159 -0
adapters/azure/adapter.py +117 -0
adapters/azure/cost_management.py +125 -0
adapters/azure/monitor.py +311 -0
adapters/azure/resource_graph.py +113 -0
adapters/azure/retry.py +57 -0
adapters/base.py +105 -0
adapters/gcp/__init__.py +0 -0
adapters/gcp/adapter.py +86 -0
adapters/gcp/asset_inventory.py +116 -0
adapters/gcp/billing.py +118 -0
adapters/gcp/cloud_logging.py +93 -0
adapters/gcp/cloud_monitoring.py +276 -0
adapters/gcp/retry.py +46 -0
ai/__init__.py +0 -0
ai/anthropic.py +174 -0
ai/azure_openai.py +241 -0
ai/base.py +78 -0
ai/bedrock.py +169 -0
ai/vertexai.py +234 -0
argus_cloud_optimizer-0.2.0.dist-info/METADATA +433 -0
argus_cloud_optimizer-0.2.0.dist-info/RECORD +62 -0
argus_cloud_optimizer-0.2.0.dist-info/WHEEL +5 -0
argus_cloud_optimizer-0.2.0.dist-info/entry_points.txt +2 -0
argus_cloud_optimizer-0.2.0.dist-info/licenses/LICENSE +21 -0
argus_cloud_optimizer-0.2.0.dist-info/top_level.txt +4 -0
core/__init__.py +0 -0
core/__version__.py +1 -0
core/agent/__init__.py +0 -0
core/agent/loop.py +390 -0
core/agent/prompts.py +317 -0
core/config.py +235 -0
core/log.py +69 -0
core/models/__init__.py +0 -0
core/models/finding.py +76 -0
core/py.typed +0 -0
core/reports/__init__.py +0 -0
core/reports/comparison.py +49 -0
core/reports/delivery.py +323 -0
core/reports/export.py +111 -0
core/reports/generator.py +168 -0
core/reports/html.py +286 -0
core/reports/multi_cloud.py +162 -0
core/secrets.py +145 -0
core/token_tracker.py +97 -0
core/validation.py +214 -0
entrypoints/__init__.py +0 -0
entrypoints/aws_lambda.py +299 -0
entrypoints/azure_function.py +257 -0
entrypoints/cli.py +156 -0
entrypoints/gcp_cloudrun.py +209 -0

adapters/aws/config.py ADDED Viewed

@@ -0,0 +1,9 @@
+from __future__ import annotations
+from botocore.config import Config
+BOTO_TIMEOUT_CONFIG = Config(
+    connect_timeout=10,
+    read_timeout=60,
+    retries={"max_attempts": 0},
+)

adapters/aws/cost_explorer.py ADDED Viewed

@@ -0,0 +1,116 @@
+from __future__ import annotations
+from datetime import datetime, timedelta, timezone
+import boto3
+import structlog
+from botocore.exceptions import ClientError
+from adapters.aws.config import BOTO_TIMEOUT_CONFIG
+from adapters.aws.retry import retry_on_transient
+logger = structlog.get_logger(__name__)
+# Cost Explorer is a global service — always us-east-1
+_CE_REGION = "us-east-1"
+def get_cost(
+    session: boto3.Session,
+    resource_ids: list[str],
+    days: int = 30,
+) -> dict[str, float]:
+    """
+    Return estimated monthly cost in USD per resource ID.
+    Uses GetCostAndUsageWithResources which requires resource-level cost
+    allocation to be enabled in the AWS Cost Management console.
+    If not enabled, returns zeros and logs a warning — the agent will
+    note that cost data is unavailable for these resources.
+    IMPORTANT: Always batch resource_ids — this is one API call regardless
+    of how many IDs are passed. Cost Explorer charges $0.01 per API call.
+    """
+    if not resource_ids:
+        return {}
+    client = session.client("ce", region_name=_CE_REGION, config=BOTO_TIMEOUT_CONFIG)
+    end_date = datetime.now(tz=timezone.utc).date()
+    start_date = end_date - timedelta(days=days)
+    try:
+        response = retry_on_transient(
+            client.get_cost_and_usage_with_resources,
+            TimePeriod={
+                "Start": start_date.strftime("%Y-%m-%d"),
+                "End": end_date.strftime("%Y-%m-%d"),
+            },
+            Granularity="MONTHLY",
+            Filter={
+                "Dimensions": {
+                    "Key": "RESOURCE_ID",
+                    "Values": resource_ids,
+                }
+            },
+            GroupBy=[{"Type": "DIMENSION", "Key": "RESOURCE_ID"}],
+            Metrics=["UnblendedCost"],
+        )
+    except ClientError as exc:
+        code = exc.response["Error"]["Code"]
+        message = exc.response["Error"].get("Message", "")
+        if code == "DataUnavailableException":
+            logger.warning(
+                "cost_explorer_resource_granularity_disabled",
+                hint=(
+                    "Enable resource-level data in AWS Cost Management console "
+                    "(Preferences → Resource-level data)."
+                ),
+            )
+            return {rid: 0.0 for rid in resource_ids}
+        if (
+            code == "AccessDeniedException"
+            and "not enabled for cost explorer" in message.lower()
+        ):
+            logger.warning(
+                "cost_explorer_not_activated",
+                hint=(
+                    "Cost Explorer has not been enabled for this AWS account. "
+                    "Activate it in the AWS Cost Management console "
+                    "(takes up to 24 hours after first activation)."
+                ),
+            )
+            return {rid: 0.0 for rid in resource_ids}
+        if code == "AccessDeniedException":
+            logger.warning(
+                "cost_explorer_access_denied",
+                hint=(
+                    "IAM principal is missing "
+                    "ce:GetCostAndUsageWithResources permission. "
+                    "Add it to the Argus IAM role."
+                ),
+                error=str(exc),
+            )
+            return {rid: 0.0 for rid in resource_ids}
+        logger.error("cost_explorer_failed", error=str(exc), code=code)
+        return {rid: 0.0 for rid in resource_ids}
+    costs: dict[str, float] = {rid: 0.0 for rid in resource_ids}
+    for time_period in response.get("ResultsByTime", []):
+        for group in time_period.get("Groups", []):
+            resource_id = group["Keys"][0]
+            amount = float(group["Metrics"]["UnblendedCost"]["Amount"])
+            # Accumulate across months if days > 31
+            costs[resource_id] = costs.get(resource_id, 0.0) + amount
+    logger.info(
+        "cost_explorer_complete",
+        resources_queried=len(resource_ids),
+        resources_with_cost=sum(1 for v in costs.values() if v > 0),
+    )
+    return costs

adapters/aws/resource_explorer.py ADDED Viewed

@@ -0,0 +1,186 @@
+from __future__ import annotations
+import json
+from typing import Any
+import boto3
+import structlog
+from botocore.exceptions import ClientError
+from adapters.aws.config import BOTO_TIMEOUT_CONFIG
+from adapters.base import Resource
+logger = structlog.get_logger(__name__)
+# Resource Explorer aggregator index lives in one region per account.
+# This is created by our CloudFormation template. Users can override
+# via RESOURCE_EXPLORER_REGION env var if their aggregator is elsewhere.
+DEFAULT_AGGREGATOR_REGION = "us-east-1"
+# -----------------------------------------------------------------------
+# Non-billable resource type filter
+# -----------------------------------------------------------------------
+# These types never appear on an AWS bill (or cost < $0.01/month and carry
+# no useful idle signal), so we strip them before the AI ever sees them.
+# This cuts token count by 50-70% on a typical account.
+#
+# Rule: when in doubt, KEEP the type (the AI can always decide it's free).
+# Only list types that are definitively free infrastructure primitives.
+# -----------------------------------------------------------------------
+_NON_BILLABLE_PREFIXES: frozenset[str] = frozenset(
+    [
+        # IAM — all objects are $0
+        "aws::iam::",
+        # CloudFormation — stacks/stacksets are metadata, not billed resources
+        "aws::cloudformation::",
+        # SSM parameters and documents ($0 for standard tier parameters)
+        "aws::ssm::parameter",
+        "aws::ssm::document",
+        "aws::ssm::patchbaseline",
+        "aws::ssm::maintenancewindow",
+        "aws::ssm::resourcedatasync",
+        "aws::ssm::association",
+        # EC2 free infrastructure primitives
+        "aws::ec2::routetable",
+        "aws::ec2::subnet",
+        "aws::ec2::networkacl",
+        "aws::ec2::dhcpoptions",
+        "aws::ec2::internetgateway",
+        "aws::ec2::keypair",
+        "aws::ec2::placementgroup",
+        "aws::ec2::prefixlist",
+        "aws::ec2::vpcpeeringconnection",
+        # Config — rule/recorder metadata ($0)
+        "aws::config::configrule",
+        "aws::config::configurationrecorder",
+        "aws::config::deliverychannel",
+        "aws::config::conformancepack",
+        # Lambda auxiliary objects (the function itself stays)
+        "aws::lambda::eventsourcemapping",
+        "aws::lambda::layerversion",
+        # SNS subscriptions ($0 — the topic itself stays)
+        "aws::sns::subscription",
+        # CloudWatch alarms and dashboards ($0.10/alarm but no idle signal)
+        "aws::cloudwatch::alarm",
+        # Events — rules stay (EventBridge charges), but event buses default is free
+        "aws::events::eventbus",
+        # WAF — web ACL associations are metadata
+        "aws::wafv2::webaclassociation",
+        # Tagging — resource groups are free metadata
+        "aws::resourcegroups::group",
+        # Macie, GuardDuty, SecurityHub — findings/members are metadata
+        "aws::guardduty::detector",
+        "aws::guardduty::member",
+        "aws::macie2::",
+        "aws::securityhub::hub",
+        "aws::securityhub::standard",
+        # Service Catalog — products/portfolios are metadata
+        "aws::servicecatalog::",
+        # Organizations — accounts/OUs are metadata
+        "aws::organizations::",
+        # Access Analyzer
+        "aws::accessanalyzer::analyzer",
+    ]
+)
+def _is_billable(resource_type: str) -> bool:
+    """Return True if a resource type could appear on an AWS bill."""
+    lower = resource_type.lower()
+    return not any(lower.startswith(prefix) for prefix in _NON_BILLABLE_PREFIXES)
+def list_resources(
+    session: boto3.Session,
+    ignore_regions: list[str] | None = None,
+    aggregator_region: str = DEFAULT_AGGREGATOR_REGION,
+) -> list[Resource]:
+    """
+    Return every resource in the account across ALL regions,
+    minus any in ignore_regions.
+    Uses AWS Resource Explorer v2 aggregator index — returns all resource
+    types in a single paginated API call. No per-type enumeration needed.
+    Requires: Resource Explorer aggregator index set up in aggregator_region.
+    The CloudFormation template handles this automatically.
+    Any region not in ignore_regions is scanned automatically — including newly
+    launched AWS regions — so regional failures never block the scan.
+    """
+    client = session.client(
+        "resource-explorer-2", region_name=aggregator_region, config=BOTO_TIMEOUT_CONFIG
+    )
+    ignore_set = set(ignore_regions) if ignore_regions else set()
+    resources: list[Resource] = []
+    try:
+        paginator = client.get_paginator("search")
+        for page in paginator.paginate(QueryString="*"):
+            for raw in page.get("Resources", []):
+                if raw.get("Region") in ignore_set:
+                    continue
+                resource_type = raw.get("ResourceType", "")
+                if not _is_billable(resource_type):
+                    continue
+                parsed = _parse_resource(raw)
+                if parsed:
+                    resources.append(parsed)
+    except ClientError as exc:
+        code = exc.response["Error"]["Code"]
+        if code == "AccessDeniedException":
+            raise PermissionError(
+                "Argus IAM role is missing resource-explorer-2:Search permission."
+            ) from exc
+        if code in ("ResourceNotFoundException", "ValidationException"):
+            raise RuntimeError(
+                "No Resource Explorer aggregator index found. "
+                "Deploy the Argus CloudFormation template to create one, "
+                "or enable Resource Explorer manually in the AWS console."
+            ) from exc
+        raise
+    logger.info(
+        "resource_explorer_search_complete",
+        total=len(resources),
+        ignored_regions=list(ignore_set),
+    )
+    return resources
+def _parse_resource(raw: dict[str, Any]) -> Resource | None:
+    arn = raw.get("Arn", "")
+    resource_type = raw.get("ResourceType", "")
+    region = raw.get("Region", "")
+    if not arn or not resource_type:
+        return None
+    tags = _parse_tags(raw.get("Properties", []))
+    return Resource(
+        resource_id=arn,
+        resource_type=resource_type,
+        cloud="aws",
+        region=region,
+        name=tags.get("Name"),
+        tags=tags,
+    )
+def _parse_tags(properties: list[dict[str, Any]]) -> dict[str, str]:
+    """
+    Resource Explorer returns tags as JSON-encoded string in Properties.
+    Example: {"Name": "tags", "Data": "[{\"Key\":\"Env\",\"Value\":\"prod\"}]"}
+    """
+    for prop in properties:
+        if prop.get("Name") == "tags":
+            try:
+                tag_list = json.loads(prop.get("Data", "[]"))
+                return {
+                    t["Key"]: t["Value"]
+                    for t in tag_list
+                    if "Key" in t and "Value" in t
+                }
+            except (json.JSONDecodeError, TypeError):
+                return {}
+    return {}

adapters/aws/retry.py ADDED Viewed

@@ -0,0 +1,55 @@
+from __future__ import annotations
+import random
+import time
+from collections.abc import Callable
+from typing import Any, TypeVar
+import structlog
+from botocore.exceptions import ClientError
+logger = structlog.get_logger(__name__)
+T = TypeVar("T")
+_MAX_RETRIES = 3
+_BASE_DELAY = 1.0
+_RETRYABLE_CODES = frozenset(
+    {
+        "ThrottlingException",
+        "RequestLimitExceeded",
+        "TooManyRequestsException",
+        "Throttling",
+        "InternalError",
+        "ServiceUnavailable",
+    }
+)
+def retry_on_transient(
+    fn: Callable[..., T],
+    *args: Any,
+    **kwargs: Any,
+) -> T:
+    delay = _BASE_DELAY
+    for attempt in range(_MAX_RETRIES):
+        try:
+            return fn(*args, **kwargs)
+        except ClientError as exc:
+            code = exc.response["Error"]["Code"]
+            if code in _RETRYABLE_CODES and attempt < _MAX_RETRIES - 1:
+                jitter = random.uniform(0, delay * 0.5)  # noqa: S311
+                sleep_time = delay + jitter
+                logger.warning(
+                    "aws_transient_error_retrying",
+                    error_code=code,
+                    attempt=attempt + 1,
+                    max_retries=_MAX_RETRIES,
+                    retry_in=round(sleep_time, 1),
+                )
+                time.sleep(sleep_time)
+                delay *= 2
+            else:
+                raise
+    raise RuntimeError("Unreachable")  # pragma: no cover

adapters/azure/__init__.py ADDED Viewed

File without changes

adapters/azure/activity_log.py ADDED Viewed

@@ -0,0 +1,159 @@
+from __future__ import annotations
+from datetime import datetime, timedelta, timezone
+from typing import Any
+import structlog
+from azure.core.exceptions import HttpResponseError
+from azure.identity import DefaultAzureCredential
+from azure.monitor.query import LogsQueryClient, LogsQueryStatus
+from adapters.azure.retry import retry_on_transient
+logger = structlog.get_logger(__name__)
+_LOOKBACK_DAYS = 90  # Azure Activity Log retention is 90 days
+def get_last_activity(
+    subscription_id: str,
+    resource_id: str,
+    resource_type: str,
+    credential: Any = None,
+) -> datetime | None:
+    """
+    Return the timestamp of the most recent activity for an Azure resource.
+    Queries Azure Monitor Activity Log via the Logs Query (Log Analytics) API.
+    Falls back to None if:
+    - Log Analytics workspace is not configured
+    - No activity found in the 90-day window
+    - API call fails
+    resource_id is the full Azure resource ID:
+    /subscriptions/{sub}/resourceGroups/{rg}/providers/{type}/{name}
+    """
+    cred = credential or DefaultAzureCredential()
+    client = LogsQueryClient(cred, connection_timeout=10, read_timeout=60)
+    # Log Analytics workspace for the subscription — set via env var.
+    import os
+    workspace_id = os.environ.get("AZURE_LOG_ANALYTICS_WORKSPACE_ID", "")
+    if not workspace_id:
+        logger.debug(
+            "azure_activity_log_skipped",
+            extra={
+                "resource_id": resource_id,
+                "reason": "AZURE_LOG_ANALYTICS_WORKSPACE_ID not set",
+            },
+        )
+        return _fallback_from_activity_log_api(subscription_id, resource_id, credential)
+    end_time = datetime.now(tz=timezone.utc)
+    start_time = end_time - timedelta(days=_LOOKBACK_DAYS)
+    # KQL query — finds the most recent write/action operation on this resource
+    query = f"""
+    AzureActivity
+    | where ResourceId =~ "{resource_id}"
+    | where OperationNameValue !endswith "/read"
+    | order by TimeGenerated desc
+    | take 1
+    | project TimeGenerated
+    """
+    try:
+        response = retry_on_transient(
+            client.query_workspace,
+            workspace_id=workspace_id,
+            query=query,
+            timespan=(start_time, end_time),
+        )
+    except HttpResponseError as exc:
+        logger.warning(
+            "azure_log_analytics_failed",
+            extra={"resource_id": resource_id, "error": str(exc)},
+        )
+        return None
+    if response.status != LogsQueryStatus.SUCCESS:
+        return None
+    for table in response.tables:
+        for row in table.rows:
+            event_time = row[0]
+            if isinstance(event_time, str):
+                from dateutil.parser import parse
+                event_time = parse(event_time)
+            if event_time and event_time.tzinfo is None:
+                event_time = event_time.replace(tzinfo=timezone.utc)
+            return event_time  # type: ignore[no-any-return]
+    return None
+def _fallback_from_activity_log_api(
+    subscription_id: str,
+    resource_id: str,
+    credential: Any,
+) -> datetime | None:
+    """
+    Direct Activity Log API fallback when Log Analytics workspace isn't configured.
+    Uses azure-mgmt-monitor to query the activity log REST endpoint directly.
+    Only available if azure-mgmt-monitor is installed.
+    """
+    try:
+        from azure.mgmt.monitor import (
+            MonitorManagementClient,  # type: ignore[import-untyped]
+        )
+    except ImportError:
+        return None
+    cred = credential or DefaultAzureCredential()
+    client = MonitorManagementClient(
+        cred, subscription_id, connection_timeout=10, read_timeout=60
+    )
+    end_time = datetime.now(tz=timezone.utc)
+    start_time = end_time - timedelta(days=_LOOKBACK_DAYS)
+    filter_str = (
+        f"eventTimestamp ge '{start_time.isoformat()}' "
+        f"and eventTimestamp le '{end_time.isoformat()}' "
+        f"and resourceUri eq '{resource_id}'"
+    )
+    try:
+        events = list(
+            retry_on_transient(
+                client.activity_logs.list,
+                filter=filter_str,
+                select="eventTimestamp,operationName",
+            )
+        )
+    except HttpResponseError as exc:
+        logger.warning(
+            "azure_activity_log_api_failed",
+            extra={"resource_id": resource_id, "error": str(exc)},
+        )
+        return None
+    # Filter out read-only operations
+    write_events = [
+        e
+        for e in events
+        if e.operation_name
+        and not str(e.operation_name.value or "").lower().endswith("/read")
+    ]
+    if not write_events:
+        return None
+    # Events come back newest-first
+    event_time: datetime = write_events[0].event_timestamp
+    if event_time and event_time.tzinfo is None:
+        event_time = event_time.replace(tzinfo=timezone.utc)
+    return event_time

adapters/azure/adapter.py ADDED Viewed

@@ -0,0 +1,117 @@
+from __future__ import annotations
+import os
+from datetime import datetime
+from typing import Any
+from adapters.azure import activity_log, cost_management, monitor, resource_graph
+from adapters.base import CloudAdapter, MetricSummary, Resource
+class AzureAdapter(CloudAdapter):
+    """
+    Azure implementation of CloudAdapter.
+    Wires together Resource Graph, Azure Monitor, Cost Management, and Activity Log.
+    All API calls are read-only.
+    Auth: DefaultAzureCredential — Managed Identity in production,
+    az login / env vars for local dev.
+    Usage:
+        adapter = AzureAdapter(subscription_ids=["sub-id-1", "sub-id-2"])
+    """
+    def __init__(
+        self,
+        subscription_ids: list[str] | None = None,
+        credential: Any = None,
+    ) -> None:
+        resolved = subscription_ids or _parse_subscription_ids()
+        if not resolved:
+            raise EnvironmentError(
+                "No Azure subscription IDs configured. "
+                "Pass subscription_ids= or set AZURE_SUBSCRIPTION_IDS "
+                "(comma-separated)."
+            )
+        self._subscription_ids = resolved
+        self._credential = credential
+    def list_resources(self, ignore_regions: list[str] | None = None) -> list[Resource]:
+        return resource_graph.list_resources(
+            subscription_ids=self._subscription_ids,
+            ignore_regions=ignore_regions,
+            credential=self._credential,
+        )
+    def get_metrics(
+        self,
+        resource_id: str,
+        resource_type: str,
+        days: int = 90,
+    ) -> MetricSummary:
+        return monitor.get_metrics(
+            resource_id=resource_id,
+            resource_type=resource_type,
+            days=days,
+            credential=self._credential,
+        )
+    def get_cost(
+        self,
+        resource_ids: list[str],
+        days: int = 30,
+    ) -> dict[str, float]:
+        # Cost Management is scoped per subscription — group by subscription
+        # extracted from the resource ID and fan out.
+        by_sub: dict[str, list[str]] = {}
+        for rid in resource_ids:
+            sub = _subscription_from_resource_id(rid)
+            by_sub.setdefault(sub, []).append(rid)
+        costs: dict[str, float] = {}
+        for sub_id, rids in by_sub.items():
+            costs.update(
+                cost_management.get_cost(
+                    subscription_id=sub_id,
+                    resource_ids=rids,
+                    days=days,
+                    credential=self._credential,
+                )
+            )
+        return costs
+    def get_last_activity(
+        self,
+        resource_id: str,
+        resource_type: str,
+    ) -> datetime | None:
+        sub = _subscription_from_resource_id(resource_id)
+        return activity_log.get_last_activity(
+            subscription_id=sub,
+            resource_id=resource_id,
+            resource_type=resource_type,
+            credential=self._credential,
+        )
+    @classmethod
+    def from_env(cls) -> "AzureAdapter":
+        """Convenience constructor — reads all config from env vars."""
+        return cls(subscription_ids=_parse_subscription_ids())
+def _parse_subscription_ids() -> list[str]:
+    raw = os.environ.get("AZURE_SUBSCRIPTION_IDS", "")
+    return [s.strip() for s in raw.split(",") if s.strip()]
+def _subscription_from_resource_id(resource_id: str) -> str:
+    """
+    Extract subscription ID from an Azure resource ID.
+    Format: /subscriptions/{sub}/resourceGroups/{rg}/providers/...
+    """
+    parts = resource_id.lower().split("/")
+    try:
+        idx = parts.index("subscriptions")
+        return parts[idx + 1]
+    except (ValueError, IndexError):
+        return ""