argus-cloud-optimizer 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- adapters/__init__.py +0 -0
- adapters/aws/__init__.py +0 -0
- adapters/aws/adapter.py +85 -0
- adapters/aws/auth.py +57 -0
- adapters/aws/cloudtrail.py +83 -0
- adapters/aws/cloudwatch.py +732 -0
- adapters/aws/config.py +9 -0
- adapters/aws/cost_explorer.py +116 -0
- adapters/aws/resource_explorer.py +186 -0
- adapters/aws/retry.py +55 -0
- adapters/azure/__init__.py +0 -0
- adapters/azure/activity_log.py +159 -0
- adapters/azure/adapter.py +117 -0
- adapters/azure/cost_management.py +125 -0
- adapters/azure/monitor.py +311 -0
- adapters/azure/resource_graph.py +113 -0
- adapters/azure/retry.py +57 -0
- adapters/base.py +105 -0
- adapters/gcp/__init__.py +0 -0
- adapters/gcp/adapter.py +86 -0
- adapters/gcp/asset_inventory.py +116 -0
- adapters/gcp/billing.py +118 -0
- adapters/gcp/cloud_logging.py +93 -0
- adapters/gcp/cloud_monitoring.py +276 -0
- adapters/gcp/retry.py +46 -0
- ai/__init__.py +0 -0
- ai/anthropic.py +174 -0
- ai/azure_openai.py +241 -0
- ai/base.py +78 -0
- ai/bedrock.py +169 -0
- ai/vertexai.py +234 -0
- argus_cloud_optimizer-0.2.0.dist-info/METADATA +433 -0
- argus_cloud_optimizer-0.2.0.dist-info/RECORD +62 -0
- argus_cloud_optimizer-0.2.0.dist-info/WHEEL +5 -0
- argus_cloud_optimizer-0.2.0.dist-info/entry_points.txt +2 -0
- argus_cloud_optimizer-0.2.0.dist-info/licenses/LICENSE +21 -0
- argus_cloud_optimizer-0.2.0.dist-info/top_level.txt +4 -0
- core/__init__.py +0 -0
- core/__version__.py +1 -0
- core/agent/__init__.py +0 -0
- core/agent/loop.py +390 -0
- core/agent/prompts.py +317 -0
- core/config.py +235 -0
- core/log.py +69 -0
- core/models/__init__.py +0 -0
- core/models/finding.py +76 -0
- core/py.typed +0 -0
- core/reports/__init__.py +0 -0
- core/reports/comparison.py +49 -0
- core/reports/delivery.py +323 -0
- core/reports/export.py +111 -0
- core/reports/generator.py +168 -0
- core/reports/html.py +286 -0
- core/reports/multi_cloud.py +162 -0
- core/secrets.py +145 -0
- core/token_tracker.py +97 -0
- core/validation.py +214 -0
- entrypoints/__init__.py +0 -0
- entrypoints/aws_lambda.py +299 -0
- entrypoints/azure_function.py +257 -0
- entrypoints/cli.py +156 -0
- entrypoints/gcp_cloudrun.py +209 -0
|
@@ -0,0 +1,276 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from datetime import datetime, timedelta, timezone
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
import structlog
|
|
7
|
+
from google.api_core.exceptions import GoogleAPICallError
|
|
8
|
+
from google.cloud import monitoring_v3
|
|
9
|
+
from google.protobuf.timestamp_pb2 import Timestamp
|
|
10
|
+
|
|
11
|
+
from adapters.base import MetricSummary
|
|
12
|
+
from adapters.gcp.retry import retry_on_transient
|
|
13
|
+
|
|
14
|
+
logger = structlog.get_logger(__name__)
|
|
15
|
+
|
|
16
|
+
# (MetricType, Stat, label_key_for_resource_filter)
|
|
17
|
+
# Stat: "mean" for utilisation, "sum" for throughput/count.
|
|
18
|
+
# label_key: the monitored-resource label used to filter to this specific resource.
|
|
19
|
+
_METRICS: dict[str, list[tuple[str, str]]] = {
|
|
20
|
+
"compute.googleapis.com/Instance": [
|
|
21
|
+
("compute.googleapis.com/instance/cpu/utilization", "mean"),
|
|
22
|
+
("compute.googleapis.com/instance/network/sent_bytes_count", "sum"),
|
|
23
|
+
("compute.googleapis.com/instance/network/received_bytes_count", "sum"),
|
|
24
|
+
],
|
|
25
|
+
"compute.googleapis.com/Disk": [
|
|
26
|
+
("compute.googleapis.com/instance/disk/read_ops_count", "sum"),
|
|
27
|
+
("compute.googleapis.com/instance/disk/write_ops_count", "sum"),
|
|
28
|
+
],
|
|
29
|
+
"sql.googleapis.com/Instance": [
|
|
30
|
+
("cloudsql.googleapis.com/database/cpu/utilization", "mean"),
|
|
31
|
+
("cloudsql.googleapis.com/database/network/connections", "mean"),
|
|
32
|
+
("cloudsql.googleapis.com/database/network/received_bytes_count", "sum"),
|
|
33
|
+
],
|
|
34
|
+
"container.googleapis.com/Cluster": [
|
|
35
|
+
("kubernetes.io/container/cpu/request_utilization", "mean"),
|
|
36
|
+
("kubernetes.io/container/memory/request_utilization", "mean"),
|
|
37
|
+
("kubernetes.io/node/cpu/allocatable_utilization", "mean"),
|
|
38
|
+
],
|
|
39
|
+
"run.googleapis.com/Service": [
|
|
40
|
+
("run.googleapis.com/request_count", "sum"),
|
|
41
|
+
("run.googleapis.com/request_latencies", "mean"),
|
|
42
|
+
("run.googleapis.com/container/cpu/utilizations", "mean"),
|
|
43
|
+
],
|
|
44
|
+
"cloudfunctions.googleapis.com/Function": [
|
|
45
|
+
("cloudfunctions.googleapis.com/function/execution_count", "sum"),
|
|
46
|
+
("cloudfunctions.googleapis.com/function/execution_times", "mean"),
|
|
47
|
+
],
|
|
48
|
+
"storage.googleapis.com/Bucket": [
|
|
49
|
+
("storage.googleapis.com/api/request_count", "sum"),
|
|
50
|
+
("storage.googleapis.com/network/sent_bytes_count", "sum"),
|
|
51
|
+
],
|
|
52
|
+
"bigquery.googleapis.com/Dataset": [
|
|
53
|
+
("bigquery.googleapis.com/storage/table_count", "mean"),
|
|
54
|
+
("bigquery.googleapis.com/storage/stored_bytes", "mean"),
|
|
55
|
+
],
|
|
56
|
+
"bigquery.googleapis.com/Table": [
|
|
57
|
+
("bigquery.googleapis.com/storage/stored_bytes", "mean"),
|
|
58
|
+
("bigquery.googleapis.com/storage/row_count", "mean"),
|
|
59
|
+
],
|
|
60
|
+
"redis.googleapis.com/Instance": [
|
|
61
|
+
("redis.googleapis.com/clients/connected", "mean"),
|
|
62
|
+
("redis.googleapis.com/stats/cache_hit_ratio", "mean"),
|
|
63
|
+
("redis.googleapis.com/stats/memory/usage_ratio", "mean"),
|
|
64
|
+
],
|
|
65
|
+
"spanner.googleapis.com/Instance": [
|
|
66
|
+
("spanner.googleapis.com/instance/cpu/utilization", "mean"),
|
|
67
|
+
("spanner.googleapis.com/instance/session_count", "mean"),
|
|
68
|
+
],
|
|
69
|
+
"pubsub.googleapis.com/Topic": [
|
|
70
|
+
("pubsub.googleapis.com/topic/send_message_operation_count", "sum"),
|
|
71
|
+
("pubsub.googleapis.com/topic/byte_cost", "sum"),
|
|
72
|
+
],
|
|
73
|
+
"pubsub.googleapis.com/Subscription": [
|
|
74
|
+
("pubsub.googleapis.com/subscription/pull_message_operation_count", "sum"),
|
|
75
|
+
("pubsub.googleapis.com/subscription/num_undelivered_messages", "mean"),
|
|
76
|
+
],
|
|
77
|
+
"dataflow.googleapis.com/Job": [
|
|
78
|
+
("dataflow.googleapis.com/job/data_watermark_age", "mean"),
|
|
79
|
+
("dataflow.googleapis.com/job/elapsed_time", "mean"),
|
|
80
|
+
("dataflow.googleapis.com/job/element_count", "sum"),
|
|
81
|
+
],
|
|
82
|
+
"dataproc.googleapis.com/Cluster": [
|
|
83
|
+
("dataproc.googleapis.com/cluster/yarn/allocated_memory_percentage", "mean"),
|
|
84
|
+
("dataproc.googleapis.com/cluster/hdfs/storage_utilization", "mean"),
|
|
85
|
+
],
|
|
86
|
+
"aiplatform.googleapis.com/Endpoint": [
|
|
87
|
+
("aiplatform.googleapis.com/prediction/online/request_count", "sum"),
|
|
88
|
+
("aiplatform.googleapis.com/prediction/online/latencies", "mean"),
|
|
89
|
+
],
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
_PERIOD_SECONDS = 86400 # daily granularity
|
|
93
|
+
_FALLBACK_METRIC_LIMIT = 5
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def get_metrics(
|
|
97
|
+
project_id: str,
|
|
98
|
+
resource_id: str,
|
|
99
|
+
resource_type: str,
|
|
100
|
+
days: int = 90,
|
|
101
|
+
) -> MetricSummary:
|
|
102
|
+
"""
|
|
103
|
+
Fetch Cloud Monitoring metrics for a GCP resource.
|
|
104
|
+
Falls back to listing available metrics for unknown resource types.
|
|
105
|
+
"""
|
|
106
|
+
metric_defs = _METRICS.get(resource_type)
|
|
107
|
+
if not metric_defs:
|
|
108
|
+
metric_defs = _discover_metrics(project_id, resource_id, resource_type)
|
|
109
|
+
if not metric_defs:
|
|
110
|
+
return MetricSummary(
|
|
111
|
+
resource_id=resource_id,
|
|
112
|
+
resource_type=resource_type,
|
|
113
|
+
period_days=days,
|
|
114
|
+
metrics={},
|
|
115
|
+
has_data=False,
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
client = monitoring_v3.MetricServiceClient()
|
|
119
|
+
project_name = f"projects/{project_id}"
|
|
120
|
+
|
|
121
|
+
end_time = datetime.now(tz=timezone.utc)
|
|
122
|
+
start_time = end_time - timedelta(days=days)
|
|
123
|
+
|
|
124
|
+
interval = monitoring_v3.TimeInterval(
|
|
125
|
+
start_time=_to_proto_timestamp(start_time),
|
|
126
|
+
end_time=_to_proto_timestamp(end_time),
|
|
127
|
+
)
|
|
128
|
+
aggregation = monitoring_v3.Aggregation(
|
|
129
|
+
alignment_period={"seconds": _PERIOD_SECONDS},
|
|
130
|
+
cross_series_reducer=monitoring_v3.Aggregation.Reducer.REDUCE_MEAN,
|
|
131
|
+
per_series_aligner=monitoring_v3.Aggregation.Aligner.ALIGN_MEAN,
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
resource_filter = _resource_filter(resource_id, resource_type)
|
|
135
|
+
metrics: dict[str, Any] = {}
|
|
136
|
+
has_data = False
|
|
137
|
+
|
|
138
|
+
for metric_type, stat in metric_defs:
|
|
139
|
+
filter_str = f'metric.type="{metric_type}"'
|
|
140
|
+
if resource_filter:
|
|
141
|
+
filter_str += f" AND {resource_filter}"
|
|
142
|
+
|
|
143
|
+
request = monitoring_v3.ListTimeSeriesRequest(
|
|
144
|
+
name=project_name,
|
|
145
|
+
filter=filter_str,
|
|
146
|
+
interval=interval,
|
|
147
|
+
aggregation=aggregation,
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
try:
|
|
151
|
+
series = list(
|
|
152
|
+
retry_on_transient(client.list_time_series, request=request, timeout=60)
|
|
153
|
+
)
|
|
154
|
+
except GoogleAPICallError as exc:
|
|
155
|
+
logger.warning(
|
|
156
|
+
"cloud_monitoring_failed",
|
|
157
|
+
extra={
|
|
158
|
+
"resource_id": resource_id,
|
|
159
|
+
"metric": metric_type,
|
|
160
|
+
"error": str(exc),
|
|
161
|
+
},
|
|
162
|
+
)
|
|
163
|
+
metrics[metric_type] = None
|
|
164
|
+
continue
|
|
165
|
+
|
|
166
|
+
values: list[float] = [
|
|
167
|
+
point.value.double_value or point.value.int64_value
|
|
168
|
+
for ts in series
|
|
169
|
+
for point in ts.points
|
|
170
|
+
]
|
|
171
|
+
|
|
172
|
+
if not values:
|
|
173
|
+
metrics[metric_type] = None
|
|
174
|
+
continue
|
|
175
|
+
|
|
176
|
+
has_data = True
|
|
177
|
+
metrics[metric_type] = round(
|
|
178
|
+
sum(values) / len(values) if stat == "mean" else sum(values), 4
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
return MetricSummary(
|
|
182
|
+
resource_id=resource_id,
|
|
183
|
+
resource_type=resource_type,
|
|
184
|
+
period_days=days,
|
|
185
|
+
metrics=metrics,
|
|
186
|
+
has_data=has_data,
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
def _discover_metrics(
|
|
191
|
+
project_id: str,
|
|
192
|
+
resource_id: str,
|
|
193
|
+
resource_type: str,
|
|
194
|
+
) -> list[tuple[str, str]]:
|
|
195
|
+
"""Auto-discover available metrics for unknown resource types."""
|
|
196
|
+
client = monitoring_v3.MetricServiceClient()
|
|
197
|
+
project_name = f"projects/{project_id}"
|
|
198
|
+
|
|
199
|
+
# Derive a Cloud Monitoring monitored resource type prefix from the asset type.
|
|
200
|
+
# e.g. "compute.googleapis.com/SomeNewThing" -> filter on "compute.googleapis.com"
|
|
201
|
+
service_prefix = (
|
|
202
|
+
resource_type.split("/")[0] if "/" in resource_type else resource_type
|
|
203
|
+
)
|
|
204
|
+
|
|
205
|
+
request = monitoring_v3.ListMetricDescriptorsRequest(
|
|
206
|
+
name=project_name,
|
|
207
|
+
filter=f'metric.type=starts_with("{service_prefix}")',
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
discovered: list[tuple[str, str]] = []
|
|
211
|
+
try:
|
|
212
|
+
for descriptor in client.list_metric_descriptors(request=request, timeout=60):
|
|
213
|
+
metric_type: str = descriptor.type
|
|
214
|
+
stat = (
|
|
215
|
+
"sum"
|
|
216
|
+
if any(
|
|
217
|
+
kw in metric_type.lower()
|
|
218
|
+
for kw in ("count", "bytes", "requests", "operations")
|
|
219
|
+
)
|
|
220
|
+
else "mean"
|
|
221
|
+
)
|
|
222
|
+
discovered.append((metric_type, stat))
|
|
223
|
+
if len(discovered) >= _FALLBACK_METRIC_LIMIT:
|
|
224
|
+
break
|
|
225
|
+
except GoogleAPICallError as exc:
|
|
226
|
+
logger.warning(
|
|
227
|
+
"cloud_monitoring_list_metrics_failed",
|
|
228
|
+
extra={"resource_id": resource_id, "error": str(exc)},
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
return discovered
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
def _resource_filter(resource_id: str, resource_type: str) -> str:
|
|
235
|
+
"""
|
|
236
|
+
Build a Cloud Monitoring filter string to scope metrics to a specific resource.
|
|
237
|
+
Resource IDs are full asset names, e.g.:
|
|
238
|
+
//compute.googleapis.com/projects/p/zones/z/instances/name
|
|
239
|
+
"""
|
|
240
|
+
# Extract the short resource name from the full asset name.
|
|
241
|
+
name = resource_id.rstrip("/").split("/")[-1]
|
|
242
|
+
|
|
243
|
+
match resource_type:
|
|
244
|
+
case "compute.googleapis.com/Instance":
|
|
245
|
+
return f'resource.labels.instance_id="{name}"'
|
|
246
|
+
case "sql.googleapis.com/Instance":
|
|
247
|
+
return f'resource.labels.database_id="{name}"'
|
|
248
|
+
case "container.googleapis.com/Cluster":
|
|
249
|
+
return f'resource.labels.cluster_name="{name}"'
|
|
250
|
+
case "run.googleapis.com/Service":
|
|
251
|
+
return f'resource.labels.service_name="{name}"'
|
|
252
|
+
case "cloudfunctions.googleapis.com/Function":
|
|
253
|
+
return f'resource.labels.function_name="{name}"'
|
|
254
|
+
case "storage.googleapis.com/Bucket":
|
|
255
|
+
return f'resource.labels.bucket_name="{name}"'
|
|
256
|
+
case "pubsub.googleapis.com/Topic":
|
|
257
|
+
return f'resource.labels.topic_id="{name}"'
|
|
258
|
+
case "pubsub.googleapis.com/Subscription":
|
|
259
|
+
return f'resource.labels.subscription_id="{name}"'
|
|
260
|
+
case "redis.googleapis.com/Instance":
|
|
261
|
+
return f'resource.labels.instance_id="{name}"'
|
|
262
|
+
case "spanner.googleapis.com/Instance":
|
|
263
|
+
return f'resource.labels.instance_id="{name}"'
|
|
264
|
+
case "dataflow.googleapis.com/Job":
|
|
265
|
+
return f'resource.labels.job_id="{name}"'
|
|
266
|
+
case "dataproc.googleapis.com/Cluster":
|
|
267
|
+
return f'resource.labels.cluster_name="{name}"'
|
|
268
|
+
case "aiplatform.googleapis.com/Endpoint":
|
|
269
|
+
return f'resource.labels.endpoint_id="{name}"'
|
|
270
|
+
return ""
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
def _to_proto_timestamp(dt: datetime) -> Timestamp:
|
|
274
|
+
ts = Timestamp()
|
|
275
|
+
ts.FromDatetime(dt)
|
|
276
|
+
return ts
|
adapters/gcp/retry.py
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import random
|
|
4
|
+
import time
|
|
5
|
+
from collections.abc import Callable
|
|
6
|
+
from typing import Any, TypeVar
|
|
7
|
+
|
|
8
|
+
import structlog
|
|
9
|
+
from google.api_core.exceptions import (
|
|
10
|
+
ResourceExhausted,
|
|
11
|
+
ServiceUnavailable,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
logger = structlog.get_logger(__name__)
|
|
15
|
+
|
|
16
|
+
T = TypeVar("T")
|
|
17
|
+
|
|
18
|
+
_MAX_RETRIES = 3
|
|
19
|
+
_BASE_DELAY = 1.0
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def retry_on_transient(
|
|
23
|
+
fn: Callable[..., T],
|
|
24
|
+
*args: Any,
|
|
25
|
+
**kwargs: Any,
|
|
26
|
+
) -> T:
|
|
27
|
+
delay = _BASE_DELAY
|
|
28
|
+
for attempt in range(_MAX_RETRIES):
|
|
29
|
+
try:
|
|
30
|
+
return fn(*args, **kwargs)
|
|
31
|
+
except (ResourceExhausted, ServiceUnavailable) as exc:
|
|
32
|
+
if attempt < _MAX_RETRIES - 1:
|
|
33
|
+
jitter = random.uniform(0, delay * 0.5) # noqa: S311
|
|
34
|
+
sleep_time = delay + jitter
|
|
35
|
+
logger.warning(
|
|
36
|
+
"gcp_transient_error_retrying",
|
|
37
|
+
error_type=type(exc).__name__,
|
|
38
|
+
attempt=attempt + 1,
|
|
39
|
+
max_retries=_MAX_RETRIES,
|
|
40
|
+
retry_in=round(sleep_time, 1),
|
|
41
|
+
)
|
|
42
|
+
time.sleep(sleep_time)
|
|
43
|
+
delay *= 2
|
|
44
|
+
else:
|
|
45
|
+
raise
|
|
46
|
+
raise RuntimeError("Unreachable") # pragma: no cover
|
ai/__init__.py
ADDED
|
File without changes
|
ai/anthropic.py
ADDED
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import random
|
|
4
|
+
import time
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
import anthropic as anthropic_sdk
|
|
8
|
+
import structlog
|
|
9
|
+
|
|
10
|
+
from ai.base import AIProvider, AIResponse, Message, Tool, ToolCall
|
|
11
|
+
|
|
12
|
+
logger = structlog.get_logger(__name__)
|
|
13
|
+
|
|
14
|
+
_MAX_RETRIES = 3
|
|
15
|
+
_BASE_DELAY = 1.0
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class AnthropicProvider(AIProvider):
|
|
19
|
+
"""
|
|
20
|
+
AI provider backed by the Anthropic direct API.
|
|
21
|
+
Works on any cloud — no AWS/GCP/Azure auth needed.
|
|
22
|
+
Best for local development and contributors without cloud AI access.
|
|
23
|
+
Configure via ANTHROPIC_API_KEY and optionally ANTHROPIC_MODEL env vars.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
DEFAULT_MODEL = "claude-sonnet-4-6"
|
|
27
|
+
DEFAULT_MAX_TOKENS = 4096
|
|
28
|
+
DEFAULT_TEMPERATURE = 0.0
|
|
29
|
+
|
|
30
|
+
def __init__(
|
|
31
|
+
self,
|
|
32
|
+
api_key: str | None = None,
|
|
33
|
+
model: str | None = None,
|
|
34
|
+
max_tokens: int = DEFAULT_MAX_TOKENS,
|
|
35
|
+
temperature: float | None = None,
|
|
36
|
+
) -> None:
|
|
37
|
+
from core.config import get_settings
|
|
38
|
+
|
|
39
|
+
cfg = get_settings().ai
|
|
40
|
+
resolved_key = api_key or cfg.anthropic_api_key
|
|
41
|
+
if not resolved_key:
|
|
42
|
+
raise EnvironmentError(
|
|
43
|
+
"ANTHROPIC_API_KEY is not set. "
|
|
44
|
+
"Export it or pass api_key= explicitly."
|
|
45
|
+
)
|
|
46
|
+
self._client = anthropic_sdk.Anthropic(api_key=resolved_key, timeout=60.0)
|
|
47
|
+
self._model = model or cfg.resolved_model("anthropic")
|
|
48
|
+
self._max_tokens = max_tokens
|
|
49
|
+
self._temperature = temperature if temperature is not None else cfg.temperature
|
|
50
|
+
|
|
51
|
+
def chat(
|
|
52
|
+
self,
|
|
53
|
+
messages: list[Message],
|
|
54
|
+
tools: list[Tool],
|
|
55
|
+
system_prompt: str | None = None,
|
|
56
|
+
) -> AIResponse:
|
|
57
|
+
kwargs: dict[str, Any] = {
|
|
58
|
+
"model": self._model,
|
|
59
|
+
"max_tokens": self._max_tokens,
|
|
60
|
+
"temperature": self._temperature,
|
|
61
|
+
"messages": [self._to_anthropic_message(m) for m in messages],
|
|
62
|
+
"tools": [self._to_anthropic_tool(t) for t in tools],
|
|
63
|
+
}
|
|
64
|
+
if system_prompt:
|
|
65
|
+
# cache_control pins the system prompt in Anthropic's prompt cache.
|
|
66
|
+
# After the first iteration it's served from cache — no reprocessing charge.
|
|
67
|
+
# Requires claude-3-5-* or claude-sonnet-4-* models.
|
|
68
|
+
kwargs["system"] = [
|
|
69
|
+
{
|
|
70
|
+
"type": "text",
|
|
71
|
+
"text": system_prompt,
|
|
72
|
+
"cache_control": {"type": "ephemeral"},
|
|
73
|
+
}
|
|
74
|
+
]
|
|
75
|
+
|
|
76
|
+
response = self._call_with_retry(kwargs)
|
|
77
|
+
return self._parse_response(response)
|
|
78
|
+
|
|
79
|
+
# ------------------------------------------------------------------
|
|
80
|
+
# Retry logic
|
|
81
|
+
# ------------------------------------------------------------------
|
|
82
|
+
|
|
83
|
+
def _call_with_retry(self, kwargs: dict[str, Any]) -> Any:
|
|
84
|
+
delay = _BASE_DELAY
|
|
85
|
+
for attempt in range(_MAX_RETRIES):
|
|
86
|
+
try:
|
|
87
|
+
return self._client.messages.create(**kwargs)
|
|
88
|
+
except (
|
|
89
|
+
anthropic_sdk.RateLimitError,
|
|
90
|
+
anthropic_sdk.InternalServerError,
|
|
91
|
+
) as exc:
|
|
92
|
+
if attempt < _MAX_RETRIES - 1:
|
|
93
|
+
jitter = random.uniform(0, delay * 0.5) # noqa: S311
|
|
94
|
+
sleep_time = delay + jitter
|
|
95
|
+
logger.warning(
|
|
96
|
+
"anthropic_api_retrying",
|
|
97
|
+
error_type=type(exc).__name__,
|
|
98
|
+
attempt=attempt + 1,
|
|
99
|
+
max_retries=_MAX_RETRIES,
|
|
100
|
+
retry_in=round(sleep_time, 1),
|
|
101
|
+
)
|
|
102
|
+
time.sleep(sleep_time)
|
|
103
|
+
delay *= 2
|
|
104
|
+
else:
|
|
105
|
+
raise
|
|
106
|
+
raise RuntimeError("Unreachable") # pragma: no cover
|
|
107
|
+
|
|
108
|
+
# ------------------------------------------------------------------
|
|
109
|
+
# Internal conversion helpers
|
|
110
|
+
# ------------------------------------------------------------------
|
|
111
|
+
|
|
112
|
+
def _to_anthropic_message(self, msg: Message) -> dict[str, Any]:
|
|
113
|
+
if msg.role == "user":
|
|
114
|
+
if msg.tool_results:
|
|
115
|
+
return {
|
|
116
|
+
"role": "user",
|
|
117
|
+
"content": [
|
|
118
|
+
{
|
|
119
|
+
"type": "tool_result",
|
|
120
|
+
"tool_use_id": tr.tool_call_id,
|
|
121
|
+
"content": tr.content,
|
|
122
|
+
**({"is_error": True} if tr.is_error else {}),
|
|
123
|
+
}
|
|
124
|
+
for tr in msg.tool_results
|
|
125
|
+
],
|
|
126
|
+
}
|
|
127
|
+
return {"role": "user", "content": msg.text or ""}
|
|
128
|
+
|
|
129
|
+
# assistant
|
|
130
|
+
content: list[dict[str, Any]] = []
|
|
131
|
+
if msg.text:
|
|
132
|
+
content.append({"type": "text", "text": msg.text})
|
|
133
|
+
for tc in msg.tool_calls:
|
|
134
|
+
content.append(
|
|
135
|
+
{
|
|
136
|
+
"type": "tool_use",
|
|
137
|
+
"id": tc.id,
|
|
138
|
+
"name": tc.name,
|
|
139
|
+
"input": tc.arguments,
|
|
140
|
+
}
|
|
141
|
+
)
|
|
142
|
+
return {"role": "assistant", "content": content}
|
|
143
|
+
|
|
144
|
+
def _to_anthropic_tool(self, tool: Tool) -> dict[str, Any]:
|
|
145
|
+
return {
|
|
146
|
+
"name": tool.name,
|
|
147
|
+
"description": tool.description,
|
|
148
|
+
"input_schema": tool.input_schema,
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
def _parse_response(self, response: Any) -> AIResponse:
|
|
152
|
+
tool_calls: list[ToolCall] = []
|
|
153
|
+
text: str | None = None
|
|
154
|
+
|
|
155
|
+
for block in response.content:
|
|
156
|
+
if block.type == "tool_use":
|
|
157
|
+
tool_calls.append(
|
|
158
|
+
ToolCall(
|
|
159
|
+
id=block.id,
|
|
160
|
+
name=block.name,
|
|
161
|
+
arguments=dict(block.input),
|
|
162
|
+
)
|
|
163
|
+
)
|
|
164
|
+
elif block.type == "text":
|
|
165
|
+
text = block.text
|
|
166
|
+
|
|
167
|
+
usage = getattr(response, "usage", None)
|
|
168
|
+
return AIResponse(
|
|
169
|
+
stop_reason=response.stop_reason,
|
|
170
|
+
text=text,
|
|
171
|
+
tool_calls=tool_calls,
|
|
172
|
+
input_tokens=getattr(usage, "input_tokens", 0) if usage else 0,
|
|
173
|
+
output_tokens=getattr(usage, "output_tokens", 0) if usage else 0,
|
|
174
|
+
)
|