squads-cli 0.1.2 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +164 -21
- package/dist/chunk-266URT5W.js +915 -0
- package/dist/chunk-266URT5W.js.map +1 -0
- package/dist/chunk-7OCVIDC7.js +12 -0
- package/dist/chunk-7OCVIDC7.js.map +1 -0
- package/dist/chunk-FUHBEL3L.js +203 -0
- package/dist/chunk-FUHBEL3L.js.map +1 -0
- package/dist/cli.js +2118 -1017
- package/dist/cli.js.map +1 -1
- package/dist/memory-4PVUKIDK.js +19 -0
- package/dist/memory-4PVUKIDK.js.map +1 -0
- package/dist/sessions-UR3YGSLR.js +15 -0
- package/dist/sessions-UR3YGSLR.js.map +1 -0
- package/docker/.env.example +17 -0
- package/docker/README.md +92 -0
- package/docker/docker-compose.engram.yml +289 -0
- package/docker/docker-compose.yml +194 -0
- package/docker/init-db.sql +399 -0
- package/docker/init-engram-db.sql +148 -0
- package/docker/init-langfuse-db.sh +10 -0
- package/docker/otel-collector.yaml +34 -0
- package/docker/squads-bridge/Dockerfile +14 -0
- package/docker/squads-bridge/Dockerfile.proxy +14 -0
- package/docker/squads-bridge/anthropic_proxy.py +313 -0
- package/docker/squads-bridge/requirements.txt +7 -0
- package/docker/squads-bridge/squads_bridge.py +1457 -0
- package/docker/telemetry-ping/Dockerfile +10 -0
- package/docker/telemetry-ping/deploy.sh +69 -0
- package/docker/telemetry-ping/main.py +136 -0
- package/docker/telemetry-ping/requirements.txt +3 -0
- package/package.json +12 -2
|
@@ -0,0 +1,313 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Anthropic API Proxy
|
|
3
|
+
Forwards requests to Anthropic API and captures rate limit headers.
|
|
4
|
+
Stores rate limits in Redis for real-time dashboard display.
|
|
5
|
+
Includes rate limiting queue to prevent parallel agents from hitting limits.
|
|
6
|
+
"""
|
|
7
|
+
import os
|
|
8
|
+
import json
|
|
9
|
+
import time
|
|
10
|
+
import threading
|
|
11
|
+
import requests
|
|
12
|
+
from datetime import datetime
|
|
13
|
+
from flask import Flask, request, Response, jsonify
|
|
14
|
+
import redis
|
|
15
|
+
|
|
16
|
+
app = Flask(__name__)
|
|
17
|
+
|
|
18
|
+
# Configuration
|
|
19
|
+
ANTHROPIC_API_URL = os.environ.get("ANTHROPIC_API_URL", "https://api.anthropic.com")
|
|
20
|
+
BRIDGE_URL = os.environ.get("SQUADS_BRIDGE_URL", "http://squads-bridge:8080")
|
|
21
|
+
REDIS_URL = os.environ.get("REDIS_URL", "redis://redis:6379/0")
|
|
22
|
+
DEBUG_MODE = os.environ.get("DEBUG", "0") == "1"
|
|
23
|
+
|
|
24
|
+
# Rate limiting configuration
|
|
25
|
+
RATE_LIMIT_ENABLED = os.environ.get("RATE_LIMIT_ENABLED", "1") == "1"
|
|
26
|
+
MIN_REQUESTS_REMAINING = int(os.environ.get("MIN_REQUESTS_REMAINING", "10")) # Queue if fewer remaining
|
|
27
|
+
MIN_TOKENS_REMAINING = int(os.environ.get("MIN_TOKENS_REMAINING", "10000")) # Queue if fewer remaining
|
|
28
|
+
QUEUE_WAIT_TIME = float(os.environ.get("QUEUE_WAIT_TIME", "5.0")) # Seconds between queued requests
|
|
29
|
+
|
|
30
|
+
# Thread-safe request queue
|
|
31
|
+
request_lock = threading.Lock()
|
|
32
|
+
last_request_time = 0
|
|
33
|
+
|
|
34
|
+
# Redis connection
|
|
35
|
+
redis_client = None
|
|
36
|
+
try:
|
|
37
|
+
redis_client = redis.from_url(REDIS_URL, decode_responses=True)
|
|
38
|
+
redis_client.ping()
|
|
39
|
+
print(f"[PROXY] Redis connected: {REDIS_URL}")
|
|
40
|
+
except Exception as e:
|
|
41
|
+
print(f"[PROXY] Redis unavailable: {e}")
|
|
42
|
+
redis_client = None
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def extract_model_family(model: str) -> str:
|
|
46
|
+
"""Extract model family from full model name."""
|
|
47
|
+
if "opus" in model.lower():
|
|
48
|
+
return "opus"
|
|
49
|
+
elif "sonnet" in model.lower():
|
|
50
|
+
return "sonnet"
|
|
51
|
+
elif "haiku" in model.lower():
|
|
52
|
+
return "haiku"
|
|
53
|
+
return "unknown"
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def store_rate_limits(model: str, headers: dict):
|
|
57
|
+
"""Store rate limit headers in Redis."""
|
|
58
|
+
if not redis_client:
|
|
59
|
+
return
|
|
60
|
+
|
|
61
|
+
# Extract rate limit headers
|
|
62
|
+
rate_limits = {}
|
|
63
|
+
header_mapping = {
|
|
64
|
+
"anthropic-ratelimit-requests-limit": "requests_limit",
|
|
65
|
+
"anthropic-ratelimit-requests-remaining": "requests_remaining",
|
|
66
|
+
"anthropic-ratelimit-requests-reset": "requests_reset",
|
|
67
|
+
"anthropic-ratelimit-tokens-limit": "tokens_limit",
|
|
68
|
+
"anthropic-ratelimit-tokens-remaining": "tokens_remaining",
|
|
69
|
+
"anthropic-ratelimit-tokens-reset": "tokens_reset",
|
|
70
|
+
"anthropic-ratelimit-input-tokens-limit": "input_tokens_limit",
|
|
71
|
+
"anthropic-ratelimit-input-tokens-remaining": "input_tokens_remaining",
|
|
72
|
+
"anthropic-ratelimit-input-tokens-reset": "input_tokens_reset",
|
|
73
|
+
"anthropic-ratelimit-output-tokens-limit": "output_tokens_limit",
|
|
74
|
+
"anthropic-ratelimit-output-tokens-remaining": "output_tokens_remaining",
|
|
75
|
+
"anthropic-ratelimit-output-tokens-reset": "output_tokens_reset",
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
for header_name, key in header_mapping.items():
|
|
79
|
+
value = headers.get(header_name)
|
|
80
|
+
if value:
|
|
81
|
+
# Parse numeric values
|
|
82
|
+
if "remaining" in key or "limit" in key:
|
|
83
|
+
try:
|
|
84
|
+
rate_limits[key] = int(value)
|
|
85
|
+
except ValueError:
|
|
86
|
+
rate_limits[key] = value
|
|
87
|
+
else:
|
|
88
|
+
rate_limits[key] = value
|
|
89
|
+
|
|
90
|
+
if not rate_limits:
|
|
91
|
+
return
|
|
92
|
+
|
|
93
|
+
rate_limits["captured_at"] = datetime.now().isoformat()
|
|
94
|
+
rate_limits["model"] = model
|
|
95
|
+
|
|
96
|
+
# Store in Redis with 5 minute TTL
|
|
97
|
+
family = extract_model_family(model)
|
|
98
|
+
key = f"ratelimit:latest:{family}"
|
|
99
|
+
redis_client.set(key, json.dumps(rate_limits), ex=300)
|
|
100
|
+
|
|
101
|
+
# Also store by full model name
|
|
102
|
+
key_full = f"ratelimit:latest:{model}"
|
|
103
|
+
redis_client.set(key_full, json.dumps(rate_limits), ex=300)
|
|
104
|
+
|
|
105
|
+
if DEBUG_MODE:
|
|
106
|
+
print(f"[PROXY] Rate limits stored for {model}: {rate_limits}")
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def check_rate_limits(model: str) -> dict:
|
|
110
|
+
"""Check current rate limits and return status."""
|
|
111
|
+
if not redis_client:
|
|
112
|
+
return {"should_wait": False, "reason": "no_redis"}
|
|
113
|
+
|
|
114
|
+
family = extract_model_family(model)
|
|
115
|
+
key = f"ratelimit:latest:{family}"
|
|
116
|
+
data = redis_client.get(key)
|
|
117
|
+
|
|
118
|
+
if not data:
|
|
119
|
+
return {"should_wait": False, "reason": "no_data"}
|
|
120
|
+
|
|
121
|
+
limits = json.loads(data)
|
|
122
|
+
requests_remaining = limits.get("requests_remaining", 999)
|
|
123
|
+
tokens_remaining = limits.get("tokens_remaining", 999999)
|
|
124
|
+
|
|
125
|
+
if requests_remaining < MIN_REQUESTS_REMAINING:
|
|
126
|
+
return {
|
|
127
|
+
"should_wait": True,
|
|
128
|
+
"reason": f"requests_remaining={requests_remaining} < {MIN_REQUESTS_REMAINING}",
|
|
129
|
+
"wait_until": limits.get("requests_reset"),
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
if tokens_remaining < MIN_TOKENS_REMAINING:
|
|
133
|
+
return {
|
|
134
|
+
"should_wait": True,
|
|
135
|
+
"reason": f"tokens_remaining={tokens_remaining} < {MIN_TOKENS_REMAINING}",
|
|
136
|
+
"wait_until": limits.get("tokens_reset"),
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
return {"should_wait": False, "reason": "ok"}
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def wait_for_rate_limit():
|
|
143
|
+
"""Enforce minimum time between requests (queue)."""
|
|
144
|
+
global last_request_time
|
|
145
|
+
|
|
146
|
+
with request_lock:
|
|
147
|
+
now = time.time()
|
|
148
|
+
elapsed = now - last_request_time
|
|
149
|
+
|
|
150
|
+
if elapsed < QUEUE_WAIT_TIME:
|
|
151
|
+
wait_time = QUEUE_WAIT_TIME - elapsed
|
|
152
|
+
if DEBUG_MODE:
|
|
153
|
+
print(f"[PROXY] Rate limit queue: waiting {wait_time:.2f}s")
|
|
154
|
+
time.sleep(wait_time)
|
|
155
|
+
|
|
156
|
+
last_request_time = time.time()
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
@app.route("/v1/messages", methods=["POST"])
|
|
160
|
+
def proxy_messages():
|
|
161
|
+
"""Proxy /v1/messages endpoint to Anthropic API."""
|
|
162
|
+
try:
|
|
163
|
+
# Get request data
|
|
164
|
+
body = request.get_json()
|
|
165
|
+
model = body.get("model", "unknown") if body else "unknown"
|
|
166
|
+
|
|
167
|
+
# Rate limiting check
|
|
168
|
+
if RATE_LIMIT_ENABLED:
|
|
169
|
+
limit_status = check_rate_limits(model)
|
|
170
|
+
if limit_status["should_wait"]:
|
|
171
|
+
if DEBUG_MODE:
|
|
172
|
+
print(f"[PROXY] Rate limit warning: {limit_status['reason']}")
|
|
173
|
+
# Add extra delay when approaching limits
|
|
174
|
+
time.sleep(QUEUE_WAIT_TIME * 2)
|
|
175
|
+
|
|
176
|
+
# Enforce queue spacing
|
|
177
|
+
wait_for_rate_limit()
|
|
178
|
+
|
|
179
|
+
# Forward headers (except Host)
|
|
180
|
+
forward_headers = {
|
|
181
|
+
key: value for key, value in request.headers
|
|
182
|
+
if key.lower() not in ("host", "content-length")
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
# Make request to Anthropic
|
|
186
|
+
response = requests.post(
|
|
187
|
+
f"{ANTHROPIC_API_URL}/v1/messages",
|
|
188
|
+
headers=forward_headers,
|
|
189
|
+
json=body,
|
|
190
|
+
stream=True, # Support streaming responses
|
|
191
|
+
timeout=300,
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
# Capture rate limit headers
|
|
195
|
+
store_rate_limits(model, dict(response.headers))
|
|
196
|
+
|
|
197
|
+
# Stream response back
|
|
198
|
+
def generate():
|
|
199
|
+
for chunk in response.iter_content(chunk_size=1024):
|
|
200
|
+
yield chunk
|
|
201
|
+
|
|
202
|
+
# Build response with original headers
|
|
203
|
+
proxy_response = Response(
|
|
204
|
+
generate(),
|
|
205
|
+
status=response.status_code,
|
|
206
|
+
content_type=response.headers.get("content-type"),
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
# Copy relevant headers
|
|
210
|
+
for header in ["x-request-id", "anthropic-ratelimit-requests-remaining",
|
|
211
|
+
"anthropic-ratelimit-tokens-remaining"]:
|
|
212
|
+
if header in response.headers:
|
|
213
|
+
proxy_response.headers[header] = response.headers[header]
|
|
214
|
+
|
|
215
|
+
return proxy_response
|
|
216
|
+
|
|
217
|
+
except requests.exceptions.Timeout:
|
|
218
|
+
return jsonify({"error": "Request to Anthropic API timed out"}), 504
|
|
219
|
+
except requests.exceptions.RequestException as e:
|
|
220
|
+
return jsonify({"error": f"Proxy error: {str(e)}"}), 502
|
|
221
|
+
except Exception as e:
|
|
222
|
+
return jsonify({"error": f"Internal error: {str(e)}"}), 500
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
@app.route("/v1/complete", methods=["POST"])
|
|
226
|
+
def proxy_complete():
|
|
227
|
+
"""Proxy /v1/complete endpoint (legacy)."""
|
|
228
|
+
try:
|
|
229
|
+
body = request.get_json()
|
|
230
|
+
model = body.get("model", "unknown") if body else "unknown"
|
|
231
|
+
|
|
232
|
+
forward_headers = {
|
|
233
|
+
key: value for key, value in request.headers
|
|
234
|
+
if key.lower() not in ("host", "content-length")
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
response = requests.post(
|
|
238
|
+
f"{ANTHROPIC_API_URL}/v1/complete",
|
|
239
|
+
headers=forward_headers,
|
|
240
|
+
json=body,
|
|
241
|
+
stream=True,
|
|
242
|
+
timeout=300,
|
|
243
|
+
)
|
|
244
|
+
|
|
245
|
+
store_rate_limits(model, dict(response.headers))
|
|
246
|
+
|
|
247
|
+
def generate():
|
|
248
|
+
for chunk in response.iter_content(chunk_size=1024):
|
|
249
|
+
yield chunk
|
|
250
|
+
|
|
251
|
+
return Response(
|
|
252
|
+
generate(),
|
|
253
|
+
status=response.status_code,
|
|
254
|
+
content_type=response.headers.get("content-type"),
|
|
255
|
+
)
|
|
256
|
+
|
|
257
|
+
except Exception as e:
|
|
258
|
+
return jsonify({"error": str(e)}), 500
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
@app.route("/rate-limits", methods=["GET"])
|
|
262
|
+
def get_rate_limits():
|
|
263
|
+
"""Get current rate limits from Redis."""
|
|
264
|
+
if not redis_client:
|
|
265
|
+
return jsonify({"error": "Redis not available"}), 503
|
|
266
|
+
|
|
267
|
+
try:
|
|
268
|
+
# Get all rate limit keys
|
|
269
|
+
keys = redis_client.keys("ratelimit:latest:*")
|
|
270
|
+
limits = {}
|
|
271
|
+
|
|
272
|
+
for key in keys:
|
|
273
|
+
family = key.split(":")[-1]
|
|
274
|
+
data = redis_client.get(key)
|
|
275
|
+
if data:
|
|
276
|
+
limits[family] = json.loads(data)
|
|
277
|
+
|
|
278
|
+
return jsonify({
|
|
279
|
+
"rate_limits": limits,
|
|
280
|
+
"source": "redis",
|
|
281
|
+
"fetched_at": datetime.now().isoformat(),
|
|
282
|
+
})
|
|
283
|
+
|
|
284
|
+
except Exception as e:
|
|
285
|
+
return jsonify({"error": str(e)}), 500
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
@app.route("/health", methods=["GET"])
|
|
289
|
+
def health():
|
|
290
|
+
"""Health check endpoint."""
|
|
291
|
+
status = {
|
|
292
|
+
"status": "healthy",
|
|
293
|
+
"anthropic_url": ANTHROPIC_API_URL,
|
|
294
|
+
"redis": "connected" if redis_client else "unavailable",
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
if redis_client:
|
|
298
|
+
try:
|
|
299
|
+
redis_client.ping()
|
|
300
|
+
except Exception:
|
|
301
|
+
status["redis"] = "error"
|
|
302
|
+
status["status"] = "degraded"
|
|
303
|
+
|
|
304
|
+
return jsonify(status), 200 if status["status"] == "healthy" else 503
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
if __name__ == "__main__":
|
|
308
|
+
port = int(os.environ.get("PORT", 8089))
|
|
309
|
+
print(f"[PROXY] Starting Anthropic Proxy on port {port}")
|
|
310
|
+
print(f"[PROXY] Forwarding to: {ANTHROPIC_API_URL}")
|
|
311
|
+
print(f"[PROXY] Redis: {'connected' if redis_client else 'unavailable'}")
|
|
312
|
+
|
|
313
|
+
app.run(host="0.0.0.0", port=port)
|