squads-cli 0.1.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,313 @@
1
+ """
2
+ Anthropic API Proxy
3
+ Forwards requests to Anthropic API and captures rate limit headers.
4
+ Stores rate limits in Redis for real-time dashboard display.
5
+ Includes rate limiting queue to prevent parallel agents from hitting limits.
6
+ """
7
+ import os
8
+ import json
9
+ import time
10
+ import threading
11
+ import requests
12
+ from datetime import datetime
13
+ from flask import Flask, request, Response, jsonify
14
+ import redis
15
+
16
+ app = Flask(__name__)
17
+
18
+ # Configuration
19
+ ANTHROPIC_API_URL = os.environ.get("ANTHROPIC_API_URL", "https://api.anthropic.com")
20
+ BRIDGE_URL = os.environ.get("SQUADS_BRIDGE_URL", "http://squads-bridge:8080")
21
+ REDIS_URL = os.environ.get("REDIS_URL", "redis://redis:6379/0")
22
+ DEBUG_MODE = os.environ.get("DEBUG", "0") == "1"
23
+
24
+ # Rate limiting configuration
25
+ RATE_LIMIT_ENABLED = os.environ.get("RATE_LIMIT_ENABLED", "1") == "1"
26
+ MIN_REQUESTS_REMAINING = int(os.environ.get("MIN_REQUESTS_REMAINING", "10")) # Queue if fewer remaining
27
+ MIN_TOKENS_REMAINING = int(os.environ.get("MIN_TOKENS_REMAINING", "10000")) # Queue if fewer remaining
28
+ QUEUE_WAIT_TIME = float(os.environ.get("QUEUE_WAIT_TIME", "5.0")) # Seconds between queued requests
29
+
30
+ # Thread-safe request queue
31
+ request_lock = threading.Lock()
32
+ last_request_time = 0
33
+
34
+ # Redis connection
35
+ redis_client = None
36
+ try:
37
+ redis_client = redis.from_url(REDIS_URL, decode_responses=True)
38
+ redis_client.ping()
39
+ print(f"[PROXY] Redis connected: {REDIS_URL}")
40
+ except Exception as e:
41
+ print(f"[PROXY] Redis unavailable: {e}")
42
+ redis_client = None
43
+
44
+
45
+ def extract_model_family(model: str) -> str:
46
+ """Extract model family from full model name."""
47
+ if "opus" in model.lower():
48
+ return "opus"
49
+ elif "sonnet" in model.lower():
50
+ return "sonnet"
51
+ elif "haiku" in model.lower():
52
+ return "haiku"
53
+ return "unknown"
54
+
55
+
56
+ def store_rate_limits(model: str, headers: dict):
57
+ """Store rate limit headers in Redis."""
58
+ if not redis_client:
59
+ return
60
+
61
+ # Extract rate limit headers
62
+ rate_limits = {}
63
+ header_mapping = {
64
+ "anthropic-ratelimit-requests-limit": "requests_limit",
65
+ "anthropic-ratelimit-requests-remaining": "requests_remaining",
66
+ "anthropic-ratelimit-requests-reset": "requests_reset",
67
+ "anthropic-ratelimit-tokens-limit": "tokens_limit",
68
+ "anthropic-ratelimit-tokens-remaining": "tokens_remaining",
69
+ "anthropic-ratelimit-tokens-reset": "tokens_reset",
70
+ "anthropic-ratelimit-input-tokens-limit": "input_tokens_limit",
71
+ "anthropic-ratelimit-input-tokens-remaining": "input_tokens_remaining",
72
+ "anthropic-ratelimit-input-tokens-reset": "input_tokens_reset",
73
+ "anthropic-ratelimit-output-tokens-limit": "output_tokens_limit",
74
+ "anthropic-ratelimit-output-tokens-remaining": "output_tokens_remaining",
75
+ "anthropic-ratelimit-output-tokens-reset": "output_tokens_reset",
76
+ }
77
+
78
+ for header_name, key in header_mapping.items():
79
+ value = headers.get(header_name)
80
+ if value:
81
+ # Parse numeric values
82
+ if "remaining" in key or "limit" in key:
83
+ try:
84
+ rate_limits[key] = int(value)
85
+ except ValueError:
86
+ rate_limits[key] = value
87
+ else:
88
+ rate_limits[key] = value
89
+
90
+ if not rate_limits:
91
+ return
92
+
93
+ rate_limits["captured_at"] = datetime.now().isoformat()
94
+ rate_limits["model"] = model
95
+
96
+ # Store in Redis with 5 minute TTL
97
+ family = extract_model_family(model)
98
+ key = f"ratelimit:latest:{family}"
99
+ redis_client.set(key, json.dumps(rate_limits), ex=300)
100
+
101
+ # Also store by full model name
102
+ key_full = f"ratelimit:latest:{model}"
103
+ redis_client.set(key_full, json.dumps(rate_limits), ex=300)
104
+
105
+ if DEBUG_MODE:
106
+ print(f"[PROXY] Rate limits stored for {model}: {rate_limits}")
107
+
108
+
109
+ def check_rate_limits(model: str) -> dict:
110
+ """Check current rate limits and return status."""
111
+ if not redis_client:
112
+ return {"should_wait": False, "reason": "no_redis"}
113
+
114
+ family = extract_model_family(model)
115
+ key = f"ratelimit:latest:{family}"
116
+ data = redis_client.get(key)
117
+
118
+ if not data:
119
+ return {"should_wait": False, "reason": "no_data"}
120
+
121
+ limits = json.loads(data)
122
+ requests_remaining = limits.get("requests_remaining", 999)
123
+ tokens_remaining = limits.get("tokens_remaining", 999999)
124
+
125
+ if requests_remaining < MIN_REQUESTS_REMAINING:
126
+ return {
127
+ "should_wait": True,
128
+ "reason": f"requests_remaining={requests_remaining} < {MIN_REQUESTS_REMAINING}",
129
+ "wait_until": limits.get("requests_reset"),
130
+ }
131
+
132
+ if tokens_remaining < MIN_TOKENS_REMAINING:
133
+ return {
134
+ "should_wait": True,
135
+ "reason": f"tokens_remaining={tokens_remaining} < {MIN_TOKENS_REMAINING}",
136
+ "wait_until": limits.get("tokens_reset"),
137
+ }
138
+
139
+ return {"should_wait": False, "reason": "ok"}
140
+
141
+
142
+ def wait_for_rate_limit():
143
+ """Enforce minimum time between requests (queue)."""
144
+ global last_request_time
145
+
146
+ with request_lock:
147
+ now = time.time()
148
+ elapsed = now - last_request_time
149
+
150
+ if elapsed < QUEUE_WAIT_TIME:
151
+ wait_time = QUEUE_WAIT_TIME - elapsed
152
+ if DEBUG_MODE:
153
+ print(f"[PROXY] Rate limit queue: waiting {wait_time:.2f}s")
154
+ time.sleep(wait_time)
155
+
156
+ last_request_time = time.time()
157
+
158
+
159
+ @app.route("/v1/messages", methods=["POST"])
160
+ def proxy_messages():
161
+ """Proxy /v1/messages endpoint to Anthropic API."""
162
+ try:
163
+ # Get request data
164
+ body = request.get_json()
165
+ model = body.get("model", "unknown") if body else "unknown"
166
+
167
+ # Rate limiting check
168
+ if RATE_LIMIT_ENABLED:
169
+ limit_status = check_rate_limits(model)
170
+ if limit_status["should_wait"]:
171
+ if DEBUG_MODE:
172
+ print(f"[PROXY] Rate limit warning: {limit_status['reason']}")
173
+ # Add extra delay when approaching limits
174
+ time.sleep(QUEUE_WAIT_TIME * 2)
175
+
176
+ # Enforce queue spacing
177
+ wait_for_rate_limit()
178
+
179
+ # Forward headers (except Host)
180
+ forward_headers = {
181
+ key: value for key, value in request.headers
182
+ if key.lower() not in ("host", "content-length")
183
+ }
184
+
185
+ # Make request to Anthropic
186
+ response = requests.post(
187
+ f"{ANTHROPIC_API_URL}/v1/messages",
188
+ headers=forward_headers,
189
+ json=body,
190
+ stream=True, # Support streaming responses
191
+ timeout=300,
192
+ )
193
+
194
+ # Capture rate limit headers
195
+ store_rate_limits(model, dict(response.headers))
196
+
197
+ # Stream response back
198
+ def generate():
199
+ for chunk in response.iter_content(chunk_size=1024):
200
+ yield chunk
201
+
202
+ # Build response with original headers
203
+ proxy_response = Response(
204
+ generate(),
205
+ status=response.status_code,
206
+ content_type=response.headers.get("content-type"),
207
+ )
208
+
209
+ # Copy relevant headers
210
+ for header in ["x-request-id", "anthropic-ratelimit-requests-remaining",
211
+ "anthropic-ratelimit-tokens-remaining"]:
212
+ if header in response.headers:
213
+ proxy_response.headers[header] = response.headers[header]
214
+
215
+ return proxy_response
216
+
217
+ except requests.exceptions.Timeout:
218
+ return jsonify({"error": "Request to Anthropic API timed out"}), 504
219
+ except requests.exceptions.RequestException as e:
220
+ return jsonify({"error": f"Proxy error: {str(e)}"}), 502
221
+ except Exception as e:
222
+ return jsonify({"error": f"Internal error: {str(e)}"}), 500
223
+
224
+
225
+ @app.route("/v1/complete", methods=["POST"])
226
+ def proxy_complete():
227
+ """Proxy /v1/complete endpoint (legacy)."""
228
+ try:
229
+ body = request.get_json()
230
+ model = body.get("model", "unknown") if body else "unknown"
231
+
232
+ forward_headers = {
233
+ key: value for key, value in request.headers
234
+ if key.lower() not in ("host", "content-length")
235
+ }
236
+
237
+ response = requests.post(
238
+ f"{ANTHROPIC_API_URL}/v1/complete",
239
+ headers=forward_headers,
240
+ json=body,
241
+ stream=True,
242
+ timeout=300,
243
+ )
244
+
245
+ store_rate_limits(model, dict(response.headers))
246
+
247
+ def generate():
248
+ for chunk in response.iter_content(chunk_size=1024):
249
+ yield chunk
250
+
251
+ return Response(
252
+ generate(),
253
+ status=response.status_code,
254
+ content_type=response.headers.get("content-type"),
255
+ )
256
+
257
+ except Exception as e:
258
+ return jsonify({"error": str(e)}), 500
259
+
260
+
261
+ @app.route("/rate-limits", methods=["GET"])
262
+ def get_rate_limits():
263
+ """Get current rate limits from Redis."""
264
+ if not redis_client:
265
+ return jsonify({"error": "Redis not available"}), 503
266
+
267
+ try:
268
+ # Get all rate limit keys
269
+ keys = redis_client.keys("ratelimit:latest:*")
270
+ limits = {}
271
+
272
+ for key in keys:
273
+ family = key.split(":")[-1]
274
+ data = redis_client.get(key)
275
+ if data:
276
+ limits[family] = json.loads(data)
277
+
278
+ return jsonify({
279
+ "rate_limits": limits,
280
+ "source": "redis",
281
+ "fetched_at": datetime.now().isoformat(),
282
+ })
283
+
284
+ except Exception as e:
285
+ return jsonify({"error": str(e)}), 500
286
+
287
+
288
+ @app.route("/health", methods=["GET"])
289
+ def health():
290
+ """Health check endpoint."""
291
+ status = {
292
+ "status": "healthy",
293
+ "anthropic_url": ANTHROPIC_API_URL,
294
+ "redis": "connected" if redis_client else "unavailable",
295
+ }
296
+
297
+ if redis_client:
298
+ try:
299
+ redis_client.ping()
300
+ except Exception:
301
+ status["redis"] = "error"
302
+ status["status"] = "degraded"
303
+
304
+ return jsonify(status), 200 if status["status"] == "healthy" else 503
305
+
306
+
307
+ if __name__ == "__main__":
308
+ port = int(os.environ.get("PORT", 8089))
309
+ print(f"[PROXY] Starting Anthropic Proxy on port {port}")
310
+ print(f"[PROXY] Forwarding to: {ANTHROPIC_API_URL}")
311
+ print(f"[PROXY] Redis: {'connected' if redis_client else 'unavailable'}")
312
+
313
+ app.run(host="0.0.0.0", port=port)
@@ -0,0 +1,7 @@
1
+ flask>=3.0.0
2
+ gunicorn>=21.0.0
3
+ psycopg2-binary>=2.9.9
4
+ redis>=5.0.0
5
+
6
+ # Optional: Langfuse forwarding (enable via LANGFUSE_ENABLED=true)
7
+ langfuse>=2.0.0,<3.0.0