@relayplane/proxy 1.8.9 → 1.8.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -84,6 +84,13 @@ const downgrade_js_1 = require("./downgrade.js");
84
84
  const agent_tracker_js_1 = require("./agent-tracker.js");
85
85
  const version_status_js_1 = require("./utils/version-status.js");
86
86
  const signup_nudge_js_1 = require("./signup-nudge.js");
87
+ const star_nudge_js_1 = require("./star-nudge.js");
88
+ const estimate_js_1 = require("./estimate.js");
89
+ // Per-IP rate limit state for /v1/estimate (60 req/min per IP)
90
+ const estimateRateMap = new Map();
91
+ // Fix A: Purge expired rate-limit entries every 5 minutes to prevent memory leak.
92
+ // Without this, IPs that make one request and disappear stay in the map forever.
93
+ setInterval(() => (0, estimate_js_1.purgeExpiredRateLimitEntries)(estimateRateMap, Date.now()), 5 * 60 * 1000);
87
94
  const PROXY_VERSION = (() => {
88
95
  try {
89
96
  const pkgPath = path.join(__dirname, '..', 'package.json');
@@ -323,6 +330,8 @@ function sendCloudTelemetry(taskType, model, tokensIn, tokensOut, latencyMs, suc
323
330
  // Uses setImmediate to guarantee zero added latency on the response path —
324
331
  // the nudge prints to stderr only after the current I/O cycle completes.
325
332
  setImmediate(() => (0, signup_nudge_js_1.checkAndShowNudge)());
333
+ // Star nudge fires at 50 requests (separate from signup nudge at 100)
334
+ setImmediate(() => (0, star_nudge_js_1.checkAndShowStarNudge)());
326
335
  }
327
336
  catch {
328
337
  // Telemetry should never break the proxy
@@ -2484,8 +2493,9 @@ async function startProxy(config = {}) {
2484
2493
  // Load persistent history from disk
2485
2494
  loadHistoryFromDisk();
2486
2495
  (0, agent_tracker_js_1.loadAgentRegistry)();
2487
- // Check once at startup whether the signup nudge has already been shown
2496
+ // Check once at startup whether the nudges have already been shown
2488
2497
  (0, signup_nudge_js_1.initNudge)();
2498
+ (0, star_nudge_js_1.initStarNudge)();
2489
2499
  // Flush history on shutdown
2490
2500
  const handleShutdown = () => {
2491
2501
  (0, agent_tracker_js_1.flushAgentRegistry)();
@@ -3895,6 +3905,39 @@ async function startProxy(config = {}) {
3895
3905
  }
3896
3906
  return;
3897
3907
  }
3908
+ // === Pre-flight cost estimation endpoint (Pro-tier) ===
3909
+ if (req.method === 'POST' && (url === '/v1/estimate' || url.endsWith('/v1/estimate'))) {
3910
+ log('Pre-flight estimate request');
3911
+ // --- Per-IP rate limit: 60 requests/minute ---
3912
+ // Fix B: Use only the raw socket address — never x-forwarded-for.
3913
+ // x-forwarded-for is a client-controlled header and is trivially spoofed;
3914
+ // any attacker can send "X-Forwarded-For: 1.2.3.4" to bypass per-IP limits.
3915
+ // The socket remoteAddress reflects the actual TCP connection and cannot be faked.
3916
+ const clientIp = req.socket?.remoteAddress ?? 'unknown';
3917
+ const now = Date.now();
3918
+ // Fix C: Delegate rate limit logic to the testable checkEstimateRateLimit() function
3919
+ // (extracted in estimate.ts so it can be unit-tested in isolation).
3920
+ const rateLimitResult = (0, estimate_js_1.checkEstimateRateLimit)(estimateRateMap, clientIp, now);
3921
+ if (!rateLimitResult.allowed) {
3922
+ res.writeHead(429, { 'Content-Type': 'application/json', 'Retry-After': '60' });
3923
+ res.end(JSON.stringify({ error: 'rate_limit_exceeded', message: 'Too many estimate requests. Limit: 60/minute.' }));
3924
+ return;
3925
+ }
3926
+ // --- Read body with size limit (uses existing MAX_BODY_SIZE helper) ---
3927
+ let body;
3928
+ try {
3929
+ body = await readRequestBody(req);
3930
+ }
3931
+ catch (err) {
3932
+ res.writeHead(413, { 'Content-Type': 'application/json' });
3933
+ res.end(JSON.stringify({ error: 'payload_too_large', message: 'Request body too large (max 10MB)' }));
3934
+ return;
3935
+ }
3936
+ const result = (0, estimate_js_1.handleEstimateRequest)(body);
3937
+ res.writeHead(result.status, { 'Content-Type': 'application/json' });
3938
+ res.end(JSON.stringify(result.body));
3939
+ return;
3940
+ }
3898
3941
  // === Token counting endpoint ===
3899
3942
  if (req.method === 'POST' && url.includes('/v1/messages/count_tokens')) {
3900
3943
  log('Token count request');
@@ -3942,7 +3985,7 @@ async function startProxy(config = {}) {
3942
3985
  // === OpenAI-compatible /v1/chat/completions endpoint ===
3943
3986
  if (req.method !== 'POST' || !url.includes('/chat/completions')) {
3944
3987
  res.writeHead(404, { 'Content-Type': 'application/json' });
3945
- res.end(JSON.stringify({ error: 'Not found. Supported: POST /v1/messages, POST /v1/chat/completions, GET /v1/models' }));
3988
+ res.end(JSON.stringify({ error: 'Not found. Supported: POST /v1/messages, POST /v1/chat/completions, POST /v1/estimate, GET /v1/models' }));
3946
3989
  return;
3947
3990
  }
3948
3991
  // Parse request body