openclaw-autoproxy 1.0.2 → 1.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -59,6 +59,9 @@ export interface GatewayConfig {
59
59
  timeoutMs: number;
60
60
  upstreamBaseUrl: string;
61
61
  upstreamApiKey: string;
62
+ upstreamMaxConnections: number;
63
+ upstreamKeepAliveTimeoutMs: number;
64
+ upstreamKeepAliveMaxTimeoutMs: number;
62
65
  retryStatusCodes: Set<number>;
63
66
  globalFallbackModels: string[];
64
67
  modelFallbackMap: Record<string, string[]>;
@@ -81,6 +84,20 @@ function parseCsvList(value: string | undefined): string[] {
81
84
  .filter(Boolean);
82
85
  }
83
86
 
87
+ function parsePositiveInteger(value: string | undefined, fallback: number): number {
88
+ if (!value) {
89
+ return fallback;
90
+ }
91
+
92
+ const parsed = Number.parseInt(value, 10);
93
+
94
+ if (!Number.isInteger(parsed) || parsed <= 0) {
95
+ return fallback;
96
+ }
97
+
98
+ return parsed;
99
+ }
100
+
84
101
  function parseRetryCodes(value: string | undefined): Set<number> {
85
102
  const defaults = new Set([412, 429, 500, 502, 503, 504]);
86
103
 
@@ -404,6 +421,15 @@ function loadRouteFileConfig(): ParsedRouteFileConfig {
404
421
  const host = process.env.HOST ?? "0.0.0.0";
405
422
  const port = Number.parseInt(process.env.PORT ?? "8787", 10);
406
423
  const timeoutMs = Number.parseInt(process.env.REQUEST_TIMEOUT_MS ?? "60000", 10);
424
+ const upstreamMaxConnections = parsePositiveInteger(process.env.UPSTREAM_MAX_CONNECTIONS, 200);
425
+ const upstreamKeepAliveTimeoutMs = parsePositiveInteger(
426
+ process.env.UPSTREAM_KEEPALIVE_TIMEOUT_MS,
427
+ 60_000,
428
+ );
429
+ const upstreamKeepAliveMaxTimeoutMs = parsePositiveInteger(
430
+ process.env.UPSTREAM_KEEPALIVE_MAX_TIMEOUT_MS,
431
+ 300_000,
432
+ );
407
433
  const upstreamBaseUrl = (process.env.UPSTREAM_BASE_URL ?? "https://api.openai.com").replace(
408
434
  /\/+$/,
409
435
  "",
@@ -424,6 +450,9 @@ export const config: GatewayConfig = {
424
450
  timeoutMs,
425
451
  upstreamBaseUrl,
426
452
  upstreamApiKey: process.env.UPSTREAM_API_KEY ?? "",
453
+ upstreamMaxConnections,
454
+ upstreamKeepAliveTimeoutMs,
455
+ upstreamKeepAliveMaxTimeoutMs,
427
456
  retryStatusCodes: routeFileConfig.retryStatusCodes ?? parseRetryCodes(process.env.RETRY_STATUS_CODES),
428
457
  globalFallbackModels: parseCsvList(process.env.GLOBAL_FALLBACK_MODELS),
429
458
  modelFallbackMap: parseModelFallbackMap(process.env.MODEL_FALLBACK_MAP),
@@ -0,0 +1,166 @@
1
+ const DEFAULT_WINDOW_MS = 12 * 60 * 60 * 1000;
2
+ const DEFAULT_MAX_SAMPLES_PER_MODEL = 5000;
3
+
4
+ interface ModelLoadSample {
5
+ at: number;
6
+ loadMs: number;
7
+ }
8
+
9
+ interface ModelLoadSummary {
10
+ model: string;
11
+ sampleCount: number;
12
+ avgLoadMs: number;
13
+ p50LoadMs: number;
14
+ p95LoadMs: number;
15
+ minLoadMs: number;
16
+ maxLoadMs: number;
17
+ lastSeenAt: string;
18
+ }
19
+
20
+ const modelSamples = new Map<string, ModelLoadSample[]>();
21
+
22
+ function quantileFromSorted(values: number[], q: number): number {
23
+ if (values.length === 0) {
24
+ return 0;
25
+ }
26
+
27
+ const clampedQ = Math.max(0, Math.min(1, q));
28
+ const index = Math.floor((values.length - 1) * clampedQ);
29
+ return values[index] ?? values[values.length - 1] ?? 0;
30
+ }
31
+
32
+ function roundMs(value: number): number {
33
+ return Math.round(value * 100) / 100;
34
+ }
35
+
36
+ function pruneModelSamples(samples: ModelLoadSample[], cutoffAt: number): ModelLoadSample[] {
37
+ let startIndex = 0;
38
+
39
+ while (startIndex < samples.length && samples[startIndex] && samples[startIndex].at < cutoffAt) {
40
+ startIndex += 1;
41
+ }
42
+
43
+ if (startIndex <= 0) {
44
+ return samples;
45
+ }
46
+
47
+ return samples.slice(startIndex);
48
+ }
49
+
50
+ function pruneExpiredSamples(cutoffAt: number): void {
51
+ for (const [model, samples] of modelSamples.entries()) {
52
+ const pruned = pruneModelSamples(samples, cutoffAt);
53
+
54
+ if (pruned.length === 0) {
55
+ modelSamples.delete(model);
56
+ continue;
57
+ }
58
+
59
+ if (pruned !== samples) {
60
+ modelSamples.set(model, pruned);
61
+ }
62
+ }
63
+ }
64
+
65
+ export function recordModelLoadSample(model: string | null, loadMs: number): void {
66
+ if (!model) {
67
+ return;
68
+ }
69
+
70
+ if (!Number.isFinite(loadMs) || loadMs <= 0) {
71
+ return;
72
+ }
73
+
74
+ const now = Date.now();
75
+ const sample: ModelLoadSample = {
76
+ at: now,
77
+ loadMs,
78
+ };
79
+
80
+ const existing = modelSamples.get(model) ?? [];
81
+ existing.push(sample);
82
+
83
+ if (existing.length > DEFAULT_MAX_SAMPLES_PER_MODEL) {
84
+ existing.splice(0, existing.length - DEFAULT_MAX_SAMPLES_PER_MODEL);
85
+ }
86
+
87
+ modelSamples.set(model, existing);
88
+
89
+ const cutoffAt = now - DEFAULT_WINDOW_MS;
90
+ pruneExpiredSamples(cutoffAt);
91
+ }
92
+
93
+ function summarizeModel(model: string, samples: ModelLoadSample[]): ModelLoadSummary | null {
94
+ if (samples.length === 0) {
95
+ return null;
96
+ }
97
+
98
+ const loadValues = samples.map((sample) => sample.loadMs).sort((a, b) => a - b);
99
+ const total = loadValues.reduce((acc, value) => acc + value, 0);
100
+ const avgLoadMs = total / loadValues.length;
101
+ const minLoadMs = loadValues[0] ?? 0;
102
+ const maxLoadMs = loadValues[loadValues.length - 1] ?? 0;
103
+ const latestAt = samples[samples.length - 1]?.at ?? Date.now();
104
+
105
+ return {
106
+ model,
107
+ sampleCount: samples.length,
108
+ avgLoadMs: roundMs(avgLoadMs),
109
+ p50LoadMs: roundMs(quantileFromSorted(loadValues, 0.5)),
110
+ p95LoadMs: roundMs(quantileFromSorted(loadValues, 0.95)),
111
+ minLoadMs: roundMs(minLoadMs),
112
+ maxLoadMs: roundMs(maxLoadMs),
113
+ lastSeenAt: new Date(latestAt).toISOString(),
114
+ };
115
+ }
116
+
117
+ export function getModelLoadRankingHealth(windowMs = DEFAULT_WINDOW_MS): {
118
+ windowHours: number;
119
+ rankedModels: Array<ModelLoadSummary & { rank: number }>;
120
+ } {
121
+ const normalizedWindowMs = Number.isFinite(windowMs) && windowMs > 0 ? windowMs : DEFAULT_WINDOW_MS;
122
+ const now = Date.now();
123
+ const cutoffAt = now - normalizedWindowMs;
124
+
125
+ pruneExpiredSamples(cutoffAt);
126
+
127
+ const summaries: ModelLoadSummary[] = [];
128
+
129
+ for (const [model, samples] of modelSamples.entries()) {
130
+ const filtered = pruneModelSamples(samples, cutoffAt);
131
+
132
+ if (filtered.length === 0) {
133
+ continue;
134
+ }
135
+
136
+ if (filtered !== samples) {
137
+ modelSamples.set(model, filtered);
138
+ }
139
+
140
+ const summary = summarizeModel(model, filtered);
141
+
142
+ if (summary) {
143
+ summaries.push(summary);
144
+ }
145
+ }
146
+
147
+ summaries.sort((a, b) => {
148
+ if (a.avgLoadMs !== b.avgLoadMs) {
149
+ return a.avgLoadMs - b.avgLoadMs;
150
+ }
151
+
152
+ if (a.p95LoadMs !== b.p95LoadMs) {
153
+ return a.p95LoadMs - b.p95LoadMs;
154
+ }
155
+
156
+ return b.sampleCount - a.sampleCount;
157
+ });
158
+
159
+ return {
160
+ windowHours: roundMs(normalizedWindowMs / (60 * 60 * 1000)),
161
+ rankedModels: summaries.map((entry, index) => ({
162
+ rank: index + 1,
163
+ ...entry,
164
+ })),
165
+ };
166
+ }