caplyr 0.2.2 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +246 -184
- package/dist/index.mjs +246 -184
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -107,6 +107,36 @@ var LogShipper = class {
|
|
|
107
107
|
}
|
|
108
108
|
};
|
|
109
109
|
|
|
110
|
+
// src/mutex.ts
|
|
111
|
+
var Mutex = class {
|
|
112
|
+
constructor() {
|
|
113
|
+
this.queue = [];
|
|
114
|
+
this.locked = false;
|
|
115
|
+
}
|
|
116
|
+
async acquire() {
|
|
117
|
+
if (!this.locked) {
|
|
118
|
+
this.locked = true;
|
|
119
|
+
return this.createRelease();
|
|
120
|
+
}
|
|
121
|
+
return new Promise((resolve) => {
|
|
122
|
+
this.queue.push(() => resolve(this.createRelease()));
|
|
123
|
+
});
|
|
124
|
+
}
|
|
125
|
+
createRelease() {
|
|
126
|
+
let released = false;
|
|
127
|
+
return () => {
|
|
128
|
+
if (released) return;
|
|
129
|
+
released = true;
|
|
130
|
+
const next = this.queue.shift();
|
|
131
|
+
if (next) {
|
|
132
|
+
next();
|
|
133
|
+
} else {
|
|
134
|
+
this.locked = false;
|
|
135
|
+
}
|
|
136
|
+
};
|
|
137
|
+
}
|
|
138
|
+
};
|
|
139
|
+
|
|
110
140
|
// src/heartbeat.ts
|
|
111
141
|
var Heartbeat = class {
|
|
112
142
|
constructor(config) {
|
|
@@ -124,6 +154,8 @@ var Heartbeat = class {
|
|
|
124
154
|
};
|
|
125
155
|
/** Current protection status */
|
|
126
156
|
this.status = "ACTIVE";
|
|
157
|
+
/** Mutex for serializing budget check → API call → trackSpend */
|
|
158
|
+
this.budgetMutex = new Mutex();
|
|
127
159
|
/** Local budget limits set via config (not from server) */
|
|
128
160
|
this.localDailyLimit = null;
|
|
129
161
|
this.localMonthlyLimit = null;
|
|
@@ -176,14 +208,18 @@ var Heartbeat = class {
|
|
|
176
208
|
const data = await res.json();
|
|
177
209
|
const localDailyUsed = this.budgetStatus.daily_used;
|
|
178
210
|
const localMonthlyUsed = this.budgetStatus.monthly_used;
|
|
211
|
+
const serverDailyUsed = Number(data.daily_used) || 0;
|
|
212
|
+
const serverMonthlyUsed = Number(data.monthly_used) || 0;
|
|
213
|
+
const serverDailyLimit = data.daily_limit != null ? Number(data.daily_limit) : null;
|
|
214
|
+
const serverMonthlyLimit = data.monthly_limit != null ? Number(data.monthly_limit) : null;
|
|
179
215
|
this.budgetStatus = {
|
|
180
216
|
...data,
|
|
181
217
|
// Use whichever spend is higher — server or local tracking
|
|
182
|
-
daily_used: Math.max(
|
|
183
|
-
monthly_used: Math.max(
|
|
184
|
-
//
|
|
185
|
-
daily_limit:
|
|
186
|
-
monthly_limit:
|
|
218
|
+
daily_used: Math.max(serverDailyUsed, localDailyUsed),
|
|
219
|
+
monthly_used: Math.max(serverMonthlyUsed, localMonthlyUsed),
|
|
220
|
+
// Use the stricter (lower) limit — local config takes priority if lower
|
|
221
|
+
daily_limit: this.pickStricterLimit(serverDailyLimit, this.localDailyLimit),
|
|
222
|
+
monthly_limit: this.pickStricterLimit(serverMonthlyLimit, this.localMonthlyLimit)
|
|
187
223
|
};
|
|
188
224
|
this.consecutiveFailures = 0;
|
|
189
225
|
const newStatus = data.kill_switch_active ? "OFF" : data.status;
|
|
@@ -200,6 +236,22 @@ var Heartbeat = class {
|
|
|
200
236
|
}
|
|
201
237
|
}
|
|
202
238
|
}
|
|
239
|
+
/**
|
|
240
|
+
* Pick the stricter (lower) of two limits.
|
|
241
|
+
* If one is null, use the other.
|
|
242
|
+
*/
|
|
243
|
+
pickStricterLimit(a, b) {
|
|
244
|
+
if (a === null) return b;
|
|
245
|
+
if (b === null) return a;
|
|
246
|
+
return Math.min(a, b);
|
|
247
|
+
}
|
|
248
|
+
/**
|
|
249
|
+
* Force an immediate heartbeat poll (useful for kill switch checks).
|
|
250
|
+
* Returns a promise that resolves when the poll completes.
|
|
251
|
+
*/
|
|
252
|
+
async forcePoll() {
|
|
253
|
+
await this.beat();
|
|
254
|
+
}
|
|
203
255
|
/**
|
|
204
256
|
* Update local budget tracking (called after each request).
|
|
205
257
|
* This provides real-time budget awareness between heartbeats.
|
|
@@ -362,18 +414,98 @@ function wrapAnthropic(client, config, shipper, heartbeat) {
|
|
|
362
414
|
});
|
|
363
415
|
}
|
|
364
416
|
}
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
417
|
+
const release = await heartbeat.budgetMutex.acquire();
|
|
418
|
+
try {
|
|
419
|
+
if (config.mode === "cost_protect") {
|
|
420
|
+
if (heartbeat.isMonthlyBudgetExceeded() || heartbeat.isDailyBudgetExceeded()) {
|
|
421
|
+
blocked = true;
|
|
422
|
+
enforcementReason = heartbeat.isDailyBudgetExceeded() ? "daily_budget_exceeded" : "monthly_budget_exceeded";
|
|
423
|
+
const blockError = {
|
|
424
|
+
code: "BUDGET_EXCEEDED",
|
|
425
|
+
message: `AI budget exceeded. ${enforcementReason.replace(/_/g, " ")}.`,
|
|
426
|
+
budget_used: heartbeat.budgetStatus.monthly_used,
|
|
427
|
+
budget_limit: heartbeat.budgetStatus.monthly_limit ?? 0,
|
|
428
|
+
retry_after: getNextResetTime(enforcementReason),
|
|
429
|
+
dashboard_url: dashboardUrl
|
|
430
|
+
};
|
|
431
|
+
shipper.push({
|
|
432
|
+
id: generateId(),
|
|
433
|
+
timestamp: startTime,
|
|
434
|
+
provider: "anthropic",
|
|
435
|
+
model,
|
|
436
|
+
input_tokens: 0,
|
|
437
|
+
output_tokens: 0,
|
|
438
|
+
cost: 0,
|
|
439
|
+
latency_ms: Date.now() - startTime,
|
|
440
|
+
endpoint_tag: config.endpoint_tag,
|
|
441
|
+
downgraded: false,
|
|
442
|
+
blocked: true,
|
|
443
|
+
enforcement_reason: enforcementReason
|
|
444
|
+
});
|
|
445
|
+
throw Object.assign(new Error(blockError.message), {
|
|
446
|
+
caplyr: blockError
|
|
447
|
+
});
|
|
448
|
+
}
|
|
449
|
+
if (heartbeat.isDowngradeThresholdReached(downgradeThreshold)) {
|
|
450
|
+
const fallback = config.fallback ?? getDefaultFallback(model);
|
|
451
|
+
if (fallback && fallback !== model) {
|
|
452
|
+
originalModel = model;
|
|
453
|
+
model = fallback;
|
|
454
|
+
downgraded = true;
|
|
455
|
+
enforcementReason = "auto_downgrade_threshold";
|
|
456
|
+
config.onEnforcement?.({
|
|
457
|
+
type: "downgrade",
|
|
458
|
+
timestamp: Date.now(),
|
|
459
|
+
reason: `Budget at ${Math.round(downgradeThreshold * 100)}% \u2014 downgraded ${originalModel} \u2192 ${model}`,
|
|
460
|
+
original_model: originalModel,
|
|
461
|
+
fallback_model: model,
|
|
462
|
+
budget_used: heartbeat.budgetStatus.monthly_used,
|
|
463
|
+
budget_limit: heartbeat.budgetStatus.monthly_limit ?? 0,
|
|
464
|
+
estimated_savings: 0
|
|
465
|
+
// Calculated after response
|
|
466
|
+
});
|
|
467
|
+
}
|
|
468
|
+
}
|
|
469
|
+
}
|
|
470
|
+
const requestParams = downgraded ? { ...params, model } : params;
|
|
471
|
+
try {
|
|
472
|
+
const response = await target.create.call(
|
|
473
|
+
target,
|
|
474
|
+
requestParams,
|
|
475
|
+
options
|
|
476
|
+
);
|
|
477
|
+
const latency = Date.now() - startTime;
|
|
478
|
+
const inputTokens = response?.usage?.input_tokens ?? 0;
|
|
479
|
+
const outputTokens = response?.usage?.output_tokens ?? 0;
|
|
480
|
+
const cost = calculateCost(model, inputTokens, outputTokens);
|
|
481
|
+
heartbeat.trackSpend(cost);
|
|
482
|
+
let estimatedSavings = 0;
|
|
483
|
+
if (downgraded && originalModel) {
|
|
484
|
+
const originalCost = calculateCost(
|
|
485
|
+
originalModel,
|
|
486
|
+
inputTokens,
|
|
487
|
+
outputTokens
|
|
488
|
+
);
|
|
489
|
+
estimatedSavings = originalCost - cost;
|
|
490
|
+
}
|
|
491
|
+
shipper.push({
|
|
492
|
+
id: generateId(),
|
|
493
|
+
timestamp: startTime,
|
|
494
|
+
provider: "anthropic",
|
|
495
|
+
model,
|
|
496
|
+
input_tokens: inputTokens,
|
|
497
|
+
output_tokens: outputTokens,
|
|
498
|
+
cost,
|
|
499
|
+
latency_ms: latency,
|
|
500
|
+
endpoint_tag: config.endpoint_tag,
|
|
501
|
+
downgraded,
|
|
502
|
+
original_model: originalModel,
|
|
503
|
+
blocked: false,
|
|
504
|
+
enforcement_reason: enforcementReason
|
|
505
|
+
});
|
|
506
|
+
return response;
|
|
507
|
+
} catch (err) {
|
|
508
|
+
if (err?.caplyr) throw err;
|
|
377
509
|
shipper.push({
|
|
378
510
|
id: generateId(),
|
|
379
511
|
timestamp: startTime,
|
|
@@ -384,90 +516,15 @@ function wrapAnthropic(client, config, shipper, heartbeat) {
|
|
|
384
516
|
cost: 0,
|
|
385
517
|
latency_ms: Date.now() - startTime,
|
|
386
518
|
endpoint_tag: config.endpoint_tag,
|
|
387
|
-
downgraded
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
throw Object.assign(new Error(blockError.message), {
|
|
392
|
-
caplyr: blockError
|
|
519
|
+
downgraded,
|
|
520
|
+
original_model: originalModel,
|
|
521
|
+
blocked: false,
|
|
522
|
+
enforcement_reason: "provider_error"
|
|
393
523
|
});
|
|
524
|
+
throw err;
|
|
394
525
|
}
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
if (fallback && fallback !== model) {
|
|
398
|
-
originalModel = model;
|
|
399
|
-
model = fallback;
|
|
400
|
-
downgraded = true;
|
|
401
|
-
enforcementReason = "auto_downgrade_threshold";
|
|
402
|
-
config.onEnforcement?.({
|
|
403
|
-
type: "downgrade",
|
|
404
|
-
timestamp: Date.now(),
|
|
405
|
-
reason: `Budget at ${Math.round(downgradeThreshold * 100)}% \u2014 downgraded ${originalModel} \u2192 ${model}`,
|
|
406
|
-
original_model: originalModel,
|
|
407
|
-
fallback_model: model,
|
|
408
|
-
budget_used: heartbeat.budgetStatus.monthly_used,
|
|
409
|
-
budget_limit: heartbeat.budgetStatus.monthly_limit ?? 0,
|
|
410
|
-
estimated_savings: 0
|
|
411
|
-
// Calculated after response
|
|
412
|
-
});
|
|
413
|
-
}
|
|
414
|
-
}
|
|
415
|
-
}
|
|
416
|
-
const requestParams = downgraded ? { ...params, model } : params;
|
|
417
|
-
try {
|
|
418
|
-
const response = await target.create.call(
|
|
419
|
-
target,
|
|
420
|
-
requestParams,
|
|
421
|
-
options
|
|
422
|
-
);
|
|
423
|
-
const latency = Date.now() - startTime;
|
|
424
|
-
const inputTokens = response?.usage?.input_tokens ?? 0;
|
|
425
|
-
const outputTokens = response?.usage?.output_tokens ?? 0;
|
|
426
|
-
const cost = calculateCost(model, inputTokens, outputTokens);
|
|
427
|
-
heartbeat.trackSpend(cost);
|
|
428
|
-
let estimatedSavings = 0;
|
|
429
|
-
if (downgraded && originalModel) {
|
|
430
|
-
const originalCost = calculateCost(
|
|
431
|
-
originalModel,
|
|
432
|
-
inputTokens,
|
|
433
|
-
outputTokens
|
|
434
|
-
);
|
|
435
|
-
estimatedSavings = originalCost - cost;
|
|
436
|
-
}
|
|
437
|
-
shipper.push({
|
|
438
|
-
id: generateId(),
|
|
439
|
-
timestamp: startTime,
|
|
440
|
-
provider: "anthropic",
|
|
441
|
-
model,
|
|
442
|
-
input_tokens: inputTokens,
|
|
443
|
-
output_tokens: outputTokens,
|
|
444
|
-
cost,
|
|
445
|
-
latency_ms: latency,
|
|
446
|
-
endpoint_tag: config.endpoint_tag,
|
|
447
|
-
downgraded,
|
|
448
|
-
original_model: originalModel,
|
|
449
|
-
blocked: false,
|
|
450
|
-
enforcement_reason: enforcementReason
|
|
451
|
-
});
|
|
452
|
-
return response;
|
|
453
|
-
} catch (err) {
|
|
454
|
-
if (err?.caplyr) throw err;
|
|
455
|
-
shipper.push({
|
|
456
|
-
id: generateId(),
|
|
457
|
-
timestamp: startTime,
|
|
458
|
-
provider: "anthropic",
|
|
459
|
-
model,
|
|
460
|
-
input_tokens: 0,
|
|
461
|
-
output_tokens: 0,
|
|
462
|
-
cost: 0,
|
|
463
|
-
latency_ms: Date.now() - startTime,
|
|
464
|
-
endpoint_tag: config.endpoint_tag,
|
|
465
|
-
downgraded,
|
|
466
|
-
original_model: originalModel,
|
|
467
|
-
blocked: false,
|
|
468
|
-
enforcement_reason: "provider_error"
|
|
469
|
-
});
|
|
470
|
-
throw err;
|
|
526
|
+
} finally {
|
|
527
|
+
release();
|
|
471
528
|
}
|
|
472
529
|
};
|
|
473
530
|
}
|
|
@@ -544,18 +601,89 @@ function wrapOpenAI(client, config, shipper, heartbeat) {
|
|
|
544
601
|
});
|
|
545
602
|
}
|
|
546
603
|
}
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
604
|
+
const release = await heartbeat.budgetMutex.acquire();
|
|
605
|
+
try {
|
|
606
|
+
if (config.mode === "cost_protect") {
|
|
607
|
+
if (heartbeat.isMonthlyBudgetExceeded() || heartbeat.isDailyBudgetExceeded()) {
|
|
608
|
+
blocked = true;
|
|
609
|
+
enforcementReason = heartbeat.isDailyBudgetExceeded() ? "daily_budget_exceeded" : "monthly_budget_exceeded";
|
|
610
|
+
const blockError = {
|
|
611
|
+
code: "BUDGET_EXCEEDED",
|
|
612
|
+
message: `AI budget exceeded. ${enforcementReason.replace(/_/g, " ")}.`,
|
|
613
|
+
budget_used: heartbeat.budgetStatus.monthly_used,
|
|
614
|
+
budget_limit: heartbeat.budgetStatus.monthly_limit ?? 0,
|
|
615
|
+
retry_after: getNextResetTime2(enforcementReason),
|
|
616
|
+
dashboard_url: dashboardUrl
|
|
617
|
+
};
|
|
618
|
+
shipper.push({
|
|
619
|
+
id: generateId2(),
|
|
620
|
+
timestamp: startTime,
|
|
621
|
+
provider: "openai",
|
|
622
|
+
model,
|
|
623
|
+
input_tokens: 0,
|
|
624
|
+
output_tokens: 0,
|
|
625
|
+
cost: 0,
|
|
626
|
+
latency_ms: Date.now() - startTime,
|
|
627
|
+
endpoint_tag: config.endpoint_tag,
|
|
628
|
+
downgraded: false,
|
|
629
|
+
blocked: true,
|
|
630
|
+
enforcement_reason: enforcementReason
|
|
631
|
+
});
|
|
632
|
+
throw Object.assign(new Error(blockError.message), {
|
|
633
|
+
caplyr: blockError
|
|
634
|
+
});
|
|
635
|
+
}
|
|
636
|
+
if (heartbeat.isDowngradeThresholdReached(downgradeThreshold)) {
|
|
637
|
+
const fallback = config.fallback ?? getDefaultFallback(model);
|
|
638
|
+
if (fallback && fallback !== model) {
|
|
639
|
+
originalModel = model;
|
|
640
|
+
model = fallback;
|
|
641
|
+
downgraded = true;
|
|
642
|
+
enforcementReason = "auto_downgrade_threshold";
|
|
643
|
+
config.onEnforcement?.({
|
|
644
|
+
type: "downgrade",
|
|
645
|
+
timestamp: Date.now(),
|
|
646
|
+
reason: `Budget at ${Math.round(downgradeThreshold * 100)}% \u2014 downgraded ${originalModel} \u2192 ${model}`,
|
|
647
|
+
original_model: originalModel,
|
|
648
|
+
fallback_model: model,
|
|
649
|
+
budget_used: heartbeat.budgetStatus.monthly_used,
|
|
650
|
+
budget_limit: heartbeat.budgetStatus.monthly_limit ?? 0,
|
|
651
|
+
estimated_savings: 0
|
|
652
|
+
});
|
|
653
|
+
}
|
|
654
|
+
}
|
|
655
|
+
}
|
|
656
|
+
const requestParams = downgraded ? { ...params, model } : params;
|
|
657
|
+
try {
|
|
658
|
+
const response = await target.create.call(
|
|
659
|
+
target,
|
|
660
|
+
requestParams,
|
|
661
|
+
options
|
|
662
|
+
);
|
|
663
|
+
const latency = Date.now() - startTime;
|
|
664
|
+
const usage = response?.usage;
|
|
665
|
+
const inputTokens = usage?.prompt_tokens ?? 0;
|
|
666
|
+
const outputTokens = usage?.completion_tokens ?? 0;
|
|
667
|
+
const cost = calculateCost(model, inputTokens, outputTokens);
|
|
668
|
+
heartbeat.trackSpend(cost);
|
|
669
|
+
shipper.push({
|
|
670
|
+
id: generateId2(),
|
|
671
|
+
timestamp: startTime,
|
|
672
|
+
provider: "openai",
|
|
673
|
+
model,
|
|
674
|
+
input_tokens: inputTokens,
|
|
675
|
+
output_tokens: outputTokens,
|
|
676
|
+
cost,
|
|
677
|
+
latency_ms: latency,
|
|
678
|
+
endpoint_tag: config.endpoint_tag,
|
|
679
|
+
downgraded,
|
|
680
|
+
original_model: originalModel,
|
|
681
|
+
blocked: false,
|
|
682
|
+
enforcement_reason: enforcementReason
|
|
683
|
+
});
|
|
684
|
+
return response;
|
|
685
|
+
} catch (err) {
|
|
686
|
+
if (err?.caplyr) throw err;
|
|
559
687
|
shipper.push({
|
|
560
688
|
id: generateId2(),
|
|
561
689
|
timestamp: startTime,
|
|
@@ -566,81 +694,15 @@ function wrapOpenAI(client, config, shipper, heartbeat) {
|
|
|
566
694
|
cost: 0,
|
|
567
695
|
latency_ms: Date.now() - startTime,
|
|
568
696
|
endpoint_tag: config.endpoint_tag,
|
|
569
|
-
downgraded
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
throw Object.assign(new Error(blockError.message), {
|
|
574
|
-
caplyr: blockError
|
|
697
|
+
downgraded,
|
|
698
|
+
original_model: originalModel,
|
|
699
|
+
blocked: false,
|
|
700
|
+
enforcement_reason: "provider_error"
|
|
575
701
|
});
|
|
702
|
+
throw err;
|
|
576
703
|
}
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
if (fallback && fallback !== model) {
|
|
580
|
-
originalModel = model;
|
|
581
|
-
model = fallback;
|
|
582
|
-
downgraded = true;
|
|
583
|
-
enforcementReason = "auto_downgrade_threshold";
|
|
584
|
-
config.onEnforcement?.({
|
|
585
|
-
type: "downgrade",
|
|
586
|
-
timestamp: Date.now(),
|
|
587
|
-
reason: `Budget at ${Math.round(downgradeThreshold * 100)}% \u2014 downgraded ${originalModel} \u2192 ${model}`,
|
|
588
|
-
original_model: originalModel,
|
|
589
|
-
fallback_model: model,
|
|
590
|
-
budget_used: heartbeat.budgetStatus.monthly_used,
|
|
591
|
-
budget_limit: heartbeat.budgetStatus.monthly_limit ?? 0,
|
|
592
|
-
estimated_savings: 0
|
|
593
|
-
});
|
|
594
|
-
}
|
|
595
|
-
}
|
|
596
|
-
}
|
|
597
|
-
const requestParams = downgraded ? { ...params, model } : params;
|
|
598
|
-
try {
|
|
599
|
-
const response = await target.create.call(
|
|
600
|
-
target,
|
|
601
|
-
requestParams,
|
|
602
|
-
options
|
|
603
|
-
);
|
|
604
|
-
const latency = Date.now() - startTime;
|
|
605
|
-
const usage = response?.usage;
|
|
606
|
-
const inputTokens = usage?.prompt_tokens ?? 0;
|
|
607
|
-
const outputTokens = usage?.completion_tokens ?? 0;
|
|
608
|
-
const cost = calculateCost(model, inputTokens, outputTokens);
|
|
609
|
-
heartbeat.trackSpend(cost);
|
|
610
|
-
shipper.push({
|
|
611
|
-
id: generateId2(),
|
|
612
|
-
timestamp: startTime,
|
|
613
|
-
provider: "openai",
|
|
614
|
-
model,
|
|
615
|
-
input_tokens: inputTokens,
|
|
616
|
-
output_tokens: outputTokens,
|
|
617
|
-
cost,
|
|
618
|
-
latency_ms: latency,
|
|
619
|
-
endpoint_tag: config.endpoint_tag,
|
|
620
|
-
downgraded,
|
|
621
|
-
original_model: originalModel,
|
|
622
|
-
blocked: false,
|
|
623
|
-
enforcement_reason: enforcementReason
|
|
624
|
-
});
|
|
625
|
-
return response;
|
|
626
|
-
} catch (err) {
|
|
627
|
-
if (err?.caplyr) throw err;
|
|
628
|
-
shipper.push({
|
|
629
|
-
id: generateId2(),
|
|
630
|
-
timestamp: startTime,
|
|
631
|
-
provider: "openai",
|
|
632
|
-
model,
|
|
633
|
-
input_tokens: 0,
|
|
634
|
-
output_tokens: 0,
|
|
635
|
-
cost: 0,
|
|
636
|
-
latency_ms: Date.now() - startTime,
|
|
637
|
-
endpoint_tag: config.endpoint_tag,
|
|
638
|
-
downgraded,
|
|
639
|
-
original_model: originalModel,
|
|
640
|
-
blocked: false,
|
|
641
|
-
enforcement_reason: "provider_error"
|
|
642
|
-
});
|
|
643
|
-
throw err;
|
|
704
|
+
} finally {
|
|
705
|
+
release();
|
|
644
706
|
}
|
|
645
707
|
};
|
|
646
708
|
}
|
package/dist/index.mjs
CHANGED
|
@@ -74,6 +74,36 @@ var LogShipper = class {
|
|
|
74
74
|
}
|
|
75
75
|
};
|
|
76
76
|
|
|
77
|
+
// src/mutex.ts
|
|
78
|
+
var Mutex = class {
|
|
79
|
+
constructor() {
|
|
80
|
+
this.queue = [];
|
|
81
|
+
this.locked = false;
|
|
82
|
+
}
|
|
83
|
+
async acquire() {
|
|
84
|
+
if (!this.locked) {
|
|
85
|
+
this.locked = true;
|
|
86
|
+
return this.createRelease();
|
|
87
|
+
}
|
|
88
|
+
return new Promise((resolve) => {
|
|
89
|
+
this.queue.push(() => resolve(this.createRelease()));
|
|
90
|
+
});
|
|
91
|
+
}
|
|
92
|
+
createRelease() {
|
|
93
|
+
let released = false;
|
|
94
|
+
return () => {
|
|
95
|
+
if (released) return;
|
|
96
|
+
released = true;
|
|
97
|
+
const next = this.queue.shift();
|
|
98
|
+
if (next) {
|
|
99
|
+
next();
|
|
100
|
+
} else {
|
|
101
|
+
this.locked = false;
|
|
102
|
+
}
|
|
103
|
+
};
|
|
104
|
+
}
|
|
105
|
+
};
|
|
106
|
+
|
|
77
107
|
// src/heartbeat.ts
|
|
78
108
|
var Heartbeat = class {
|
|
79
109
|
constructor(config) {
|
|
@@ -91,6 +121,8 @@ var Heartbeat = class {
|
|
|
91
121
|
};
|
|
92
122
|
/** Current protection status */
|
|
93
123
|
this.status = "ACTIVE";
|
|
124
|
+
/** Mutex for serializing budget check → API call → trackSpend */
|
|
125
|
+
this.budgetMutex = new Mutex();
|
|
94
126
|
/** Local budget limits set via config (not from server) */
|
|
95
127
|
this.localDailyLimit = null;
|
|
96
128
|
this.localMonthlyLimit = null;
|
|
@@ -143,14 +175,18 @@ var Heartbeat = class {
|
|
|
143
175
|
const data = await res.json();
|
|
144
176
|
const localDailyUsed = this.budgetStatus.daily_used;
|
|
145
177
|
const localMonthlyUsed = this.budgetStatus.monthly_used;
|
|
178
|
+
const serverDailyUsed = Number(data.daily_used) || 0;
|
|
179
|
+
const serverMonthlyUsed = Number(data.monthly_used) || 0;
|
|
180
|
+
const serverDailyLimit = data.daily_limit != null ? Number(data.daily_limit) : null;
|
|
181
|
+
const serverMonthlyLimit = data.monthly_limit != null ? Number(data.monthly_limit) : null;
|
|
146
182
|
this.budgetStatus = {
|
|
147
183
|
...data,
|
|
148
184
|
// Use whichever spend is higher — server or local tracking
|
|
149
|
-
daily_used: Math.max(
|
|
150
|
-
monthly_used: Math.max(
|
|
151
|
-
//
|
|
152
|
-
daily_limit:
|
|
153
|
-
monthly_limit:
|
|
185
|
+
daily_used: Math.max(serverDailyUsed, localDailyUsed),
|
|
186
|
+
monthly_used: Math.max(serverMonthlyUsed, localMonthlyUsed),
|
|
187
|
+
// Use the stricter (lower) limit — local config takes priority if lower
|
|
188
|
+
daily_limit: this.pickStricterLimit(serverDailyLimit, this.localDailyLimit),
|
|
189
|
+
monthly_limit: this.pickStricterLimit(serverMonthlyLimit, this.localMonthlyLimit)
|
|
154
190
|
};
|
|
155
191
|
this.consecutiveFailures = 0;
|
|
156
192
|
const newStatus = data.kill_switch_active ? "OFF" : data.status;
|
|
@@ -167,6 +203,22 @@ var Heartbeat = class {
|
|
|
167
203
|
}
|
|
168
204
|
}
|
|
169
205
|
}
|
|
206
|
+
/**
|
|
207
|
+
* Pick the stricter (lower) of two limits.
|
|
208
|
+
* If one is null, use the other.
|
|
209
|
+
*/
|
|
210
|
+
pickStricterLimit(a, b) {
|
|
211
|
+
if (a === null) return b;
|
|
212
|
+
if (b === null) return a;
|
|
213
|
+
return Math.min(a, b);
|
|
214
|
+
}
|
|
215
|
+
/**
|
|
216
|
+
* Force an immediate heartbeat poll (useful for kill switch checks).
|
|
217
|
+
* Returns a promise that resolves when the poll completes.
|
|
218
|
+
*/
|
|
219
|
+
async forcePoll() {
|
|
220
|
+
await this.beat();
|
|
221
|
+
}
|
|
170
222
|
/**
|
|
171
223
|
* Update local budget tracking (called after each request).
|
|
172
224
|
* This provides real-time budget awareness between heartbeats.
|
|
@@ -329,18 +381,98 @@ function wrapAnthropic(client, config, shipper, heartbeat) {
|
|
|
329
381
|
});
|
|
330
382
|
}
|
|
331
383
|
}
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
384
|
+
const release = await heartbeat.budgetMutex.acquire();
|
|
385
|
+
try {
|
|
386
|
+
if (config.mode === "cost_protect") {
|
|
387
|
+
if (heartbeat.isMonthlyBudgetExceeded() || heartbeat.isDailyBudgetExceeded()) {
|
|
388
|
+
blocked = true;
|
|
389
|
+
enforcementReason = heartbeat.isDailyBudgetExceeded() ? "daily_budget_exceeded" : "monthly_budget_exceeded";
|
|
390
|
+
const blockError = {
|
|
391
|
+
code: "BUDGET_EXCEEDED",
|
|
392
|
+
message: `AI budget exceeded. ${enforcementReason.replace(/_/g, " ")}.`,
|
|
393
|
+
budget_used: heartbeat.budgetStatus.monthly_used,
|
|
394
|
+
budget_limit: heartbeat.budgetStatus.monthly_limit ?? 0,
|
|
395
|
+
retry_after: getNextResetTime(enforcementReason),
|
|
396
|
+
dashboard_url: dashboardUrl
|
|
397
|
+
};
|
|
398
|
+
shipper.push({
|
|
399
|
+
id: generateId(),
|
|
400
|
+
timestamp: startTime,
|
|
401
|
+
provider: "anthropic",
|
|
402
|
+
model,
|
|
403
|
+
input_tokens: 0,
|
|
404
|
+
output_tokens: 0,
|
|
405
|
+
cost: 0,
|
|
406
|
+
latency_ms: Date.now() - startTime,
|
|
407
|
+
endpoint_tag: config.endpoint_tag,
|
|
408
|
+
downgraded: false,
|
|
409
|
+
blocked: true,
|
|
410
|
+
enforcement_reason: enforcementReason
|
|
411
|
+
});
|
|
412
|
+
throw Object.assign(new Error(blockError.message), {
|
|
413
|
+
caplyr: blockError
|
|
414
|
+
});
|
|
415
|
+
}
|
|
416
|
+
if (heartbeat.isDowngradeThresholdReached(downgradeThreshold)) {
|
|
417
|
+
const fallback = config.fallback ?? getDefaultFallback(model);
|
|
418
|
+
if (fallback && fallback !== model) {
|
|
419
|
+
originalModel = model;
|
|
420
|
+
model = fallback;
|
|
421
|
+
downgraded = true;
|
|
422
|
+
enforcementReason = "auto_downgrade_threshold";
|
|
423
|
+
config.onEnforcement?.({
|
|
424
|
+
type: "downgrade",
|
|
425
|
+
timestamp: Date.now(),
|
|
426
|
+
reason: `Budget at ${Math.round(downgradeThreshold * 100)}% \u2014 downgraded ${originalModel} \u2192 ${model}`,
|
|
427
|
+
original_model: originalModel,
|
|
428
|
+
fallback_model: model,
|
|
429
|
+
budget_used: heartbeat.budgetStatus.monthly_used,
|
|
430
|
+
budget_limit: heartbeat.budgetStatus.monthly_limit ?? 0,
|
|
431
|
+
estimated_savings: 0
|
|
432
|
+
// Calculated after response
|
|
433
|
+
});
|
|
434
|
+
}
|
|
435
|
+
}
|
|
436
|
+
}
|
|
437
|
+
const requestParams = downgraded ? { ...params, model } : params;
|
|
438
|
+
try {
|
|
439
|
+
const response = await target.create.call(
|
|
440
|
+
target,
|
|
441
|
+
requestParams,
|
|
442
|
+
options
|
|
443
|
+
);
|
|
444
|
+
const latency = Date.now() - startTime;
|
|
445
|
+
const inputTokens = response?.usage?.input_tokens ?? 0;
|
|
446
|
+
const outputTokens = response?.usage?.output_tokens ?? 0;
|
|
447
|
+
const cost = calculateCost(model, inputTokens, outputTokens);
|
|
448
|
+
heartbeat.trackSpend(cost);
|
|
449
|
+
let estimatedSavings = 0;
|
|
450
|
+
if (downgraded && originalModel) {
|
|
451
|
+
const originalCost = calculateCost(
|
|
452
|
+
originalModel,
|
|
453
|
+
inputTokens,
|
|
454
|
+
outputTokens
|
|
455
|
+
);
|
|
456
|
+
estimatedSavings = originalCost - cost;
|
|
457
|
+
}
|
|
458
|
+
shipper.push({
|
|
459
|
+
id: generateId(),
|
|
460
|
+
timestamp: startTime,
|
|
461
|
+
provider: "anthropic",
|
|
462
|
+
model,
|
|
463
|
+
input_tokens: inputTokens,
|
|
464
|
+
output_tokens: outputTokens,
|
|
465
|
+
cost,
|
|
466
|
+
latency_ms: latency,
|
|
467
|
+
endpoint_tag: config.endpoint_tag,
|
|
468
|
+
downgraded,
|
|
469
|
+
original_model: originalModel,
|
|
470
|
+
blocked: false,
|
|
471
|
+
enforcement_reason: enforcementReason
|
|
472
|
+
});
|
|
473
|
+
return response;
|
|
474
|
+
} catch (err) {
|
|
475
|
+
if (err?.caplyr) throw err;
|
|
344
476
|
shipper.push({
|
|
345
477
|
id: generateId(),
|
|
346
478
|
timestamp: startTime,
|
|
@@ -351,90 +483,15 @@ function wrapAnthropic(client, config, shipper, heartbeat) {
|
|
|
351
483
|
cost: 0,
|
|
352
484
|
latency_ms: Date.now() - startTime,
|
|
353
485
|
endpoint_tag: config.endpoint_tag,
|
|
354
|
-
downgraded
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
throw Object.assign(new Error(blockError.message), {
|
|
359
|
-
caplyr: blockError
|
|
486
|
+
downgraded,
|
|
487
|
+
original_model: originalModel,
|
|
488
|
+
blocked: false,
|
|
489
|
+
enforcement_reason: "provider_error"
|
|
360
490
|
});
|
|
491
|
+
throw err;
|
|
361
492
|
}
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
if (fallback && fallback !== model) {
|
|
365
|
-
originalModel = model;
|
|
366
|
-
model = fallback;
|
|
367
|
-
downgraded = true;
|
|
368
|
-
enforcementReason = "auto_downgrade_threshold";
|
|
369
|
-
config.onEnforcement?.({
|
|
370
|
-
type: "downgrade",
|
|
371
|
-
timestamp: Date.now(),
|
|
372
|
-
reason: `Budget at ${Math.round(downgradeThreshold * 100)}% \u2014 downgraded ${originalModel} \u2192 ${model}`,
|
|
373
|
-
original_model: originalModel,
|
|
374
|
-
fallback_model: model,
|
|
375
|
-
budget_used: heartbeat.budgetStatus.monthly_used,
|
|
376
|
-
budget_limit: heartbeat.budgetStatus.monthly_limit ?? 0,
|
|
377
|
-
estimated_savings: 0
|
|
378
|
-
// Calculated after response
|
|
379
|
-
});
|
|
380
|
-
}
|
|
381
|
-
}
|
|
382
|
-
}
|
|
383
|
-
const requestParams = downgraded ? { ...params, model } : params;
|
|
384
|
-
try {
|
|
385
|
-
const response = await target.create.call(
|
|
386
|
-
target,
|
|
387
|
-
requestParams,
|
|
388
|
-
options
|
|
389
|
-
);
|
|
390
|
-
const latency = Date.now() - startTime;
|
|
391
|
-
const inputTokens = response?.usage?.input_tokens ?? 0;
|
|
392
|
-
const outputTokens = response?.usage?.output_tokens ?? 0;
|
|
393
|
-
const cost = calculateCost(model, inputTokens, outputTokens);
|
|
394
|
-
heartbeat.trackSpend(cost);
|
|
395
|
-
let estimatedSavings = 0;
|
|
396
|
-
if (downgraded && originalModel) {
|
|
397
|
-
const originalCost = calculateCost(
|
|
398
|
-
originalModel,
|
|
399
|
-
inputTokens,
|
|
400
|
-
outputTokens
|
|
401
|
-
);
|
|
402
|
-
estimatedSavings = originalCost - cost;
|
|
403
|
-
}
|
|
404
|
-
shipper.push({
|
|
405
|
-
id: generateId(),
|
|
406
|
-
timestamp: startTime,
|
|
407
|
-
provider: "anthropic",
|
|
408
|
-
model,
|
|
409
|
-
input_tokens: inputTokens,
|
|
410
|
-
output_tokens: outputTokens,
|
|
411
|
-
cost,
|
|
412
|
-
latency_ms: latency,
|
|
413
|
-
endpoint_tag: config.endpoint_tag,
|
|
414
|
-
downgraded,
|
|
415
|
-
original_model: originalModel,
|
|
416
|
-
blocked: false,
|
|
417
|
-
enforcement_reason: enforcementReason
|
|
418
|
-
});
|
|
419
|
-
return response;
|
|
420
|
-
} catch (err) {
|
|
421
|
-
if (err?.caplyr) throw err;
|
|
422
|
-
shipper.push({
|
|
423
|
-
id: generateId(),
|
|
424
|
-
timestamp: startTime,
|
|
425
|
-
provider: "anthropic",
|
|
426
|
-
model,
|
|
427
|
-
input_tokens: 0,
|
|
428
|
-
output_tokens: 0,
|
|
429
|
-
cost: 0,
|
|
430
|
-
latency_ms: Date.now() - startTime,
|
|
431
|
-
endpoint_tag: config.endpoint_tag,
|
|
432
|
-
downgraded,
|
|
433
|
-
original_model: originalModel,
|
|
434
|
-
blocked: false,
|
|
435
|
-
enforcement_reason: "provider_error"
|
|
436
|
-
});
|
|
437
|
-
throw err;
|
|
493
|
+
} finally {
|
|
494
|
+
release();
|
|
438
495
|
}
|
|
439
496
|
};
|
|
440
497
|
}
|
|
@@ -511,18 +568,89 @@ function wrapOpenAI(client, config, shipper, heartbeat) {
|
|
|
511
568
|
});
|
|
512
569
|
}
|
|
513
570
|
}
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
571
|
+
const release = await heartbeat.budgetMutex.acquire();
|
|
572
|
+
try {
|
|
573
|
+
if (config.mode === "cost_protect") {
|
|
574
|
+
if (heartbeat.isMonthlyBudgetExceeded() || heartbeat.isDailyBudgetExceeded()) {
|
|
575
|
+
blocked = true;
|
|
576
|
+
enforcementReason = heartbeat.isDailyBudgetExceeded() ? "daily_budget_exceeded" : "monthly_budget_exceeded";
|
|
577
|
+
const blockError = {
|
|
578
|
+
code: "BUDGET_EXCEEDED",
|
|
579
|
+
message: `AI budget exceeded. ${enforcementReason.replace(/_/g, " ")}.`,
|
|
580
|
+
budget_used: heartbeat.budgetStatus.monthly_used,
|
|
581
|
+
budget_limit: heartbeat.budgetStatus.monthly_limit ?? 0,
|
|
582
|
+
retry_after: getNextResetTime2(enforcementReason),
|
|
583
|
+
dashboard_url: dashboardUrl
|
|
584
|
+
};
|
|
585
|
+
shipper.push({
|
|
586
|
+
id: generateId2(),
|
|
587
|
+
timestamp: startTime,
|
|
588
|
+
provider: "openai",
|
|
589
|
+
model,
|
|
590
|
+
input_tokens: 0,
|
|
591
|
+
output_tokens: 0,
|
|
592
|
+
cost: 0,
|
|
593
|
+
latency_ms: Date.now() - startTime,
|
|
594
|
+
endpoint_tag: config.endpoint_tag,
|
|
595
|
+
downgraded: false,
|
|
596
|
+
blocked: true,
|
|
597
|
+
enforcement_reason: enforcementReason
|
|
598
|
+
});
|
|
599
|
+
throw Object.assign(new Error(blockError.message), {
|
|
600
|
+
caplyr: blockError
|
|
601
|
+
});
|
|
602
|
+
}
|
|
603
|
+
if (heartbeat.isDowngradeThresholdReached(downgradeThreshold)) {
|
|
604
|
+
const fallback = config.fallback ?? getDefaultFallback(model);
|
|
605
|
+
if (fallback && fallback !== model) {
|
|
606
|
+
originalModel = model;
|
|
607
|
+
model = fallback;
|
|
608
|
+
downgraded = true;
|
|
609
|
+
enforcementReason = "auto_downgrade_threshold";
|
|
610
|
+
config.onEnforcement?.({
|
|
611
|
+
type: "downgrade",
|
|
612
|
+
timestamp: Date.now(),
|
|
613
|
+
reason: `Budget at ${Math.round(downgradeThreshold * 100)}% \u2014 downgraded ${originalModel} \u2192 ${model}`,
|
|
614
|
+
original_model: originalModel,
|
|
615
|
+
fallback_model: model,
|
|
616
|
+
budget_used: heartbeat.budgetStatus.monthly_used,
|
|
617
|
+
budget_limit: heartbeat.budgetStatus.monthly_limit ?? 0,
|
|
618
|
+
estimated_savings: 0
|
|
619
|
+
});
|
|
620
|
+
}
|
|
621
|
+
}
|
|
622
|
+
}
|
|
623
|
+
const requestParams = downgraded ? { ...params, model } : params;
|
|
624
|
+
try {
|
|
625
|
+
const response = await target.create.call(
|
|
626
|
+
target,
|
|
627
|
+
requestParams,
|
|
628
|
+
options
|
|
629
|
+
);
|
|
630
|
+
const latency = Date.now() - startTime;
|
|
631
|
+
const usage = response?.usage;
|
|
632
|
+
const inputTokens = usage?.prompt_tokens ?? 0;
|
|
633
|
+
const outputTokens = usage?.completion_tokens ?? 0;
|
|
634
|
+
const cost = calculateCost(model, inputTokens, outputTokens);
|
|
635
|
+
heartbeat.trackSpend(cost);
|
|
636
|
+
shipper.push({
|
|
637
|
+
id: generateId2(),
|
|
638
|
+
timestamp: startTime,
|
|
639
|
+
provider: "openai",
|
|
640
|
+
model,
|
|
641
|
+
input_tokens: inputTokens,
|
|
642
|
+
output_tokens: outputTokens,
|
|
643
|
+
cost,
|
|
644
|
+
latency_ms: latency,
|
|
645
|
+
endpoint_tag: config.endpoint_tag,
|
|
646
|
+
downgraded,
|
|
647
|
+
original_model: originalModel,
|
|
648
|
+
blocked: false,
|
|
649
|
+
enforcement_reason: enforcementReason
|
|
650
|
+
});
|
|
651
|
+
return response;
|
|
652
|
+
} catch (err) {
|
|
653
|
+
if (err?.caplyr) throw err;
|
|
526
654
|
shipper.push({
|
|
527
655
|
id: generateId2(),
|
|
528
656
|
timestamp: startTime,
|
|
@@ -533,81 +661,15 @@ function wrapOpenAI(client, config, shipper, heartbeat) {
|
|
|
533
661
|
cost: 0,
|
|
534
662
|
latency_ms: Date.now() - startTime,
|
|
535
663
|
endpoint_tag: config.endpoint_tag,
|
|
536
|
-
downgraded
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
throw Object.assign(new Error(blockError.message), {
|
|
541
|
-
caplyr: blockError
|
|
664
|
+
downgraded,
|
|
665
|
+
original_model: originalModel,
|
|
666
|
+
blocked: false,
|
|
667
|
+
enforcement_reason: "provider_error"
|
|
542
668
|
});
|
|
669
|
+
throw err;
|
|
543
670
|
}
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
if (fallback && fallback !== model) {
|
|
547
|
-
originalModel = model;
|
|
548
|
-
model = fallback;
|
|
549
|
-
downgraded = true;
|
|
550
|
-
enforcementReason = "auto_downgrade_threshold";
|
|
551
|
-
config.onEnforcement?.({
|
|
552
|
-
type: "downgrade",
|
|
553
|
-
timestamp: Date.now(),
|
|
554
|
-
reason: `Budget at ${Math.round(downgradeThreshold * 100)}% \u2014 downgraded ${originalModel} \u2192 ${model}`,
|
|
555
|
-
original_model: originalModel,
|
|
556
|
-
fallback_model: model,
|
|
557
|
-
budget_used: heartbeat.budgetStatus.monthly_used,
|
|
558
|
-
budget_limit: heartbeat.budgetStatus.monthly_limit ?? 0,
|
|
559
|
-
estimated_savings: 0
|
|
560
|
-
});
|
|
561
|
-
}
|
|
562
|
-
}
|
|
563
|
-
}
|
|
564
|
-
const requestParams = downgraded ? { ...params, model } : params;
|
|
565
|
-
try {
|
|
566
|
-
const response = await target.create.call(
|
|
567
|
-
target,
|
|
568
|
-
requestParams,
|
|
569
|
-
options
|
|
570
|
-
);
|
|
571
|
-
const latency = Date.now() - startTime;
|
|
572
|
-
const usage = response?.usage;
|
|
573
|
-
const inputTokens = usage?.prompt_tokens ?? 0;
|
|
574
|
-
const outputTokens = usage?.completion_tokens ?? 0;
|
|
575
|
-
const cost = calculateCost(model, inputTokens, outputTokens);
|
|
576
|
-
heartbeat.trackSpend(cost);
|
|
577
|
-
shipper.push({
|
|
578
|
-
id: generateId2(),
|
|
579
|
-
timestamp: startTime,
|
|
580
|
-
provider: "openai",
|
|
581
|
-
model,
|
|
582
|
-
input_tokens: inputTokens,
|
|
583
|
-
output_tokens: outputTokens,
|
|
584
|
-
cost,
|
|
585
|
-
latency_ms: latency,
|
|
586
|
-
endpoint_tag: config.endpoint_tag,
|
|
587
|
-
downgraded,
|
|
588
|
-
original_model: originalModel,
|
|
589
|
-
blocked: false,
|
|
590
|
-
enforcement_reason: enforcementReason
|
|
591
|
-
});
|
|
592
|
-
return response;
|
|
593
|
-
} catch (err) {
|
|
594
|
-
if (err?.caplyr) throw err;
|
|
595
|
-
shipper.push({
|
|
596
|
-
id: generateId2(),
|
|
597
|
-
timestamp: startTime,
|
|
598
|
-
provider: "openai",
|
|
599
|
-
model,
|
|
600
|
-
input_tokens: 0,
|
|
601
|
-
output_tokens: 0,
|
|
602
|
-
cost: 0,
|
|
603
|
-
latency_ms: Date.now() - startTime,
|
|
604
|
-
endpoint_tag: config.endpoint_tag,
|
|
605
|
-
downgraded,
|
|
606
|
-
original_model: originalModel,
|
|
607
|
-
blocked: false,
|
|
608
|
-
enforcement_reason: "provider_error"
|
|
609
|
-
});
|
|
610
|
-
throw err;
|
|
671
|
+
} finally {
|
|
672
|
+
release();
|
|
611
673
|
}
|
|
612
674
|
};
|
|
613
675
|
}
|