caplyr 0.2.3 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +221 -179
- package/dist/index.mjs +221 -179
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -107,6 +107,36 @@ var LogShipper = class {
|
|
|
107
107
|
}
|
|
108
108
|
};
|
|
109
109
|
|
|
110
|
+
// src/mutex.ts
|
|
111
|
+
var Mutex = class {
|
|
112
|
+
constructor() {
|
|
113
|
+
this.queue = [];
|
|
114
|
+
this.locked = false;
|
|
115
|
+
}
|
|
116
|
+
async acquire() {
|
|
117
|
+
if (!this.locked) {
|
|
118
|
+
this.locked = true;
|
|
119
|
+
return this.createRelease();
|
|
120
|
+
}
|
|
121
|
+
return new Promise((resolve) => {
|
|
122
|
+
this.queue.push(() => resolve(this.createRelease()));
|
|
123
|
+
});
|
|
124
|
+
}
|
|
125
|
+
createRelease() {
|
|
126
|
+
let released = false;
|
|
127
|
+
return () => {
|
|
128
|
+
if (released) return;
|
|
129
|
+
released = true;
|
|
130
|
+
const next = this.queue.shift();
|
|
131
|
+
if (next) {
|
|
132
|
+
next();
|
|
133
|
+
} else {
|
|
134
|
+
this.locked = false;
|
|
135
|
+
}
|
|
136
|
+
};
|
|
137
|
+
}
|
|
138
|
+
};
|
|
139
|
+
|
|
110
140
|
// src/heartbeat.ts
|
|
111
141
|
var Heartbeat = class {
|
|
112
142
|
constructor(config) {
|
|
@@ -124,6 +154,8 @@ var Heartbeat = class {
|
|
|
124
154
|
};
|
|
125
155
|
/** Current protection status */
|
|
126
156
|
this.status = "ACTIVE";
|
|
157
|
+
/** Mutex for serializing budget check → API call → trackSpend */
|
|
158
|
+
this.budgetMutex = new Mutex();
|
|
127
159
|
/** Local budget limits set via config (not from server) */
|
|
128
160
|
this.localDailyLimit = null;
|
|
129
161
|
this.localMonthlyLimit = null;
|
|
@@ -382,18 +414,98 @@ function wrapAnthropic(client, config, shipper, heartbeat) {
|
|
|
382
414
|
});
|
|
383
415
|
}
|
|
384
416
|
}
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
417
|
+
const release = await heartbeat.budgetMutex.acquire();
|
|
418
|
+
try {
|
|
419
|
+
if (config.mode === "cost_protect") {
|
|
420
|
+
if (heartbeat.isMonthlyBudgetExceeded() || heartbeat.isDailyBudgetExceeded()) {
|
|
421
|
+
blocked = true;
|
|
422
|
+
enforcementReason = heartbeat.isDailyBudgetExceeded() ? "daily_budget_exceeded" : "monthly_budget_exceeded";
|
|
423
|
+
const blockError = {
|
|
424
|
+
code: "BUDGET_EXCEEDED",
|
|
425
|
+
message: `AI budget exceeded. ${enforcementReason.replace(/_/g, " ")}.`,
|
|
426
|
+
budget_used: heartbeat.budgetStatus.monthly_used,
|
|
427
|
+
budget_limit: heartbeat.budgetStatus.monthly_limit ?? 0,
|
|
428
|
+
retry_after: getNextResetTime(enforcementReason),
|
|
429
|
+
dashboard_url: dashboardUrl
|
|
430
|
+
};
|
|
431
|
+
shipper.push({
|
|
432
|
+
id: generateId(),
|
|
433
|
+
timestamp: startTime,
|
|
434
|
+
provider: "anthropic",
|
|
435
|
+
model,
|
|
436
|
+
input_tokens: 0,
|
|
437
|
+
output_tokens: 0,
|
|
438
|
+
cost: 0,
|
|
439
|
+
latency_ms: Date.now() - startTime,
|
|
440
|
+
endpoint_tag: config.endpoint_tag,
|
|
441
|
+
downgraded: false,
|
|
442
|
+
blocked: true,
|
|
443
|
+
enforcement_reason: enforcementReason
|
|
444
|
+
});
|
|
445
|
+
throw Object.assign(new Error(blockError.message), {
|
|
446
|
+
caplyr: blockError
|
|
447
|
+
});
|
|
448
|
+
}
|
|
449
|
+
if (heartbeat.isDowngradeThresholdReached(downgradeThreshold)) {
|
|
450
|
+
const fallback = config.fallback ?? getDefaultFallback(model);
|
|
451
|
+
if (fallback && fallback !== model) {
|
|
452
|
+
originalModel = model;
|
|
453
|
+
model = fallback;
|
|
454
|
+
downgraded = true;
|
|
455
|
+
enforcementReason = "auto_downgrade_threshold";
|
|
456
|
+
config.onEnforcement?.({
|
|
457
|
+
type: "downgrade",
|
|
458
|
+
timestamp: Date.now(),
|
|
459
|
+
reason: `Budget at ${Math.round(downgradeThreshold * 100)}% \u2014 downgraded ${originalModel} \u2192 ${model}`,
|
|
460
|
+
original_model: originalModel,
|
|
461
|
+
fallback_model: model,
|
|
462
|
+
budget_used: heartbeat.budgetStatus.monthly_used,
|
|
463
|
+
budget_limit: heartbeat.budgetStatus.monthly_limit ?? 0,
|
|
464
|
+
estimated_savings: 0
|
|
465
|
+
// Calculated after response
|
|
466
|
+
});
|
|
467
|
+
}
|
|
468
|
+
}
|
|
469
|
+
}
|
|
470
|
+
const requestParams = downgraded ? { ...params, model } : params;
|
|
471
|
+
try {
|
|
472
|
+
const response = await target.create.call(
|
|
473
|
+
target,
|
|
474
|
+
requestParams,
|
|
475
|
+
options
|
|
476
|
+
);
|
|
477
|
+
const latency = Date.now() - startTime;
|
|
478
|
+
const inputTokens = response?.usage?.input_tokens ?? 0;
|
|
479
|
+
const outputTokens = response?.usage?.output_tokens ?? 0;
|
|
480
|
+
const cost = calculateCost(model, inputTokens, outputTokens);
|
|
481
|
+
heartbeat.trackSpend(cost);
|
|
482
|
+
let estimatedSavings = 0;
|
|
483
|
+
if (downgraded && originalModel) {
|
|
484
|
+
const originalCost = calculateCost(
|
|
485
|
+
originalModel,
|
|
486
|
+
inputTokens,
|
|
487
|
+
outputTokens
|
|
488
|
+
);
|
|
489
|
+
estimatedSavings = originalCost - cost;
|
|
490
|
+
}
|
|
491
|
+
shipper.push({
|
|
492
|
+
id: generateId(),
|
|
493
|
+
timestamp: startTime,
|
|
494
|
+
provider: "anthropic",
|
|
495
|
+
model,
|
|
496
|
+
input_tokens: inputTokens,
|
|
497
|
+
output_tokens: outputTokens,
|
|
498
|
+
cost,
|
|
499
|
+
latency_ms: latency,
|
|
500
|
+
endpoint_tag: config.endpoint_tag,
|
|
501
|
+
downgraded,
|
|
502
|
+
original_model: originalModel,
|
|
503
|
+
blocked: false,
|
|
504
|
+
enforcement_reason: enforcementReason
|
|
505
|
+
});
|
|
506
|
+
return response;
|
|
507
|
+
} catch (err) {
|
|
508
|
+
if (err?.caplyr) throw err;
|
|
397
509
|
shipper.push({
|
|
398
510
|
id: generateId(),
|
|
399
511
|
timestamp: startTime,
|
|
@@ -404,90 +516,15 @@ function wrapAnthropic(client, config, shipper, heartbeat) {
|
|
|
404
516
|
cost: 0,
|
|
405
517
|
latency_ms: Date.now() - startTime,
|
|
406
518
|
endpoint_tag: config.endpoint_tag,
|
|
407
|
-
downgraded
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
throw Object.assign(new Error(blockError.message), {
|
|
412
|
-
caplyr: blockError
|
|
519
|
+
downgraded,
|
|
520
|
+
original_model: originalModel,
|
|
521
|
+
blocked: false,
|
|
522
|
+
enforcement_reason: "provider_error"
|
|
413
523
|
});
|
|
524
|
+
throw err;
|
|
414
525
|
}
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
if (fallback && fallback !== model) {
|
|
418
|
-
originalModel = model;
|
|
419
|
-
model = fallback;
|
|
420
|
-
downgraded = true;
|
|
421
|
-
enforcementReason = "auto_downgrade_threshold";
|
|
422
|
-
config.onEnforcement?.({
|
|
423
|
-
type: "downgrade",
|
|
424
|
-
timestamp: Date.now(),
|
|
425
|
-
reason: `Budget at ${Math.round(downgradeThreshold * 100)}% \u2014 downgraded ${originalModel} \u2192 ${model}`,
|
|
426
|
-
original_model: originalModel,
|
|
427
|
-
fallback_model: model,
|
|
428
|
-
budget_used: heartbeat.budgetStatus.monthly_used,
|
|
429
|
-
budget_limit: heartbeat.budgetStatus.monthly_limit ?? 0,
|
|
430
|
-
estimated_savings: 0
|
|
431
|
-
// Calculated after response
|
|
432
|
-
});
|
|
433
|
-
}
|
|
434
|
-
}
|
|
435
|
-
}
|
|
436
|
-
const requestParams = downgraded ? { ...params, model } : params;
|
|
437
|
-
try {
|
|
438
|
-
const response = await target.create.call(
|
|
439
|
-
target,
|
|
440
|
-
requestParams,
|
|
441
|
-
options
|
|
442
|
-
);
|
|
443
|
-
const latency = Date.now() - startTime;
|
|
444
|
-
const inputTokens = response?.usage?.input_tokens ?? 0;
|
|
445
|
-
const outputTokens = response?.usage?.output_tokens ?? 0;
|
|
446
|
-
const cost = calculateCost(model, inputTokens, outputTokens);
|
|
447
|
-
heartbeat.trackSpend(cost);
|
|
448
|
-
let estimatedSavings = 0;
|
|
449
|
-
if (downgraded && originalModel) {
|
|
450
|
-
const originalCost = calculateCost(
|
|
451
|
-
originalModel,
|
|
452
|
-
inputTokens,
|
|
453
|
-
outputTokens
|
|
454
|
-
);
|
|
455
|
-
estimatedSavings = originalCost - cost;
|
|
456
|
-
}
|
|
457
|
-
shipper.push({
|
|
458
|
-
id: generateId(),
|
|
459
|
-
timestamp: startTime,
|
|
460
|
-
provider: "anthropic",
|
|
461
|
-
model,
|
|
462
|
-
input_tokens: inputTokens,
|
|
463
|
-
output_tokens: outputTokens,
|
|
464
|
-
cost,
|
|
465
|
-
latency_ms: latency,
|
|
466
|
-
endpoint_tag: config.endpoint_tag,
|
|
467
|
-
downgraded,
|
|
468
|
-
original_model: originalModel,
|
|
469
|
-
blocked: false,
|
|
470
|
-
enforcement_reason: enforcementReason
|
|
471
|
-
});
|
|
472
|
-
return response;
|
|
473
|
-
} catch (err) {
|
|
474
|
-
if (err?.caplyr) throw err;
|
|
475
|
-
shipper.push({
|
|
476
|
-
id: generateId(),
|
|
477
|
-
timestamp: startTime,
|
|
478
|
-
provider: "anthropic",
|
|
479
|
-
model,
|
|
480
|
-
input_tokens: 0,
|
|
481
|
-
output_tokens: 0,
|
|
482
|
-
cost: 0,
|
|
483
|
-
latency_ms: Date.now() - startTime,
|
|
484
|
-
endpoint_tag: config.endpoint_tag,
|
|
485
|
-
downgraded,
|
|
486
|
-
original_model: originalModel,
|
|
487
|
-
blocked: false,
|
|
488
|
-
enforcement_reason: "provider_error"
|
|
489
|
-
});
|
|
490
|
-
throw err;
|
|
526
|
+
} finally {
|
|
527
|
+
release();
|
|
491
528
|
}
|
|
492
529
|
};
|
|
493
530
|
}
|
|
@@ -564,18 +601,89 @@ function wrapOpenAI(client, config, shipper, heartbeat) {
|
|
|
564
601
|
});
|
|
565
602
|
}
|
|
566
603
|
}
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
604
|
+
const release = await heartbeat.budgetMutex.acquire();
|
|
605
|
+
try {
|
|
606
|
+
if (config.mode === "cost_protect") {
|
|
607
|
+
if (heartbeat.isMonthlyBudgetExceeded() || heartbeat.isDailyBudgetExceeded()) {
|
|
608
|
+
blocked = true;
|
|
609
|
+
enforcementReason = heartbeat.isDailyBudgetExceeded() ? "daily_budget_exceeded" : "monthly_budget_exceeded";
|
|
610
|
+
const blockError = {
|
|
611
|
+
code: "BUDGET_EXCEEDED",
|
|
612
|
+
message: `AI budget exceeded. ${enforcementReason.replace(/_/g, " ")}.`,
|
|
613
|
+
budget_used: heartbeat.budgetStatus.monthly_used,
|
|
614
|
+
budget_limit: heartbeat.budgetStatus.monthly_limit ?? 0,
|
|
615
|
+
retry_after: getNextResetTime2(enforcementReason),
|
|
616
|
+
dashboard_url: dashboardUrl
|
|
617
|
+
};
|
|
618
|
+
shipper.push({
|
|
619
|
+
id: generateId2(),
|
|
620
|
+
timestamp: startTime,
|
|
621
|
+
provider: "openai",
|
|
622
|
+
model,
|
|
623
|
+
input_tokens: 0,
|
|
624
|
+
output_tokens: 0,
|
|
625
|
+
cost: 0,
|
|
626
|
+
latency_ms: Date.now() - startTime,
|
|
627
|
+
endpoint_tag: config.endpoint_tag,
|
|
628
|
+
downgraded: false,
|
|
629
|
+
blocked: true,
|
|
630
|
+
enforcement_reason: enforcementReason
|
|
631
|
+
});
|
|
632
|
+
throw Object.assign(new Error(blockError.message), {
|
|
633
|
+
caplyr: blockError
|
|
634
|
+
});
|
|
635
|
+
}
|
|
636
|
+
if (heartbeat.isDowngradeThresholdReached(downgradeThreshold)) {
|
|
637
|
+
const fallback = config.fallback ?? getDefaultFallback(model);
|
|
638
|
+
if (fallback && fallback !== model) {
|
|
639
|
+
originalModel = model;
|
|
640
|
+
model = fallback;
|
|
641
|
+
downgraded = true;
|
|
642
|
+
enforcementReason = "auto_downgrade_threshold";
|
|
643
|
+
config.onEnforcement?.({
|
|
644
|
+
type: "downgrade",
|
|
645
|
+
timestamp: Date.now(),
|
|
646
|
+
reason: `Budget at ${Math.round(downgradeThreshold * 100)}% \u2014 downgraded ${originalModel} \u2192 ${model}`,
|
|
647
|
+
original_model: originalModel,
|
|
648
|
+
fallback_model: model,
|
|
649
|
+
budget_used: heartbeat.budgetStatus.monthly_used,
|
|
650
|
+
budget_limit: heartbeat.budgetStatus.monthly_limit ?? 0,
|
|
651
|
+
estimated_savings: 0
|
|
652
|
+
});
|
|
653
|
+
}
|
|
654
|
+
}
|
|
655
|
+
}
|
|
656
|
+
const requestParams = downgraded ? { ...params, model } : params;
|
|
657
|
+
try {
|
|
658
|
+
const response = await target.create.call(
|
|
659
|
+
target,
|
|
660
|
+
requestParams,
|
|
661
|
+
options
|
|
662
|
+
);
|
|
663
|
+
const latency = Date.now() - startTime;
|
|
664
|
+
const usage = response?.usage;
|
|
665
|
+
const inputTokens = usage?.prompt_tokens ?? 0;
|
|
666
|
+
const outputTokens = usage?.completion_tokens ?? 0;
|
|
667
|
+
const cost = calculateCost(model, inputTokens, outputTokens);
|
|
668
|
+
heartbeat.trackSpend(cost);
|
|
669
|
+
shipper.push({
|
|
670
|
+
id: generateId2(),
|
|
671
|
+
timestamp: startTime,
|
|
672
|
+
provider: "openai",
|
|
673
|
+
model,
|
|
674
|
+
input_tokens: inputTokens,
|
|
675
|
+
output_tokens: outputTokens,
|
|
676
|
+
cost,
|
|
677
|
+
latency_ms: latency,
|
|
678
|
+
endpoint_tag: config.endpoint_tag,
|
|
679
|
+
downgraded,
|
|
680
|
+
original_model: originalModel,
|
|
681
|
+
blocked: false,
|
|
682
|
+
enforcement_reason: enforcementReason
|
|
683
|
+
});
|
|
684
|
+
return response;
|
|
685
|
+
} catch (err) {
|
|
686
|
+
if (err?.caplyr) throw err;
|
|
579
687
|
shipper.push({
|
|
580
688
|
id: generateId2(),
|
|
581
689
|
timestamp: startTime,
|
|
@@ -586,81 +694,15 @@ function wrapOpenAI(client, config, shipper, heartbeat) {
|
|
|
586
694
|
cost: 0,
|
|
587
695
|
latency_ms: Date.now() - startTime,
|
|
588
696
|
endpoint_tag: config.endpoint_tag,
|
|
589
|
-
downgraded
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
throw Object.assign(new Error(blockError.message), {
|
|
594
|
-
caplyr: blockError
|
|
697
|
+
downgraded,
|
|
698
|
+
original_model: originalModel,
|
|
699
|
+
blocked: false,
|
|
700
|
+
enforcement_reason: "provider_error"
|
|
595
701
|
});
|
|
702
|
+
throw err;
|
|
596
703
|
}
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
if (fallback && fallback !== model) {
|
|
600
|
-
originalModel = model;
|
|
601
|
-
model = fallback;
|
|
602
|
-
downgraded = true;
|
|
603
|
-
enforcementReason = "auto_downgrade_threshold";
|
|
604
|
-
config.onEnforcement?.({
|
|
605
|
-
type: "downgrade",
|
|
606
|
-
timestamp: Date.now(),
|
|
607
|
-
reason: `Budget at ${Math.round(downgradeThreshold * 100)}% \u2014 downgraded ${originalModel} \u2192 ${model}`,
|
|
608
|
-
original_model: originalModel,
|
|
609
|
-
fallback_model: model,
|
|
610
|
-
budget_used: heartbeat.budgetStatus.monthly_used,
|
|
611
|
-
budget_limit: heartbeat.budgetStatus.monthly_limit ?? 0,
|
|
612
|
-
estimated_savings: 0
|
|
613
|
-
});
|
|
614
|
-
}
|
|
615
|
-
}
|
|
616
|
-
}
|
|
617
|
-
const requestParams = downgraded ? { ...params, model } : params;
|
|
618
|
-
try {
|
|
619
|
-
const response = await target.create.call(
|
|
620
|
-
target,
|
|
621
|
-
requestParams,
|
|
622
|
-
options
|
|
623
|
-
);
|
|
624
|
-
const latency = Date.now() - startTime;
|
|
625
|
-
const usage = response?.usage;
|
|
626
|
-
const inputTokens = usage?.prompt_tokens ?? 0;
|
|
627
|
-
const outputTokens = usage?.completion_tokens ?? 0;
|
|
628
|
-
const cost = calculateCost(model, inputTokens, outputTokens);
|
|
629
|
-
heartbeat.trackSpend(cost);
|
|
630
|
-
shipper.push({
|
|
631
|
-
id: generateId2(),
|
|
632
|
-
timestamp: startTime,
|
|
633
|
-
provider: "openai",
|
|
634
|
-
model,
|
|
635
|
-
input_tokens: inputTokens,
|
|
636
|
-
output_tokens: outputTokens,
|
|
637
|
-
cost,
|
|
638
|
-
latency_ms: latency,
|
|
639
|
-
endpoint_tag: config.endpoint_tag,
|
|
640
|
-
downgraded,
|
|
641
|
-
original_model: originalModel,
|
|
642
|
-
blocked: false,
|
|
643
|
-
enforcement_reason: enforcementReason
|
|
644
|
-
});
|
|
645
|
-
return response;
|
|
646
|
-
} catch (err) {
|
|
647
|
-
if (err?.caplyr) throw err;
|
|
648
|
-
shipper.push({
|
|
649
|
-
id: generateId2(),
|
|
650
|
-
timestamp: startTime,
|
|
651
|
-
provider: "openai",
|
|
652
|
-
model,
|
|
653
|
-
input_tokens: 0,
|
|
654
|
-
output_tokens: 0,
|
|
655
|
-
cost: 0,
|
|
656
|
-
latency_ms: Date.now() - startTime,
|
|
657
|
-
endpoint_tag: config.endpoint_tag,
|
|
658
|
-
downgraded,
|
|
659
|
-
original_model: originalModel,
|
|
660
|
-
blocked: false,
|
|
661
|
-
enforcement_reason: "provider_error"
|
|
662
|
-
});
|
|
663
|
-
throw err;
|
|
704
|
+
} finally {
|
|
705
|
+
release();
|
|
664
706
|
}
|
|
665
707
|
};
|
|
666
708
|
}
|
package/dist/index.mjs
CHANGED
|
@@ -74,6 +74,36 @@ var LogShipper = class {
|
|
|
74
74
|
}
|
|
75
75
|
};
|
|
76
76
|
|
|
77
|
+
// src/mutex.ts
|
|
78
|
+
var Mutex = class {
|
|
79
|
+
constructor() {
|
|
80
|
+
this.queue = [];
|
|
81
|
+
this.locked = false;
|
|
82
|
+
}
|
|
83
|
+
async acquire() {
|
|
84
|
+
if (!this.locked) {
|
|
85
|
+
this.locked = true;
|
|
86
|
+
return this.createRelease();
|
|
87
|
+
}
|
|
88
|
+
return new Promise((resolve) => {
|
|
89
|
+
this.queue.push(() => resolve(this.createRelease()));
|
|
90
|
+
});
|
|
91
|
+
}
|
|
92
|
+
createRelease() {
|
|
93
|
+
let released = false;
|
|
94
|
+
return () => {
|
|
95
|
+
if (released) return;
|
|
96
|
+
released = true;
|
|
97
|
+
const next = this.queue.shift();
|
|
98
|
+
if (next) {
|
|
99
|
+
next();
|
|
100
|
+
} else {
|
|
101
|
+
this.locked = false;
|
|
102
|
+
}
|
|
103
|
+
};
|
|
104
|
+
}
|
|
105
|
+
};
|
|
106
|
+
|
|
77
107
|
// src/heartbeat.ts
|
|
78
108
|
var Heartbeat = class {
|
|
79
109
|
constructor(config) {
|
|
@@ -91,6 +121,8 @@ var Heartbeat = class {
|
|
|
91
121
|
};
|
|
92
122
|
/** Current protection status */
|
|
93
123
|
this.status = "ACTIVE";
|
|
124
|
+
/** Mutex for serializing budget check → API call → trackSpend */
|
|
125
|
+
this.budgetMutex = new Mutex();
|
|
94
126
|
/** Local budget limits set via config (not from server) */
|
|
95
127
|
this.localDailyLimit = null;
|
|
96
128
|
this.localMonthlyLimit = null;
|
|
@@ -349,18 +381,98 @@ function wrapAnthropic(client, config, shipper, heartbeat) {
|
|
|
349
381
|
});
|
|
350
382
|
}
|
|
351
383
|
}
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
384
|
+
const release = await heartbeat.budgetMutex.acquire();
|
|
385
|
+
try {
|
|
386
|
+
if (config.mode === "cost_protect") {
|
|
387
|
+
if (heartbeat.isMonthlyBudgetExceeded() || heartbeat.isDailyBudgetExceeded()) {
|
|
388
|
+
blocked = true;
|
|
389
|
+
enforcementReason = heartbeat.isDailyBudgetExceeded() ? "daily_budget_exceeded" : "monthly_budget_exceeded";
|
|
390
|
+
const blockError = {
|
|
391
|
+
code: "BUDGET_EXCEEDED",
|
|
392
|
+
message: `AI budget exceeded. ${enforcementReason.replace(/_/g, " ")}.`,
|
|
393
|
+
budget_used: heartbeat.budgetStatus.monthly_used,
|
|
394
|
+
budget_limit: heartbeat.budgetStatus.monthly_limit ?? 0,
|
|
395
|
+
retry_after: getNextResetTime(enforcementReason),
|
|
396
|
+
dashboard_url: dashboardUrl
|
|
397
|
+
};
|
|
398
|
+
shipper.push({
|
|
399
|
+
id: generateId(),
|
|
400
|
+
timestamp: startTime,
|
|
401
|
+
provider: "anthropic",
|
|
402
|
+
model,
|
|
403
|
+
input_tokens: 0,
|
|
404
|
+
output_tokens: 0,
|
|
405
|
+
cost: 0,
|
|
406
|
+
latency_ms: Date.now() - startTime,
|
|
407
|
+
endpoint_tag: config.endpoint_tag,
|
|
408
|
+
downgraded: false,
|
|
409
|
+
blocked: true,
|
|
410
|
+
enforcement_reason: enforcementReason
|
|
411
|
+
});
|
|
412
|
+
throw Object.assign(new Error(blockError.message), {
|
|
413
|
+
caplyr: blockError
|
|
414
|
+
});
|
|
415
|
+
}
|
|
416
|
+
if (heartbeat.isDowngradeThresholdReached(downgradeThreshold)) {
|
|
417
|
+
const fallback = config.fallback ?? getDefaultFallback(model);
|
|
418
|
+
if (fallback && fallback !== model) {
|
|
419
|
+
originalModel = model;
|
|
420
|
+
model = fallback;
|
|
421
|
+
downgraded = true;
|
|
422
|
+
enforcementReason = "auto_downgrade_threshold";
|
|
423
|
+
config.onEnforcement?.({
|
|
424
|
+
type: "downgrade",
|
|
425
|
+
timestamp: Date.now(),
|
|
426
|
+
reason: `Budget at ${Math.round(downgradeThreshold * 100)}% \u2014 downgraded ${originalModel} \u2192 ${model}`,
|
|
427
|
+
original_model: originalModel,
|
|
428
|
+
fallback_model: model,
|
|
429
|
+
budget_used: heartbeat.budgetStatus.monthly_used,
|
|
430
|
+
budget_limit: heartbeat.budgetStatus.monthly_limit ?? 0,
|
|
431
|
+
estimated_savings: 0
|
|
432
|
+
// Calculated after response
|
|
433
|
+
});
|
|
434
|
+
}
|
|
435
|
+
}
|
|
436
|
+
}
|
|
437
|
+
const requestParams = downgraded ? { ...params, model } : params;
|
|
438
|
+
try {
|
|
439
|
+
const response = await target.create.call(
|
|
440
|
+
target,
|
|
441
|
+
requestParams,
|
|
442
|
+
options
|
|
443
|
+
);
|
|
444
|
+
const latency = Date.now() - startTime;
|
|
445
|
+
const inputTokens = response?.usage?.input_tokens ?? 0;
|
|
446
|
+
const outputTokens = response?.usage?.output_tokens ?? 0;
|
|
447
|
+
const cost = calculateCost(model, inputTokens, outputTokens);
|
|
448
|
+
heartbeat.trackSpend(cost);
|
|
449
|
+
let estimatedSavings = 0;
|
|
450
|
+
if (downgraded && originalModel) {
|
|
451
|
+
const originalCost = calculateCost(
|
|
452
|
+
originalModel,
|
|
453
|
+
inputTokens,
|
|
454
|
+
outputTokens
|
|
455
|
+
);
|
|
456
|
+
estimatedSavings = originalCost - cost;
|
|
457
|
+
}
|
|
458
|
+
shipper.push({
|
|
459
|
+
id: generateId(),
|
|
460
|
+
timestamp: startTime,
|
|
461
|
+
provider: "anthropic",
|
|
462
|
+
model,
|
|
463
|
+
input_tokens: inputTokens,
|
|
464
|
+
output_tokens: outputTokens,
|
|
465
|
+
cost,
|
|
466
|
+
latency_ms: latency,
|
|
467
|
+
endpoint_tag: config.endpoint_tag,
|
|
468
|
+
downgraded,
|
|
469
|
+
original_model: originalModel,
|
|
470
|
+
blocked: false,
|
|
471
|
+
enforcement_reason: enforcementReason
|
|
472
|
+
});
|
|
473
|
+
return response;
|
|
474
|
+
} catch (err) {
|
|
475
|
+
if (err?.caplyr) throw err;
|
|
364
476
|
shipper.push({
|
|
365
477
|
id: generateId(),
|
|
366
478
|
timestamp: startTime,
|
|
@@ -371,90 +483,15 @@ function wrapAnthropic(client, config, shipper, heartbeat) {
|
|
|
371
483
|
cost: 0,
|
|
372
484
|
latency_ms: Date.now() - startTime,
|
|
373
485
|
endpoint_tag: config.endpoint_tag,
|
|
374
|
-
downgraded
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
throw Object.assign(new Error(blockError.message), {
|
|
379
|
-
caplyr: blockError
|
|
486
|
+
downgraded,
|
|
487
|
+
original_model: originalModel,
|
|
488
|
+
blocked: false,
|
|
489
|
+
enforcement_reason: "provider_error"
|
|
380
490
|
});
|
|
491
|
+
throw err;
|
|
381
492
|
}
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
if (fallback && fallback !== model) {
|
|
385
|
-
originalModel = model;
|
|
386
|
-
model = fallback;
|
|
387
|
-
downgraded = true;
|
|
388
|
-
enforcementReason = "auto_downgrade_threshold";
|
|
389
|
-
config.onEnforcement?.({
|
|
390
|
-
type: "downgrade",
|
|
391
|
-
timestamp: Date.now(),
|
|
392
|
-
reason: `Budget at ${Math.round(downgradeThreshold * 100)}% \u2014 downgraded ${originalModel} \u2192 ${model}`,
|
|
393
|
-
original_model: originalModel,
|
|
394
|
-
fallback_model: model,
|
|
395
|
-
budget_used: heartbeat.budgetStatus.monthly_used,
|
|
396
|
-
budget_limit: heartbeat.budgetStatus.monthly_limit ?? 0,
|
|
397
|
-
estimated_savings: 0
|
|
398
|
-
// Calculated after response
|
|
399
|
-
});
|
|
400
|
-
}
|
|
401
|
-
}
|
|
402
|
-
}
|
|
403
|
-
const requestParams = downgraded ? { ...params, model } : params;
|
|
404
|
-
try {
|
|
405
|
-
const response = await target.create.call(
|
|
406
|
-
target,
|
|
407
|
-
requestParams,
|
|
408
|
-
options
|
|
409
|
-
);
|
|
410
|
-
const latency = Date.now() - startTime;
|
|
411
|
-
const inputTokens = response?.usage?.input_tokens ?? 0;
|
|
412
|
-
const outputTokens = response?.usage?.output_tokens ?? 0;
|
|
413
|
-
const cost = calculateCost(model, inputTokens, outputTokens);
|
|
414
|
-
heartbeat.trackSpend(cost);
|
|
415
|
-
let estimatedSavings = 0;
|
|
416
|
-
if (downgraded && originalModel) {
|
|
417
|
-
const originalCost = calculateCost(
|
|
418
|
-
originalModel,
|
|
419
|
-
inputTokens,
|
|
420
|
-
outputTokens
|
|
421
|
-
);
|
|
422
|
-
estimatedSavings = originalCost - cost;
|
|
423
|
-
}
|
|
424
|
-
shipper.push({
|
|
425
|
-
id: generateId(),
|
|
426
|
-
timestamp: startTime,
|
|
427
|
-
provider: "anthropic",
|
|
428
|
-
model,
|
|
429
|
-
input_tokens: inputTokens,
|
|
430
|
-
output_tokens: outputTokens,
|
|
431
|
-
cost,
|
|
432
|
-
latency_ms: latency,
|
|
433
|
-
endpoint_tag: config.endpoint_tag,
|
|
434
|
-
downgraded,
|
|
435
|
-
original_model: originalModel,
|
|
436
|
-
blocked: false,
|
|
437
|
-
enforcement_reason: enforcementReason
|
|
438
|
-
});
|
|
439
|
-
return response;
|
|
440
|
-
} catch (err) {
|
|
441
|
-
if (err?.caplyr) throw err;
|
|
442
|
-
shipper.push({
|
|
443
|
-
id: generateId(),
|
|
444
|
-
timestamp: startTime,
|
|
445
|
-
provider: "anthropic",
|
|
446
|
-
model,
|
|
447
|
-
input_tokens: 0,
|
|
448
|
-
output_tokens: 0,
|
|
449
|
-
cost: 0,
|
|
450
|
-
latency_ms: Date.now() - startTime,
|
|
451
|
-
endpoint_tag: config.endpoint_tag,
|
|
452
|
-
downgraded,
|
|
453
|
-
original_model: originalModel,
|
|
454
|
-
blocked: false,
|
|
455
|
-
enforcement_reason: "provider_error"
|
|
456
|
-
});
|
|
457
|
-
throw err;
|
|
493
|
+
} finally {
|
|
494
|
+
release();
|
|
458
495
|
}
|
|
459
496
|
};
|
|
460
497
|
}
|
|
@@ -531,18 +568,89 @@ function wrapOpenAI(client, config, shipper, heartbeat) {
|
|
|
531
568
|
});
|
|
532
569
|
}
|
|
533
570
|
}
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
571
|
+
const release = await heartbeat.budgetMutex.acquire();
|
|
572
|
+
try {
|
|
573
|
+
if (config.mode === "cost_protect") {
|
|
574
|
+
if (heartbeat.isMonthlyBudgetExceeded() || heartbeat.isDailyBudgetExceeded()) {
|
|
575
|
+
blocked = true;
|
|
576
|
+
enforcementReason = heartbeat.isDailyBudgetExceeded() ? "daily_budget_exceeded" : "monthly_budget_exceeded";
|
|
577
|
+
const blockError = {
|
|
578
|
+
code: "BUDGET_EXCEEDED",
|
|
579
|
+
message: `AI budget exceeded. ${enforcementReason.replace(/_/g, " ")}.`,
|
|
580
|
+
budget_used: heartbeat.budgetStatus.monthly_used,
|
|
581
|
+
budget_limit: heartbeat.budgetStatus.monthly_limit ?? 0,
|
|
582
|
+
retry_after: getNextResetTime2(enforcementReason),
|
|
583
|
+
dashboard_url: dashboardUrl
|
|
584
|
+
};
|
|
585
|
+
shipper.push({
|
|
586
|
+
id: generateId2(),
|
|
587
|
+
timestamp: startTime,
|
|
588
|
+
provider: "openai",
|
|
589
|
+
model,
|
|
590
|
+
input_tokens: 0,
|
|
591
|
+
output_tokens: 0,
|
|
592
|
+
cost: 0,
|
|
593
|
+
latency_ms: Date.now() - startTime,
|
|
594
|
+
endpoint_tag: config.endpoint_tag,
|
|
595
|
+
downgraded: false,
|
|
596
|
+
blocked: true,
|
|
597
|
+
enforcement_reason: enforcementReason
|
|
598
|
+
});
|
|
599
|
+
throw Object.assign(new Error(blockError.message), {
|
|
600
|
+
caplyr: blockError
|
|
601
|
+
});
|
|
602
|
+
}
|
|
603
|
+
if (heartbeat.isDowngradeThresholdReached(downgradeThreshold)) {
|
|
604
|
+
const fallback = config.fallback ?? getDefaultFallback(model);
|
|
605
|
+
if (fallback && fallback !== model) {
|
|
606
|
+
originalModel = model;
|
|
607
|
+
model = fallback;
|
|
608
|
+
downgraded = true;
|
|
609
|
+
enforcementReason = "auto_downgrade_threshold";
|
|
610
|
+
config.onEnforcement?.({
|
|
611
|
+
type: "downgrade",
|
|
612
|
+
timestamp: Date.now(),
|
|
613
|
+
reason: `Budget at ${Math.round(downgradeThreshold * 100)}% \u2014 downgraded ${originalModel} \u2192 ${model}`,
|
|
614
|
+
original_model: originalModel,
|
|
615
|
+
fallback_model: model,
|
|
616
|
+
budget_used: heartbeat.budgetStatus.monthly_used,
|
|
617
|
+
budget_limit: heartbeat.budgetStatus.monthly_limit ?? 0,
|
|
618
|
+
estimated_savings: 0
|
|
619
|
+
});
|
|
620
|
+
}
|
|
621
|
+
}
|
|
622
|
+
}
|
|
623
|
+
const requestParams = downgraded ? { ...params, model } : params;
|
|
624
|
+
try {
|
|
625
|
+
const response = await target.create.call(
|
|
626
|
+
target,
|
|
627
|
+
requestParams,
|
|
628
|
+
options
|
|
629
|
+
);
|
|
630
|
+
const latency = Date.now() - startTime;
|
|
631
|
+
const usage = response?.usage;
|
|
632
|
+
const inputTokens = usage?.prompt_tokens ?? 0;
|
|
633
|
+
const outputTokens = usage?.completion_tokens ?? 0;
|
|
634
|
+
const cost = calculateCost(model, inputTokens, outputTokens);
|
|
635
|
+
heartbeat.trackSpend(cost);
|
|
636
|
+
shipper.push({
|
|
637
|
+
id: generateId2(),
|
|
638
|
+
timestamp: startTime,
|
|
639
|
+
provider: "openai",
|
|
640
|
+
model,
|
|
641
|
+
input_tokens: inputTokens,
|
|
642
|
+
output_tokens: outputTokens,
|
|
643
|
+
cost,
|
|
644
|
+
latency_ms: latency,
|
|
645
|
+
endpoint_tag: config.endpoint_tag,
|
|
646
|
+
downgraded,
|
|
647
|
+
original_model: originalModel,
|
|
648
|
+
blocked: false,
|
|
649
|
+
enforcement_reason: enforcementReason
|
|
650
|
+
});
|
|
651
|
+
return response;
|
|
652
|
+
} catch (err) {
|
|
653
|
+
if (err?.caplyr) throw err;
|
|
546
654
|
shipper.push({
|
|
547
655
|
id: generateId2(),
|
|
548
656
|
timestamp: startTime,
|
|
@@ -553,81 +661,15 @@ function wrapOpenAI(client, config, shipper, heartbeat) {
|
|
|
553
661
|
cost: 0,
|
|
554
662
|
latency_ms: Date.now() - startTime,
|
|
555
663
|
endpoint_tag: config.endpoint_tag,
|
|
556
|
-
downgraded
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
throw Object.assign(new Error(blockError.message), {
|
|
561
|
-
caplyr: blockError
|
|
664
|
+
downgraded,
|
|
665
|
+
original_model: originalModel,
|
|
666
|
+
blocked: false,
|
|
667
|
+
enforcement_reason: "provider_error"
|
|
562
668
|
});
|
|
669
|
+
throw err;
|
|
563
670
|
}
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
if (fallback && fallback !== model) {
|
|
567
|
-
originalModel = model;
|
|
568
|
-
model = fallback;
|
|
569
|
-
downgraded = true;
|
|
570
|
-
enforcementReason = "auto_downgrade_threshold";
|
|
571
|
-
config.onEnforcement?.({
|
|
572
|
-
type: "downgrade",
|
|
573
|
-
timestamp: Date.now(),
|
|
574
|
-
reason: `Budget at ${Math.round(downgradeThreshold * 100)}% \u2014 downgraded ${originalModel} \u2192 ${model}`,
|
|
575
|
-
original_model: originalModel,
|
|
576
|
-
fallback_model: model,
|
|
577
|
-
budget_used: heartbeat.budgetStatus.monthly_used,
|
|
578
|
-
budget_limit: heartbeat.budgetStatus.monthly_limit ?? 0,
|
|
579
|
-
estimated_savings: 0
|
|
580
|
-
});
|
|
581
|
-
}
|
|
582
|
-
}
|
|
583
|
-
}
|
|
584
|
-
const requestParams = downgraded ? { ...params, model } : params;
|
|
585
|
-
try {
|
|
586
|
-
const response = await target.create.call(
|
|
587
|
-
target,
|
|
588
|
-
requestParams,
|
|
589
|
-
options
|
|
590
|
-
);
|
|
591
|
-
const latency = Date.now() - startTime;
|
|
592
|
-
const usage = response?.usage;
|
|
593
|
-
const inputTokens = usage?.prompt_tokens ?? 0;
|
|
594
|
-
const outputTokens = usage?.completion_tokens ?? 0;
|
|
595
|
-
const cost = calculateCost(model, inputTokens, outputTokens);
|
|
596
|
-
heartbeat.trackSpend(cost);
|
|
597
|
-
shipper.push({
|
|
598
|
-
id: generateId2(),
|
|
599
|
-
timestamp: startTime,
|
|
600
|
-
provider: "openai",
|
|
601
|
-
model,
|
|
602
|
-
input_tokens: inputTokens,
|
|
603
|
-
output_tokens: outputTokens,
|
|
604
|
-
cost,
|
|
605
|
-
latency_ms: latency,
|
|
606
|
-
endpoint_tag: config.endpoint_tag,
|
|
607
|
-
downgraded,
|
|
608
|
-
original_model: originalModel,
|
|
609
|
-
blocked: false,
|
|
610
|
-
enforcement_reason: enforcementReason
|
|
611
|
-
});
|
|
612
|
-
return response;
|
|
613
|
-
} catch (err) {
|
|
614
|
-
if (err?.caplyr) throw err;
|
|
615
|
-
shipper.push({
|
|
616
|
-
id: generateId2(),
|
|
617
|
-
timestamp: startTime,
|
|
618
|
-
provider: "openai",
|
|
619
|
-
model,
|
|
620
|
-
input_tokens: 0,
|
|
621
|
-
output_tokens: 0,
|
|
622
|
-
cost: 0,
|
|
623
|
-
latency_ms: Date.now() - startTime,
|
|
624
|
-
endpoint_tag: config.endpoint_tag,
|
|
625
|
-
downgraded,
|
|
626
|
-
original_model: originalModel,
|
|
627
|
-
blocked: false,
|
|
628
|
-
enforcement_reason: "provider_error"
|
|
629
|
-
});
|
|
630
|
-
throw err;
|
|
671
|
+
} finally {
|
|
672
|
+
release();
|
|
631
673
|
}
|
|
632
674
|
};
|
|
633
675
|
}
|