caplyr 0.2.3 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +288 -198
- package/dist/index.mjs +288 -198
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -36,20 +36,32 @@ var LogShipper = class {
|
|
|
36
36
|
constructor(config) {
|
|
37
37
|
this.buffer = [];
|
|
38
38
|
this.timer = null;
|
|
39
|
+
// Store bound handlers so we can remove them on shutdown
|
|
40
|
+
this.processHandlers = [];
|
|
39
41
|
this.endpoint = config.endpoint ?? "https://api.caplyr.com";
|
|
40
42
|
this.apiKey = config.apiKey;
|
|
41
43
|
this.batchSize = config.batchSize ?? 10;
|
|
42
44
|
this.flushInterval = config.flushInterval ?? 3e4;
|
|
45
|
+
this.maxBufferSize = 1e3;
|
|
43
46
|
this.onError = config.onError;
|
|
44
47
|
this.timer = setInterval(() => this.flush(), this.flushInterval);
|
|
48
|
+
this.timer.unref?.();
|
|
45
49
|
if (typeof process !== "undefined" && process.on) {
|
|
46
|
-
const
|
|
50
|
+
const onBeforeExit = () => {
|
|
51
|
+
this.flush();
|
|
52
|
+
};
|
|
53
|
+
const onSignal = () => {
|
|
47
54
|
this.flush().finally(() => {
|
|
48
55
|
});
|
|
49
56
|
};
|
|
50
|
-
process.on("beforeExit",
|
|
51
|
-
process.on("SIGTERM",
|
|
52
|
-
process.on("SIGINT",
|
|
57
|
+
process.on("beforeExit", onBeforeExit);
|
|
58
|
+
process.on("SIGTERM", onSignal);
|
|
59
|
+
process.on("SIGINT", onSignal);
|
|
60
|
+
this.processHandlers = [
|
|
61
|
+
{ event: "beforeExit", handler: onBeforeExit },
|
|
62
|
+
{ event: "SIGTERM", handler: onSignal },
|
|
63
|
+
{ event: "SIGINT", handler: onSignal }
|
|
64
|
+
];
|
|
53
65
|
}
|
|
54
66
|
}
|
|
55
67
|
/**
|
|
@@ -57,6 +69,10 @@ var LogShipper = class {
|
|
|
57
69
|
* Auto-flushes when batch size is reached.
|
|
58
70
|
*/
|
|
59
71
|
push(log) {
|
|
72
|
+
if (this.buffer.length >= this.maxBufferSize) {
|
|
73
|
+
const excess = this.buffer.length - this.maxBufferSize + 1;
|
|
74
|
+
this.buffer.splice(0, excess);
|
|
75
|
+
}
|
|
60
76
|
this.buffer.push(log);
|
|
61
77
|
if (this.buffer.length >= this.batchSize) {
|
|
62
78
|
this.flush();
|
|
@@ -88,7 +104,18 @@ var LogShipper = class {
|
|
|
88
104
|
}
|
|
89
105
|
}
|
|
90
106
|
/**
|
|
91
|
-
*
|
|
107
|
+
* Remove process signal handlers registered in the constructor.
|
|
108
|
+
*/
|
|
109
|
+
removeProcessHandlers() {
|
|
110
|
+
if (typeof process !== "undefined" && process.removeListener) {
|
|
111
|
+
for (const { event, handler } of this.processHandlers) {
|
|
112
|
+
process.removeListener(event, handler);
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
this.processHandlers = [];
|
|
116
|
+
}
|
|
117
|
+
/**
|
|
118
|
+
* Stop the periodic flush timer and remove signal handlers.
|
|
92
119
|
* Call this when tearing down the SDK.
|
|
93
120
|
*/
|
|
94
121
|
destroy() {
|
|
@@ -96,17 +123,53 @@ var LogShipper = class {
|
|
|
96
123
|
clearInterval(this.timer);
|
|
97
124
|
this.timer = null;
|
|
98
125
|
}
|
|
126
|
+
this.removeProcessHandlers();
|
|
99
127
|
this.flush();
|
|
100
128
|
}
|
|
129
|
+
/**
|
|
130
|
+
* Await the final log flush, stop timers, and remove signal handlers.
|
|
131
|
+
* Preferred over destroy() for clean shutdown.
|
|
132
|
+
*/
|
|
101
133
|
async shutdown() {
|
|
102
134
|
if (this.timer) {
|
|
103
135
|
clearInterval(this.timer);
|
|
104
136
|
this.timer = null;
|
|
105
137
|
}
|
|
138
|
+
this.removeProcessHandlers();
|
|
106
139
|
await this.flush();
|
|
107
140
|
}
|
|
108
141
|
};
|
|
109
142
|
|
|
143
|
+
// src/mutex.ts
|
|
144
|
+
var Mutex = class {
|
|
145
|
+
constructor() {
|
|
146
|
+
this.queue = [];
|
|
147
|
+
this.locked = false;
|
|
148
|
+
}
|
|
149
|
+
async acquire() {
|
|
150
|
+
if (!this.locked) {
|
|
151
|
+
this.locked = true;
|
|
152
|
+
return this.createRelease();
|
|
153
|
+
}
|
|
154
|
+
return new Promise((resolve) => {
|
|
155
|
+
this.queue.push(() => resolve(this.createRelease()));
|
|
156
|
+
});
|
|
157
|
+
}
|
|
158
|
+
createRelease() {
|
|
159
|
+
let released = false;
|
|
160
|
+
return () => {
|
|
161
|
+
if (released) return;
|
|
162
|
+
released = true;
|
|
163
|
+
const next = this.queue.shift();
|
|
164
|
+
if (next) {
|
|
165
|
+
next();
|
|
166
|
+
} else {
|
|
167
|
+
this.locked = false;
|
|
168
|
+
}
|
|
169
|
+
};
|
|
170
|
+
}
|
|
171
|
+
};
|
|
172
|
+
|
|
110
173
|
// src/heartbeat.ts
|
|
111
174
|
var Heartbeat = class {
|
|
112
175
|
constructor(config) {
|
|
@@ -124,6 +187,8 @@ var Heartbeat = class {
|
|
|
124
187
|
};
|
|
125
188
|
/** Current protection status */
|
|
126
189
|
this.status = "ACTIVE";
|
|
190
|
+
/** Mutex for serializing budget check → API call → trackSpend */
|
|
191
|
+
this.budgetMutex = new Mutex();
|
|
127
192
|
/** Local budget limits set via config (not from server) */
|
|
128
193
|
this.localDailyLimit = null;
|
|
129
194
|
this.localMonthlyLimit = null;
|
|
@@ -155,6 +220,7 @@ var Heartbeat = class {
|
|
|
155
220
|
start() {
|
|
156
221
|
this.beat();
|
|
157
222
|
this.timer = setInterval(() => this.beat(), this.interval);
|
|
223
|
+
this.timer.unref?.();
|
|
158
224
|
}
|
|
159
225
|
/**
|
|
160
226
|
* Send a single heartbeat and update local state.
|
|
@@ -174,21 +240,20 @@ var Heartbeat = class {
|
|
|
174
240
|
throw new Error(`Heartbeat failed: ${res.status}`);
|
|
175
241
|
}
|
|
176
242
|
const data = await res.json();
|
|
177
|
-
const localDailyUsed = this.budgetStatus.daily_used;
|
|
178
|
-
const localMonthlyUsed = this.budgetStatus.monthly_used;
|
|
179
243
|
const serverDailyUsed = Number(data.daily_used) || 0;
|
|
180
244
|
const serverMonthlyUsed = Number(data.monthly_used) || 0;
|
|
181
245
|
const serverDailyLimit = data.daily_limit != null ? Number(data.daily_limit) : null;
|
|
182
246
|
const serverMonthlyLimit = data.monthly_limit != null ? Number(data.monthly_limit) : null;
|
|
183
|
-
this.budgetStatus
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
247
|
+
const snapshotDaily = this.budgetStatus.daily_used;
|
|
248
|
+
const snapshotMonthly = this.budgetStatus.monthly_used;
|
|
249
|
+
const mergedDaily = Math.max(serverDailyUsed, snapshotDaily);
|
|
250
|
+
const mergedMonthly = Math.max(serverMonthlyUsed, snapshotMonthly);
|
|
251
|
+
this.budgetStatus.daily_used = mergedDaily + (this.budgetStatus.daily_used - snapshotDaily);
|
|
252
|
+
this.budgetStatus.monthly_used = mergedMonthly + (this.budgetStatus.monthly_used - snapshotMonthly);
|
|
253
|
+
this.budgetStatus.daily_limit = this.pickStricterLimit(serverDailyLimit, this.localDailyLimit);
|
|
254
|
+
this.budgetStatus.monthly_limit = this.pickStricterLimit(serverMonthlyLimit, this.localMonthlyLimit);
|
|
255
|
+
this.budgetStatus.status = data.status;
|
|
256
|
+
this.budgetStatus.kill_switch_active = data.kill_switch_active;
|
|
192
257
|
this.consecutiveFailures = 0;
|
|
193
258
|
const newStatus = data.kill_switch_active ? "OFF" : data.status;
|
|
194
259
|
if (newStatus !== this.status) {
|
|
@@ -382,18 +447,98 @@ function wrapAnthropic(client, config, shipper, heartbeat) {
|
|
|
382
447
|
});
|
|
383
448
|
}
|
|
384
449
|
}
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
450
|
+
const release = await heartbeat.budgetMutex.acquire();
|
|
451
|
+
try {
|
|
452
|
+
if (config.mode === "cost_protect") {
|
|
453
|
+
if (heartbeat.isMonthlyBudgetExceeded() || heartbeat.isDailyBudgetExceeded()) {
|
|
454
|
+
blocked = true;
|
|
455
|
+
enforcementReason = heartbeat.isDailyBudgetExceeded() ? "daily_budget_exceeded" : "monthly_budget_exceeded";
|
|
456
|
+
const blockError = {
|
|
457
|
+
code: "BUDGET_EXCEEDED",
|
|
458
|
+
message: `AI budget exceeded. ${enforcementReason.replace(/_/g, " ")}.`,
|
|
459
|
+
budget_used: heartbeat.budgetStatus.monthly_used,
|
|
460
|
+
budget_limit: heartbeat.budgetStatus.monthly_limit ?? 0,
|
|
461
|
+
retry_after: getNextResetTime(enforcementReason),
|
|
462
|
+
dashboard_url: dashboardUrl
|
|
463
|
+
};
|
|
464
|
+
shipper.push({
|
|
465
|
+
id: generateId(),
|
|
466
|
+
timestamp: startTime,
|
|
467
|
+
provider: "anthropic",
|
|
468
|
+
model,
|
|
469
|
+
input_tokens: 0,
|
|
470
|
+
output_tokens: 0,
|
|
471
|
+
cost: 0,
|
|
472
|
+
latency_ms: Date.now() - startTime,
|
|
473
|
+
endpoint_tag: config.endpoint_tag,
|
|
474
|
+
downgraded: false,
|
|
475
|
+
blocked: true,
|
|
476
|
+
enforcement_reason: enforcementReason
|
|
477
|
+
});
|
|
478
|
+
throw Object.assign(new Error(blockError.message), {
|
|
479
|
+
caplyr: blockError
|
|
480
|
+
});
|
|
481
|
+
}
|
|
482
|
+
if (heartbeat.isDowngradeThresholdReached(downgradeThreshold)) {
|
|
483
|
+
const fallback = config.fallback ?? getDefaultFallback(model);
|
|
484
|
+
if (fallback && fallback !== model) {
|
|
485
|
+
originalModel = model;
|
|
486
|
+
model = fallback;
|
|
487
|
+
downgraded = true;
|
|
488
|
+
enforcementReason = "auto_downgrade_threshold";
|
|
489
|
+
config.onEnforcement?.({
|
|
490
|
+
type: "downgrade",
|
|
491
|
+
timestamp: Date.now(),
|
|
492
|
+
reason: `Budget at ${Math.round(downgradeThreshold * 100)}% \u2014 downgraded ${originalModel} \u2192 ${model}`,
|
|
493
|
+
original_model: originalModel,
|
|
494
|
+
fallback_model: model,
|
|
495
|
+
budget_used: heartbeat.budgetStatus.monthly_used,
|
|
496
|
+
budget_limit: heartbeat.budgetStatus.monthly_limit ?? 0,
|
|
497
|
+
estimated_savings: 0
|
|
498
|
+
// Calculated after response
|
|
499
|
+
});
|
|
500
|
+
}
|
|
501
|
+
}
|
|
502
|
+
}
|
|
503
|
+
const requestParams = downgraded ? { ...params, model } : params;
|
|
504
|
+
try {
|
|
505
|
+
const response = await target.create.call(
|
|
506
|
+
target,
|
|
507
|
+
requestParams,
|
|
508
|
+
options
|
|
509
|
+
);
|
|
510
|
+
const latency = Date.now() - startTime;
|
|
511
|
+
const inputTokens = response?.usage?.input_tokens ?? 0;
|
|
512
|
+
const outputTokens = response?.usage?.output_tokens ?? 0;
|
|
513
|
+
const cost = calculateCost(model, inputTokens, outputTokens);
|
|
514
|
+
heartbeat.trackSpend(cost);
|
|
515
|
+
let estimatedSavings = 0;
|
|
516
|
+
if (downgraded && originalModel) {
|
|
517
|
+
const originalCost = calculateCost(
|
|
518
|
+
originalModel,
|
|
519
|
+
inputTokens,
|
|
520
|
+
outputTokens
|
|
521
|
+
);
|
|
522
|
+
estimatedSavings = originalCost - cost;
|
|
523
|
+
}
|
|
524
|
+
shipper.push({
|
|
525
|
+
id: generateId(),
|
|
526
|
+
timestamp: startTime,
|
|
527
|
+
provider: "anthropic",
|
|
528
|
+
model,
|
|
529
|
+
input_tokens: inputTokens,
|
|
530
|
+
output_tokens: outputTokens,
|
|
531
|
+
cost,
|
|
532
|
+
latency_ms: latency,
|
|
533
|
+
endpoint_tag: config.endpoint_tag,
|
|
534
|
+
downgraded,
|
|
535
|
+
original_model: originalModel,
|
|
536
|
+
blocked: false,
|
|
537
|
+
enforcement_reason: enforcementReason
|
|
538
|
+
});
|
|
539
|
+
return response;
|
|
540
|
+
} catch (err) {
|
|
541
|
+
if (err?.caplyr) throw err;
|
|
397
542
|
shipper.push({
|
|
398
543
|
id: generateId(),
|
|
399
544
|
timestamp: startTime,
|
|
@@ -404,90 +549,15 @@ function wrapAnthropic(client, config, shipper, heartbeat) {
|
|
|
404
549
|
cost: 0,
|
|
405
550
|
latency_ms: Date.now() - startTime,
|
|
406
551
|
endpoint_tag: config.endpoint_tag,
|
|
407
|
-
downgraded
|
|
408
|
-
|
|
409
|
-
|
|
552
|
+
downgraded,
|
|
553
|
+
original_model: originalModel,
|
|
554
|
+
blocked: false,
|
|
555
|
+
enforcement_reason: "provider_error"
|
|
410
556
|
});
|
|
411
|
-
throw
|
|
412
|
-
caplyr: blockError
|
|
413
|
-
});
|
|
414
|
-
}
|
|
415
|
-
if (heartbeat.isDowngradeThresholdReached(downgradeThreshold)) {
|
|
416
|
-
const fallback = config.fallback ?? getDefaultFallback(model);
|
|
417
|
-
if (fallback && fallback !== model) {
|
|
418
|
-
originalModel = model;
|
|
419
|
-
model = fallback;
|
|
420
|
-
downgraded = true;
|
|
421
|
-
enforcementReason = "auto_downgrade_threshold";
|
|
422
|
-
config.onEnforcement?.({
|
|
423
|
-
type: "downgrade",
|
|
424
|
-
timestamp: Date.now(),
|
|
425
|
-
reason: `Budget at ${Math.round(downgradeThreshold * 100)}% \u2014 downgraded ${originalModel} \u2192 ${model}`,
|
|
426
|
-
original_model: originalModel,
|
|
427
|
-
fallback_model: model,
|
|
428
|
-
budget_used: heartbeat.budgetStatus.monthly_used,
|
|
429
|
-
budget_limit: heartbeat.budgetStatus.monthly_limit ?? 0,
|
|
430
|
-
estimated_savings: 0
|
|
431
|
-
// Calculated after response
|
|
432
|
-
});
|
|
433
|
-
}
|
|
557
|
+
throw err;
|
|
434
558
|
}
|
|
435
|
-
}
|
|
436
|
-
|
|
437
|
-
try {
|
|
438
|
-
const response = await target.create.call(
|
|
439
|
-
target,
|
|
440
|
-
requestParams,
|
|
441
|
-
options
|
|
442
|
-
);
|
|
443
|
-
const latency = Date.now() - startTime;
|
|
444
|
-
const inputTokens = response?.usage?.input_tokens ?? 0;
|
|
445
|
-
const outputTokens = response?.usage?.output_tokens ?? 0;
|
|
446
|
-
const cost = calculateCost(model, inputTokens, outputTokens);
|
|
447
|
-
heartbeat.trackSpend(cost);
|
|
448
|
-
let estimatedSavings = 0;
|
|
449
|
-
if (downgraded && originalModel) {
|
|
450
|
-
const originalCost = calculateCost(
|
|
451
|
-
originalModel,
|
|
452
|
-
inputTokens,
|
|
453
|
-
outputTokens
|
|
454
|
-
);
|
|
455
|
-
estimatedSavings = originalCost - cost;
|
|
456
|
-
}
|
|
457
|
-
shipper.push({
|
|
458
|
-
id: generateId(),
|
|
459
|
-
timestamp: startTime,
|
|
460
|
-
provider: "anthropic",
|
|
461
|
-
model,
|
|
462
|
-
input_tokens: inputTokens,
|
|
463
|
-
output_tokens: outputTokens,
|
|
464
|
-
cost,
|
|
465
|
-
latency_ms: latency,
|
|
466
|
-
endpoint_tag: config.endpoint_tag,
|
|
467
|
-
downgraded,
|
|
468
|
-
original_model: originalModel,
|
|
469
|
-
blocked: false,
|
|
470
|
-
enforcement_reason: enforcementReason
|
|
471
|
-
});
|
|
472
|
-
return response;
|
|
473
|
-
} catch (err) {
|
|
474
|
-
if (err?.caplyr) throw err;
|
|
475
|
-
shipper.push({
|
|
476
|
-
id: generateId(),
|
|
477
|
-
timestamp: startTime,
|
|
478
|
-
provider: "anthropic",
|
|
479
|
-
model,
|
|
480
|
-
input_tokens: 0,
|
|
481
|
-
output_tokens: 0,
|
|
482
|
-
cost: 0,
|
|
483
|
-
latency_ms: Date.now() - startTime,
|
|
484
|
-
endpoint_tag: config.endpoint_tag,
|
|
485
|
-
downgraded,
|
|
486
|
-
original_model: originalModel,
|
|
487
|
-
blocked: false,
|
|
488
|
-
enforcement_reason: "provider_error"
|
|
489
|
-
});
|
|
490
|
-
throw err;
|
|
559
|
+
} finally {
|
|
560
|
+
release();
|
|
491
561
|
}
|
|
492
562
|
};
|
|
493
563
|
}
|
|
@@ -564,18 +634,89 @@ function wrapOpenAI(client, config, shipper, heartbeat) {
|
|
|
564
634
|
});
|
|
565
635
|
}
|
|
566
636
|
}
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
637
|
+
const release = await heartbeat.budgetMutex.acquire();
|
|
638
|
+
try {
|
|
639
|
+
if (config.mode === "cost_protect") {
|
|
640
|
+
if (heartbeat.isMonthlyBudgetExceeded() || heartbeat.isDailyBudgetExceeded()) {
|
|
641
|
+
blocked = true;
|
|
642
|
+
enforcementReason = heartbeat.isDailyBudgetExceeded() ? "daily_budget_exceeded" : "monthly_budget_exceeded";
|
|
643
|
+
const blockError = {
|
|
644
|
+
code: "BUDGET_EXCEEDED",
|
|
645
|
+
message: `AI budget exceeded. ${enforcementReason.replace(/_/g, " ")}.`,
|
|
646
|
+
budget_used: heartbeat.budgetStatus.monthly_used,
|
|
647
|
+
budget_limit: heartbeat.budgetStatus.monthly_limit ?? 0,
|
|
648
|
+
retry_after: getNextResetTime2(enforcementReason),
|
|
649
|
+
dashboard_url: dashboardUrl
|
|
650
|
+
};
|
|
651
|
+
shipper.push({
|
|
652
|
+
id: generateId2(),
|
|
653
|
+
timestamp: startTime,
|
|
654
|
+
provider: "openai",
|
|
655
|
+
model,
|
|
656
|
+
input_tokens: 0,
|
|
657
|
+
output_tokens: 0,
|
|
658
|
+
cost: 0,
|
|
659
|
+
latency_ms: Date.now() - startTime,
|
|
660
|
+
endpoint_tag: config.endpoint_tag,
|
|
661
|
+
downgraded: false,
|
|
662
|
+
blocked: true,
|
|
663
|
+
enforcement_reason: enforcementReason
|
|
664
|
+
});
|
|
665
|
+
throw Object.assign(new Error(blockError.message), {
|
|
666
|
+
caplyr: blockError
|
|
667
|
+
});
|
|
668
|
+
}
|
|
669
|
+
if (heartbeat.isDowngradeThresholdReached(downgradeThreshold)) {
|
|
670
|
+
const fallback = config.fallback ?? getDefaultFallback(model);
|
|
671
|
+
if (fallback && fallback !== model) {
|
|
672
|
+
originalModel = model;
|
|
673
|
+
model = fallback;
|
|
674
|
+
downgraded = true;
|
|
675
|
+
enforcementReason = "auto_downgrade_threshold";
|
|
676
|
+
config.onEnforcement?.({
|
|
677
|
+
type: "downgrade",
|
|
678
|
+
timestamp: Date.now(),
|
|
679
|
+
reason: `Budget at ${Math.round(downgradeThreshold * 100)}% \u2014 downgraded ${originalModel} \u2192 ${model}`,
|
|
680
|
+
original_model: originalModel,
|
|
681
|
+
fallback_model: model,
|
|
682
|
+
budget_used: heartbeat.budgetStatus.monthly_used,
|
|
683
|
+
budget_limit: heartbeat.budgetStatus.monthly_limit ?? 0,
|
|
684
|
+
estimated_savings: 0
|
|
685
|
+
});
|
|
686
|
+
}
|
|
687
|
+
}
|
|
688
|
+
}
|
|
689
|
+
const requestParams = downgraded ? { ...params, model } : params;
|
|
690
|
+
try {
|
|
691
|
+
const response = await target.create.call(
|
|
692
|
+
target,
|
|
693
|
+
requestParams,
|
|
694
|
+
options
|
|
695
|
+
);
|
|
696
|
+
const latency = Date.now() - startTime;
|
|
697
|
+
const usage = response?.usage;
|
|
698
|
+
const inputTokens = usage?.prompt_tokens ?? 0;
|
|
699
|
+
const outputTokens = usage?.completion_tokens ?? 0;
|
|
700
|
+
const cost = calculateCost(model, inputTokens, outputTokens);
|
|
701
|
+
heartbeat.trackSpend(cost);
|
|
702
|
+
shipper.push({
|
|
703
|
+
id: generateId2(),
|
|
704
|
+
timestamp: startTime,
|
|
705
|
+
provider: "openai",
|
|
706
|
+
model,
|
|
707
|
+
input_tokens: inputTokens,
|
|
708
|
+
output_tokens: outputTokens,
|
|
709
|
+
cost,
|
|
710
|
+
latency_ms: latency,
|
|
711
|
+
endpoint_tag: config.endpoint_tag,
|
|
712
|
+
downgraded,
|
|
713
|
+
original_model: originalModel,
|
|
714
|
+
blocked: false,
|
|
715
|
+
enforcement_reason: enforcementReason
|
|
716
|
+
});
|
|
717
|
+
return response;
|
|
718
|
+
} catch (err) {
|
|
719
|
+
if (err?.caplyr) throw err;
|
|
579
720
|
shipper.push({
|
|
580
721
|
id: generateId2(),
|
|
581
722
|
timestamp: startTime,
|
|
@@ -586,81 +727,15 @@ function wrapOpenAI(client, config, shipper, heartbeat) {
|
|
|
586
727
|
cost: 0,
|
|
587
728
|
latency_ms: Date.now() - startTime,
|
|
588
729
|
endpoint_tag: config.endpoint_tag,
|
|
589
|
-
downgraded
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
throw Object.assign(new Error(blockError.message), {
|
|
594
|
-
caplyr: blockError
|
|
730
|
+
downgraded,
|
|
731
|
+
original_model: originalModel,
|
|
732
|
+
blocked: false,
|
|
733
|
+
enforcement_reason: "provider_error"
|
|
595
734
|
});
|
|
735
|
+
throw err;
|
|
596
736
|
}
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
if (fallback && fallback !== model) {
|
|
600
|
-
originalModel = model;
|
|
601
|
-
model = fallback;
|
|
602
|
-
downgraded = true;
|
|
603
|
-
enforcementReason = "auto_downgrade_threshold";
|
|
604
|
-
config.onEnforcement?.({
|
|
605
|
-
type: "downgrade",
|
|
606
|
-
timestamp: Date.now(),
|
|
607
|
-
reason: `Budget at ${Math.round(downgradeThreshold * 100)}% \u2014 downgraded ${originalModel} \u2192 ${model}`,
|
|
608
|
-
original_model: originalModel,
|
|
609
|
-
fallback_model: model,
|
|
610
|
-
budget_used: heartbeat.budgetStatus.monthly_used,
|
|
611
|
-
budget_limit: heartbeat.budgetStatus.monthly_limit ?? 0,
|
|
612
|
-
estimated_savings: 0
|
|
613
|
-
});
|
|
614
|
-
}
|
|
615
|
-
}
|
|
616
|
-
}
|
|
617
|
-
const requestParams = downgraded ? { ...params, model } : params;
|
|
618
|
-
try {
|
|
619
|
-
const response = await target.create.call(
|
|
620
|
-
target,
|
|
621
|
-
requestParams,
|
|
622
|
-
options
|
|
623
|
-
);
|
|
624
|
-
const latency = Date.now() - startTime;
|
|
625
|
-
const usage = response?.usage;
|
|
626
|
-
const inputTokens = usage?.prompt_tokens ?? 0;
|
|
627
|
-
const outputTokens = usage?.completion_tokens ?? 0;
|
|
628
|
-
const cost = calculateCost(model, inputTokens, outputTokens);
|
|
629
|
-
heartbeat.trackSpend(cost);
|
|
630
|
-
shipper.push({
|
|
631
|
-
id: generateId2(),
|
|
632
|
-
timestamp: startTime,
|
|
633
|
-
provider: "openai",
|
|
634
|
-
model,
|
|
635
|
-
input_tokens: inputTokens,
|
|
636
|
-
output_tokens: outputTokens,
|
|
637
|
-
cost,
|
|
638
|
-
latency_ms: latency,
|
|
639
|
-
endpoint_tag: config.endpoint_tag,
|
|
640
|
-
downgraded,
|
|
641
|
-
original_model: originalModel,
|
|
642
|
-
blocked: false,
|
|
643
|
-
enforcement_reason: enforcementReason
|
|
644
|
-
});
|
|
645
|
-
return response;
|
|
646
|
-
} catch (err) {
|
|
647
|
-
if (err?.caplyr) throw err;
|
|
648
|
-
shipper.push({
|
|
649
|
-
id: generateId2(),
|
|
650
|
-
timestamp: startTime,
|
|
651
|
-
provider: "openai",
|
|
652
|
-
model,
|
|
653
|
-
input_tokens: 0,
|
|
654
|
-
output_tokens: 0,
|
|
655
|
-
cost: 0,
|
|
656
|
-
latency_ms: Date.now() - startTime,
|
|
657
|
-
endpoint_tag: config.endpoint_tag,
|
|
658
|
-
downgraded,
|
|
659
|
-
original_model: originalModel,
|
|
660
|
-
blocked: false,
|
|
661
|
-
enforcement_reason: "provider_error"
|
|
662
|
-
});
|
|
663
|
-
throw err;
|
|
737
|
+
} finally {
|
|
738
|
+
release();
|
|
664
739
|
}
|
|
665
740
|
};
|
|
666
741
|
}
|
|
@@ -723,6 +798,7 @@ function protect(client, config) {
|
|
|
723
798
|
...config
|
|
724
799
|
};
|
|
725
800
|
let shared = instances.get(resolvedConfig.apiKey);
|
|
801
|
+
const isExisting = !!shared;
|
|
726
802
|
if (!shared) {
|
|
727
803
|
const shipper2 = new LogShipper(resolvedConfig);
|
|
728
804
|
const heartbeat2 = new Heartbeat(resolvedConfig);
|
|
@@ -732,7 +808,19 @@ function protect(client, config) {
|
|
|
732
808
|
}
|
|
733
809
|
const { shipper, heartbeat } = shared;
|
|
734
810
|
if (resolvedConfig.budget) {
|
|
735
|
-
const
|
|
811
|
+
const raw = typeof resolvedConfig.budget === "number" ? { monthly: resolvedConfig.budget } : resolvedConfig.budget;
|
|
812
|
+
const budgetConfig = { ...raw };
|
|
813
|
+
if (isExisting) {
|
|
814
|
+
const current = heartbeat.budgetStatus;
|
|
815
|
+
if (budgetConfig.daily !== void 0) {
|
|
816
|
+
const existing = current.daily_limit;
|
|
817
|
+
budgetConfig.daily = existing !== null ? Math.min(existing, budgetConfig.daily) : budgetConfig.daily;
|
|
818
|
+
}
|
|
819
|
+
if (budgetConfig.monthly !== void 0) {
|
|
820
|
+
const existing = current.monthly_limit;
|
|
821
|
+
budgetConfig.monthly = existing !== null ? Math.min(existing, budgetConfig.monthly) : budgetConfig.monthly;
|
|
822
|
+
}
|
|
823
|
+
}
|
|
736
824
|
heartbeat.applyLocalLimits(budgetConfig);
|
|
737
825
|
}
|
|
738
826
|
const provider = detectProvider(client);
|
|
@@ -774,14 +862,16 @@ async function shutdown(apiKey) {
|
|
|
774
862
|
const shared = instances.get(apiKey);
|
|
775
863
|
if (shared) {
|
|
776
864
|
shared.heartbeat.destroy();
|
|
777
|
-
shared.shipper.
|
|
865
|
+
await shared.shipper.shutdown();
|
|
778
866
|
instances.delete(apiKey);
|
|
779
867
|
}
|
|
780
868
|
} else {
|
|
869
|
+
const shutdowns = [];
|
|
781
870
|
for (const [key, shared] of instances) {
|
|
782
871
|
shared.heartbeat.destroy();
|
|
783
|
-
shared.shipper.
|
|
872
|
+
shutdowns.push(shared.shipper.shutdown());
|
|
784
873
|
}
|
|
874
|
+
await Promise.all(shutdowns);
|
|
785
875
|
instances.clear();
|
|
786
876
|
}
|
|
787
877
|
}
|
package/dist/index.mjs
CHANGED
|
@@ -3,20 +3,32 @@ var LogShipper = class {
|
|
|
3
3
|
constructor(config) {
|
|
4
4
|
this.buffer = [];
|
|
5
5
|
this.timer = null;
|
|
6
|
+
// Store bound handlers so we can remove them on shutdown
|
|
7
|
+
this.processHandlers = [];
|
|
6
8
|
this.endpoint = config.endpoint ?? "https://api.caplyr.com";
|
|
7
9
|
this.apiKey = config.apiKey;
|
|
8
10
|
this.batchSize = config.batchSize ?? 10;
|
|
9
11
|
this.flushInterval = config.flushInterval ?? 3e4;
|
|
12
|
+
this.maxBufferSize = 1e3;
|
|
10
13
|
this.onError = config.onError;
|
|
11
14
|
this.timer = setInterval(() => this.flush(), this.flushInterval);
|
|
15
|
+
this.timer.unref?.();
|
|
12
16
|
if (typeof process !== "undefined" && process.on) {
|
|
13
|
-
const
|
|
17
|
+
const onBeforeExit = () => {
|
|
18
|
+
this.flush();
|
|
19
|
+
};
|
|
20
|
+
const onSignal = () => {
|
|
14
21
|
this.flush().finally(() => {
|
|
15
22
|
});
|
|
16
23
|
};
|
|
17
|
-
process.on("beforeExit",
|
|
18
|
-
process.on("SIGTERM",
|
|
19
|
-
process.on("SIGINT",
|
|
24
|
+
process.on("beforeExit", onBeforeExit);
|
|
25
|
+
process.on("SIGTERM", onSignal);
|
|
26
|
+
process.on("SIGINT", onSignal);
|
|
27
|
+
this.processHandlers = [
|
|
28
|
+
{ event: "beforeExit", handler: onBeforeExit },
|
|
29
|
+
{ event: "SIGTERM", handler: onSignal },
|
|
30
|
+
{ event: "SIGINT", handler: onSignal }
|
|
31
|
+
];
|
|
20
32
|
}
|
|
21
33
|
}
|
|
22
34
|
/**
|
|
@@ -24,6 +36,10 @@ var LogShipper = class {
|
|
|
24
36
|
* Auto-flushes when batch size is reached.
|
|
25
37
|
*/
|
|
26
38
|
push(log) {
|
|
39
|
+
if (this.buffer.length >= this.maxBufferSize) {
|
|
40
|
+
const excess = this.buffer.length - this.maxBufferSize + 1;
|
|
41
|
+
this.buffer.splice(0, excess);
|
|
42
|
+
}
|
|
27
43
|
this.buffer.push(log);
|
|
28
44
|
if (this.buffer.length >= this.batchSize) {
|
|
29
45
|
this.flush();
|
|
@@ -55,7 +71,18 @@ var LogShipper = class {
|
|
|
55
71
|
}
|
|
56
72
|
}
|
|
57
73
|
/**
|
|
58
|
-
*
|
|
74
|
+
* Remove process signal handlers registered in the constructor.
|
|
75
|
+
*/
|
|
76
|
+
removeProcessHandlers() {
|
|
77
|
+
if (typeof process !== "undefined" && process.removeListener) {
|
|
78
|
+
for (const { event, handler } of this.processHandlers) {
|
|
79
|
+
process.removeListener(event, handler);
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
this.processHandlers = [];
|
|
83
|
+
}
|
|
84
|
+
/**
|
|
85
|
+
* Stop the periodic flush timer and remove signal handlers.
|
|
59
86
|
* Call this when tearing down the SDK.
|
|
60
87
|
*/
|
|
61
88
|
destroy() {
|
|
@@ -63,17 +90,53 @@ var LogShipper = class {
|
|
|
63
90
|
clearInterval(this.timer);
|
|
64
91
|
this.timer = null;
|
|
65
92
|
}
|
|
93
|
+
this.removeProcessHandlers();
|
|
66
94
|
this.flush();
|
|
67
95
|
}
|
|
96
|
+
/**
|
|
97
|
+
* Await the final log flush, stop timers, and remove signal handlers.
|
|
98
|
+
* Preferred over destroy() for clean shutdown.
|
|
99
|
+
*/
|
|
68
100
|
async shutdown() {
|
|
69
101
|
if (this.timer) {
|
|
70
102
|
clearInterval(this.timer);
|
|
71
103
|
this.timer = null;
|
|
72
104
|
}
|
|
105
|
+
this.removeProcessHandlers();
|
|
73
106
|
await this.flush();
|
|
74
107
|
}
|
|
75
108
|
};
|
|
76
109
|
|
|
110
|
+
// src/mutex.ts
|
|
111
|
+
var Mutex = class {
|
|
112
|
+
constructor() {
|
|
113
|
+
this.queue = [];
|
|
114
|
+
this.locked = false;
|
|
115
|
+
}
|
|
116
|
+
async acquire() {
|
|
117
|
+
if (!this.locked) {
|
|
118
|
+
this.locked = true;
|
|
119
|
+
return this.createRelease();
|
|
120
|
+
}
|
|
121
|
+
return new Promise((resolve) => {
|
|
122
|
+
this.queue.push(() => resolve(this.createRelease()));
|
|
123
|
+
});
|
|
124
|
+
}
|
|
125
|
+
createRelease() {
|
|
126
|
+
let released = false;
|
|
127
|
+
return () => {
|
|
128
|
+
if (released) return;
|
|
129
|
+
released = true;
|
|
130
|
+
const next = this.queue.shift();
|
|
131
|
+
if (next) {
|
|
132
|
+
next();
|
|
133
|
+
} else {
|
|
134
|
+
this.locked = false;
|
|
135
|
+
}
|
|
136
|
+
};
|
|
137
|
+
}
|
|
138
|
+
};
|
|
139
|
+
|
|
77
140
|
// src/heartbeat.ts
|
|
78
141
|
var Heartbeat = class {
|
|
79
142
|
constructor(config) {
|
|
@@ -91,6 +154,8 @@ var Heartbeat = class {
|
|
|
91
154
|
};
|
|
92
155
|
/** Current protection status */
|
|
93
156
|
this.status = "ACTIVE";
|
|
157
|
+
/** Mutex for serializing budget check → API call → trackSpend */
|
|
158
|
+
this.budgetMutex = new Mutex();
|
|
94
159
|
/** Local budget limits set via config (not from server) */
|
|
95
160
|
this.localDailyLimit = null;
|
|
96
161
|
this.localMonthlyLimit = null;
|
|
@@ -122,6 +187,7 @@ var Heartbeat = class {
|
|
|
122
187
|
start() {
|
|
123
188
|
this.beat();
|
|
124
189
|
this.timer = setInterval(() => this.beat(), this.interval);
|
|
190
|
+
this.timer.unref?.();
|
|
125
191
|
}
|
|
126
192
|
/**
|
|
127
193
|
* Send a single heartbeat and update local state.
|
|
@@ -141,21 +207,20 @@ var Heartbeat = class {
|
|
|
141
207
|
throw new Error(`Heartbeat failed: ${res.status}`);
|
|
142
208
|
}
|
|
143
209
|
const data = await res.json();
|
|
144
|
-
const localDailyUsed = this.budgetStatus.daily_used;
|
|
145
|
-
const localMonthlyUsed = this.budgetStatus.monthly_used;
|
|
146
210
|
const serverDailyUsed = Number(data.daily_used) || 0;
|
|
147
211
|
const serverMonthlyUsed = Number(data.monthly_used) || 0;
|
|
148
212
|
const serverDailyLimit = data.daily_limit != null ? Number(data.daily_limit) : null;
|
|
149
213
|
const serverMonthlyLimit = data.monthly_limit != null ? Number(data.monthly_limit) : null;
|
|
150
|
-
this.budgetStatus
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
214
|
+
const snapshotDaily = this.budgetStatus.daily_used;
|
|
215
|
+
const snapshotMonthly = this.budgetStatus.monthly_used;
|
|
216
|
+
const mergedDaily = Math.max(serverDailyUsed, snapshotDaily);
|
|
217
|
+
const mergedMonthly = Math.max(serverMonthlyUsed, snapshotMonthly);
|
|
218
|
+
this.budgetStatus.daily_used = mergedDaily + (this.budgetStatus.daily_used - snapshotDaily);
|
|
219
|
+
this.budgetStatus.monthly_used = mergedMonthly + (this.budgetStatus.monthly_used - snapshotMonthly);
|
|
220
|
+
this.budgetStatus.daily_limit = this.pickStricterLimit(serverDailyLimit, this.localDailyLimit);
|
|
221
|
+
this.budgetStatus.monthly_limit = this.pickStricterLimit(serverMonthlyLimit, this.localMonthlyLimit);
|
|
222
|
+
this.budgetStatus.status = data.status;
|
|
223
|
+
this.budgetStatus.kill_switch_active = data.kill_switch_active;
|
|
159
224
|
this.consecutiveFailures = 0;
|
|
160
225
|
const newStatus = data.kill_switch_active ? "OFF" : data.status;
|
|
161
226
|
if (newStatus !== this.status) {
|
|
@@ -349,18 +414,98 @@ function wrapAnthropic(client, config, shipper, heartbeat) {
|
|
|
349
414
|
});
|
|
350
415
|
}
|
|
351
416
|
}
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
417
|
+
const release = await heartbeat.budgetMutex.acquire();
|
|
418
|
+
try {
|
|
419
|
+
if (config.mode === "cost_protect") {
|
|
420
|
+
if (heartbeat.isMonthlyBudgetExceeded() || heartbeat.isDailyBudgetExceeded()) {
|
|
421
|
+
blocked = true;
|
|
422
|
+
enforcementReason = heartbeat.isDailyBudgetExceeded() ? "daily_budget_exceeded" : "monthly_budget_exceeded";
|
|
423
|
+
const blockError = {
|
|
424
|
+
code: "BUDGET_EXCEEDED",
|
|
425
|
+
message: `AI budget exceeded. ${enforcementReason.replace(/_/g, " ")}.`,
|
|
426
|
+
budget_used: heartbeat.budgetStatus.monthly_used,
|
|
427
|
+
budget_limit: heartbeat.budgetStatus.monthly_limit ?? 0,
|
|
428
|
+
retry_after: getNextResetTime(enforcementReason),
|
|
429
|
+
dashboard_url: dashboardUrl
|
|
430
|
+
};
|
|
431
|
+
shipper.push({
|
|
432
|
+
id: generateId(),
|
|
433
|
+
timestamp: startTime,
|
|
434
|
+
provider: "anthropic",
|
|
435
|
+
model,
|
|
436
|
+
input_tokens: 0,
|
|
437
|
+
output_tokens: 0,
|
|
438
|
+
cost: 0,
|
|
439
|
+
latency_ms: Date.now() - startTime,
|
|
440
|
+
endpoint_tag: config.endpoint_tag,
|
|
441
|
+
downgraded: false,
|
|
442
|
+
blocked: true,
|
|
443
|
+
enforcement_reason: enforcementReason
|
|
444
|
+
});
|
|
445
|
+
throw Object.assign(new Error(blockError.message), {
|
|
446
|
+
caplyr: blockError
|
|
447
|
+
});
|
|
448
|
+
}
|
|
449
|
+
if (heartbeat.isDowngradeThresholdReached(downgradeThreshold)) {
|
|
450
|
+
const fallback = config.fallback ?? getDefaultFallback(model);
|
|
451
|
+
if (fallback && fallback !== model) {
|
|
452
|
+
originalModel = model;
|
|
453
|
+
model = fallback;
|
|
454
|
+
downgraded = true;
|
|
455
|
+
enforcementReason = "auto_downgrade_threshold";
|
|
456
|
+
config.onEnforcement?.({
|
|
457
|
+
type: "downgrade",
|
|
458
|
+
timestamp: Date.now(),
|
|
459
|
+
reason: `Budget at ${Math.round(downgradeThreshold * 100)}% \u2014 downgraded ${originalModel} \u2192 ${model}`,
|
|
460
|
+
original_model: originalModel,
|
|
461
|
+
fallback_model: model,
|
|
462
|
+
budget_used: heartbeat.budgetStatus.monthly_used,
|
|
463
|
+
budget_limit: heartbeat.budgetStatus.monthly_limit ?? 0,
|
|
464
|
+
estimated_savings: 0
|
|
465
|
+
// Calculated after response
|
|
466
|
+
});
|
|
467
|
+
}
|
|
468
|
+
}
|
|
469
|
+
}
|
|
470
|
+
const requestParams = downgraded ? { ...params, model } : params;
|
|
471
|
+
try {
|
|
472
|
+
const response = await target.create.call(
|
|
473
|
+
target,
|
|
474
|
+
requestParams,
|
|
475
|
+
options
|
|
476
|
+
);
|
|
477
|
+
const latency = Date.now() - startTime;
|
|
478
|
+
const inputTokens = response?.usage?.input_tokens ?? 0;
|
|
479
|
+
const outputTokens = response?.usage?.output_tokens ?? 0;
|
|
480
|
+
const cost = calculateCost(model, inputTokens, outputTokens);
|
|
481
|
+
heartbeat.trackSpend(cost);
|
|
482
|
+
let estimatedSavings = 0;
|
|
483
|
+
if (downgraded && originalModel) {
|
|
484
|
+
const originalCost = calculateCost(
|
|
485
|
+
originalModel,
|
|
486
|
+
inputTokens,
|
|
487
|
+
outputTokens
|
|
488
|
+
);
|
|
489
|
+
estimatedSavings = originalCost - cost;
|
|
490
|
+
}
|
|
491
|
+
shipper.push({
|
|
492
|
+
id: generateId(),
|
|
493
|
+
timestamp: startTime,
|
|
494
|
+
provider: "anthropic",
|
|
495
|
+
model,
|
|
496
|
+
input_tokens: inputTokens,
|
|
497
|
+
output_tokens: outputTokens,
|
|
498
|
+
cost,
|
|
499
|
+
latency_ms: latency,
|
|
500
|
+
endpoint_tag: config.endpoint_tag,
|
|
501
|
+
downgraded,
|
|
502
|
+
original_model: originalModel,
|
|
503
|
+
blocked: false,
|
|
504
|
+
enforcement_reason: enforcementReason
|
|
505
|
+
});
|
|
506
|
+
return response;
|
|
507
|
+
} catch (err) {
|
|
508
|
+
if (err?.caplyr) throw err;
|
|
364
509
|
shipper.push({
|
|
365
510
|
id: generateId(),
|
|
366
511
|
timestamp: startTime,
|
|
@@ -371,90 +516,15 @@ function wrapAnthropic(client, config, shipper, heartbeat) {
|
|
|
371
516
|
cost: 0,
|
|
372
517
|
latency_ms: Date.now() - startTime,
|
|
373
518
|
endpoint_tag: config.endpoint_tag,
|
|
374
|
-
downgraded
|
|
375
|
-
|
|
376
|
-
|
|
519
|
+
downgraded,
|
|
520
|
+
original_model: originalModel,
|
|
521
|
+
blocked: false,
|
|
522
|
+
enforcement_reason: "provider_error"
|
|
377
523
|
});
|
|
378
|
-
throw
|
|
379
|
-
caplyr: blockError
|
|
380
|
-
});
|
|
381
|
-
}
|
|
382
|
-
if (heartbeat.isDowngradeThresholdReached(downgradeThreshold)) {
|
|
383
|
-
const fallback = config.fallback ?? getDefaultFallback(model);
|
|
384
|
-
if (fallback && fallback !== model) {
|
|
385
|
-
originalModel = model;
|
|
386
|
-
model = fallback;
|
|
387
|
-
downgraded = true;
|
|
388
|
-
enforcementReason = "auto_downgrade_threshold";
|
|
389
|
-
config.onEnforcement?.({
|
|
390
|
-
type: "downgrade",
|
|
391
|
-
timestamp: Date.now(),
|
|
392
|
-
reason: `Budget at ${Math.round(downgradeThreshold * 100)}% \u2014 downgraded ${originalModel} \u2192 ${model}`,
|
|
393
|
-
original_model: originalModel,
|
|
394
|
-
fallback_model: model,
|
|
395
|
-
budget_used: heartbeat.budgetStatus.monthly_used,
|
|
396
|
-
budget_limit: heartbeat.budgetStatus.monthly_limit ?? 0,
|
|
397
|
-
estimated_savings: 0
|
|
398
|
-
// Calculated after response
|
|
399
|
-
});
|
|
400
|
-
}
|
|
524
|
+
throw err;
|
|
401
525
|
}
|
|
402
|
-
}
|
|
403
|
-
|
|
404
|
-
try {
|
|
405
|
-
const response = await target.create.call(
|
|
406
|
-
target,
|
|
407
|
-
requestParams,
|
|
408
|
-
options
|
|
409
|
-
);
|
|
410
|
-
const latency = Date.now() - startTime;
|
|
411
|
-
const inputTokens = response?.usage?.input_tokens ?? 0;
|
|
412
|
-
const outputTokens = response?.usage?.output_tokens ?? 0;
|
|
413
|
-
const cost = calculateCost(model, inputTokens, outputTokens);
|
|
414
|
-
heartbeat.trackSpend(cost);
|
|
415
|
-
let estimatedSavings = 0;
|
|
416
|
-
if (downgraded && originalModel) {
|
|
417
|
-
const originalCost = calculateCost(
|
|
418
|
-
originalModel,
|
|
419
|
-
inputTokens,
|
|
420
|
-
outputTokens
|
|
421
|
-
);
|
|
422
|
-
estimatedSavings = originalCost - cost;
|
|
423
|
-
}
|
|
424
|
-
shipper.push({
|
|
425
|
-
id: generateId(),
|
|
426
|
-
timestamp: startTime,
|
|
427
|
-
provider: "anthropic",
|
|
428
|
-
model,
|
|
429
|
-
input_tokens: inputTokens,
|
|
430
|
-
output_tokens: outputTokens,
|
|
431
|
-
cost,
|
|
432
|
-
latency_ms: latency,
|
|
433
|
-
endpoint_tag: config.endpoint_tag,
|
|
434
|
-
downgraded,
|
|
435
|
-
original_model: originalModel,
|
|
436
|
-
blocked: false,
|
|
437
|
-
enforcement_reason: enforcementReason
|
|
438
|
-
});
|
|
439
|
-
return response;
|
|
440
|
-
} catch (err) {
|
|
441
|
-
if (err?.caplyr) throw err;
|
|
442
|
-
shipper.push({
|
|
443
|
-
id: generateId(),
|
|
444
|
-
timestamp: startTime,
|
|
445
|
-
provider: "anthropic",
|
|
446
|
-
model,
|
|
447
|
-
input_tokens: 0,
|
|
448
|
-
output_tokens: 0,
|
|
449
|
-
cost: 0,
|
|
450
|
-
latency_ms: Date.now() - startTime,
|
|
451
|
-
endpoint_tag: config.endpoint_tag,
|
|
452
|
-
downgraded,
|
|
453
|
-
original_model: originalModel,
|
|
454
|
-
blocked: false,
|
|
455
|
-
enforcement_reason: "provider_error"
|
|
456
|
-
});
|
|
457
|
-
throw err;
|
|
526
|
+
} finally {
|
|
527
|
+
release();
|
|
458
528
|
}
|
|
459
529
|
};
|
|
460
530
|
}
|
|
@@ -531,18 +601,89 @@ function wrapOpenAI(client, config, shipper, heartbeat) {
|
|
|
531
601
|
});
|
|
532
602
|
}
|
|
533
603
|
}
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
604
|
+
const release = await heartbeat.budgetMutex.acquire();
|
|
605
|
+
try {
|
|
606
|
+
if (config.mode === "cost_protect") {
|
|
607
|
+
if (heartbeat.isMonthlyBudgetExceeded() || heartbeat.isDailyBudgetExceeded()) {
|
|
608
|
+
blocked = true;
|
|
609
|
+
enforcementReason = heartbeat.isDailyBudgetExceeded() ? "daily_budget_exceeded" : "monthly_budget_exceeded";
|
|
610
|
+
const blockError = {
|
|
611
|
+
code: "BUDGET_EXCEEDED",
|
|
612
|
+
message: `AI budget exceeded. ${enforcementReason.replace(/_/g, " ")}.`,
|
|
613
|
+
budget_used: heartbeat.budgetStatus.monthly_used,
|
|
614
|
+
budget_limit: heartbeat.budgetStatus.monthly_limit ?? 0,
|
|
615
|
+
retry_after: getNextResetTime2(enforcementReason),
|
|
616
|
+
dashboard_url: dashboardUrl
|
|
617
|
+
};
|
|
618
|
+
shipper.push({
|
|
619
|
+
id: generateId2(),
|
|
620
|
+
timestamp: startTime,
|
|
621
|
+
provider: "openai",
|
|
622
|
+
model,
|
|
623
|
+
input_tokens: 0,
|
|
624
|
+
output_tokens: 0,
|
|
625
|
+
cost: 0,
|
|
626
|
+
latency_ms: Date.now() - startTime,
|
|
627
|
+
endpoint_tag: config.endpoint_tag,
|
|
628
|
+
downgraded: false,
|
|
629
|
+
blocked: true,
|
|
630
|
+
enforcement_reason: enforcementReason
|
|
631
|
+
});
|
|
632
|
+
throw Object.assign(new Error(blockError.message), {
|
|
633
|
+
caplyr: blockError
|
|
634
|
+
});
|
|
635
|
+
}
|
|
636
|
+
if (heartbeat.isDowngradeThresholdReached(downgradeThreshold)) {
|
|
637
|
+
const fallback = config.fallback ?? getDefaultFallback(model);
|
|
638
|
+
if (fallback && fallback !== model) {
|
|
639
|
+
originalModel = model;
|
|
640
|
+
model = fallback;
|
|
641
|
+
downgraded = true;
|
|
642
|
+
enforcementReason = "auto_downgrade_threshold";
|
|
643
|
+
config.onEnforcement?.({
|
|
644
|
+
type: "downgrade",
|
|
645
|
+
timestamp: Date.now(),
|
|
646
|
+
reason: `Budget at ${Math.round(downgradeThreshold * 100)}% \u2014 downgraded ${originalModel} \u2192 ${model}`,
|
|
647
|
+
original_model: originalModel,
|
|
648
|
+
fallback_model: model,
|
|
649
|
+
budget_used: heartbeat.budgetStatus.monthly_used,
|
|
650
|
+
budget_limit: heartbeat.budgetStatus.monthly_limit ?? 0,
|
|
651
|
+
estimated_savings: 0
|
|
652
|
+
});
|
|
653
|
+
}
|
|
654
|
+
}
|
|
655
|
+
}
|
|
656
|
+
const requestParams = downgraded ? { ...params, model } : params;
|
|
657
|
+
try {
|
|
658
|
+
const response = await target.create.call(
|
|
659
|
+
target,
|
|
660
|
+
requestParams,
|
|
661
|
+
options
|
|
662
|
+
);
|
|
663
|
+
const latency = Date.now() - startTime;
|
|
664
|
+
const usage = response?.usage;
|
|
665
|
+
const inputTokens = usage?.prompt_tokens ?? 0;
|
|
666
|
+
const outputTokens = usage?.completion_tokens ?? 0;
|
|
667
|
+
const cost = calculateCost(model, inputTokens, outputTokens);
|
|
668
|
+
heartbeat.trackSpend(cost);
|
|
669
|
+
shipper.push({
|
|
670
|
+
id: generateId2(),
|
|
671
|
+
timestamp: startTime,
|
|
672
|
+
provider: "openai",
|
|
673
|
+
model,
|
|
674
|
+
input_tokens: inputTokens,
|
|
675
|
+
output_tokens: outputTokens,
|
|
676
|
+
cost,
|
|
677
|
+
latency_ms: latency,
|
|
678
|
+
endpoint_tag: config.endpoint_tag,
|
|
679
|
+
downgraded,
|
|
680
|
+
original_model: originalModel,
|
|
681
|
+
blocked: false,
|
|
682
|
+
enforcement_reason: enforcementReason
|
|
683
|
+
});
|
|
684
|
+
return response;
|
|
685
|
+
} catch (err) {
|
|
686
|
+
if (err?.caplyr) throw err;
|
|
546
687
|
shipper.push({
|
|
547
688
|
id: generateId2(),
|
|
548
689
|
timestamp: startTime,
|
|
@@ -553,81 +694,15 @@ function wrapOpenAI(client, config, shipper, heartbeat) {
|
|
|
553
694
|
cost: 0,
|
|
554
695
|
latency_ms: Date.now() - startTime,
|
|
555
696
|
endpoint_tag: config.endpoint_tag,
|
|
556
|
-
downgraded
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
throw Object.assign(new Error(blockError.message), {
|
|
561
|
-
caplyr: blockError
|
|
697
|
+
downgraded,
|
|
698
|
+
original_model: originalModel,
|
|
699
|
+
blocked: false,
|
|
700
|
+
enforcement_reason: "provider_error"
|
|
562
701
|
});
|
|
702
|
+
throw err;
|
|
563
703
|
}
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
if (fallback && fallback !== model) {
|
|
567
|
-
originalModel = model;
|
|
568
|
-
model = fallback;
|
|
569
|
-
downgraded = true;
|
|
570
|
-
enforcementReason = "auto_downgrade_threshold";
|
|
571
|
-
config.onEnforcement?.({
|
|
572
|
-
type: "downgrade",
|
|
573
|
-
timestamp: Date.now(),
|
|
574
|
-
reason: `Budget at ${Math.round(downgradeThreshold * 100)}% \u2014 downgraded ${originalModel} \u2192 ${model}`,
|
|
575
|
-
original_model: originalModel,
|
|
576
|
-
fallback_model: model,
|
|
577
|
-
budget_used: heartbeat.budgetStatus.monthly_used,
|
|
578
|
-
budget_limit: heartbeat.budgetStatus.monthly_limit ?? 0,
|
|
579
|
-
estimated_savings: 0
|
|
580
|
-
});
|
|
581
|
-
}
|
|
582
|
-
}
|
|
583
|
-
}
|
|
584
|
-
const requestParams = downgraded ? { ...params, model } : params;
|
|
585
|
-
try {
|
|
586
|
-
const response = await target.create.call(
|
|
587
|
-
target,
|
|
588
|
-
requestParams,
|
|
589
|
-
options
|
|
590
|
-
);
|
|
591
|
-
const latency = Date.now() - startTime;
|
|
592
|
-
const usage = response?.usage;
|
|
593
|
-
const inputTokens = usage?.prompt_tokens ?? 0;
|
|
594
|
-
const outputTokens = usage?.completion_tokens ?? 0;
|
|
595
|
-
const cost = calculateCost(model, inputTokens, outputTokens);
|
|
596
|
-
heartbeat.trackSpend(cost);
|
|
597
|
-
shipper.push({
|
|
598
|
-
id: generateId2(),
|
|
599
|
-
timestamp: startTime,
|
|
600
|
-
provider: "openai",
|
|
601
|
-
model,
|
|
602
|
-
input_tokens: inputTokens,
|
|
603
|
-
output_tokens: outputTokens,
|
|
604
|
-
cost,
|
|
605
|
-
latency_ms: latency,
|
|
606
|
-
endpoint_tag: config.endpoint_tag,
|
|
607
|
-
downgraded,
|
|
608
|
-
original_model: originalModel,
|
|
609
|
-
blocked: false,
|
|
610
|
-
enforcement_reason: enforcementReason
|
|
611
|
-
});
|
|
612
|
-
return response;
|
|
613
|
-
} catch (err) {
|
|
614
|
-
if (err?.caplyr) throw err;
|
|
615
|
-
shipper.push({
|
|
616
|
-
id: generateId2(),
|
|
617
|
-
timestamp: startTime,
|
|
618
|
-
provider: "openai",
|
|
619
|
-
model,
|
|
620
|
-
input_tokens: 0,
|
|
621
|
-
output_tokens: 0,
|
|
622
|
-
cost: 0,
|
|
623
|
-
latency_ms: Date.now() - startTime,
|
|
624
|
-
endpoint_tag: config.endpoint_tag,
|
|
625
|
-
downgraded,
|
|
626
|
-
original_model: originalModel,
|
|
627
|
-
blocked: false,
|
|
628
|
-
enforcement_reason: "provider_error"
|
|
629
|
-
});
|
|
630
|
-
throw err;
|
|
704
|
+
} finally {
|
|
705
|
+
release();
|
|
631
706
|
}
|
|
632
707
|
};
|
|
633
708
|
}
|
|
@@ -690,6 +765,7 @@ function protect(client, config) {
|
|
|
690
765
|
...config
|
|
691
766
|
};
|
|
692
767
|
let shared = instances.get(resolvedConfig.apiKey);
|
|
768
|
+
const isExisting = !!shared;
|
|
693
769
|
if (!shared) {
|
|
694
770
|
const shipper2 = new LogShipper(resolvedConfig);
|
|
695
771
|
const heartbeat2 = new Heartbeat(resolvedConfig);
|
|
@@ -699,7 +775,19 @@ function protect(client, config) {
|
|
|
699
775
|
}
|
|
700
776
|
const { shipper, heartbeat } = shared;
|
|
701
777
|
if (resolvedConfig.budget) {
|
|
702
|
-
const
|
|
778
|
+
const raw = typeof resolvedConfig.budget === "number" ? { monthly: resolvedConfig.budget } : resolvedConfig.budget;
|
|
779
|
+
const budgetConfig = { ...raw };
|
|
780
|
+
if (isExisting) {
|
|
781
|
+
const current = heartbeat.budgetStatus;
|
|
782
|
+
if (budgetConfig.daily !== void 0) {
|
|
783
|
+
const existing = current.daily_limit;
|
|
784
|
+
budgetConfig.daily = existing !== null ? Math.min(existing, budgetConfig.daily) : budgetConfig.daily;
|
|
785
|
+
}
|
|
786
|
+
if (budgetConfig.monthly !== void 0) {
|
|
787
|
+
const existing = current.monthly_limit;
|
|
788
|
+
budgetConfig.monthly = existing !== null ? Math.min(existing, budgetConfig.monthly) : budgetConfig.monthly;
|
|
789
|
+
}
|
|
790
|
+
}
|
|
703
791
|
heartbeat.applyLocalLimits(budgetConfig);
|
|
704
792
|
}
|
|
705
793
|
const provider = detectProvider(client);
|
|
@@ -741,14 +829,16 @@ async function shutdown(apiKey) {
|
|
|
741
829
|
const shared = instances.get(apiKey);
|
|
742
830
|
if (shared) {
|
|
743
831
|
shared.heartbeat.destroy();
|
|
744
|
-
shared.shipper.
|
|
832
|
+
await shared.shipper.shutdown();
|
|
745
833
|
instances.delete(apiKey);
|
|
746
834
|
}
|
|
747
835
|
} else {
|
|
836
|
+
const shutdowns = [];
|
|
748
837
|
for (const [key, shared] of instances) {
|
|
749
838
|
shared.heartbeat.destroy();
|
|
750
|
-
shared.shipper.
|
|
839
|
+
shutdowns.push(shared.shipper.shutdown());
|
|
751
840
|
}
|
|
841
|
+
await Promise.all(shutdowns);
|
|
752
842
|
instances.clear();
|
|
753
843
|
}
|
|
754
844
|
}
|