caplyr 0.2.2 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/dist/index.js +246 -184
  2. package/dist/index.mjs +246 -184
  3. package/package.json +1 -1
package/dist/index.js CHANGED
@@ -107,6 +107,36 @@ var LogShipper = class {
107
107
  }
108
108
  };
109
109
 
110
+ // src/mutex.ts
111
+ var Mutex = class {
112
+ constructor() {
113
+ this.queue = [];
114
+ this.locked = false;
115
+ }
116
+ async acquire() {
117
+ if (!this.locked) {
118
+ this.locked = true;
119
+ return this.createRelease();
120
+ }
121
+ return new Promise((resolve) => {
122
+ this.queue.push(() => resolve(this.createRelease()));
123
+ });
124
+ }
125
+ createRelease() {
126
+ let released = false;
127
+ return () => {
128
+ if (released) return;
129
+ released = true;
130
+ const next = this.queue.shift();
131
+ if (next) {
132
+ next();
133
+ } else {
134
+ this.locked = false;
135
+ }
136
+ };
137
+ }
138
+ };
139
+
110
140
  // src/heartbeat.ts
111
141
  var Heartbeat = class {
112
142
  constructor(config) {
@@ -124,6 +154,8 @@ var Heartbeat = class {
124
154
  };
125
155
  /** Current protection status */
126
156
  this.status = "ACTIVE";
157
+ /** Mutex for serializing budget check → API call → trackSpend */
158
+ this.budgetMutex = new Mutex();
127
159
  /** Local budget limits set via config (not from server) */
128
160
  this.localDailyLimit = null;
129
161
  this.localMonthlyLimit = null;
@@ -176,14 +208,18 @@ var Heartbeat = class {
176
208
  const data = await res.json();
177
209
  const localDailyUsed = this.budgetStatus.daily_used;
178
210
  const localMonthlyUsed = this.budgetStatus.monthly_used;
211
+ const serverDailyUsed = Number(data.daily_used) || 0;
212
+ const serverMonthlyUsed = Number(data.monthly_used) || 0;
213
+ const serverDailyLimit = data.daily_limit != null ? Number(data.daily_limit) : null;
214
+ const serverMonthlyLimit = data.monthly_limit != null ? Number(data.monthly_limit) : null;
179
215
  this.budgetStatus = {
180
216
  ...data,
181
217
  // Use whichever spend is higher — server or local tracking
182
- daily_used: Math.max(data.daily_used ?? 0, localDailyUsed),
183
- monthly_used: Math.max(data.monthly_used ?? 0, localMonthlyUsed),
184
- // Preserve local limits if server doesn't provide them
185
- daily_limit: data.daily_limit ?? this.localDailyLimit,
186
- monthly_limit: data.monthly_limit ?? this.localMonthlyLimit
218
+ daily_used: Math.max(serverDailyUsed, localDailyUsed),
219
+ monthly_used: Math.max(serverMonthlyUsed, localMonthlyUsed),
220
+ // Use the stricter (lower) limit — local config takes priority if lower
221
+ daily_limit: this.pickStricterLimit(serverDailyLimit, this.localDailyLimit),
222
+ monthly_limit: this.pickStricterLimit(serverMonthlyLimit, this.localMonthlyLimit)
187
223
  };
188
224
  this.consecutiveFailures = 0;
189
225
  const newStatus = data.kill_switch_active ? "OFF" : data.status;
@@ -200,6 +236,22 @@ var Heartbeat = class {
200
236
  }
201
237
  }
202
238
  }
239
+ /**
240
+ * Pick the stricter (lower) of two limits.
241
+ * If one is null, use the other.
242
+ */
243
+ pickStricterLimit(a, b) {
244
+ if (a === null) return b;
245
+ if (b === null) return a;
246
+ return Math.min(a, b);
247
+ }
248
+ /**
249
+ * Force an immediate heartbeat poll (useful for kill switch checks).
250
+ * Returns a promise that resolves when the poll completes.
251
+ */
252
+ async forcePoll() {
253
+ await this.beat();
254
+ }
203
255
  /**
204
256
  * Update local budget tracking (called after each request).
205
257
  * This provides real-time budget awareness between heartbeats.
@@ -362,18 +414,98 @@ function wrapAnthropic(client, config, shipper, heartbeat) {
362
414
  });
363
415
  }
364
416
  }
365
- if (config.mode === "cost_protect") {
366
- if (heartbeat.isMonthlyBudgetExceeded() || heartbeat.isDailyBudgetExceeded()) {
367
- blocked = true;
368
- enforcementReason = heartbeat.isDailyBudgetExceeded() ? "daily_budget_exceeded" : "monthly_budget_exceeded";
369
- const blockError = {
370
- code: "BUDGET_EXCEEDED",
371
- message: `AI budget exceeded. ${enforcementReason.replace(/_/g, " ")}.`,
372
- budget_used: heartbeat.budgetStatus.monthly_used,
373
- budget_limit: heartbeat.budgetStatus.monthly_limit ?? 0,
374
- retry_after: getNextResetTime(enforcementReason),
375
- dashboard_url: dashboardUrl
376
- };
417
+ const release = await heartbeat.budgetMutex.acquire();
418
+ try {
419
+ if (config.mode === "cost_protect") {
420
+ if (heartbeat.isMonthlyBudgetExceeded() || heartbeat.isDailyBudgetExceeded()) {
421
+ blocked = true;
422
+ enforcementReason = heartbeat.isDailyBudgetExceeded() ? "daily_budget_exceeded" : "monthly_budget_exceeded";
423
+ const blockError = {
424
+ code: "BUDGET_EXCEEDED",
425
+ message: `AI budget exceeded. ${enforcementReason.replace(/_/g, " ")}.`,
426
+ budget_used: heartbeat.budgetStatus.monthly_used,
427
+ budget_limit: heartbeat.budgetStatus.monthly_limit ?? 0,
428
+ retry_after: getNextResetTime(enforcementReason),
429
+ dashboard_url: dashboardUrl
430
+ };
431
+ shipper.push({
432
+ id: generateId(),
433
+ timestamp: startTime,
434
+ provider: "anthropic",
435
+ model,
436
+ input_tokens: 0,
437
+ output_tokens: 0,
438
+ cost: 0,
439
+ latency_ms: Date.now() - startTime,
440
+ endpoint_tag: config.endpoint_tag,
441
+ downgraded: false,
442
+ blocked: true,
443
+ enforcement_reason: enforcementReason
444
+ });
445
+ throw Object.assign(new Error(blockError.message), {
446
+ caplyr: blockError
447
+ });
448
+ }
449
+ if (heartbeat.isDowngradeThresholdReached(downgradeThreshold)) {
450
+ const fallback = config.fallback ?? getDefaultFallback(model);
451
+ if (fallback && fallback !== model) {
452
+ originalModel = model;
453
+ model = fallback;
454
+ downgraded = true;
455
+ enforcementReason = "auto_downgrade_threshold";
456
+ config.onEnforcement?.({
457
+ type: "downgrade",
458
+ timestamp: Date.now(),
459
+ reason: `Budget at ${Math.round(downgradeThreshold * 100)}% \u2014 downgraded ${originalModel} \u2192 ${model}`,
460
+ original_model: originalModel,
461
+ fallback_model: model,
462
+ budget_used: heartbeat.budgetStatus.monthly_used,
463
+ budget_limit: heartbeat.budgetStatus.monthly_limit ?? 0,
464
+ estimated_savings: 0
465
+ // Calculated after response
466
+ });
467
+ }
468
+ }
469
+ }
470
+ const requestParams = downgraded ? { ...params, model } : params;
471
+ try {
472
+ const response = await target.create.call(
473
+ target,
474
+ requestParams,
475
+ options
476
+ );
477
+ const latency = Date.now() - startTime;
478
+ const inputTokens = response?.usage?.input_tokens ?? 0;
479
+ const outputTokens = response?.usage?.output_tokens ?? 0;
480
+ const cost = calculateCost(model, inputTokens, outputTokens);
481
+ heartbeat.trackSpend(cost);
482
+ let estimatedSavings = 0;
483
+ if (downgraded && originalModel) {
484
+ const originalCost = calculateCost(
485
+ originalModel,
486
+ inputTokens,
487
+ outputTokens
488
+ );
489
+ estimatedSavings = originalCost - cost;
490
+ }
491
+ shipper.push({
492
+ id: generateId(),
493
+ timestamp: startTime,
494
+ provider: "anthropic",
495
+ model,
496
+ input_tokens: inputTokens,
497
+ output_tokens: outputTokens,
498
+ cost,
499
+ latency_ms: latency,
500
+ endpoint_tag: config.endpoint_tag,
501
+ downgraded,
502
+ original_model: originalModel,
503
+ blocked: false,
504
+ enforcement_reason: enforcementReason
505
+ });
506
+ return response;
507
+ } catch (err) {
508
+ if (err?.caplyr) throw err;
377
509
  shipper.push({
378
510
  id: generateId(),
379
511
  timestamp: startTime,
@@ -384,90 +516,15 @@ function wrapAnthropic(client, config, shipper, heartbeat) {
384
516
  cost: 0,
385
517
  latency_ms: Date.now() - startTime,
386
518
  endpoint_tag: config.endpoint_tag,
387
- downgraded: false,
388
- blocked: true,
389
- enforcement_reason: enforcementReason
390
- });
391
- throw Object.assign(new Error(blockError.message), {
392
- caplyr: blockError
519
+ downgraded,
520
+ original_model: originalModel,
521
+ blocked: false,
522
+ enforcement_reason: "provider_error"
393
523
  });
524
+ throw err;
394
525
  }
395
- if (heartbeat.isDowngradeThresholdReached(downgradeThreshold)) {
396
- const fallback = config.fallback ?? getDefaultFallback(model);
397
- if (fallback && fallback !== model) {
398
- originalModel = model;
399
- model = fallback;
400
- downgraded = true;
401
- enforcementReason = "auto_downgrade_threshold";
402
- config.onEnforcement?.({
403
- type: "downgrade",
404
- timestamp: Date.now(),
405
- reason: `Budget at ${Math.round(downgradeThreshold * 100)}% \u2014 downgraded ${originalModel} \u2192 ${model}`,
406
- original_model: originalModel,
407
- fallback_model: model,
408
- budget_used: heartbeat.budgetStatus.monthly_used,
409
- budget_limit: heartbeat.budgetStatus.monthly_limit ?? 0,
410
- estimated_savings: 0
411
- // Calculated after response
412
- });
413
- }
414
- }
415
- }
416
- const requestParams = downgraded ? { ...params, model } : params;
417
- try {
418
- const response = await target.create.call(
419
- target,
420
- requestParams,
421
- options
422
- );
423
- const latency = Date.now() - startTime;
424
- const inputTokens = response?.usage?.input_tokens ?? 0;
425
- const outputTokens = response?.usage?.output_tokens ?? 0;
426
- const cost = calculateCost(model, inputTokens, outputTokens);
427
- heartbeat.trackSpend(cost);
428
- let estimatedSavings = 0;
429
- if (downgraded && originalModel) {
430
- const originalCost = calculateCost(
431
- originalModel,
432
- inputTokens,
433
- outputTokens
434
- );
435
- estimatedSavings = originalCost - cost;
436
- }
437
- shipper.push({
438
- id: generateId(),
439
- timestamp: startTime,
440
- provider: "anthropic",
441
- model,
442
- input_tokens: inputTokens,
443
- output_tokens: outputTokens,
444
- cost,
445
- latency_ms: latency,
446
- endpoint_tag: config.endpoint_tag,
447
- downgraded,
448
- original_model: originalModel,
449
- blocked: false,
450
- enforcement_reason: enforcementReason
451
- });
452
- return response;
453
- } catch (err) {
454
- if (err?.caplyr) throw err;
455
- shipper.push({
456
- id: generateId(),
457
- timestamp: startTime,
458
- provider: "anthropic",
459
- model,
460
- input_tokens: 0,
461
- output_tokens: 0,
462
- cost: 0,
463
- latency_ms: Date.now() - startTime,
464
- endpoint_tag: config.endpoint_tag,
465
- downgraded,
466
- original_model: originalModel,
467
- blocked: false,
468
- enforcement_reason: "provider_error"
469
- });
470
- throw err;
526
+ } finally {
527
+ release();
471
528
  }
472
529
  };
473
530
  }
@@ -544,18 +601,89 @@ function wrapOpenAI(client, config, shipper, heartbeat) {
544
601
  });
545
602
  }
546
603
  }
547
- if (config.mode === "cost_protect") {
548
- if (heartbeat.isMonthlyBudgetExceeded() || heartbeat.isDailyBudgetExceeded()) {
549
- blocked = true;
550
- enforcementReason = heartbeat.isDailyBudgetExceeded() ? "daily_budget_exceeded" : "monthly_budget_exceeded";
551
- const blockError = {
552
- code: "BUDGET_EXCEEDED",
553
- message: `AI budget exceeded. ${enforcementReason.replace(/_/g, " ")}.`,
554
- budget_used: heartbeat.budgetStatus.monthly_used,
555
- budget_limit: heartbeat.budgetStatus.monthly_limit ?? 0,
556
- retry_after: getNextResetTime2(enforcementReason),
557
- dashboard_url: dashboardUrl
558
- };
604
+ const release = await heartbeat.budgetMutex.acquire();
605
+ try {
606
+ if (config.mode === "cost_protect") {
607
+ if (heartbeat.isMonthlyBudgetExceeded() || heartbeat.isDailyBudgetExceeded()) {
608
+ blocked = true;
609
+ enforcementReason = heartbeat.isDailyBudgetExceeded() ? "daily_budget_exceeded" : "monthly_budget_exceeded";
610
+ const blockError = {
611
+ code: "BUDGET_EXCEEDED",
612
+ message: `AI budget exceeded. ${enforcementReason.replace(/_/g, " ")}.`,
613
+ budget_used: heartbeat.budgetStatus.monthly_used,
614
+ budget_limit: heartbeat.budgetStatus.monthly_limit ?? 0,
615
+ retry_after: getNextResetTime2(enforcementReason),
616
+ dashboard_url: dashboardUrl
617
+ };
618
+ shipper.push({
619
+ id: generateId2(),
620
+ timestamp: startTime,
621
+ provider: "openai",
622
+ model,
623
+ input_tokens: 0,
624
+ output_tokens: 0,
625
+ cost: 0,
626
+ latency_ms: Date.now() - startTime,
627
+ endpoint_tag: config.endpoint_tag,
628
+ downgraded: false,
629
+ blocked: true,
630
+ enforcement_reason: enforcementReason
631
+ });
632
+ throw Object.assign(new Error(blockError.message), {
633
+ caplyr: blockError
634
+ });
635
+ }
636
+ if (heartbeat.isDowngradeThresholdReached(downgradeThreshold)) {
637
+ const fallback = config.fallback ?? getDefaultFallback(model);
638
+ if (fallback && fallback !== model) {
639
+ originalModel = model;
640
+ model = fallback;
641
+ downgraded = true;
642
+ enforcementReason = "auto_downgrade_threshold";
643
+ config.onEnforcement?.({
644
+ type: "downgrade",
645
+ timestamp: Date.now(),
646
+ reason: `Budget at ${Math.round(downgradeThreshold * 100)}% \u2014 downgraded ${originalModel} \u2192 ${model}`,
647
+ original_model: originalModel,
648
+ fallback_model: model,
649
+ budget_used: heartbeat.budgetStatus.monthly_used,
650
+ budget_limit: heartbeat.budgetStatus.monthly_limit ?? 0,
651
+ estimated_savings: 0
652
+ });
653
+ }
654
+ }
655
+ }
656
+ const requestParams = downgraded ? { ...params, model } : params;
657
+ try {
658
+ const response = await target.create.call(
659
+ target,
660
+ requestParams,
661
+ options
662
+ );
663
+ const latency = Date.now() - startTime;
664
+ const usage = response?.usage;
665
+ const inputTokens = usage?.prompt_tokens ?? 0;
666
+ const outputTokens = usage?.completion_tokens ?? 0;
667
+ const cost = calculateCost(model, inputTokens, outputTokens);
668
+ heartbeat.trackSpend(cost);
669
+ shipper.push({
670
+ id: generateId2(),
671
+ timestamp: startTime,
672
+ provider: "openai",
673
+ model,
674
+ input_tokens: inputTokens,
675
+ output_tokens: outputTokens,
676
+ cost,
677
+ latency_ms: latency,
678
+ endpoint_tag: config.endpoint_tag,
679
+ downgraded,
680
+ original_model: originalModel,
681
+ blocked: false,
682
+ enforcement_reason: enforcementReason
683
+ });
684
+ return response;
685
+ } catch (err) {
686
+ if (err?.caplyr) throw err;
559
687
  shipper.push({
560
688
  id: generateId2(),
561
689
  timestamp: startTime,
@@ -566,81 +694,15 @@ function wrapOpenAI(client, config, shipper, heartbeat) {
566
694
  cost: 0,
567
695
  latency_ms: Date.now() - startTime,
568
696
  endpoint_tag: config.endpoint_tag,
569
- downgraded: false,
570
- blocked: true,
571
- enforcement_reason: enforcementReason
572
- });
573
- throw Object.assign(new Error(blockError.message), {
574
- caplyr: blockError
697
+ downgraded,
698
+ original_model: originalModel,
699
+ blocked: false,
700
+ enforcement_reason: "provider_error"
575
701
  });
702
+ throw err;
576
703
  }
577
- if (heartbeat.isDowngradeThresholdReached(downgradeThreshold)) {
578
- const fallback = config.fallback ?? getDefaultFallback(model);
579
- if (fallback && fallback !== model) {
580
- originalModel = model;
581
- model = fallback;
582
- downgraded = true;
583
- enforcementReason = "auto_downgrade_threshold";
584
- config.onEnforcement?.({
585
- type: "downgrade",
586
- timestamp: Date.now(),
587
- reason: `Budget at ${Math.round(downgradeThreshold * 100)}% \u2014 downgraded ${originalModel} \u2192 ${model}`,
588
- original_model: originalModel,
589
- fallback_model: model,
590
- budget_used: heartbeat.budgetStatus.monthly_used,
591
- budget_limit: heartbeat.budgetStatus.monthly_limit ?? 0,
592
- estimated_savings: 0
593
- });
594
- }
595
- }
596
- }
597
- const requestParams = downgraded ? { ...params, model } : params;
598
- try {
599
- const response = await target.create.call(
600
- target,
601
- requestParams,
602
- options
603
- );
604
- const latency = Date.now() - startTime;
605
- const usage = response?.usage;
606
- const inputTokens = usage?.prompt_tokens ?? 0;
607
- const outputTokens = usage?.completion_tokens ?? 0;
608
- const cost = calculateCost(model, inputTokens, outputTokens);
609
- heartbeat.trackSpend(cost);
610
- shipper.push({
611
- id: generateId2(),
612
- timestamp: startTime,
613
- provider: "openai",
614
- model,
615
- input_tokens: inputTokens,
616
- output_tokens: outputTokens,
617
- cost,
618
- latency_ms: latency,
619
- endpoint_tag: config.endpoint_tag,
620
- downgraded,
621
- original_model: originalModel,
622
- blocked: false,
623
- enforcement_reason: enforcementReason
624
- });
625
- return response;
626
- } catch (err) {
627
- if (err?.caplyr) throw err;
628
- shipper.push({
629
- id: generateId2(),
630
- timestamp: startTime,
631
- provider: "openai",
632
- model,
633
- input_tokens: 0,
634
- output_tokens: 0,
635
- cost: 0,
636
- latency_ms: Date.now() - startTime,
637
- endpoint_tag: config.endpoint_tag,
638
- downgraded,
639
- original_model: originalModel,
640
- blocked: false,
641
- enforcement_reason: "provider_error"
642
- });
643
- throw err;
704
+ } finally {
705
+ release();
644
706
  }
645
707
  };
646
708
  }
package/dist/index.mjs CHANGED
@@ -74,6 +74,36 @@ var LogShipper = class {
74
74
  }
75
75
  };
76
76
 
77
+ // src/mutex.ts
78
+ var Mutex = class {
79
+ constructor() {
80
+ this.queue = [];
81
+ this.locked = false;
82
+ }
83
+ async acquire() {
84
+ if (!this.locked) {
85
+ this.locked = true;
86
+ return this.createRelease();
87
+ }
88
+ return new Promise((resolve) => {
89
+ this.queue.push(() => resolve(this.createRelease()));
90
+ });
91
+ }
92
+ createRelease() {
93
+ let released = false;
94
+ return () => {
95
+ if (released) return;
96
+ released = true;
97
+ const next = this.queue.shift();
98
+ if (next) {
99
+ next();
100
+ } else {
101
+ this.locked = false;
102
+ }
103
+ };
104
+ }
105
+ };
106
+
77
107
  // src/heartbeat.ts
78
108
  var Heartbeat = class {
79
109
  constructor(config) {
@@ -91,6 +121,8 @@ var Heartbeat = class {
91
121
  };
92
122
  /** Current protection status */
93
123
  this.status = "ACTIVE";
124
+ /** Mutex for serializing budget check → API call → trackSpend */
125
+ this.budgetMutex = new Mutex();
94
126
  /** Local budget limits set via config (not from server) */
95
127
  this.localDailyLimit = null;
96
128
  this.localMonthlyLimit = null;
@@ -143,14 +175,18 @@ var Heartbeat = class {
143
175
  const data = await res.json();
144
176
  const localDailyUsed = this.budgetStatus.daily_used;
145
177
  const localMonthlyUsed = this.budgetStatus.monthly_used;
178
+ const serverDailyUsed = Number(data.daily_used) || 0;
179
+ const serverMonthlyUsed = Number(data.monthly_used) || 0;
180
+ const serverDailyLimit = data.daily_limit != null ? Number(data.daily_limit) : null;
181
+ const serverMonthlyLimit = data.monthly_limit != null ? Number(data.monthly_limit) : null;
146
182
  this.budgetStatus = {
147
183
  ...data,
148
184
  // Use whichever spend is higher — server or local tracking
149
- daily_used: Math.max(data.daily_used ?? 0, localDailyUsed),
150
- monthly_used: Math.max(data.monthly_used ?? 0, localMonthlyUsed),
151
- // Preserve local limits if server doesn't provide them
152
- daily_limit: data.daily_limit ?? this.localDailyLimit,
153
- monthly_limit: data.monthly_limit ?? this.localMonthlyLimit
185
+ daily_used: Math.max(serverDailyUsed, localDailyUsed),
186
+ monthly_used: Math.max(serverMonthlyUsed, localMonthlyUsed),
187
+ // Use the stricter (lower) limit — local config takes priority if lower
188
+ daily_limit: this.pickStricterLimit(serverDailyLimit, this.localDailyLimit),
189
+ monthly_limit: this.pickStricterLimit(serverMonthlyLimit, this.localMonthlyLimit)
154
190
  };
155
191
  this.consecutiveFailures = 0;
156
192
  const newStatus = data.kill_switch_active ? "OFF" : data.status;
@@ -167,6 +203,22 @@ var Heartbeat = class {
167
203
  }
168
204
  }
169
205
  }
206
+ /**
207
+ * Pick the stricter (lower) of two limits.
208
+ * If one is null, use the other.
209
+ */
210
+ pickStricterLimit(a, b) {
211
+ if (a === null) return b;
212
+ if (b === null) return a;
213
+ return Math.min(a, b);
214
+ }
215
+ /**
216
+ * Force an immediate heartbeat poll (useful for kill switch checks).
217
+ * Returns a promise that resolves when the poll completes.
218
+ */
219
+ async forcePoll() {
220
+ await this.beat();
221
+ }
170
222
  /**
171
223
  * Update local budget tracking (called after each request).
172
224
  * This provides real-time budget awareness between heartbeats.
@@ -329,18 +381,98 @@ function wrapAnthropic(client, config, shipper, heartbeat) {
329
381
  });
330
382
  }
331
383
  }
332
- if (config.mode === "cost_protect") {
333
- if (heartbeat.isMonthlyBudgetExceeded() || heartbeat.isDailyBudgetExceeded()) {
334
- blocked = true;
335
- enforcementReason = heartbeat.isDailyBudgetExceeded() ? "daily_budget_exceeded" : "monthly_budget_exceeded";
336
- const blockError = {
337
- code: "BUDGET_EXCEEDED",
338
- message: `AI budget exceeded. ${enforcementReason.replace(/_/g, " ")}.`,
339
- budget_used: heartbeat.budgetStatus.monthly_used,
340
- budget_limit: heartbeat.budgetStatus.monthly_limit ?? 0,
341
- retry_after: getNextResetTime(enforcementReason),
342
- dashboard_url: dashboardUrl
343
- };
384
+ const release = await heartbeat.budgetMutex.acquire();
385
+ try {
386
+ if (config.mode === "cost_protect") {
387
+ if (heartbeat.isMonthlyBudgetExceeded() || heartbeat.isDailyBudgetExceeded()) {
388
+ blocked = true;
389
+ enforcementReason = heartbeat.isDailyBudgetExceeded() ? "daily_budget_exceeded" : "monthly_budget_exceeded";
390
+ const blockError = {
391
+ code: "BUDGET_EXCEEDED",
392
+ message: `AI budget exceeded. ${enforcementReason.replace(/_/g, " ")}.`,
393
+ budget_used: heartbeat.budgetStatus.monthly_used,
394
+ budget_limit: heartbeat.budgetStatus.monthly_limit ?? 0,
395
+ retry_after: getNextResetTime(enforcementReason),
396
+ dashboard_url: dashboardUrl
397
+ };
398
+ shipper.push({
399
+ id: generateId(),
400
+ timestamp: startTime,
401
+ provider: "anthropic",
402
+ model,
403
+ input_tokens: 0,
404
+ output_tokens: 0,
405
+ cost: 0,
406
+ latency_ms: Date.now() - startTime,
407
+ endpoint_tag: config.endpoint_tag,
408
+ downgraded: false,
409
+ blocked: true,
410
+ enforcement_reason: enforcementReason
411
+ });
412
+ throw Object.assign(new Error(blockError.message), {
413
+ caplyr: blockError
414
+ });
415
+ }
416
+ if (heartbeat.isDowngradeThresholdReached(downgradeThreshold)) {
417
+ const fallback = config.fallback ?? getDefaultFallback(model);
418
+ if (fallback && fallback !== model) {
419
+ originalModel = model;
420
+ model = fallback;
421
+ downgraded = true;
422
+ enforcementReason = "auto_downgrade_threshold";
423
+ config.onEnforcement?.({
424
+ type: "downgrade",
425
+ timestamp: Date.now(),
426
+ reason: `Budget at ${Math.round(downgradeThreshold * 100)}% \u2014 downgraded ${originalModel} \u2192 ${model}`,
427
+ original_model: originalModel,
428
+ fallback_model: model,
429
+ budget_used: heartbeat.budgetStatus.monthly_used,
430
+ budget_limit: heartbeat.budgetStatus.monthly_limit ?? 0,
431
+ estimated_savings: 0
432
+ // Calculated after response
433
+ });
434
+ }
435
+ }
436
+ }
437
+ const requestParams = downgraded ? { ...params, model } : params;
438
+ try {
439
+ const response = await target.create.call(
440
+ target,
441
+ requestParams,
442
+ options
443
+ );
444
+ const latency = Date.now() - startTime;
445
+ const inputTokens = response?.usage?.input_tokens ?? 0;
446
+ const outputTokens = response?.usage?.output_tokens ?? 0;
447
+ const cost = calculateCost(model, inputTokens, outputTokens);
448
+ heartbeat.trackSpend(cost);
449
+ let estimatedSavings = 0;
450
+ if (downgraded && originalModel) {
451
+ const originalCost = calculateCost(
452
+ originalModel,
453
+ inputTokens,
454
+ outputTokens
455
+ );
456
+ estimatedSavings = originalCost - cost;
457
+ }
458
+ shipper.push({
459
+ id: generateId(),
460
+ timestamp: startTime,
461
+ provider: "anthropic",
462
+ model,
463
+ input_tokens: inputTokens,
464
+ output_tokens: outputTokens,
465
+ cost,
466
+ latency_ms: latency,
467
+ endpoint_tag: config.endpoint_tag,
468
+ downgraded,
469
+ original_model: originalModel,
470
+ blocked: false,
471
+ enforcement_reason: enforcementReason
472
+ });
473
+ return response;
474
+ } catch (err) {
475
+ if (err?.caplyr) throw err;
344
476
  shipper.push({
345
477
  id: generateId(),
346
478
  timestamp: startTime,
@@ -351,90 +483,15 @@ function wrapAnthropic(client, config, shipper, heartbeat) {
351
483
  cost: 0,
352
484
  latency_ms: Date.now() - startTime,
353
485
  endpoint_tag: config.endpoint_tag,
354
- downgraded: false,
355
- blocked: true,
356
- enforcement_reason: enforcementReason
357
- });
358
- throw Object.assign(new Error(blockError.message), {
359
- caplyr: blockError
486
+ downgraded,
487
+ original_model: originalModel,
488
+ blocked: false,
489
+ enforcement_reason: "provider_error"
360
490
  });
491
+ throw err;
361
492
  }
362
- if (heartbeat.isDowngradeThresholdReached(downgradeThreshold)) {
363
- const fallback = config.fallback ?? getDefaultFallback(model);
364
- if (fallback && fallback !== model) {
365
- originalModel = model;
366
- model = fallback;
367
- downgraded = true;
368
- enforcementReason = "auto_downgrade_threshold";
369
- config.onEnforcement?.({
370
- type: "downgrade",
371
- timestamp: Date.now(),
372
- reason: `Budget at ${Math.round(downgradeThreshold * 100)}% \u2014 downgraded ${originalModel} \u2192 ${model}`,
373
- original_model: originalModel,
374
- fallback_model: model,
375
- budget_used: heartbeat.budgetStatus.monthly_used,
376
- budget_limit: heartbeat.budgetStatus.monthly_limit ?? 0,
377
- estimated_savings: 0
378
- // Calculated after response
379
- });
380
- }
381
- }
382
- }
383
- const requestParams = downgraded ? { ...params, model } : params;
384
- try {
385
- const response = await target.create.call(
386
- target,
387
- requestParams,
388
- options
389
- );
390
- const latency = Date.now() - startTime;
391
- const inputTokens = response?.usage?.input_tokens ?? 0;
392
- const outputTokens = response?.usage?.output_tokens ?? 0;
393
- const cost = calculateCost(model, inputTokens, outputTokens);
394
- heartbeat.trackSpend(cost);
395
- let estimatedSavings = 0;
396
- if (downgraded && originalModel) {
397
- const originalCost = calculateCost(
398
- originalModel,
399
- inputTokens,
400
- outputTokens
401
- );
402
- estimatedSavings = originalCost - cost;
403
- }
404
- shipper.push({
405
- id: generateId(),
406
- timestamp: startTime,
407
- provider: "anthropic",
408
- model,
409
- input_tokens: inputTokens,
410
- output_tokens: outputTokens,
411
- cost,
412
- latency_ms: latency,
413
- endpoint_tag: config.endpoint_tag,
414
- downgraded,
415
- original_model: originalModel,
416
- blocked: false,
417
- enforcement_reason: enforcementReason
418
- });
419
- return response;
420
- } catch (err) {
421
- if (err?.caplyr) throw err;
422
- shipper.push({
423
- id: generateId(),
424
- timestamp: startTime,
425
- provider: "anthropic",
426
- model,
427
- input_tokens: 0,
428
- output_tokens: 0,
429
- cost: 0,
430
- latency_ms: Date.now() - startTime,
431
- endpoint_tag: config.endpoint_tag,
432
- downgraded,
433
- original_model: originalModel,
434
- blocked: false,
435
- enforcement_reason: "provider_error"
436
- });
437
- throw err;
493
+ } finally {
494
+ release();
438
495
  }
439
496
  };
440
497
  }
@@ -511,18 +568,89 @@ function wrapOpenAI(client, config, shipper, heartbeat) {
511
568
  });
512
569
  }
513
570
  }
514
- if (config.mode === "cost_protect") {
515
- if (heartbeat.isMonthlyBudgetExceeded() || heartbeat.isDailyBudgetExceeded()) {
516
- blocked = true;
517
- enforcementReason = heartbeat.isDailyBudgetExceeded() ? "daily_budget_exceeded" : "monthly_budget_exceeded";
518
- const blockError = {
519
- code: "BUDGET_EXCEEDED",
520
- message: `AI budget exceeded. ${enforcementReason.replace(/_/g, " ")}.`,
521
- budget_used: heartbeat.budgetStatus.monthly_used,
522
- budget_limit: heartbeat.budgetStatus.monthly_limit ?? 0,
523
- retry_after: getNextResetTime2(enforcementReason),
524
- dashboard_url: dashboardUrl
525
- };
571
+ const release = await heartbeat.budgetMutex.acquire();
572
+ try {
573
+ if (config.mode === "cost_protect") {
574
+ if (heartbeat.isMonthlyBudgetExceeded() || heartbeat.isDailyBudgetExceeded()) {
575
+ blocked = true;
576
+ enforcementReason = heartbeat.isDailyBudgetExceeded() ? "daily_budget_exceeded" : "monthly_budget_exceeded";
577
+ const blockError = {
578
+ code: "BUDGET_EXCEEDED",
579
+ message: `AI budget exceeded. ${enforcementReason.replace(/_/g, " ")}.`,
580
+ budget_used: heartbeat.budgetStatus.monthly_used,
581
+ budget_limit: heartbeat.budgetStatus.monthly_limit ?? 0,
582
+ retry_after: getNextResetTime2(enforcementReason),
583
+ dashboard_url: dashboardUrl
584
+ };
585
+ shipper.push({
586
+ id: generateId2(),
587
+ timestamp: startTime,
588
+ provider: "openai",
589
+ model,
590
+ input_tokens: 0,
591
+ output_tokens: 0,
592
+ cost: 0,
593
+ latency_ms: Date.now() - startTime,
594
+ endpoint_tag: config.endpoint_tag,
595
+ downgraded: false,
596
+ blocked: true,
597
+ enforcement_reason: enforcementReason
598
+ });
599
+ throw Object.assign(new Error(blockError.message), {
600
+ caplyr: blockError
601
+ });
602
+ }
603
+ if (heartbeat.isDowngradeThresholdReached(downgradeThreshold)) {
604
+ const fallback = config.fallback ?? getDefaultFallback(model);
605
+ if (fallback && fallback !== model) {
606
+ originalModel = model;
607
+ model = fallback;
608
+ downgraded = true;
609
+ enforcementReason = "auto_downgrade_threshold";
610
+ config.onEnforcement?.({
611
+ type: "downgrade",
612
+ timestamp: Date.now(),
613
+ reason: `Budget at ${Math.round(downgradeThreshold * 100)}% \u2014 downgraded ${originalModel} \u2192 ${model}`,
614
+ original_model: originalModel,
615
+ fallback_model: model,
616
+ budget_used: heartbeat.budgetStatus.monthly_used,
617
+ budget_limit: heartbeat.budgetStatus.monthly_limit ?? 0,
618
+ estimated_savings: 0
619
+ });
620
+ }
621
+ }
622
+ }
623
+ const requestParams = downgraded ? { ...params, model } : params;
624
+ try {
625
+ const response = await target.create.call(
626
+ target,
627
+ requestParams,
628
+ options
629
+ );
630
+ const latency = Date.now() - startTime;
631
+ const usage = response?.usage;
632
+ const inputTokens = usage?.prompt_tokens ?? 0;
633
+ const outputTokens = usage?.completion_tokens ?? 0;
634
+ const cost = calculateCost(model, inputTokens, outputTokens);
635
+ heartbeat.trackSpend(cost);
636
+ shipper.push({
637
+ id: generateId2(),
638
+ timestamp: startTime,
639
+ provider: "openai",
640
+ model,
641
+ input_tokens: inputTokens,
642
+ output_tokens: outputTokens,
643
+ cost,
644
+ latency_ms: latency,
645
+ endpoint_tag: config.endpoint_tag,
646
+ downgraded,
647
+ original_model: originalModel,
648
+ blocked: false,
649
+ enforcement_reason: enforcementReason
650
+ });
651
+ return response;
652
+ } catch (err) {
653
+ if (err?.caplyr) throw err;
526
654
  shipper.push({
527
655
  id: generateId2(),
528
656
  timestamp: startTime,
@@ -533,81 +661,15 @@ function wrapOpenAI(client, config, shipper, heartbeat) {
533
661
  cost: 0,
534
662
  latency_ms: Date.now() - startTime,
535
663
  endpoint_tag: config.endpoint_tag,
536
- downgraded: false,
537
- blocked: true,
538
- enforcement_reason: enforcementReason
539
- });
540
- throw Object.assign(new Error(blockError.message), {
541
- caplyr: blockError
664
+ downgraded,
665
+ original_model: originalModel,
666
+ blocked: false,
667
+ enforcement_reason: "provider_error"
542
668
  });
669
+ throw err;
543
670
  }
544
- if (heartbeat.isDowngradeThresholdReached(downgradeThreshold)) {
545
- const fallback = config.fallback ?? getDefaultFallback(model);
546
- if (fallback && fallback !== model) {
547
- originalModel = model;
548
- model = fallback;
549
- downgraded = true;
550
- enforcementReason = "auto_downgrade_threshold";
551
- config.onEnforcement?.({
552
- type: "downgrade",
553
- timestamp: Date.now(),
554
- reason: `Budget at ${Math.round(downgradeThreshold * 100)}% \u2014 downgraded ${originalModel} \u2192 ${model}`,
555
- original_model: originalModel,
556
- fallback_model: model,
557
- budget_used: heartbeat.budgetStatus.monthly_used,
558
- budget_limit: heartbeat.budgetStatus.monthly_limit ?? 0,
559
- estimated_savings: 0
560
- });
561
- }
562
- }
563
- }
564
- const requestParams = downgraded ? { ...params, model } : params;
565
- try {
566
- const response = await target.create.call(
567
- target,
568
- requestParams,
569
- options
570
- );
571
- const latency = Date.now() - startTime;
572
- const usage = response?.usage;
573
- const inputTokens = usage?.prompt_tokens ?? 0;
574
- const outputTokens = usage?.completion_tokens ?? 0;
575
- const cost = calculateCost(model, inputTokens, outputTokens);
576
- heartbeat.trackSpend(cost);
577
- shipper.push({
578
- id: generateId2(),
579
- timestamp: startTime,
580
- provider: "openai",
581
- model,
582
- input_tokens: inputTokens,
583
- output_tokens: outputTokens,
584
- cost,
585
- latency_ms: latency,
586
- endpoint_tag: config.endpoint_tag,
587
- downgraded,
588
- original_model: originalModel,
589
- blocked: false,
590
- enforcement_reason: enforcementReason
591
- });
592
- return response;
593
- } catch (err) {
594
- if (err?.caplyr) throw err;
595
- shipper.push({
596
- id: generateId2(),
597
- timestamp: startTime,
598
- provider: "openai",
599
- model,
600
- input_tokens: 0,
601
- output_tokens: 0,
602
- cost: 0,
603
- latency_ms: Date.now() - startTime,
604
- endpoint_tag: config.endpoint_tag,
605
- downgraded,
606
- original_model: originalModel,
607
- blocked: false,
608
- enforcement_reason: "provider_error"
609
- });
610
- throw err;
671
+ } finally {
672
+ release();
611
673
  }
612
674
  };
613
675
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "caplyr",
3
- "version": "0.2.2",
3
+ "version": "0.2.4",
4
4
  "description": "AI Cost Control Plane — budget guardrails, auto-downgrade, and kill switch for AI API calls",
5
5
  "main": "dist/index.js",
6
6
  "module": "dist/index.mjs",