caplyr 0.2.3 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/dist/index.js +221 -179
  2. package/dist/index.mjs +221 -179
  3. package/package.json +1 -1
package/dist/index.js CHANGED
@@ -107,6 +107,36 @@ var LogShipper = class {
107
107
  }
108
108
  };
109
109
 
110
+ // src/mutex.ts
111
+ var Mutex = class {
112
+ constructor() {
113
+ this.queue = [];
114
+ this.locked = false;
115
+ }
116
+ async acquire() {
117
+ if (!this.locked) {
118
+ this.locked = true;
119
+ return this.createRelease();
120
+ }
121
+ return new Promise((resolve) => {
122
+ this.queue.push(() => resolve(this.createRelease()));
123
+ });
124
+ }
125
+ createRelease() {
126
+ let released = false;
127
+ return () => {
128
+ if (released) return;
129
+ released = true;
130
+ const next = this.queue.shift();
131
+ if (next) {
132
+ next();
133
+ } else {
134
+ this.locked = false;
135
+ }
136
+ };
137
+ }
138
+ };
139
+
110
140
  // src/heartbeat.ts
111
141
  var Heartbeat = class {
112
142
  constructor(config) {
@@ -124,6 +154,8 @@ var Heartbeat = class {
124
154
  };
125
155
  /** Current protection status */
126
156
  this.status = "ACTIVE";
157
+ /** Mutex for serializing budget check → API call → trackSpend */
158
+ this.budgetMutex = new Mutex();
127
159
  /** Local budget limits set via config (not from server) */
128
160
  this.localDailyLimit = null;
129
161
  this.localMonthlyLimit = null;
@@ -382,18 +414,98 @@ function wrapAnthropic(client, config, shipper, heartbeat) {
382
414
  });
383
415
  }
384
416
  }
385
- if (config.mode === "cost_protect") {
386
- if (heartbeat.isMonthlyBudgetExceeded() || heartbeat.isDailyBudgetExceeded()) {
387
- blocked = true;
388
- enforcementReason = heartbeat.isDailyBudgetExceeded() ? "daily_budget_exceeded" : "monthly_budget_exceeded";
389
- const blockError = {
390
- code: "BUDGET_EXCEEDED",
391
- message: `AI budget exceeded. ${enforcementReason.replace(/_/g, " ")}.`,
392
- budget_used: heartbeat.budgetStatus.monthly_used,
393
- budget_limit: heartbeat.budgetStatus.monthly_limit ?? 0,
394
- retry_after: getNextResetTime(enforcementReason),
395
- dashboard_url: dashboardUrl
396
- };
417
+ const release = await heartbeat.budgetMutex.acquire();
418
+ try {
419
+ if (config.mode === "cost_protect") {
420
+ if (heartbeat.isMonthlyBudgetExceeded() || heartbeat.isDailyBudgetExceeded()) {
421
+ blocked = true;
422
+ enforcementReason = heartbeat.isDailyBudgetExceeded() ? "daily_budget_exceeded" : "monthly_budget_exceeded";
423
+ const blockError = {
424
+ code: "BUDGET_EXCEEDED",
425
+ message: `AI budget exceeded. ${enforcementReason.replace(/_/g, " ")}.`,
426
+ budget_used: heartbeat.budgetStatus.monthly_used,
427
+ budget_limit: heartbeat.budgetStatus.monthly_limit ?? 0,
428
+ retry_after: getNextResetTime(enforcementReason),
429
+ dashboard_url: dashboardUrl
430
+ };
431
+ shipper.push({
432
+ id: generateId(),
433
+ timestamp: startTime,
434
+ provider: "anthropic",
435
+ model,
436
+ input_tokens: 0,
437
+ output_tokens: 0,
438
+ cost: 0,
439
+ latency_ms: Date.now() - startTime,
440
+ endpoint_tag: config.endpoint_tag,
441
+ downgraded: false,
442
+ blocked: true,
443
+ enforcement_reason: enforcementReason
444
+ });
445
+ throw Object.assign(new Error(blockError.message), {
446
+ caplyr: blockError
447
+ });
448
+ }
449
+ if (heartbeat.isDowngradeThresholdReached(downgradeThreshold)) {
450
+ const fallback = config.fallback ?? getDefaultFallback(model);
451
+ if (fallback && fallback !== model) {
452
+ originalModel = model;
453
+ model = fallback;
454
+ downgraded = true;
455
+ enforcementReason = "auto_downgrade_threshold";
456
+ config.onEnforcement?.({
457
+ type: "downgrade",
458
+ timestamp: Date.now(),
459
+ reason: `Budget at ${Math.round(downgradeThreshold * 100)}% \u2014 downgraded ${originalModel} \u2192 ${model}`,
460
+ original_model: originalModel,
461
+ fallback_model: model,
462
+ budget_used: heartbeat.budgetStatus.monthly_used,
463
+ budget_limit: heartbeat.budgetStatus.monthly_limit ?? 0,
464
+ estimated_savings: 0
465
+ // Calculated after response
466
+ });
467
+ }
468
+ }
469
+ }
470
+ const requestParams = downgraded ? { ...params, model } : params;
471
+ try {
472
+ const response = await target.create.call(
473
+ target,
474
+ requestParams,
475
+ options
476
+ );
477
+ const latency = Date.now() - startTime;
478
+ const inputTokens = response?.usage?.input_tokens ?? 0;
479
+ const outputTokens = response?.usage?.output_tokens ?? 0;
480
+ const cost = calculateCost(model, inputTokens, outputTokens);
481
+ heartbeat.trackSpend(cost);
482
+ let estimatedSavings = 0;
483
+ if (downgraded && originalModel) {
484
+ const originalCost = calculateCost(
485
+ originalModel,
486
+ inputTokens,
487
+ outputTokens
488
+ );
489
+ estimatedSavings = originalCost - cost;
490
+ }
491
+ shipper.push({
492
+ id: generateId(),
493
+ timestamp: startTime,
494
+ provider: "anthropic",
495
+ model,
496
+ input_tokens: inputTokens,
497
+ output_tokens: outputTokens,
498
+ cost,
499
+ latency_ms: latency,
500
+ endpoint_tag: config.endpoint_tag,
501
+ downgraded,
502
+ original_model: originalModel,
503
+ blocked: false,
504
+ enforcement_reason: enforcementReason
505
+ });
506
+ return response;
507
+ } catch (err) {
508
+ if (err?.caplyr) throw err;
397
509
  shipper.push({
398
510
  id: generateId(),
399
511
  timestamp: startTime,
@@ -404,90 +516,15 @@ function wrapAnthropic(client, config, shipper, heartbeat) {
404
516
  cost: 0,
405
517
  latency_ms: Date.now() - startTime,
406
518
  endpoint_tag: config.endpoint_tag,
407
- downgraded: false,
408
- blocked: true,
409
- enforcement_reason: enforcementReason
410
- });
411
- throw Object.assign(new Error(blockError.message), {
412
- caplyr: blockError
519
+ downgraded,
520
+ original_model: originalModel,
521
+ blocked: false,
522
+ enforcement_reason: "provider_error"
413
523
  });
524
+ throw err;
414
525
  }
415
- if (heartbeat.isDowngradeThresholdReached(downgradeThreshold)) {
416
- const fallback = config.fallback ?? getDefaultFallback(model);
417
- if (fallback && fallback !== model) {
418
- originalModel = model;
419
- model = fallback;
420
- downgraded = true;
421
- enforcementReason = "auto_downgrade_threshold";
422
- config.onEnforcement?.({
423
- type: "downgrade",
424
- timestamp: Date.now(),
425
- reason: `Budget at ${Math.round(downgradeThreshold * 100)}% \u2014 downgraded ${originalModel} \u2192 ${model}`,
426
- original_model: originalModel,
427
- fallback_model: model,
428
- budget_used: heartbeat.budgetStatus.monthly_used,
429
- budget_limit: heartbeat.budgetStatus.monthly_limit ?? 0,
430
- estimated_savings: 0
431
- // Calculated after response
432
- });
433
- }
434
- }
435
- }
436
- const requestParams = downgraded ? { ...params, model } : params;
437
- try {
438
- const response = await target.create.call(
439
- target,
440
- requestParams,
441
- options
442
- );
443
- const latency = Date.now() - startTime;
444
- const inputTokens = response?.usage?.input_tokens ?? 0;
445
- const outputTokens = response?.usage?.output_tokens ?? 0;
446
- const cost = calculateCost(model, inputTokens, outputTokens);
447
- heartbeat.trackSpend(cost);
448
- let estimatedSavings = 0;
449
- if (downgraded && originalModel) {
450
- const originalCost = calculateCost(
451
- originalModel,
452
- inputTokens,
453
- outputTokens
454
- );
455
- estimatedSavings = originalCost - cost;
456
- }
457
- shipper.push({
458
- id: generateId(),
459
- timestamp: startTime,
460
- provider: "anthropic",
461
- model,
462
- input_tokens: inputTokens,
463
- output_tokens: outputTokens,
464
- cost,
465
- latency_ms: latency,
466
- endpoint_tag: config.endpoint_tag,
467
- downgraded,
468
- original_model: originalModel,
469
- blocked: false,
470
- enforcement_reason: enforcementReason
471
- });
472
- return response;
473
- } catch (err) {
474
- if (err?.caplyr) throw err;
475
- shipper.push({
476
- id: generateId(),
477
- timestamp: startTime,
478
- provider: "anthropic",
479
- model,
480
- input_tokens: 0,
481
- output_tokens: 0,
482
- cost: 0,
483
- latency_ms: Date.now() - startTime,
484
- endpoint_tag: config.endpoint_tag,
485
- downgraded,
486
- original_model: originalModel,
487
- blocked: false,
488
- enforcement_reason: "provider_error"
489
- });
490
- throw err;
526
+ } finally {
527
+ release();
491
528
  }
492
529
  };
493
530
  }
@@ -564,18 +601,89 @@ function wrapOpenAI(client, config, shipper, heartbeat) {
564
601
  });
565
602
  }
566
603
  }
567
- if (config.mode === "cost_protect") {
568
- if (heartbeat.isMonthlyBudgetExceeded() || heartbeat.isDailyBudgetExceeded()) {
569
- blocked = true;
570
- enforcementReason = heartbeat.isDailyBudgetExceeded() ? "daily_budget_exceeded" : "monthly_budget_exceeded";
571
- const blockError = {
572
- code: "BUDGET_EXCEEDED",
573
- message: `AI budget exceeded. ${enforcementReason.replace(/_/g, " ")}.`,
574
- budget_used: heartbeat.budgetStatus.monthly_used,
575
- budget_limit: heartbeat.budgetStatus.monthly_limit ?? 0,
576
- retry_after: getNextResetTime2(enforcementReason),
577
- dashboard_url: dashboardUrl
578
- };
604
+ const release = await heartbeat.budgetMutex.acquire();
605
+ try {
606
+ if (config.mode === "cost_protect") {
607
+ if (heartbeat.isMonthlyBudgetExceeded() || heartbeat.isDailyBudgetExceeded()) {
608
+ blocked = true;
609
+ enforcementReason = heartbeat.isDailyBudgetExceeded() ? "daily_budget_exceeded" : "monthly_budget_exceeded";
610
+ const blockError = {
611
+ code: "BUDGET_EXCEEDED",
612
+ message: `AI budget exceeded. ${enforcementReason.replace(/_/g, " ")}.`,
613
+ budget_used: heartbeat.budgetStatus.monthly_used,
614
+ budget_limit: heartbeat.budgetStatus.monthly_limit ?? 0,
615
+ retry_after: getNextResetTime2(enforcementReason),
616
+ dashboard_url: dashboardUrl
617
+ };
618
+ shipper.push({
619
+ id: generateId2(),
620
+ timestamp: startTime,
621
+ provider: "openai",
622
+ model,
623
+ input_tokens: 0,
624
+ output_tokens: 0,
625
+ cost: 0,
626
+ latency_ms: Date.now() - startTime,
627
+ endpoint_tag: config.endpoint_tag,
628
+ downgraded: false,
629
+ blocked: true,
630
+ enforcement_reason: enforcementReason
631
+ });
632
+ throw Object.assign(new Error(blockError.message), {
633
+ caplyr: blockError
634
+ });
635
+ }
636
+ if (heartbeat.isDowngradeThresholdReached(downgradeThreshold)) {
637
+ const fallback = config.fallback ?? getDefaultFallback(model);
638
+ if (fallback && fallback !== model) {
639
+ originalModel = model;
640
+ model = fallback;
641
+ downgraded = true;
642
+ enforcementReason = "auto_downgrade_threshold";
643
+ config.onEnforcement?.({
644
+ type: "downgrade",
645
+ timestamp: Date.now(),
646
+ reason: `Budget at ${Math.round(downgradeThreshold * 100)}% \u2014 downgraded ${originalModel} \u2192 ${model}`,
647
+ original_model: originalModel,
648
+ fallback_model: model,
649
+ budget_used: heartbeat.budgetStatus.monthly_used,
650
+ budget_limit: heartbeat.budgetStatus.monthly_limit ?? 0,
651
+ estimated_savings: 0
652
+ });
653
+ }
654
+ }
655
+ }
656
+ const requestParams = downgraded ? { ...params, model } : params;
657
+ try {
658
+ const response = await target.create.call(
659
+ target,
660
+ requestParams,
661
+ options
662
+ );
663
+ const latency = Date.now() - startTime;
664
+ const usage = response?.usage;
665
+ const inputTokens = usage?.prompt_tokens ?? 0;
666
+ const outputTokens = usage?.completion_tokens ?? 0;
667
+ const cost = calculateCost(model, inputTokens, outputTokens);
668
+ heartbeat.trackSpend(cost);
669
+ shipper.push({
670
+ id: generateId2(),
671
+ timestamp: startTime,
672
+ provider: "openai",
673
+ model,
674
+ input_tokens: inputTokens,
675
+ output_tokens: outputTokens,
676
+ cost,
677
+ latency_ms: latency,
678
+ endpoint_tag: config.endpoint_tag,
679
+ downgraded,
680
+ original_model: originalModel,
681
+ blocked: false,
682
+ enforcement_reason: enforcementReason
683
+ });
684
+ return response;
685
+ } catch (err) {
686
+ if (err?.caplyr) throw err;
579
687
  shipper.push({
580
688
  id: generateId2(),
581
689
  timestamp: startTime,
@@ -586,81 +694,15 @@ function wrapOpenAI(client, config, shipper, heartbeat) {
586
694
  cost: 0,
587
695
  latency_ms: Date.now() - startTime,
588
696
  endpoint_tag: config.endpoint_tag,
589
- downgraded: false,
590
- blocked: true,
591
- enforcement_reason: enforcementReason
592
- });
593
- throw Object.assign(new Error(blockError.message), {
594
- caplyr: blockError
697
+ downgraded,
698
+ original_model: originalModel,
699
+ blocked: false,
700
+ enforcement_reason: "provider_error"
595
701
  });
702
+ throw err;
596
703
  }
597
- if (heartbeat.isDowngradeThresholdReached(downgradeThreshold)) {
598
- const fallback = config.fallback ?? getDefaultFallback(model);
599
- if (fallback && fallback !== model) {
600
- originalModel = model;
601
- model = fallback;
602
- downgraded = true;
603
- enforcementReason = "auto_downgrade_threshold";
604
- config.onEnforcement?.({
605
- type: "downgrade",
606
- timestamp: Date.now(),
607
- reason: `Budget at ${Math.round(downgradeThreshold * 100)}% \u2014 downgraded ${originalModel} \u2192 ${model}`,
608
- original_model: originalModel,
609
- fallback_model: model,
610
- budget_used: heartbeat.budgetStatus.monthly_used,
611
- budget_limit: heartbeat.budgetStatus.monthly_limit ?? 0,
612
- estimated_savings: 0
613
- });
614
- }
615
- }
616
- }
617
- const requestParams = downgraded ? { ...params, model } : params;
618
- try {
619
- const response = await target.create.call(
620
- target,
621
- requestParams,
622
- options
623
- );
624
- const latency = Date.now() - startTime;
625
- const usage = response?.usage;
626
- const inputTokens = usage?.prompt_tokens ?? 0;
627
- const outputTokens = usage?.completion_tokens ?? 0;
628
- const cost = calculateCost(model, inputTokens, outputTokens);
629
- heartbeat.trackSpend(cost);
630
- shipper.push({
631
- id: generateId2(),
632
- timestamp: startTime,
633
- provider: "openai",
634
- model,
635
- input_tokens: inputTokens,
636
- output_tokens: outputTokens,
637
- cost,
638
- latency_ms: latency,
639
- endpoint_tag: config.endpoint_tag,
640
- downgraded,
641
- original_model: originalModel,
642
- blocked: false,
643
- enforcement_reason: enforcementReason
644
- });
645
- return response;
646
- } catch (err) {
647
- if (err?.caplyr) throw err;
648
- shipper.push({
649
- id: generateId2(),
650
- timestamp: startTime,
651
- provider: "openai",
652
- model,
653
- input_tokens: 0,
654
- output_tokens: 0,
655
- cost: 0,
656
- latency_ms: Date.now() - startTime,
657
- endpoint_tag: config.endpoint_tag,
658
- downgraded,
659
- original_model: originalModel,
660
- blocked: false,
661
- enforcement_reason: "provider_error"
662
- });
663
- throw err;
704
+ } finally {
705
+ release();
664
706
  }
665
707
  };
666
708
  }
package/dist/index.mjs CHANGED
@@ -74,6 +74,36 @@ var LogShipper = class {
74
74
  }
75
75
  };
76
76
 
77
+ // src/mutex.ts
78
+ var Mutex = class {
79
+ constructor() {
80
+ this.queue = [];
81
+ this.locked = false;
82
+ }
83
+ async acquire() {
84
+ if (!this.locked) {
85
+ this.locked = true;
86
+ return this.createRelease();
87
+ }
88
+ return new Promise((resolve) => {
89
+ this.queue.push(() => resolve(this.createRelease()));
90
+ });
91
+ }
92
+ createRelease() {
93
+ let released = false;
94
+ return () => {
95
+ if (released) return;
96
+ released = true;
97
+ const next = this.queue.shift();
98
+ if (next) {
99
+ next();
100
+ } else {
101
+ this.locked = false;
102
+ }
103
+ };
104
+ }
105
+ };
106
+
77
107
  // src/heartbeat.ts
78
108
  var Heartbeat = class {
79
109
  constructor(config) {
@@ -91,6 +121,8 @@ var Heartbeat = class {
91
121
  };
92
122
  /** Current protection status */
93
123
  this.status = "ACTIVE";
124
+ /** Mutex for serializing budget check → API call → trackSpend */
125
+ this.budgetMutex = new Mutex();
94
126
  /** Local budget limits set via config (not from server) */
95
127
  this.localDailyLimit = null;
96
128
  this.localMonthlyLimit = null;
@@ -349,18 +381,98 @@ function wrapAnthropic(client, config, shipper, heartbeat) {
349
381
  });
350
382
  }
351
383
  }
352
- if (config.mode === "cost_protect") {
353
- if (heartbeat.isMonthlyBudgetExceeded() || heartbeat.isDailyBudgetExceeded()) {
354
- blocked = true;
355
- enforcementReason = heartbeat.isDailyBudgetExceeded() ? "daily_budget_exceeded" : "monthly_budget_exceeded";
356
- const blockError = {
357
- code: "BUDGET_EXCEEDED",
358
- message: `AI budget exceeded. ${enforcementReason.replace(/_/g, " ")}.`,
359
- budget_used: heartbeat.budgetStatus.monthly_used,
360
- budget_limit: heartbeat.budgetStatus.monthly_limit ?? 0,
361
- retry_after: getNextResetTime(enforcementReason),
362
- dashboard_url: dashboardUrl
363
- };
384
+ const release = await heartbeat.budgetMutex.acquire();
385
+ try {
386
+ if (config.mode === "cost_protect") {
387
+ if (heartbeat.isMonthlyBudgetExceeded() || heartbeat.isDailyBudgetExceeded()) {
388
+ blocked = true;
389
+ enforcementReason = heartbeat.isDailyBudgetExceeded() ? "daily_budget_exceeded" : "monthly_budget_exceeded";
390
+ const blockError = {
391
+ code: "BUDGET_EXCEEDED",
392
+ message: `AI budget exceeded. ${enforcementReason.replace(/_/g, " ")}.`,
393
+ budget_used: heartbeat.budgetStatus.monthly_used,
394
+ budget_limit: heartbeat.budgetStatus.monthly_limit ?? 0,
395
+ retry_after: getNextResetTime(enforcementReason),
396
+ dashboard_url: dashboardUrl
397
+ };
398
+ shipper.push({
399
+ id: generateId(),
400
+ timestamp: startTime,
401
+ provider: "anthropic",
402
+ model,
403
+ input_tokens: 0,
404
+ output_tokens: 0,
405
+ cost: 0,
406
+ latency_ms: Date.now() - startTime,
407
+ endpoint_tag: config.endpoint_tag,
408
+ downgraded: false,
409
+ blocked: true,
410
+ enforcement_reason: enforcementReason
411
+ });
412
+ throw Object.assign(new Error(blockError.message), {
413
+ caplyr: blockError
414
+ });
415
+ }
416
+ if (heartbeat.isDowngradeThresholdReached(downgradeThreshold)) {
417
+ const fallback = config.fallback ?? getDefaultFallback(model);
418
+ if (fallback && fallback !== model) {
419
+ originalModel = model;
420
+ model = fallback;
421
+ downgraded = true;
422
+ enforcementReason = "auto_downgrade_threshold";
423
+ config.onEnforcement?.({
424
+ type: "downgrade",
425
+ timestamp: Date.now(),
426
+ reason: `Budget at ${Math.round(downgradeThreshold * 100)}% \u2014 downgraded ${originalModel} \u2192 ${model}`,
427
+ original_model: originalModel,
428
+ fallback_model: model,
429
+ budget_used: heartbeat.budgetStatus.monthly_used,
430
+ budget_limit: heartbeat.budgetStatus.monthly_limit ?? 0,
431
+ estimated_savings: 0
432
+ // Calculated after response
433
+ });
434
+ }
435
+ }
436
+ }
437
+ const requestParams = downgraded ? { ...params, model } : params;
438
+ try {
439
+ const response = await target.create.call(
440
+ target,
441
+ requestParams,
442
+ options
443
+ );
444
+ const latency = Date.now() - startTime;
445
+ const inputTokens = response?.usage?.input_tokens ?? 0;
446
+ const outputTokens = response?.usage?.output_tokens ?? 0;
447
+ const cost = calculateCost(model, inputTokens, outputTokens);
448
+ heartbeat.trackSpend(cost);
449
+ let estimatedSavings = 0;
450
+ if (downgraded && originalModel) {
451
+ const originalCost = calculateCost(
452
+ originalModel,
453
+ inputTokens,
454
+ outputTokens
455
+ );
456
+ estimatedSavings = originalCost - cost;
457
+ }
458
+ shipper.push({
459
+ id: generateId(),
460
+ timestamp: startTime,
461
+ provider: "anthropic",
462
+ model,
463
+ input_tokens: inputTokens,
464
+ output_tokens: outputTokens,
465
+ cost,
466
+ latency_ms: latency,
467
+ endpoint_tag: config.endpoint_tag,
468
+ downgraded,
469
+ original_model: originalModel,
470
+ blocked: false,
471
+ enforcement_reason: enforcementReason
472
+ });
473
+ return response;
474
+ } catch (err) {
475
+ if (err?.caplyr) throw err;
364
476
  shipper.push({
365
477
  id: generateId(),
366
478
  timestamp: startTime,
@@ -371,90 +483,15 @@ function wrapAnthropic(client, config, shipper, heartbeat) {
371
483
  cost: 0,
372
484
  latency_ms: Date.now() - startTime,
373
485
  endpoint_tag: config.endpoint_tag,
374
- downgraded: false,
375
- blocked: true,
376
- enforcement_reason: enforcementReason
377
- });
378
- throw Object.assign(new Error(blockError.message), {
379
- caplyr: blockError
486
+ downgraded,
487
+ original_model: originalModel,
488
+ blocked: false,
489
+ enforcement_reason: "provider_error"
380
490
  });
491
+ throw err;
381
492
  }
382
- if (heartbeat.isDowngradeThresholdReached(downgradeThreshold)) {
383
- const fallback = config.fallback ?? getDefaultFallback(model);
384
- if (fallback && fallback !== model) {
385
- originalModel = model;
386
- model = fallback;
387
- downgraded = true;
388
- enforcementReason = "auto_downgrade_threshold";
389
- config.onEnforcement?.({
390
- type: "downgrade",
391
- timestamp: Date.now(),
392
- reason: `Budget at ${Math.round(downgradeThreshold * 100)}% \u2014 downgraded ${originalModel} \u2192 ${model}`,
393
- original_model: originalModel,
394
- fallback_model: model,
395
- budget_used: heartbeat.budgetStatus.monthly_used,
396
- budget_limit: heartbeat.budgetStatus.monthly_limit ?? 0,
397
- estimated_savings: 0
398
- // Calculated after response
399
- });
400
- }
401
- }
402
- }
403
- const requestParams = downgraded ? { ...params, model } : params;
404
- try {
405
- const response = await target.create.call(
406
- target,
407
- requestParams,
408
- options
409
- );
410
- const latency = Date.now() - startTime;
411
- const inputTokens = response?.usage?.input_tokens ?? 0;
412
- const outputTokens = response?.usage?.output_tokens ?? 0;
413
- const cost = calculateCost(model, inputTokens, outputTokens);
414
- heartbeat.trackSpend(cost);
415
- let estimatedSavings = 0;
416
- if (downgraded && originalModel) {
417
- const originalCost = calculateCost(
418
- originalModel,
419
- inputTokens,
420
- outputTokens
421
- );
422
- estimatedSavings = originalCost - cost;
423
- }
424
- shipper.push({
425
- id: generateId(),
426
- timestamp: startTime,
427
- provider: "anthropic",
428
- model,
429
- input_tokens: inputTokens,
430
- output_tokens: outputTokens,
431
- cost,
432
- latency_ms: latency,
433
- endpoint_tag: config.endpoint_tag,
434
- downgraded,
435
- original_model: originalModel,
436
- blocked: false,
437
- enforcement_reason: enforcementReason
438
- });
439
- return response;
440
- } catch (err) {
441
- if (err?.caplyr) throw err;
442
- shipper.push({
443
- id: generateId(),
444
- timestamp: startTime,
445
- provider: "anthropic",
446
- model,
447
- input_tokens: 0,
448
- output_tokens: 0,
449
- cost: 0,
450
- latency_ms: Date.now() - startTime,
451
- endpoint_tag: config.endpoint_tag,
452
- downgraded,
453
- original_model: originalModel,
454
- blocked: false,
455
- enforcement_reason: "provider_error"
456
- });
457
- throw err;
493
+ } finally {
494
+ release();
458
495
  }
459
496
  };
460
497
  }
@@ -531,18 +568,89 @@ function wrapOpenAI(client, config, shipper, heartbeat) {
531
568
  });
532
569
  }
533
570
  }
534
- if (config.mode === "cost_protect") {
535
- if (heartbeat.isMonthlyBudgetExceeded() || heartbeat.isDailyBudgetExceeded()) {
536
- blocked = true;
537
- enforcementReason = heartbeat.isDailyBudgetExceeded() ? "daily_budget_exceeded" : "monthly_budget_exceeded";
538
- const blockError = {
539
- code: "BUDGET_EXCEEDED",
540
- message: `AI budget exceeded. ${enforcementReason.replace(/_/g, " ")}.`,
541
- budget_used: heartbeat.budgetStatus.monthly_used,
542
- budget_limit: heartbeat.budgetStatus.monthly_limit ?? 0,
543
- retry_after: getNextResetTime2(enforcementReason),
544
- dashboard_url: dashboardUrl
545
- };
571
+ const release = await heartbeat.budgetMutex.acquire();
572
+ try {
573
+ if (config.mode === "cost_protect") {
574
+ if (heartbeat.isMonthlyBudgetExceeded() || heartbeat.isDailyBudgetExceeded()) {
575
+ blocked = true;
576
+ enforcementReason = heartbeat.isDailyBudgetExceeded() ? "daily_budget_exceeded" : "monthly_budget_exceeded";
577
+ const blockError = {
578
+ code: "BUDGET_EXCEEDED",
579
+ message: `AI budget exceeded. ${enforcementReason.replace(/_/g, " ")}.`,
580
+ budget_used: heartbeat.budgetStatus.monthly_used,
581
+ budget_limit: heartbeat.budgetStatus.monthly_limit ?? 0,
582
+ retry_after: getNextResetTime2(enforcementReason),
583
+ dashboard_url: dashboardUrl
584
+ };
585
+ shipper.push({
586
+ id: generateId2(),
587
+ timestamp: startTime,
588
+ provider: "openai",
589
+ model,
590
+ input_tokens: 0,
591
+ output_tokens: 0,
592
+ cost: 0,
593
+ latency_ms: Date.now() - startTime,
594
+ endpoint_tag: config.endpoint_tag,
595
+ downgraded: false,
596
+ blocked: true,
597
+ enforcement_reason: enforcementReason
598
+ });
599
+ throw Object.assign(new Error(blockError.message), {
600
+ caplyr: blockError
601
+ });
602
+ }
603
+ if (heartbeat.isDowngradeThresholdReached(downgradeThreshold)) {
604
+ const fallback = config.fallback ?? getDefaultFallback(model);
605
+ if (fallback && fallback !== model) {
606
+ originalModel = model;
607
+ model = fallback;
608
+ downgraded = true;
609
+ enforcementReason = "auto_downgrade_threshold";
610
+ config.onEnforcement?.({
611
+ type: "downgrade",
612
+ timestamp: Date.now(),
613
+ reason: `Budget at ${Math.round(downgradeThreshold * 100)}% \u2014 downgraded ${originalModel} \u2192 ${model}`,
614
+ original_model: originalModel,
615
+ fallback_model: model,
616
+ budget_used: heartbeat.budgetStatus.monthly_used,
617
+ budget_limit: heartbeat.budgetStatus.monthly_limit ?? 0,
618
+ estimated_savings: 0
619
+ });
620
+ }
621
+ }
622
+ }
623
+ const requestParams = downgraded ? { ...params, model } : params;
624
+ try {
625
+ const response = await target.create.call(
626
+ target,
627
+ requestParams,
628
+ options
629
+ );
630
+ const latency = Date.now() - startTime;
631
+ const usage = response?.usage;
632
+ const inputTokens = usage?.prompt_tokens ?? 0;
633
+ const outputTokens = usage?.completion_tokens ?? 0;
634
+ const cost = calculateCost(model, inputTokens, outputTokens);
635
+ heartbeat.trackSpend(cost);
636
+ shipper.push({
637
+ id: generateId2(),
638
+ timestamp: startTime,
639
+ provider: "openai",
640
+ model,
641
+ input_tokens: inputTokens,
642
+ output_tokens: outputTokens,
643
+ cost,
644
+ latency_ms: latency,
645
+ endpoint_tag: config.endpoint_tag,
646
+ downgraded,
647
+ original_model: originalModel,
648
+ blocked: false,
649
+ enforcement_reason: enforcementReason
650
+ });
651
+ return response;
652
+ } catch (err) {
653
+ if (err?.caplyr) throw err;
546
654
  shipper.push({
547
655
  id: generateId2(),
548
656
  timestamp: startTime,
@@ -553,81 +661,15 @@ function wrapOpenAI(client, config, shipper, heartbeat) {
553
661
  cost: 0,
554
662
  latency_ms: Date.now() - startTime,
555
663
  endpoint_tag: config.endpoint_tag,
556
- downgraded: false,
557
- blocked: true,
558
- enforcement_reason: enforcementReason
559
- });
560
- throw Object.assign(new Error(blockError.message), {
561
- caplyr: blockError
664
+ downgraded,
665
+ original_model: originalModel,
666
+ blocked: false,
667
+ enforcement_reason: "provider_error"
562
668
  });
669
+ throw err;
563
670
  }
564
- if (heartbeat.isDowngradeThresholdReached(downgradeThreshold)) {
565
- const fallback = config.fallback ?? getDefaultFallback(model);
566
- if (fallback && fallback !== model) {
567
- originalModel = model;
568
- model = fallback;
569
- downgraded = true;
570
- enforcementReason = "auto_downgrade_threshold";
571
- config.onEnforcement?.({
572
- type: "downgrade",
573
- timestamp: Date.now(),
574
- reason: `Budget at ${Math.round(downgradeThreshold * 100)}% \u2014 downgraded ${originalModel} \u2192 ${model}`,
575
- original_model: originalModel,
576
- fallback_model: model,
577
- budget_used: heartbeat.budgetStatus.monthly_used,
578
- budget_limit: heartbeat.budgetStatus.monthly_limit ?? 0,
579
- estimated_savings: 0
580
- });
581
- }
582
- }
583
- }
584
- const requestParams = downgraded ? { ...params, model } : params;
585
- try {
586
- const response = await target.create.call(
587
- target,
588
- requestParams,
589
- options
590
- );
591
- const latency = Date.now() - startTime;
592
- const usage = response?.usage;
593
- const inputTokens = usage?.prompt_tokens ?? 0;
594
- const outputTokens = usage?.completion_tokens ?? 0;
595
- const cost = calculateCost(model, inputTokens, outputTokens);
596
- heartbeat.trackSpend(cost);
597
- shipper.push({
598
- id: generateId2(),
599
- timestamp: startTime,
600
- provider: "openai",
601
- model,
602
- input_tokens: inputTokens,
603
- output_tokens: outputTokens,
604
- cost,
605
- latency_ms: latency,
606
- endpoint_tag: config.endpoint_tag,
607
- downgraded,
608
- original_model: originalModel,
609
- blocked: false,
610
- enforcement_reason: enforcementReason
611
- });
612
- return response;
613
- } catch (err) {
614
- if (err?.caplyr) throw err;
615
- shipper.push({
616
- id: generateId2(),
617
- timestamp: startTime,
618
- provider: "openai",
619
- model,
620
- input_tokens: 0,
621
- output_tokens: 0,
622
- cost: 0,
623
- latency_ms: Date.now() - startTime,
624
- endpoint_tag: config.endpoint_tag,
625
- downgraded,
626
- original_model: originalModel,
627
- blocked: false,
628
- enforcement_reason: "provider_error"
629
- });
630
- throw err;
671
+ } finally {
672
+ release();
631
673
  }
632
674
  };
633
675
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "caplyr",
3
- "version": "0.2.3",
3
+ "version": "0.2.4",
4
4
  "description": "AI Cost Control Plane — budget guardrails, auto-downgrade, and kill switch for AI API calls",
5
5
  "main": "dist/index.js",
6
6
  "module": "dist/index.mjs",