@usagetap/sdk 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +53 -16
- package/dist/adapters/anthropic.cjs +943 -0
- package/dist/adapters/anthropic.cjs.map +1 -0
- package/dist/adapters/anthropic.d.cts +81 -0
- package/dist/adapters/anthropic.d.ts +81 -0
- package/dist/adapters/anthropic.mjs +940 -0
- package/dist/adapters/anthropic.mjs.map +1 -0
- package/dist/adapters/openai.cjs +601 -17
- package/dist/adapters/openai.cjs.map +1 -1
- package/dist/adapters/openai.d.cts +57 -2
- package/dist/adapters/openai.d.ts +57 -2
- package/dist/adapters/openai.mjs +601 -18
- package/dist/adapters/openai.mjs.map +1 -1
- package/dist/adapters/openrouter.cjs.map +1 -1
- package/dist/adapters/openrouter.d.cts +1 -1
- package/dist/adapters/openrouter.d.ts +1 -1
- package/dist/adapters/openrouter.mjs.map +1 -1
- package/dist/anthropic/index.cjs +943 -0
- package/dist/anthropic/index.cjs.map +1 -0
- package/dist/anthropic/index.d.cts +2 -0
- package/dist/anthropic/index.d.ts +2 -0
- package/dist/anthropic/index.mjs +940 -0
- package/dist/anthropic/index.mjs.map +1 -0
- package/dist/{client-BHNMYvlO.d.cts → client-BA-QlnRq.d.cts} +32 -1
- package/dist/{client-BHNMYvlO.d.ts → client-BA-QlnRq.d.ts} +32 -1
- package/dist/express/index.cjs +597 -17
- package/dist/express/index.cjs.map +1 -1
- package/dist/express/index.d.cts +1 -1
- package/dist/express/index.d.ts +1 -1
- package/dist/express/index.mjs +597 -17
- package/dist/express/index.mjs.map +1 -1
- package/dist/index.cjs +77 -9
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +2 -2
- package/dist/index.d.ts +2 -2
- package/dist/index.mjs +76 -10
- package/dist/index.mjs.map +1 -1
- package/dist/openai/index.cjs +601 -17
- package/dist/openai/index.cjs.map +1 -1
- package/dist/openai/index.d.cts +2 -2
- package/dist/openai/index.d.ts +2 -2
- package/dist/openai/index.mjs +601 -18
- package/dist/openai/index.mjs.map +1 -1
- package/package.json +21 -1
package/dist/openai/index.mjs
CHANGED
|
@@ -27,7 +27,62 @@ var UsageTapError = class extends Error {
|
|
|
27
27
|
}
|
|
28
28
|
};
|
|
29
29
|
|
|
30
|
+
// src/prompt-compression.ts
|
|
31
|
+
function estimatePromptTokens(input) {
|
|
32
|
+
const text = typeof input === "string" ? input : stableStringifyInput(input);
|
|
33
|
+
return text.match(/[\p{L}\p{N}]+|[^\s]/gu)?.length ?? 0;
|
|
34
|
+
}
|
|
35
|
+
function stableStringifyInput(input) {
|
|
36
|
+
if (typeof input === "string") return input;
|
|
37
|
+
return JSON.stringify(input) ?? String(input);
|
|
38
|
+
}
|
|
39
|
+
|
|
30
40
|
// src/adapters/openai.ts
|
|
41
|
+
var OpenAIPromptCompressionStats = class {
|
|
42
|
+
history = [];
|
|
43
|
+
failures = [];
|
|
44
|
+
_record(turn) {
|
|
45
|
+
this.history.push(turn);
|
|
46
|
+
}
|
|
47
|
+
_recordFailure(failure) {
|
|
48
|
+
this.failures.push(failure);
|
|
49
|
+
}
|
|
50
|
+
get totalOriginalTokens() {
|
|
51
|
+
return this.history.reduce((sum, turn) => sum + (turn.originalTokens ?? 0), 0);
|
|
52
|
+
}
|
|
53
|
+
get totalCompressedTokens() {
|
|
54
|
+
return this.history.reduce((sum, turn) => sum + (turn.compressedTokens ?? 0), 0);
|
|
55
|
+
}
|
|
56
|
+
get totalTokensSaved() {
|
|
57
|
+
return this.history.reduce((sum, turn) => sum + (turn.savedTokens ?? 0), 0);
|
|
58
|
+
}
|
|
59
|
+
get totalOriginalCharacters() {
|
|
60
|
+
return this.history.reduce((sum, turn) => sum + turn.originalCharacters, 0);
|
|
61
|
+
}
|
|
62
|
+
get totalCompressedCharacters() {
|
|
63
|
+
return this.history.reduce((sum, turn) => sum + turn.compressedCharacters, 0);
|
|
64
|
+
}
|
|
65
|
+
get totalCharactersSaved() {
|
|
66
|
+
return this.history.reduce((sum, turn) => sum + turn.savedCharacters, 0);
|
|
67
|
+
}
|
|
68
|
+
get calls() {
|
|
69
|
+
return this.history.length;
|
|
70
|
+
}
|
|
71
|
+
get telemetryFailures() {
|
|
72
|
+
return this.failures.length;
|
|
73
|
+
}
|
|
74
|
+
get failOpenEvents() {
|
|
75
|
+
return this.history.filter(
|
|
76
|
+
(turn) => turn.techniques.includes("compression-error") || turn.techniques.includes("fallback-original")
|
|
77
|
+
).length;
|
|
78
|
+
}
|
|
79
|
+
get tokenSavingsRatio() {
|
|
80
|
+
return this.totalOriginalTokens > 0 ? this.totalTokensSaved / this.totalOriginalTokens : 0;
|
|
81
|
+
}
|
|
82
|
+
get savingsRatio() {
|
|
83
|
+
return this.totalOriginalCharacters > 0 ? this.totalCharactersSaved / this.totalOriginalCharacters : 0;
|
|
84
|
+
}
|
|
85
|
+
};
|
|
31
86
|
function createOpenAIAdapter(init) {
|
|
32
87
|
const { client, usageTap } = init;
|
|
33
88
|
return {
|
|
@@ -184,8 +239,24 @@ function wrapOpenAI(client, usageTap, options = {}) {
|
|
|
184
239
|
}
|
|
185
240
|
const defaultContext = options.defaultContext;
|
|
186
241
|
const applyVendorHints = options.applyVendorHints !== false;
|
|
187
|
-
const
|
|
188
|
-
const
|
|
242
|
+
const defaultPromptCompression = normalizePromptCompressionOptions(options.promptCompression);
|
|
243
|
+
const promptCompressionStats = new OpenAIPromptCompressionStats();
|
|
244
|
+
const proxiedChat = client.chat ? createChatProxy(
|
|
245
|
+
client.chat,
|
|
246
|
+
usageTap,
|
|
247
|
+
defaultContext,
|
|
248
|
+
applyVendorHints,
|
|
249
|
+
defaultPromptCompression,
|
|
250
|
+
promptCompressionStats
|
|
251
|
+
) : void 0;
|
|
252
|
+
const proxiedResponses = typeof client.responses !== "undefined" ? createResponsesProxy(
|
|
253
|
+
client.responses,
|
|
254
|
+
usageTap,
|
|
255
|
+
defaultContext,
|
|
256
|
+
applyVendorHints,
|
|
257
|
+
defaultPromptCompression,
|
|
258
|
+
promptCompressionStats
|
|
259
|
+
) : void 0;
|
|
189
260
|
const handler = {
|
|
190
261
|
get(target, prop, receiver) {
|
|
191
262
|
if (prop === "chat" && proxiedChat) {
|
|
@@ -200,6 +271,9 @@ function wrapOpenAI(client, usageTap, options = {}) {
|
|
|
200
271
|
if (prop === "pipeToResponse") {
|
|
201
272
|
return pipeToResponse;
|
|
202
273
|
}
|
|
274
|
+
if (prop === "promptCompression") {
|
|
275
|
+
return promptCompressionStats;
|
|
276
|
+
}
|
|
203
277
|
if (prop === "unwrap") {
|
|
204
278
|
return () => target;
|
|
205
279
|
}
|
|
@@ -230,6 +304,9 @@ function streamOpenAIRoute(usageTap, openai, options) {
|
|
|
230
304
|
if (requestConfig.withUsage) {
|
|
231
305
|
callOptions.withUsage = requestConfig.withUsage;
|
|
232
306
|
}
|
|
307
|
+
if (requestConfig.promptCompression !== void 0) {
|
|
308
|
+
callOptions.promptCompression = requestConfig.promptCompression;
|
|
309
|
+
}
|
|
233
310
|
const stream = await wrappedClient.chat.completions.create(
|
|
234
311
|
mergedParams,
|
|
235
312
|
Object.keys(callOptions).length ? callOptions : void 0
|
|
@@ -256,12 +333,14 @@ function streamOpenAIRoute(usageTap, openai, options) {
|
|
|
256
333
|
});
|
|
257
334
|
};
|
|
258
335
|
}
|
|
259
|
-
function createChatProxy(resource, usageTap, defaultContext, applyVendorHints) {
|
|
336
|
+
function createChatProxy(resource, usageTap, defaultContext, applyVendorHints, defaultPromptCompression, promptCompressionStats) {
|
|
260
337
|
const completions = createChatCompletionsProxy(
|
|
261
338
|
resource.completions,
|
|
262
339
|
usageTap,
|
|
263
340
|
defaultContext,
|
|
264
|
-
applyVendorHints
|
|
341
|
+
applyVendorHints,
|
|
342
|
+
defaultPromptCompression,
|
|
343
|
+
promptCompressionStats
|
|
265
344
|
);
|
|
266
345
|
const handler = {
|
|
267
346
|
get(target, prop, receiver) {
|
|
@@ -273,7 +352,7 @@ function createChatProxy(resource, usageTap, defaultContext, applyVendorHints) {
|
|
|
273
352
|
};
|
|
274
353
|
return new Proxy(resource, handler);
|
|
275
354
|
}
|
|
276
|
-
function createResponsesProxy(resource, usageTap, defaultContext, applyVendorHints) {
|
|
355
|
+
function createResponsesProxy(resource, usageTap, defaultContext, applyVendorHints, defaultPromptCompression, promptCompressionStats) {
|
|
277
356
|
if (!resource || typeof resource !== "object") {
|
|
278
357
|
return void 0;
|
|
279
358
|
}
|
|
@@ -282,11 +361,26 @@ function createResponsesProxy(resource, usageTap, defaultContext, applyVendorHin
|
|
|
282
361
|
}
|
|
283
362
|
const originalCreate = resource.create.bind(resource);
|
|
284
363
|
const wrappedCreate = (params, options) => {
|
|
285
|
-
const {
|
|
364
|
+
const {
|
|
365
|
+
requestOptions,
|
|
366
|
+
usageContext,
|
|
367
|
+
withUsage,
|
|
368
|
+
promptCompression
|
|
369
|
+
} = splitUsageOptions(options);
|
|
286
370
|
const beginRequest = resolveBeginRequest(defaultContext, usageContext);
|
|
287
371
|
const wantsStream = isStreamingRequest(params);
|
|
288
|
-
return usageTap.withUsage(beginRequest, (ctx) => {
|
|
289
|
-
const
|
|
372
|
+
return usageTap.withUsage(beginRequest, async (ctx) => {
|
|
373
|
+
const hintedParams = applyVendorHints ? applyResponsesVendorHints(params, ctx.begin.data.vendorHints) : params;
|
|
374
|
+
const finalParams = await compressResponsesParamsForCall({
|
|
375
|
+
params: hintedParams,
|
|
376
|
+
usageTap,
|
|
377
|
+
ctx,
|
|
378
|
+
defaultPromptCompression,
|
|
379
|
+
callPromptCompression: promptCompression,
|
|
380
|
+
stats: promptCompressionStats,
|
|
381
|
+
withUsage,
|
|
382
|
+
operation: "responses.create"
|
|
383
|
+
});
|
|
290
384
|
const request = attachCorrelationHeader(requestOptions, ctx.begin.correlationId);
|
|
291
385
|
if (wantsStream) {
|
|
292
386
|
const apiPromise2 = originalCreate(finalParams, request);
|
|
@@ -320,16 +414,31 @@ function createResponsesProxy(resource, usageTap, defaultContext, applyVendorHin
|
|
|
320
414
|
};
|
|
321
415
|
return new Proxy(resource, handler);
|
|
322
416
|
}
|
|
323
|
-
function createChatCompletionsProxy(resource, usageTap, defaultContext, applyVendorHints) {
|
|
417
|
+
function createChatCompletionsProxy(resource, usageTap, defaultContext, applyVendorHints, defaultPromptCompression, promptCompressionStats) {
|
|
324
418
|
const originalCreate = resource.create.bind(resource);
|
|
325
419
|
const streamCandidate = resource.stream;
|
|
326
420
|
const originalStream = typeof streamCandidate === "function" ? streamCandidate.bind(resource) : void 0;
|
|
327
421
|
const wrappedCreate = (params, options) => {
|
|
328
|
-
const {
|
|
422
|
+
const {
|
|
423
|
+
requestOptions,
|
|
424
|
+
usageContext,
|
|
425
|
+
withUsage,
|
|
426
|
+
promptCompression
|
|
427
|
+
} = splitUsageOptions(options);
|
|
329
428
|
const beginRequest = resolveBeginRequest(defaultContext, usageContext);
|
|
330
429
|
const wantsStream = isStreamingRequest(params);
|
|
331
|
-
return usageTap.withUsage(beginRequest, (ctx) => {
|
|
332
|
-
const
|
|
430
|
+
return usageTap.withUsage(beginRequest, async (ctx) => {
|
|
431
|
+
const hintedParams = applyVendorHints ? applyChatVendorHints(params, ctx.begin.data.vendorHints) : params;
|
|
432
|
+
const finalParams = await compressChatParamsForCall({
|
|
433
|
+
params: hintedParams,
|
|
434
|
+
usageTap,
|
|
435
|
+
ctx,
|
|
436
|
+
defaultPromptCompression,
|
|
437
|
+
callPromptCompression: promptCompression,
|
|
438
|
+
stats: promptCompressionStats,
|
|
439
|
+
withUsage,
|
|
440
|
+
operation: "chat.completions.create"
|
|
441
|
+
});
|
|
333
442
|
const request = attachCorrelationHeader(requestOptions, ctx.begin.correlationId);
|
|
334
443
|
if (wantsStream) {
|
|
335
444
|
const apiPromise2 = originalCreate(finalParams, request);
|
|
@@ -354,10 +463,25 @@ function createChatCompletionsProxy(resource, usageTap, defaultContext, applyVen
|
|
|
354
463
|
}, withUsage);
|
|
355
464
|
};
|
|
356
465
|
const wrappedStream = originalStream ? (params, options) => {
|
|
357
|
-
const {
|
|
466
|
+
const {
|
|
467
|
+
requestOptions,
|
|
468
|
+
usageContext,
|
|
469
|
+
withUsage,
|
|
470
|
+
promptCompression
|
|
471
|
+
} = splitUsageOptions(options);
|
|
358
472
|
const beginRequest = resolveBeginRequest(defaultContext, usageContext);
|
|
359
|
-
return usageTap.withUsage(beginRequest, (ctx) => {
|
|
360
|
-
const
|
|
473
|
+
return usageTap.withUsage(beginRequest, async (ctx) => {
|
|
474
|
+
const hintedParams = applyVendorHints ? applyChatVendorHints(params, ctx.begin.data.vendorHints) : params;
|
|
475
|
+
const finalParams = await compressChatParamsForCall({
|
|
476
|
+
params: hintedParams,
|
|
477
|
+
usageTap,
|
|
478
|
+
ctx,
|
|
479
|
+
defaultPromptCompression,
|
|
480
|
+
callPromptCompression: promptCompression,
|
|
481
|
+
stats: promptCompressionStats,
|
|
482
|
+
withUsage,
|
|
483
|
+
operation: "chat.completions.stream"
|
|
484
|
+
});
|
|
361
485
|
const request = attachCorrelationHeader(requestOptions, ctx.begin.correlationId);
|
|
362
486
|
const apiPromise = originalStream(finalParams, request);
|
|
363
487
|
const wrappedPromise = transformApiPromise(apiPromise, (rawStream) => {
|
|
@@ -386,16 +510,475 @@ function createChatCompletionsProxy(resource, usageTap, defaultContext, applyVen
|
|
|
386
510
|
};
|
|
387
511
|
return new Proxy(resource, handler);
|
|
388
512
|
}
|
|
513
|
+
async function compressChatParamsForCall(args) {
|
|
514
|
+
const compression = resolveEffectivePromptCompressionOptions(
|
|
515
|
+
args.defaultPromptCompression,
|
|
516
|
+
args.callPromptCompression
|
|
517
|
+
);
|
|
518
|
+
if (!compression) {
|
|
519
|
+
return args.params;
|
|
520
|
+
}
|
|
521
|
+
const outcome = await compressChatParams(
|
|
522
|
+
args.params,
|
|
523
|
+
args.usageTap,
|
|
524
|
+
compression,
|
|
525
|
+
args.withUsage?.signal
|
|
526
|
+
);
|
|
527
|
+
await recordCompressionOutcome({
|
|
528
|
+
outcome,
|
|
529
|
+
compression,
|
|
530
|
+
usageTap: args.usageTap,
|
|
531
|
+
ctx: args.ctx,
|
|
532
|
+
stats: args.stats,
|
|
533
|
+
withUsage: args.withUsage,
|
|
534
|
+
operation: args.operation
|
|
535
|
+
});
|
|
536
|
+
return outcome.params;
|
|
537
|
+
}
|
|
538
|
+
async function compressResponsesParamsForCall(args) {
|
|
539
|
+
const compression = resolveEffectivePromptCompressionOptions(
|
|
540
|
+
args.defaultPromptCompression,
|
|
541
|
+
args.callPromptCompression
|
|
542
|
+
);
|
|
543
|
+
if (!compression) {
|
|
544
|
+
return args.params;
|
|
545
|
+
}
|
|
546
|
+
const outcome = await compressResponsesParams(
|
|
547
|
+
args.params,
|
|
548
|
+
args.usageTap,
|
|
549
|
+
compression,
|
|
550
|
+
args.withUsage?.signal
|
|
551
|
+
);
|
|
552
|
+
await recordCompressionOutcome({
|
|
553
|
+
outcome,
|
|
554
|
+
compression,
|
|
555
|
+
usageTap: args.usageTap,
|
|
556
|
+
ctx: args.ctx,
|
|
557
|
+
stats: args.stats,
|
|
558
|
+
withUsage: args.withUsage,
|
|
559
|
+
operation: args.operation
|
|
560
|
+
});
|
|
561
|
+
return outcome.params;
|
|
562
|
+
}
|
|
563
|
+
async function recordCompressionOutcome(args) {
|
|
564
|
+
const telemetry = buildPromptCompressionTelemetry(args.outcome.segments);
|
|
565
|
+
if (!telemetry) {
|
|
566
|
+
return;
|
|
567
|
+
}
|
|
568
|
+
const turn = {
|
|
569
|
+
...telemetry,
|
|
570
|
+
callId: args.ctx.begin.data.callId,
|
|
571
|
+
operation: args.operation,
|
|
572
|
+
messagesCompressed: args.outcome.segments.length,
|
|
573
|
+
timestamp: Date.now()
|
|
574
|
+
};
|
|
575
|
+
args.stats._record(turn);
|
|
576
|
+
try {
|
|
577
|
+
await args.usageTap.recordPromptCompression(
|
|
578
|
+
{
|
|
579
|
+
callId: args.ctx.begin.data.callId,
|
|
580
|
+
promptCompression: telemetry
|
|
581
|
+
},
|
|
582
|
+
promptCompressionRequestOptions(args.withUsage, args.ctx.begin.correlationId)
|
|
583
|
+
);
|
|
584
|
+
} catch (error) {
|
|
585
|
+
args.stats._recordFailure({
|
|
586
|
+
callId: args.ctx.begin.data.callId,
|
|
587
|
+
operation: args.operation,
|
|
588
|
+
stage: "telemetry",
|
|
589
|
+
message: error instanceof Error ? error.message : String(error),
|
|
590
|
+
timestamp: Date.now()
|
|
591
|
+
});
|
|
592
|
+
if (args.compression.failOpen === false) {
|
|
593
|
+
throw error;
|
|
594
|
+
}
|
|
595
|
+
}
|
|
596
|
+
}
|
|
597
|
+
async function compressChatParams(params, usageTap, compression, signal) {
|
|
598
|
+
if (!params || typeof params !== "object") {
|
|
599
|
+
return { params, segments: [] };
|
|
600
|
+
}
|
|
601
|
+
const source = cloneRecord(params);
|
|
602
|
+
const messages = Array.isArray(source.messages) ? source.messages : void 0;
|
|
603
|
+
if (!messages) {
|
|
604
|
+
return { params, segments: [] };
|
|
605
|
+
}
|
|
606
|
+
const messageResults = await Promise.all(
|
|
607
|
+
messages.map(
|
|
608
|
+
(message) => compressOpenAIMessage(message, usageTap, compression, signal)
|
|
609
|
+
)
|
|
610
|
+
);
|
|
611
|
+
return {
|
|
612
|
+
params: {
|
|
613
|
+
...source,
|
|
614
|
+
messages: messageResults.map((result) => result.value)
|
|
615
|
+
},
|
|
616
|
+
segments: messageResults.flatMap((result) => result.segments)
|
|
617
|
+
};
|
|
618
|
+
}
|
|
619
|
+
async function compressResponsesParams(params, usageTap, compression, signal) {
|
|
620
|
+
if (!params || typeof params !== "object") {
|
|
621
|
+
return { params, segments: [] };
|
|
622
|
+
}
|
|
623
|
+
const source = cloneRecord(params);
|
|
624
|
+
const segments = [];
|
|
625
|
+
if (typeof source.instructions === "string") {
|
|
626
|
+
const compressed = await compressTextForRole(
|
|
627
|
+
source.instructions,
|
|
628
|
+
"system",
|
|
629
|
+
usageTap,
|
|
630
|
+
compression,
|
|
631
|
+
signal
|
|
632
|
+
);
|
|
633
|
+
if (compressed) {
|
|
634
|
+
source.instructions = compressed.text;
|
|
635
|
+
segments.push(compressed.segment);
|
|
636
|
+
}
|
|
637
|
+
}
|
|
638
|
+
if (typeof source.input === "string") {
|
|
639
|
+
const compressed = await compressTextForRole(
|
|
640
|
+
source.input,
|
|
641
|
+
"user",
|
|
642
|
+
usageTap,
|
|
643
|
+
compression,
|
|
644
|
+
signal
|
|
645
|
+
);
|
|
646
|
+
if (compressed) {
|
|
647
|
+
source.input = compressed.text;
|
|
648
|
+
segments.push(compressed.segment);
|
|
649
|
+
}
|
|
650
|
+
} else if (Array.isArray(source.input)) {
|
|
651
|
+
const inputResults = await Promise.all(
|
|
652
|
+
source.input.map(
|
|
653
|
+
(item) => compressResponsesInputItem(item, usageTap, compression, signal)
|
|
654
|
+
)
|
|
655
|
+
);
|
|
656
|
+
source.input = inputResults.map((result) => result.value);
|
|
657
|
+
segments.push(...inputResults.flatMap((result) => result.segments));
|
|
658
|
+
}
|
|
659
|
+
return {
|
|
660
|
+
params: source,
|
|
661
|
+
segments
|
|
662
|
+
};
|
|
663
|
+
}
|
|
664
|
+
async function compressOpenAIMessage(message, usageTap, compression, signal) {
|
|
665
|
+
if (!isObjectRecord(message)) {
|
|
666
|
+
return { value: message, segments: [] };
|
|
667
|
+
}
|
|
668
|
+
const role = mapOpenAIRole(message.role);
|
|
669
|
+
if (!role) {
|
|
670
|
+
return { value: message, segments: [] };
|
|
671
|
+
}
|
|
672
|
+
const content = message.content;
|
|
673
|
+
if (typeof content === "string") {
|
|
674
|
+
const compressed = await compressTextForRole(
|
|
675
|
+
content,
|
|
676
|
+
role,
|
|
677
|
+
usageTap,
|
|
678
|
+
compression,
|
|
679
|
+
signal
|
|
680
|
+
);
|
|
681
|
+
if (!compressed) {
|
|
682
|
+
return { value: message, segments: [] };
|
|
683
|
+
}
|
|
684
|
+
return {
|
|
685
|
+
value: { ...message, content: compressed.text },
|
|
686
|
+
segments: [compressed.segment]
|
|
687
|
+
};
|
|
688
|
+
}
|
|
689
|
+
if (Array.isArray(content)) {
|
|
690
|
+
const blockResults = await Promise.all(
|
|
691
|
+
content.map(
|
|
692
|
+
(block) => compressOpenAITextBlock(block, role, usageTap, compression, signal)
|
|
693
|
+
)
|
|
694
|
+
);
|
|
695
|
+
const segments = blockResults.flatMap(
|
|
696
|
+
(result) => result.segment ? [result.segment] : []
|
|
697
|
+
);
|
|
698
|
+
return {
|
|
699
|
+
value: segments.length ? { ...message, content: blockResults.map((result) => result.value) } : message,
|
|
700
|
+
segments
|
|
701
|
+
};
|
|
702
|
+
}
|
|
703
|
+
return { value: message, segments: [] };
|
|
704
|
+
}
|
|
705
|
+
async function compressOpenAITextBlock(block, role, usageTap, compression, signal) {
|
|
706
|
+
if (!isObjectRecord(block) || block.type !== "text" || typeof block.text !== "string") {
|
|
707
|
+
return { value: block };
|
|
708
|
+
}
|
|
709
|
+
const compressed = await compressTextForRole(
|
|
710
|
+
block.text,
|
|
711
|
+
role,
|
|
712
|
+
usageTap,
|
|
713
|
+
compression,
|
|
714
|
+
signal
|
|
715
|
+
);
|
|
716
|
+
if (!compressed) {
|
|
717
|
+
return { value: block };
|
|
718
|
+
}
|
|
719
|
+
return {
|
|
720
|
+
value: { ...block, text: compressed.text },
|
|
721
|
+
segment: compressed.segment
|
|
722
|
+
};
|
|
723
|
+
}
|
|
724
|
+
async function compressResponsesInputItem(item, usageTap, compression, signal) {
|
|
725
|
+
if (!isObjectRecord(item)) {
|
|
726
|
+
return { value: item, segments: [] };
|
|
727
|
+
}
|
|
728
|
+
const specialToolRole = mapResponsesItemTypeToRole(item.type);
|
|
729
|
+
const role = specialToolRole ?? mapOpenAIRole(item.role);
|
|
730
|
+
const segments = [];
|
|
731
|
+
let next = item;
|
|
732
|
+
if (role && typeof item.content === "string") {
|
|
733
|
+
const compressed = await compressTextForRole(
|
|
734
|
+
item.content,
|
|
735
|
+
role,
|
|
736
|
+
usageTap,
|
|
737
|
+
compression,
|
|
738
|
+
signal
|
|
739
|
+
);
|
|
740
|
+
if (compressed) {
|
|
741
|
+
next = { ...next, content: compressed.text };
|
|
742
|
+
segments.push(compressed.segment);
|
|
743
|
+
}
|
|
744
|
+
} else if (role && Array.isArray(item.content)) {
|
|
745
|
+
const contentResults = await Promise.all(
|
|
746
|
+
item.content.map(
|
|
747
|
+
(block) => compressResponsesContentBlock(block, role, usageTap, compression, signal)
|
|
748
|
+
)
|
|
749
|
+
);
|
|
750
|
+
segments.push(
|
|
751
|
+
...contentResults.flatMap(
|
|
752
|
+
(result) => result.segment ? [result.segment] : []
|
|
753
|
+
)
|
|
754
|
+
);
|
|
755
|
+
if (segments.length) {
|
|
756
|
+
next = {
|
|
757
|
+
...next,
|
|
758
|
+
content: contentResults.map((result) => result.value)
|
|
759
|
+
};
|
|
760
|
+
}
|
|
761
|
+
}
|
|
762
|
+
if (specialToolRole && typeof item.output === "string") {
|
|
763
|
+
const compressed = await compressTextForRole(
|
|
764
|
+
item.output,
|
|
765
|
+
specialToolRole,
|
|
766
|
+
usageTap,
|
|
767
|
+
compression,
|
|
768
|
+
signal
|
|
769
|
+
);
|
|
770
|
+
if (compressed) {
|
|
771
|
+
next = { ...next, output: compressed.text };
|
|
772
|
+
segments.push(compressed.segment);
|
|
773
|
+
}
|
|
774
|
+
}
|
|
775
|
+
return { value: next, segments };
|
|
776
|
+
}
|
|
777
|
+
async function compressResponsesContentBlock(block, role, usageTap, compression, signal) {
|
|
778
|
+
if (!isObjectRecord(block)) {
|
|
779
|
+
return { value: block };
|
|
780
|
+
}
|
|
781
|
+
if ((block.type === "input_text" || block.type === "text") && typeof block.text === "string") {
|
|
782
|
+
const compressed = await compressTextForRole(
|
|
783
|
+
block.text,
|
|
784
|
+
role,
|
|
785
|
+
usageTap,
|
|
786
|
+
compression,
|
|
787
|
+
signal
|
|
788
|
+
);
|
|
789
|
+
if (compressed) {
|
|
790
|
+
return {
|
|
791
|
+
value: { ...block, text: compressed.text },
|
|
792
|
+
segment: compressed.segment
|
|
793
|
+
};
|
|
794
|
+
}
|
|
795
|
+
}
|
|
796
|
+
if (role === "tool" && typeof block.output === "string") {
|
|
797
|
+
const compressed = await compressTextForRole(
|
|
798
|
+
block.output,
|
|
799
|
+
role,
|
|
800
|
+
usageTap,
|
|
801
|
+
compression,
|
|
802
|
+
signal
|
|
803
|
+
);
|
|
804
|
+
if (compressed) {
|
|
805
|
+
return {
|
|
806
|
+
value: { ...block, output: compressed.text },
|
|
807
|
+
segment: compressed.segment
|
|
808
|
+
};
|
|
809
|
+
}
|
|
810
|
+
}
|
|
811
|
+
return { value: block };
|
|
812
|
+
}
|
|
813
|
+
async function compressTextForRole(text, role, usageTap, compression, signal) {
|
|
814
|
+
if (!text.trim()) {
|
|
815
|
+
return void 0;
|
|
816
|
+
}
|
|
817
|
+
const roleOptions = resolveRoleCompressionOptions(compression, role);
|
|
818
|
+
if (!roleOptions) {
|
|
819
|
+
return void 0;
|
|
820
|
+
}
|
|
821
|
+
const estimatedTokens = estimatePromptTokens(text);
|
|
822
|
+
if (typeof roleOptions.minTokens === "number" && estimatedTokens < roleOptions.minTokens) {
|
|
823
|
+
return void 0;
|
|
824
|
+
}
|
|
825
|
+
const result = await usageTap.compressPromptInput(text, {
|
|
826
|
+
provider: roleOptions.provider,
|
|
827
|
+
failOpen: roleOptions.failOpen,
|
|
828
|
+
tokenCompanyModel: roleOptions.tokenCompanyModel,
|
|
829
|
+
tokenCompanyAggressiveness: roleOptions.tokenCompanyAggressiveness,
|
|
830
|
+
tokenCompanyAppId: roleOptions.tokenCompanyAppId,
|
|
831
|
+
signal
|
|
832
|
+
});
|
|
833
|
+
const compressedText = typeof result.compressedInput === "string" ? result.compressedInput : String(result.compressedInput);
|
|
834
|
+
return {
|
|
835
|
+
text: compressedText,
|
|
836
|
+
segment: { role, result: { ...result, compressedInput: compressedText } }
|
|
837
|
+
};
|
|
838
|
+
}
|
|
839
|
+
function normalizePromptCompressionOptions(options) {
|
|
840
|
+
if (!options) {
|
|
841
|
+
return void 0;
|
|
842
|
+
}
|
|
843
|
+
if (options === true) {
|
|
844
|
+
return {};
|
|
845
|
+
}
|
|
846
|
+
if (options.enabled === false) {
|
|
847
|
+
return void 0;
|
|
848
|
+
}
|
|
849
|
+
return options;
|
|
850
|
+
}
|
|
851
|
+
function resolveEffectivePromptCompressionOptions(defaults, override) {
|
|
852
|
+
if (override === false) {
|
|
853
|
+
return void 0;
|
|
854
|
+
}
|
|
855
|
+
if (override === void 0) {
|
|
856
|
+
return defaults;
|
|
857
|
+
}
|
|
858
|
+
if (override === true) {
|
|
859
|
+
return defaults ?? {};
|
|
860
|
+
}
|
|
861
|
+
const merged = {
|
|
862
|
+
...defaults ?? {},
|
|
863
|
+
...override,
|
|
864
|
+
roles: override.roles ?? defaults?.roles
|
|
865
|
+
};
|
|
866
|
+
return normalizePromptCompressionOptions(merged);
|
|
867
|
+
}
|
|
868
|
+
function resolveRoleCompressionOptions(compression, role) {
|
|
869
|
+
const hasExplicitRoles = compression.roles !== void 0;
|
|
870
|
+
const setting = compression.roles?.[role];
|
|
871
|
+
if (hasExplicitRoles && setting === void 0) {
|
|
872
|
+
return void 0;
|
|
873
|
+
}
|
|
874
|
+
if (!hasExplicitRoles && role === "assistant") {
|
|
875
|
+
return void 0;
|
|
876
|
+
}
|
|
877
|
+
if (setting === false) {
|
|
878
|
+
return void 0;
|
|
879
|
+
}
|
|
880
|
+
const roleOptions = typeof setting === "object" ? setting : void 0;
|
|
881
|
+
if (roleOptions?.enabled === false) {
|
|
882
|
+
return void 0;
|
|
883
|
+
}
|
|
884
|
+
return {
|
|
885
|
+
provider: roleOptions?.provider ?? compression.provider,
|
|
886
|
+
minTokens: roleOptions?.minTokens ?? compression.minTokens,
|
|
887
|
+
failOpen: compression.failOpen,
|
|
888
|
+
tokenCompanyModel: compression.tokenCompanyModel,
|
|
889
|
+
tokenCompanyAggressiveness: roleOptions?.tokenCompanyAggressiveness ?? resolveTokenCompanyAggressiveness(compression, role),
|
|
890
|
+
tokenCompanyAppId: compression.tokenCompanyAppId
|
|
891
|
+
};
|
|
892
|
+
}
|
|
893
|
+
function resolveTokenCompanyAggressiveness(compression, role) {
|
|
894
|
+
if (typeof compression.tokenCompanyAggressiveness === "number") {
|
|
895
|
+
return compression.tokenCompanyAggressiveness;
|
|
896
|
+
}
|
|
897
|
+
return compression.tokenCompanyAggressiveness?.[role];
|
|
898
|
+
}
|
|
899
|
+
function buildPromptCompressionTelemetry(segments) {
|
|
900
|
+
if (!segments.length) {
|
|
901
|
+
return void 0;
|
|
902
|
+
}
|
|
903
|
+
const originalCharacters = segments.reduce(
|
|
904
|
+
(sum, segment) => sum + segment.result.originalCharacters,
|
|
905
|
+
0
|
|
906
|
+
);
|
|
907
|
+
const compressedCharacters = segments.reduce(
|
|
908
|
+
(sum, segment) => sum + segment.result.compressedCharacters,
|
|
909
|
+
0
|
|
910
|
+
);
|
|
911
|
+
const originalTokens = segments.reduce(
|
|
912
|
+
(sum, segment) => sum + segment.result.originalTokens,
|
|
913
|
+
0
|
|
914
|
+
);
|
|
915
|
+
const compressedTokens = segments.reduce(
|
|
916
|
+
(sum, segment) => sum + segment.result.compressedTokens,
|
|
917
|
+
0
|
|
918
|
+
);
|
|
919
|
+
const savedCharacters = Math.max(0, originalCharacters - compressedCharacters);
|
|
920
|
+
const savedTokens = Math.max(0, originalTokens - compressedTokens);
|
|
921
|
+
const providers = dedupeStrings(segments.map((segment) => segment.result.provider));
|
|
922
|
+
const roles = dedupeStrings(segments.map((segment) => `role:${segment.role}`));
|
|
923
|
+
const techniques = dedupeStrings([
|
|
924
|
+
"openai-wrapper",
|
|
925
|
+
...roles,
|
|
926
|
+
...segments.flatMap((segment) => segment.result.techniques),
|
|
927
|
+
...providers.length > 1 ? ["mixed-providers"] : []
|
|
928
|
+
]);
|
|
929
|
+
return {
|
|
930
|
+
provider: segments[0]?.result.provider ?? "heuristic",
|
|
931
|
+
originalCharacters,
|
|
932
|
+
compressedCharacters,
|
|
933
|
+
savedCharacters,
|
|
934
|
+
originalTokens,
|
|
935
|
+
compressedTokens,
|
|
936
|
+
savedTokens,
|
|
937
|
+
tokenSavingsRatio: originalTokens > 0 ? savedTokens / originalTokens : 0,
|
|
938
|
+
savingsRatio: originalCharacters > 0 ? savedCharacters / originalCharacters : 0,
|
|
939
|
+
techniques
|
|
940
|
+
};
|
|
941
|
+
}
|
|
942
|
+
function promptCompressionRequestOptions(withUsage, correlationId) {
|
|
943
|
+
return {
|
|
944
|
+
signal: withUsage?.signal,
|
|
945
|
+
headers: withUsage?.headers,
|
|
946
|
+
retries: withUsage?.retries,
|
|
947
|
+
correlationId
|
|
948
|
+
};
|
|
949
|
+
}
|
|
950
|
+
function mapOpenAIRole(role) {
|
|
951
|
+
if (role === "system" || role === "developer") {
|
|
952
|
+
return "system";
|
|
953
|
+
}
|
|
954
|
+
if (role === "user") {
|
|
955
|
+
return "user";
|
|
956
|
+
}
|
|
957
|
+
if (role === "tool" || role === "function") {
|
|
958
|
+
return "tool";
|
|
959
|
+
}
|
|
960
|
+
if (role === "assistant") {
|
|
961
|
+
return "assistant";
|
|
962
|
+
}
|
|
963
|
+
return void 0;
|
|
964
|
+
}
|
|
965
|
+
function mapResponsesItemTypeToRole(type) {
|
|
966
|
+
if (type === "function_call_output" || type === "tool_result" || type === "computer_call_output") {
|
|
967
|
+
return "tool";
|
|
968
|
+
}
|
|
969
|
+
return void 0;
|
|
970
|
+
}
|
|
389
971
|
function splitUsageOptions(options) {
|
|
390
972
|
if (!options || typeof options !== "object") {
|
|
391
973
|
return {};
|
|
392
974
|
}
|
|
393
|
-
const { usageTap, withUsage, ...rest } = options;
|
|
975
|
+
const { usageTap, withUsage, promptCompression, ...rest } = options;
|
|
394
976
|
const requestOptions = Object.keys(rest).length ? cloneRequestOptions(rest) : void 0;
|
|
395
977
|
return {
|
|
396
978
|
requestOptions,
|
|
397
979
|
usageContext: usageTap,
|
|
398
|
-
withUsage
|
|
980
|
+
withUsage,
|
|
981
|
+
promptCompression
|
|
399
982
|
};
|
|
400
983
|
}
|
|
401
984
|
function resolveBeginRequest(defaults, override) {
|
|
@@ -764,6 +1347,6 @@ function createOpenRouterAdapter(init) {
|
|
|
764
1347
|
return createOpenAIAdapter(init);
|
|
765
1348
|
}
|
|
766
1349
|
|
|
767
|
-
export { createOpenAIAdapter, createOpenRouterAdapter, pipeToResponse, streamOpenAIRoute, toNextResponse, wrapOpenAI };
|
|
1350
|
+
export { OpenAIPromptCompressionStats, createOpenAIAdapter, createOpenRouterAdapter, pipeToResponse, streamOpenAIRoute, toNextResponse, wrapOpenAI };
|
|
768
1351
|
//# sourceMappingURL=index.mjs.map
|
|
769
1352
|
//# sourceMappingURL=index.mjs.map
|