@usagetap/sdk 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +53 -16
- package/dist/adapters/anthropic.cjs +943 -0
- package/dist/adapters/anthropic.cjs.map +1 -0
- package/dist/adapters/anthropic.d.cts +81 -0
- package/dist/adapters/anthropic.d.ts +81 -0
- package/dist/adapters/anthropic.mjs +940 -0
- package/dist/adapters/anthropic.mjs.map +1 -0
- package/dist/adapters/openai.cjs +601 -17
- package/dist/adapters/openai.cjs.map +1 -1
- package/dist/adapters/openai.d.cts +57 -2
- package/dist/adapters/openai.d.ts +57 -2
- package/dist/adapters/openai.mjs +601 -18
- package/dist/adapters/openai.mjs.map +1 -1
- package/dist/adapters/openrouter.cjs.map +1 -1
- package/dist/adapters/openrouter.d.cts +1 -1
- package/dist/adapters/openrouter.d.ts +1 -1
- package/dist/adapters/openrouter.mjs.map +1 -1
- package/dist/anthropic/index.cjs +943 -0
- package/dist/anthropic/index.cjs.map +1 -0
- package/dist/anthropic/index.d.cts +2 -0
- package/dist/anthropic/index.d.ts +2 -0
- package/dist/anthropic/index.mjs +940 -0
- package/dist/anthropic/index.mjs.map +1 -0
- package/dist/{client-BHNMYvlO.d.cts → client-BA-QlnRq.d.cts} +32 -1
- package/dist/{client-BHNMYvlO.d.ts → client-BA-QlnRq.d.ts} +32 -1
- package/dist/express/index.cjs +597 -17
- package/dist/express/index.cjs.map +1 -1
- package/dist/express/index.d.cts +1 -1
- package/dist/express/index.d.ts +1 -1
- package/dist/express/index.mjs +597 -17
- package/dist/express/index.mjs.map +1 -1
- package/dist/index.cjs +77 -9
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +2 -2
- package/dist/index.d.ts +2 -2
- package/dist/index.mjs +76 -10
- package/dist/index.mjs.map +1 -1
- package/dist/openai/index.cjs +601 -17
- package/dist/openai/index.cjs.map +1 -1
- package/dist/openai/index.d.cts +2 -2
- package/dist/openai/index.d.ts +2 -2
- package/dist/openai/index.mjs +601 -18
- package/dist/openai/index.mjs.map +1 -1
- package/package.json +21 -1
package/dist/adapters/openai.cjs
CHANGED
|
@@ -29,7 +29,62 @@ var UsageTapError = class extends Error {
|
|
|
29
29
|
}
|
|
30
30
|
};
|
|
31
31
|
|
|
32
|
+
// src/prompt-compression.ts
|
|
33
|
+
function estimatePromptTokens(input) {
|
|
34
|
+
const text = typeof input === "string" ? input : stableStringifyInput(input);
|
|
35
|
+
return text.match(/[\p{L}\p{N}]+|[^\s]/gu)?.length ?? 0;
|
|
36
|
+
}
|
|
37
|
+
function stableStringifyInput(input) {
|
|
38
|
+
if (typeof input === "string") return input;
|
|
39
|
+
return JSON.stringify(input) ?? String(input);
|
|
40
|
+
}
|
|
41
|
+
|
|
32
42
|
// src/adapters/openai.ts
|
|
43
|
+
var OpenAIPromptCompressionStats = class {
|
|
44
|
+
history = [];
|
|
45
|
+
failures = [];
|
|
46
|
+
_record(turn) {
|
|
47
|
+
this.history.push(turn);
|
|
48
|
+
}
|
|
49
|
+
_recordFailure(failure) {
|
|
50
|
+
this.failures.push(failure);
|
|
51
|
+
}
|
|
52
|
+
get totalOriginalTokens() {
|
|
53
|
+
return this.history.reduce((sum, turn) => sum + (turn.originalTokens ?? 0), 0);
|
|
54
|
+
}
|
|
55
|
+
get totalCompressedTokens() {
|
|
56
|
+
return this.history.reduce((sum, turn) => sum + (turn.compressedTokens ?? 0), 0);
|
|
57
|
+
}
|
|
58
|
+
get totalTokensSaved() {
|
|
59
|
+
return this.history.reduce((sum, turn) => sum + (turn.savedTokens ?? 0), 0);
|
|
60
|
+
}
|
|
61
|
+
get totalOriginalCharacters() {
|
|
62
|
+
return this.history.reduce((sum, turn) => sum + turn.originalCharacters, 0);
|
|
63
|
+
}
|
|
64
|
+
get totalCompressedCharacters() {
|
|
65
|
+
return this.history.reduce((sum, turn) => sum + turn.compressedCharacters, 0);
|
|
66
|
+
}
|
|
67
|
+
get totalCharactersSaved() {
|
|
68
|
+
return this.history.reduce((sum, turn) => sum + turn.savedCharacters, 0);
|
|
69
|
+
}
|
|
70
|
+
get calls() {
|
|
71
|
+
return this.history.length;
|
|
72
|
+
}
|
|
73
|
+
get telemetryFailures() {
|
|
74
|
+
return this.failures.length;
|
|
75
|
+
}
|
|
76
|
+
get failOpenEvents() {
|
|
77
|
+
return this.history.filter(
|
|
78
|
+
(turn) => turn.techniques.includes("compression-error") || turn.techniques.includes("fallback-original")
|
|
79
|
+
).length;
|
|
80
|
+
}
|
|
81
|
+
get tokenSavingsRatio() {
|
|
82
|
+
return this.totalOriginalTokens > 0 ? this.totalTokensSaved / this.totalOriginalTokens : 0;
|
|
83
|
+
}
|
|
84
|
+
get savingsRatio() {
|
|
85
|
+
return this.totalOriginalCharacters > 0 ? this.totalCharactersSaved / this.totalOriginalCharacters : 0;
|
|
86
|
+
}
|
|
87
|
+
};
|
|
33
88
|
function createOpenAIAdapter(init) {
|
|
34
89
|
const { client, usageTap } = init;
|
|
35
90
|
return {
|
|
@@ -186,8 +241,24 @@ function wrapOpenAI(client, usageTap, options = {}) {
|
|
|
186
241
|
}
|
|
187
242
|
const defaultContext = options.defaultContext;
|
|
188
243
|
const applyVendorHints = options.applyVendorHints !== false;
|
|
189
|
-
const
|
|
190
|
-
const
|
|
244
|
+
const defaultPromptCompression = normalizePromptCompressionOptions(options.promptCompression);
|
|
245
|
+
const promptCompressionStats = new OpenAIPromptCompressionStats();
|
|
246
|
+
const proxiedChat = client.chat ? createChatProxy(
|
|
247
|
+
client.chat,
|
|
248
|
+
usageTap,
|
|
249
|
+
defaultContext,
|
|
250
|
+
applyVendorHints,
|
|
251
|
+
defaultPromptCompression,
|
|
252
|
+
promptCompressionStats
|
|
253
|
+
) : void 0;
|
|
254
|
+
const proxiedResponses = typeof client.responses !== "undefined" ? createResponsesProxy(
|
|
255
|
+
client.responses,
|
|
256
|
+
usageTap,
|
|
257
|
+
defaultContext,
|
|
258
|
+
applyVendorHints,
|
|
259
|
+
defaultPromptCompression,
|
|
260
|
+
promptCompressionStats
|
|
261
|
+
) : void 0;
|
|
191
262
|
const handler = {
|
|
192
263
|
get(target, prop, receiver) {
|
|
193
264
|
if (prop === "chat" && proxiedChat) {
|
|
@@ -202,6 +273,9 @@ function wrapOpenAI(client, usageTap, options = {}) {
|
|
|
202
273
|
if (prop === "pipeToResponse") {
|
|
203
274
|
return pipeToResponse;
|
|
204
275
|
}
|
|
276
|
+
if (prop === "promptCompression") {
|
|
277
|
+
return promptCompressionStats;
|
|
278
|
+
}
|
|
205
279
|
if (prop === "unwrap") {
|
|
206
280
|
return () => target;
|
|
207
281
|
}
|
|
@@ -232,6 +306,9 @@ function streamOpenAIRoute(usageTap, openai, options) {
|
|
|
232
306
|
if (requestConfig.withUsage) {
|
|
233
307
|
callOptions.withUsage = requestConfig.withUsage;
|
|
234
308
|
}
|
|
309
|
+
if (requestConfig.promptCompression !== void 0) {
|
|
310
|
+
callOptions.promptCompression = requestConfig.promptCompression;
|
|
311
|
+
}
|
|
235
312
|
const stream = await wrappedClient.chat.completions.create(
|
|
236
313
|
mergedParams,
|
|
237
314
|
Object.keys(callOptions).length ? callOptions : void 0
|
|
@@ -258,12 +335,14 @@ function streamOpenAIRoute(usageTap, openai, options) {
|
|
|
258
335
|
});
|
|
259
336
|
};
|
|
260
337
|
}
|
|
261
|
-
function createChatProxy(resource, usageTap, defaultContext, applyVendorHints) {
|
|
338
|
+
function createChatProxy(resource, usageTap, defaultContext, applyVendorHints, defaultPromptCompression, promptCompressionStats) {
|
|
262
339
|
const completions = createChatCompletionsProxy(
|
|
263
340
|
resource.completions,
|
|
264
341
|
usageTap,
|
|
265
342
|
defaultContext,
|
|
266
|
-
applyVendorHints
|
|
343
|
+
applyVendorHints,
|
|
344
|
+
defaultPromptCompression,
|
|
345
|
+
promptCompressionStats
|
|
267
346
|
);
|
|
268
347
|
const handler = {
|
|
269
348
|
get(target, prop, receiver) {
|
|
@@ -275,7 +354,7 @@ function createChatProxy(resource, usageTap, defaultContext, applyVendorHints) {
|
|
|
275
354
|
};
|
|
276
355
|
return new Proxy(resource, handler);
|
|
277
356
|
}
|
|
278
|
-
function createResponsesProxy(resource, usageTap, defaultContext, applyVendorHints) {
|
|
357
|
+
function createResponsesProxy(resource, usageTap, defaultContext, applyVendorHints, defaultPromptCompression, promptCompressionStats) {
|
|
279
358
|
if (!resource || typeof resource !== "object") {
|
|
280
359
|
return void 0;
|
|
281
360
|
}
|
|
@@ -284,11 +363,26 @@ function createResponsesProxy(resource, usageTap, defaultContext, applyVendorHin
|
|
|
284
363
|
}
|
|
285
364
|
const originalCreate = resource.create.bind(resource);
|
|
286
365
|
const wrappedCreate = (params, options) => {
|
|
287
|
-
const {
|
|
366
|
+
const {
|
|
367
|
+
requestOptions,
|
|
368
|
+
usageContext,
|
|
369
|
+
withUsage,
|
|
370
|
+
promptCompression
|
|
371
|
+
} = splitUsageOptions(options);
|
|
288
372
|
const beginRequest = resolveBeginRequest(defaultContext, usageContext);
|
|
289
373
|
const wantsStream = isStreamingRequest(params);
|
|
290
|
-
return usageTap.withUsage(beginRequest, (ctx) => {
|
|
291
|
-
const
|
|
374
|
+
return usageTap.withUsage(beginRequest, async (ctx) => {
|
|
375
|
+
const hintedParams = applyVendorHints ? applyResponsesVendorHints(params, ctx.begin.data.vendorHints) : params;
|
|
376
|
+
const finalParams = await compressResponsesParamsForCall({
|
|
377
|
+
params: hintedParams,
|
|
378
|
+
usageTap,
|
|
379
|
+
ctx,
|
|
380
|
+
defaultPromptCompression,
|
|
381
|
+
callPromptCompression: promptCompression,
|
|
382
|
+
stats: promptCompressionStats,
|
|
383
|
+
withUsage,
|
|
384
|
+
operation: "responses.create"
|
|
385
|
+
});
|
|
292
386
|
const request = attachCorrelationHeader(requestOptions, ctx.begin.correlationId);
|
|
293
387
|
if (wantsStream) {
|
|
294
388
|
const apiPromise2 = originalCreate(finalParams, request);
|
|
@@ -322,16 +416,31 @@ function createResponsesProxy(resource, usageTap, defaultContext, applyVendorHin
|
|
|
322
416
|
};
|
|
323
417
|
return new Proxy(resource, handler);
|
|
324
418
|
}
|
|
325
|
-
function createChatCompletionsProxy(resource, usageTap, defaultContext, applyVendorHints) {
|
|
419
|
+
function createChatCompletionsProxy(resource, usageTap, defaultContext, applyVendorHints, defaultPromptCompression, promptCompressionStats) {
|
|
326
420
|
const originalCreate = resource.create.bind(resource);
|
|
327
421
|
const streamCandidate = resource.stream;
|
|
328
422
|
const originalStream = typeof streamCandidate === "function" ? streamCandidate.bind(resource) : void 0;
|
|
329
423
|
const wrappedCreate = (params, options) => {
|
|
330
|
-
const {
|
|
424
|
+
const {
|
|
425
|
+
requestOptions,
|
|
426
|
+
usageContext,
|
|
427
|
+
withUsage,
|
|
428
|
+
promptCompression
|
|
429
|
+
} = splitUsageOptions(options);
|
|
331
430
|
const beginRequest = resolveBeginRequest(defaultContext, usageContext);
|
|
332
431
|
const wantsStream = isStreamingRequest(params);
|
|
333
|
-
return usageTap.withUsage(beginRequest, (ctx) => {
|
|
334
|
-
const
|
|
432
|
+
return usageTap.withUsage(beginRequest, async (ctx) => {
|
|
433
|
+
const hintedParams = applyVendorHints ? applyChatVendorHints(params, ctx.begin.data.vendorHints) : params;
|
|
434
|
+
const finalParams = await compressChatParamsForCall({
|
|
435
|
+
params: hintedParams,
|
|
436
|
+
usageTap,
|
|
437
|
+
ctx,
|
|
438
|
+
defaultPromptCompression,
|
|
439
|
+
callPromptCompression: promptCompression,
|
|
440
|
+
stats: promptCompressionStats,
|
|
441
|
+
withUsage,
|
|
442
|
+
operation: "chat.completions.create"
|
|
443
|
+
});
|
|
335
444
|
const request = attachCorrelationHeader(requestOptions, ctx.begin.correlationId);
|
|
336
445
|
if (wantsStream) {
|
|
337
446
|
const apiPromise2 = originalCreate(finalParams, request);
|
|
@@ -356,10 +465,25 @@ function createChatCompletionsProxy(resource, usageTap, defaultContext, applyVen
|
|
|
356
465
|
}, withUsage);
|
|
357
466
|
};
|
|
358
467
|
const wrappedStream = originalStream ? (params, options) => {
|
|
359
|
-
const {
|
|
468
|
+
const {
|
|
469
|
+
requestOptions,
|
|
470
|
+
usageContext,
|
|
471
|
+
withUsage,
|
|
472
|
+
promptCompression
|
|
473
|
+
} = splitUsageOptions(options);
|
|
360
474
|
const beginRequest = resolveBeginRequest(defaultContext, usageContext);
|
|
361
|
-
return usageTap.withUsage(beginRequest, (ctx) => {
|
|
362
|
-
const
|
|
475
|
+
return usageTap.withUsage(beginRequest, async (ctx) => {
|
|
476
|
+
const hintedParams = applyVendorHints ? applyChatVendorHints(params, ctx.begin.data.vendorHints) : params;
|
|
477
|
+
const finalParams = await compressChatParamsForCall({
|
|
478
|
+
params: hintedParams,
|
|
479
|
+
usageTap,
|
|
480
|
+
ctx,
|
|
481
|
+
defaultPromptCompression,
|
|
482
|
+
callPromptCompression: promptCompression,
|
|
483
|
+
stats: promptCompressionStats,
|
|
484
|
+
withUsage,
|
|
485
|
+
operation: "chat.completions.stream"
|
|
486
|
+
});
|
|
363
487
|
const request = attachCorrelationHeader(requestOptions, ctx.begin.correlationId);
|
|
364
488
|
const apiPromise = originalStream(finalParams, request);
|
|
365
489
|
const wrappedPromise = transformApiPromise(apiPromise, (rawStream) => {
|
|
@@ -388,16 +512,475 @@ function createChatCompletionsProxy(resource, usageTap, defaultContext, applyVen
|
|
|
388
512
|
};
|
|
389
513
|
return new Proxy(resource, handler);
|
|
390
514
|
}
|
|
515
|
+
async function compressChatParamsForCall(args) {
|
|
516
|
+
const compression = resolveEffectivePromptCompressionOptions(
|
|
517
|
+
args.defaultPromptCompression,
|
|
518
|
+
args.callPromptCompression
|
|
519
|
+
);
|
|
520
|
+
if (!compression) {
|
|
521
|
+
return args.params;
|
|
522
|
+
}
|
|
523
|
+
const outcome = await compressChatParams(
|
|
524
|
+
args.params,
|
|
525
|
+
args.usageTap,
|
|
526
|
+
compression,
|
|
527
|
+
args.withUsage?.signal
|
|
528
|
+
);
|
|
529
|
+
await recordCompressionOutcome({
|
|
530
|
+
outcome,
|
|
531
|
+
compression,
|
|
532
|
+
usageTap: args.usageTap,
|
|
533
|
+
ctx: args.ctx,
|
|
534
|
+
stats: args.stats,
|
|
535
|
+
withUsage: args.withUsage,
|
|
536
|
+
operation: args.operation
|
|
537
|
+
});
|
|
538
|
+
return outcome.params;
|
|
539
|
+
}
|
|
540
|
+
async function compressResponsesParamsForCall(args) {
|
|
541
|
+
const compression = resolveEffectivePromptCompressionOptions(
|
|
542
|
+
args.defaultPromptCompression,
|
|
543
|
+
args.callPromptCompression
|
|
544
|
+
);
|
|
545
|
+
if (!compression) {
|
|
546
|
+
return args.params;
|
|
547
|
+
}
|
|
548
|
+
const outcome = await compressResponsesParams(
|
|
549
|
+
args.params,
|
|
550
|
+
args.usageTap,
|
|
551
|
+
compression,
|
|
552
|
+
args.withUsage?.signal
|
|
553
|
+
);
|
|
554
|
+
await recordCompressionOutcome({
|
|
555
|
+
outcome,
|
|
556
|
+
compression,
|
|
557
|
+
usageTap: args.usageTap,
|
|
558
|
+
ctx: args.ctx,
|
|
559
|
+
stats: args.stats,
|
|
560
|
+
withUsage: args.withUsage,
|
|
561
|
+
operation: args.operation
|
|
562
|
+
});
|
|
563
|
+
return outcome.params;
|
|
564
|
+
}
|
|
565
|
+
async function recordCompressionOutcome(args) {
|
|
566
|
+
const telemetry = buildPromptCompressionTelemetry(args.outcome.segments);
|
|
567
|
+
if (!telemetry) {
|
|
568
|
+
return;
|
|
569
|
+
}
|
|
570
|
+
const turn = {
|
|
571
|
+
...telemetry,
|
|
572
|
+
callId: args.ctx.begin.data.callId,
|
|
573
|
+
operation: args.operation,
|
|
574
|
+
messagesCompressed: args.outcome.segments.length,
|
|
575
|
+
timestamp: Date.now()
|
|
576
|
+
};
|
|
577
|
+
args.stats._record(turn);
|
|
578
|
+
try {
|
|
579
|
+
await args.usageTap.recordPromptCompression(
|
|
580
|
+
{
|
|
581
|
+
callId: args.ctx.begin.data.callId,
|
|
582
|
+
promptCompression: telemetry
|
|
583
|
+
},
|
|
584
|
+
promptCompressionRequestOptions(args.withUsage, args.ctx.begin.correlationId)
|
|
585
|
+
);
|
|
586
|
+
} catch (error) {
|
|
587
|
+
args.stats._recordFailure({
|
|
588
|
+
callId: args.ctx.begin.data.callId,
|
|
589
|
+
operation: args.operation,
|
|
590
|
+
stage: "telemetry",
|
|
591
|
+
message: error instanceof Error ? error.message : String(error),
|
|
592
|
+
timestamp: Date.now()
|
|
593
|
+
});
|
|
594
|
+
if (args.compression.failOpen === false) {
|
|
595
|
+
throw error;
|
|
596
|
+
}
|
|
597
|
+
}
|
|
598
|
+
}
|
|
599
|
+
async function compressChatParams(params, usageTap, compression, signal) {
|
|
600
|
+
if (!params || typeof params !== "object") {
|
|
601
|
+
return { params, segments: [] };
|
|
602
|
+
}
|
|
603
|
+
const source = cloneRecord(params);
|
|
604
|
+
const messages = Array.isArray(source.messages) ? source.messages : void 0;
|
|
605
|
+
if (!messages) {
|
|
606
|
+
return { params, segments: [] };
|
|
607
|
+
}
|
|
608
|
+
const messageResults = await Promise.all(
|
|
609
|
+
messages.map(
|
|
610
|
+
(message) => compressOpenAIMessage(message, usageTap, compression, signal)
|
|
611
|
+
)
|
|
612
|
+
);
|
|
613
|
+
return {
|
|
614
|
+
params: {
|
|
615
|
+
...source,
|
|
616
|
+
messages: messageResults.map((result) => result.value)
|
|
617
|
+
},
|
|
618
|
+
segments: messageResults.flatMap((result) => result.segments)
|
|
619
|
+
};
|
|
620
|
+
}
|
|
621
|
+
async function compressResponsesParams(params, usageTap, compression, signal) {
|
|
622
|
+
if (!params || typeof params !== "object") {
|
|
623
|
+
return { params, segments: [] };
|
|
624
|
+
}
|
|
625
|
+
const source = cloneRecord(params);
|
|
626
|
+
const segments = [];
|
|
627
|
+
if (typeof source.instructions === "string") {
|
|
628
|
+
const compressed = await compressTextForRole(
|
|
629
|
+
source.instructions,
|
|
630
|
+
"system",
|
|
631
|
+
usageTap,
|
|
632
|
+
compression,
|
|
633
|
+
signal
|
|
634
|
+
);
|
|
635
|
+
if (compressed) {
|
|
636
|
+
source.instructions = compressed.text;
|
|
637
|
+
segments.push(compressed.segment);
|
|
638
|
+
}
|
|
639
|
+
}
|
|
640
|
+
if (typeof source.input === "string") {
|
|
641
|
+
const compressed = await compressTextForRole(
|
|
642
|
+
source.input,
|
|
643
|
+
"user",
|
|
644
|
+
usageTap,
|
|
645
|
+
compression,
|
|
646
|
+
signal
|
|
647
|
+
);
|
|
648
|
+
if (compressed) {
|
|
649
|
+
source.input = compressed.text;
|
|
650
|
+
segments.push(compressed.segment);
|
|
651
|
+
}
|
|
652
|
+
} else if (Array.isArray(source.input)) {
|
|
653
|
+
const inputResults = await Promise.all(
|
|
654
|
+
source.input.map(
|
|
655
|
+
(item) => compressResponsesInputItem(item, usageTap, compression, signal)
|
|
656
|
+
)
|
|
657
|
+
);
|
|
658
|
+
source.input = inputResults.map((result) => result.value);
|
|
659
|
+
segments.push(...inputResults.flatMap((result) => result.segments));
|
|
660
|
+
}
|
|
661
|
+
return {
|
|
662
|
+
params: source,
|
|
663
|
+
segments
|
|
664
|
+
};
|
|
665
|
+
}
|
|
666
|
+
async function compressOpenAIMessage(message, usageTap, compression, signal) {
|
|
667
|
+
if (!isObjectRecord(message)) {
|
|
668
|
+
return { value: message, segments: [] };
|
|
669
|
+
}
|
|
670
|
+
const role = mapOpenAIRole(message.role);
|
|
671
|
+
if (!role) {
|
|
672
|
+
return { value: message, segments: [] };
|
|
673
|
+
}
|
|
674
|
+
const content = message.content;
|
|
675
|
+
if (typeof content === "string") {
|
|
676
|
+
const compressed = await compressTextForRole(
|
|
677
|
+
content,
|
|
678
|
+
role,
|
|
679
|
+
usageTap,
|
|
680
|
+
compression,
|
|
681
|
+
signal
|
|
682
|
+
);
|
|
683
|
+
if (!compressed) {
|
|
684
|
+
return { value: message, segments: [] };
|
|
685
|
+
}
|
|
686
|
+
return {
|
|
687
|
+
value: { ...message, content: compressed.text },
|
|
688
|
+
segments: [compressed.segment]
|
|
689
|
+
};
|
|
690
|
+
}
|
|
691
|
+
if (Array.isArray(content)) {
|
|
692
|
+
const blockResults = await Promise.all(
|
|
693
|
+
content.map(
|
|
694
|
+
(block) => compressOpenAITextBlock(block, role, usageTap, compression, signal)
|
|
695
|
+
)
|
|
696
|
+
);
|
|
697
|
+
const segments = blockResults.flatMap(
|
|
698
|
+
(result) => result.segment ? [result.segment] : []
|
|
699
|
+
);
|
|
700
|
+
return {
|
|
701
|
+
value: segments.length ? { ...message, content: blockResults.map((result) => result.value) } : message,
|
|
702
|
+
segments
|
|
703
|
+
};
|
|
704
|
+
}
|
|
705
|
+
return { value: message, segments: [] };
|
|
706
|
+
}
|
|
707
|
+
async function compressOpenAITextBlock(block, role, usageTap, compression, signal) {
|
|
708
|
+
if (!isObjectRecord(block) || block.type !== "text" || typeof block.text !== "string") {
|
|
709
|
+
return { value: block };
|
|
710
|
+
}
|
|
711
|
+
const compressed = await compressTextForRole(
|
|
712
|
+
block.text,
|
|
713
|
+
role,
|
|
714
|
+
usageTap,
|
|
715
|
+
compression,
|
|
716
|
+
signal
|
|
717
|
+
);
|
|
718
|
+
if (!compressed) {
|
|
719
|
+
return { value: block };
|
|
720
|
+
}
|
|
721
|
+
return {
|
|
722
|
+
value: { ...block, text: compressed.text },
|
|
723
|
+
segment: compressed.segment
|
|
724
|
+
};
|
|
725
|
+
}
|
|
726
|
+
async function compressResponsesInputItem(item, usageTap, compression, signal) {
|
|
727
|
+
if (!isObjectRecord(item)) {
|
|
728
|
+
return { value: item, segments: [] };
|
|
729
|
+
}
|
|
730
|
+
const specialToolRole = mapResponsesItemTypeToRole(item.type);
|
|
731
|
+
const role = specialToolRole ?? mapOpenAIRole(item.role);
|
|
732
|
+
const segments = [];
|
|
733
|
+
let next = item;
|
|
734
|
+
if (role && typeof item.content === "string") {
|
|
735
|
+
const compressed = await compressTextForRole(
|
|
736
|
+
item.content,
|
|
737
|
+
role,
|
|
738
|
+
usageTap,
|
|
739
|
+
compression,
|
|
740
|
+
signal
|
|
741
|
+
);
|
|
742
|
+
if (compressed) {
|
|
743
|
+
next = { ...next, content: compressed.text };
|
|
744
|
+
segments.push(compressed.segment);
|
|
745
|
+
}
|
|
746
|
+
} else if (role && Array.isArray(item.content)) {
|
|
747
|
+
const contentResults = await Promise.all(
|
|
748
|
+
item.content.map(
|
|
749
|
+
(block) => compressResponsesContentBlock(block, role, usageTap, compression, signal)
|
|
750
|
+
)
|
|
751
|
+
);
|
|
752
|
+
segments.push(
|
|
753
|
+
...contentResults.flatMap(
|
|
754
|
+
(result) => result.segment ? [result.segment] : []
|
|
755
|
+
)
|
|
756
|
+
);
|
|
757
|
+
if (segments.length) {
|
|
758
|
+
next = {
|
|
759
|
+
...next,
|
|
760
|
+
content: contentResults.map((result) => result.value)
|
|
761
|
+
};
|
|
762
|
+
}
|
|
763
|
+
}
|
|
764
|
+
if (specialToolRole && typeof item.output === "string") {
|
|
765
|
+
const compressed = await compressTextForRole(
|
|
766
|
+
item.output,
|
|
767
|
+
specialToolRole,
|
|
768
|
+
usageTap,
|
|
769
|
+
compression,
|
|
770
|
+
signal
|
|
771
|
+
);
|
|
772
|
+
if (compressed) {
|
|
773
|
+
next = { ...next, output: compressed.text };
|
|
774
|
+
segments.push(compressed.segment);
|
|
775
|
+
}
|
|
776
|
+
}
|
|
777
|
+
return { value: next, segments };
|
|
778
|
+
}
|
|
779
|
+
async function compressResponsesContentBlock(block, role, usageTap, compression, signal) {
|
|
780
|
+
if (!isObjectRecord(block)) {
|
|
781
|
+
return { value: block };
|
|
782
|
+
}
|
|
783
|
+
if ((block.type === "input_text" || block.type === "text") && typeof block.text === "string") {
|
|
784
|
+
const compressed = await compressTextForRole(
|
|
785
|
+
block.text,
|
|
786
|
+
role,
|
|
787
|
+
usageTap,
|
|
788
|
+
compression,
|
|
789
|
+
signal
|
|
790
|
+
);
|
|
791
|
+
if (compressed) {
|
|
792
|
+
return {
|
|
793
|
+
value: { ...block, text: compressed.text },
|
|
794
|
+
segment: compressed.segment
|
|
795
|
+
};
|
|
796
|
+
}
|
|
797
|
+
}
|
|
798
|
+
if (role === "tool" && typeof block.output === "string") {
|
|
799
|
+
const compressed = await compressTextForRole(
|
|
800
|
+
block.output,
|
|
801
|
+
role,
|
|
802
|
+
usageTap,
|
|
803
|
+
compression,
|
|
804
|
+
signal
|
|
805
|
+
);
|
|
806
|
+
if (compressed) {
|
|
807
|
+
return {
|
|
808
|
+
value: { ...block, output: compressed.text },
|
|
809
|
+
segment: compressed.segment
|
|
810
|
+
};
|
|
811
|
+
}
|
|
812
|
+
}
|
|
813
|
+
return { value: block };
|
|
814
|
+
}
|
|
815
|
+
async function compressTextForRole(text, role, usageTap, compression, signal) {
|
|
816
|
+
if (!text.trim()) {
|
|
817
|
+
return void 0;
|
|
818
|
+
}
|
|
819
|
+
const roleOptions = resolveRoleCompressionOptions(compression, role);
|
|
820
|
+
if (!roleOptions) {
|
|
821
|
+
return void 0;
|
|
822
|
+
}
|
|
823
|
+
const estimatedTokens = estimatePromptTokens(text);
|
|
824
|
+
if (typeof roleOptions.minTokens === "number" && estimatedTokens < roleOptions.minTokens) {
|
|
825
|
+
return void 0;
|
|
826
|
+
}
|
|
827
|
+
const result = await usageTap.compressPromptInput(text, {
|
|
828
|
+
provider: roleOptions.provider,
|
|
829
|
+
failOpen: roleOptions.failOpen,
|
|
830
|
+
tokenCompanyModel: roleOptions.tokenCompanyModel,
|
|
831
|
+
tokenCompanyAggressiveness: roleOptions.tokenCompanyAggressiveness,
|
|
832
|
+
tokenCompanyAppId: roleOptions.tokenCompanyAppId,
|
|
833
|
+
signal
|
|
834
|
+
});
|
|
835
|
+
const compressedText = typeof result.compressedInput === "string" ? result.compressedInput : String(result.compressedInput);
|
|
836
|
+
return {
|
|
837
|
+
text: compressedText,
|
|
838
|
+
segment: { role, result: { ...result, compressedInput: compressedText } }
|
|
839
|
+
};
|
|
840
|
+
}
|
|
841
|
+
function normalizePromptCompressionOptions(options) {
|
|
842
|
+
if (!options) {
|
|
843
|
+
return void 0;
|
|
844
|
+
}
|
|
845
|
+
if (options === true) {
|
|
846
|
+
return {};
|
|
847
|
+
}
|
|
848
|
+
if (options.enabled === false) {
|
|
849
|
+
return void 0;
|
|
850
|
+
}
|
|
851
|
+
return options;
|
|
852
|
+
}
|
|
853
|
+
function resolveEffectivePromptCompressionOptions(defaults, override) {
|
|
854
|
+
if (override === false) {
|
|
855
|
+
return void 0;
|
|
856
|
+
}
|
|
857
|
+
if (override === void 0) {
|
|
858
|
+
return defaults;
|
|
859
|
+
}
|
|
860
|
+
if (override === true) {
|
|
861
|
+
return defaults ?? {};
|
|
862
|
+
}
|
|
863
|
+
const merged = {
|
|
864
|
+
...defaults ?? {},
|
|
865
|
+
...override,
|
|
866
|
+
roles: override.roles ?? defaults?.roles
|
|
867
|
+
};
|
|
868
|
+
return normalizePromptCompressionOptions(merged);
|
|
869
|
+
}
|
|
870
|
+
function resolveRoleCompressionOptions(compression, role) {
|
|
871
|
+
const hasExplicitRoles = compression.roles !== void 0;
|
|
872
|
+
const setting = compression.roles?.[role];
|
|
873
|
+
if (hasExplicitRoles && setting === void 0) {
|
|
874
|
+
return void 0;
|
|
875
|
+
}
|
|
876
|
+
if (!hasExplicitRoles && role === "assistant") {
|
|
877
|
+
return void 0;
|
|
878
|
+
}
|
|
879
|
+
if (setting === false) {
|
|
880
|
+
return void 0;
|
|
881
|
+
}
|
|
882
|
+
const roleOptions = typeof setting === "object" ? setting : void 0;
|
|
883
|
+
if (roleOptions?.enabled === false) {
|
|
884
|
+
return void 0;
|
|
885
|
+
}
|
|
886
|
+
return {
|
|
887
|
+
provider: roleOptions?.provider ?? compression.provider,
|
|
888
|
+
minTokens: roleOptions?.minTokens ?? compression.minTokens,
|
|
889
|
+
failOpen: compression.failOpen,
|
|
890
|
+
tokenCompanyModel: compression.tokenCompanyModel,
|
|
891
|
+
tokenCompanyAggressiveness: roleOptions?.tokenCompanyAggressiveness ?? resolveTokenCompanyAggressiveness(compression, role),
|
|
892
|
+
tokenCompanyAppId: compression.tokenCompanyAppId
|
|
893
|
+
};
|
|
894
|
+
}
|
|
895
|
+
function resolveTokenCompanyAggressiveness(compression, role) {
|
|
896
|
+
if (typeof compression.tokenCompanyAggressiveness === "number") {
|
|
897
|
+
return compression.tokenCompanyAggressiveness;
|
|
898
|
+
}
|
|
899
|
+
return compression.tokenCompanyAggressiveness?.[role];
|
|
900
|
+
}
|
|
901
|
+
function buildPromptCompressionTelemetry(segments) {
|
|
902
|
+
if (!segments.length) {
|
|
903
|
+
return void 0;
|
|
904
|
+
}
|
|
905
|
+
const originalCharacters = segments.reduce(
|
|
906
|
+
(sum, segment) => sum + segment.result.originalCharacters,
|
|
907
|
+
0
|
|
908
|
+
);
|
|
909
|
+
const compressedCharacters = segments.reduce(
|
|
910
|
+
(sum, segment) => sum + segment.result.compressedCharacters,
|
|
911
|
+
0
|
|
912
|
+
);
|
|
913
|
+
const originalTokens = segments.reduce(
|
|
914
|
+
(sum, segment) => sum + segment.result.originalTokens,
|
|
915
|
+
0
|
|
916
|
+
);
|
|
917
|
+
const compressedTokens = segments.reduce(
|
|
918
|
+
(sum, segment) => sum + segment.result.compressedTokens,
|
|
919
|
+
0
|
|
920
|
+
);
|
|
921
|
+
const savedCharacters = Math.max(0, originalCharacters - compressedCharacters);
|
|
922
|
+
const savedTokens = Math.max(0, originalTokens - compressedTokens);
|
|
923
|
+
const providers = dedupeStrings(segments.map((segment) => segment.result.provider));
|
|
924
|
+
const roles = dedupeStrings(segments.map((segment) => `role:${segment.role}`));
|
|
925
|
+
const techniques = dedupeStrings([
|
|
926
|
+
"openai-wrapper",
|
|
927
|
+
...roles,
|
|
928
|
+
...segments.flatMap((segment) => segment.result.techniques),
|
|
929
|
+
...providers.length > 1 ? ["mixed-providers"] : []
|
|
930
|
+
]);
|
|
931
|
+
return {
|
|
932
|
+
provider: segments[0]?.result.provider ?? "heuristic",
|
|
933
|
+
originalCharacters,
|
|
934
|
+
compressedCharacters,
|
|
935
|
+
savedCharacters,
|
|
936
|
+
originalTokens,
|
|
937
|
+
compressedTokens,
|
|
938
|
+
savedTokens,
|
|
939
|
+
tokenSavingsRatio: originalTokens > 0 ? savedTokens / originalTokens : 0,
|
|
940
|
+
savingsRatio: originalCharacters > 0 ? savedCharacters / originalCharacters : 0,
|
|
941
|
+
techniques
|
|
942
|
+
};
|
|
943
|
+
}
|
|
944
|
+
function promptCompressionRequestOptions(withUsage, correlationId) {
|
|
945
|
+
return {
|
|
946
|
+
signal: withUsage?.signal,
|
|
947
|
+
headers: withUsage?.headers,
|
|
948
|
+
retries: withUsage?.retries,
|
|
949
|
+
correlationId
|
|
950
|
+
};
|
|
951
|
+
}
|
|
952
|
+
function mapOpenAIRole(role) {
|
|
953
|
+
if (role === "system" || role === "developer") {
|
|
954
|
+
return "system";
|
|
955
|
+
}
|
|
956
|
+
if (role === "user") {
|
|
957
|
+
return "user";
|
|
958
|
+
}
|
|
959
|
+
if (role === "tool" || role === "function") {
|
|
960
|
+
return "tool";
|
|
961
|
+
}
|
|
962
|
+
if (role === "assistant") {
|
|
963
|
+
return "assistant";
|
|
964
|
+
}
|
|
965
|
+
return void 0;
|
|
966
|
+
}
|
|
967
|
+
function mapResponsesItemTypeToRole(type) {
|
|
968
|
+
if (type === "function_call_output" || type === "tool_result" || type === "computer_call_output") {
|
|
969
|
+
return "tool";
|
|
970
|
+
}
|
|
971
|
+
return void 0;
|
|
972
|
+
}
|
|
391
973
|
function splitUsageOptions(options) {
|
|
392
974
|
if (!options || typeof options !== "object") {
|
|
393
975
|
return {};
|
|
394
976
|
}
|
|
395
|
-
const { usageTap, withUsage, ...rest } = options;
|
|
977
|
+
const { usageTap, withUsage, promptCompression, ...rest } = options;
|
|
396
978
|
const requestOptions = Object.keys(rest).length ? cloneRequestOptions(rest) : void 0;
|
|
397
979
|
return {
|
|
398
980
|
requestOptions,
|
|
399
981
|
usageContext: usageTap,
|
|
400
|
-
withUsage
|
|
982
|
+
withUsage,
|
|
983
|
+
promptCompression
|
|
401
984
|
};
|
|
402
985
|
}
|
|
403
986
|
function resolveBeginRequest(defaults, override) {
|
|
@@ -761,6 +1344,7 @@ function isIteratorResult(value) {
|
|
|
761
1344
|
return isObjectRecord(value) && "done" in value;
|
|
762
1345
|
}
|
|
763
1346
|
|
|
1347
|
+
exports.OpenAIPromptCompressionStats = OpenAIPromptCompressionStats;
|
|
764
1348
|
exports.createOpenAIAdapter = createOpenAIAdapter;
|
|
765
1349
|
exports.pipeToResponse = pipeToResponse;
|
|
766
1350
|
exports.streamOpenAIRoute = streamOpenAIRoute;
|