@usagetap/sdk 0.10.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +83 -22
- package/dist/adapters/anthropic.cjs +943 -0
- package/dist/adapters/anthropic.cjs.map +1 -0
- package/dist/adapters/anthropic.d.cts +81 -0
- package/dist/adapters/anthropic.d.ts +81 -0
- package/dist/adapters/anthropic.mjs +940 -0
- package/dist/adapters/anthropic.mjs.map +1 -0
- package/dist/adapters/openai.cjs +601 -17
- package/dist/adapters/openai.cjs.map +1 -1
- package/dist/adapters/openai.d.cts +57 -2
- package/dist/adapters/openai.d.ts +57 -2
- package/dist/adapters/openai.mjs +601 -18
- package/dist/adapters/openai.mjs.map +1 -1
- package/dist/adapters/openrouter.cjs.map +1 -1
- package/dist/adapters/openrouter.d.cts +1 -1
- package/dist/adapters/openrouter.d.ts +1 -1
- package/dist/adapters/openrouter.mjs.map +1 -1
- package/dist/anthropic/index.cjs +943 -0
- package/dist/anthropic/index.cjs.map +1 -0
- package/dist/anthropic/index.d.cts +2 -0
- package/dist/anthropic/index.d.ts +2 -0
- package/dist/anthropic/index.mjs +940 -0
- package/dist/anthropic/index.mjs.map +1 -0
- package/dist/{client-DEbk0Q2l.d.cts → client-BA-QlnRq.d.cts} +95 -1
- package/dist/{client-DEbk0Q2l.d.ts → client-BA-QlnRq.d.ts} +95 -1
- package/dist/express/index.cjs +597 -17
- package/dist/express/index.cjs.map +1 -1
- package/dist/express/index.d.cts +1 -1
- package/dist/express/index.d.ts +1 -1
- package/dist/express/index.mjs +597 -17
- package/dist/express/index.mjs.map +1 -1
- package/dist/index.cjs +586 -1
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +2 -2
- package/dist/index.d.ts +2 -2
- package/dist/index.mjs +581 -2
- package/dist/index.mjs.map +1 -1
- package/dist/openai/index.cjs +601 -17
- package/dist/openai/index.cjs.map +1 -1
- package/dist/openai/index.d.cts +2 -2
- package/dist/openai/index.d.ts +2 -2
- package/dist/openai/index.mjs +601 -18
- package/dist/openai/index.mjs.map +1 -1
- package/package.json +22 -2
package/dist/express/index.mjs
CHANGED
|
@@ -27,7 +27,62 @@ var UsageTapError = class extends Error {
|
|
|
27
27
|
}
|
|
28
28
|
};
|
|
29
29
|
|
|
30
|
+
// src/prompt-compression.ts
|
|
31
|
+
function estimatePromptTokens(input) {
|
|
32
|
+
const text = typeof input === "string" ? input : stableStringifyInput(input);
|
|
33
|
+
return text.match(/[\p{L}\p{N}]+|[^\s]/gu)?.length ?? 0;
|
|
34
|
+
}
|
|
35
|
+
function stableStringifyInput(input) {
|
|
36
|
+
if (typeof input === "string") return input;
|
|
37
|
+
return JSON.stringify(input) ?? String(input);
|
|
38
|
+
}
|
|
39
|
+
|
|
30
40
|
// src/adapters/openai.ts
|
|
41
|
+
var OpenAIPromptCompressionStats = class {
|
|
42
|
+
history = [];
|
|
43
|
+
failures = [];
|
|
44
|
+
_record(turn) {
|
|
45
|
+
this.history.push(turn);
|
|
46
|
+
}
|
|
47
|
+
_recordFailure(failure) {
|
|
48
|
+
this.failures.push(failure);
|
|
49
|
+
}
|
|
50
|
+
get totalOriginalTokens() {
|
|
51
|
+
return this.history.reduce((sum, turn) => sum + (turn.originalTokens ?? 0), 0);
|
|
52
|
+
}
|
|
53
|
+
get totalCompressedTokens() {
|
|
54
|
+
return this.history.reduce((sum, turn) => sum + (turn.compressedTokens ?? 0), 0);
|
|
55
|
+
}
|
|
56
|
+
get totalTokensSaved() {
|
|
57
|
+
return this.history.reduce((sum, turn) => sum + (turn.savedTokens ?? 0), 0);
|
|
58
|
+
}
|
|
59
|
+
get totalOriginalCharacters() {
|
|
60
|
+
return this.history.reduce((sum, turn) => sum + turn.originalCharacters, 0);
|
|
61
|
+
}
|
|
62
|
+
get totalCompressedCharacters() {
|
|
63
|
+
return this.history.reduce((sum, turn) => sum + turn.compressedCharacters, 0);
|
|
64
|
+
}
|
|
65
|
+
get totalCharactersSaved() {
|
|
66
|
+
return this.history.reduce((sum, turn) => sum + turn.savedCharacters, 0);
|
|
67
|
+
}
|
|
68
|
+
get calls() {
|
|
69
|
+
return this.history.length;
|
|
70
|
+
}
|
|
71
|
+
get telemetryFailures() {
|
|
72
|
+
return this.failures.length;
|
|
73
|
+
}
|
|
74
|
+
get failOpenEvents() {
|
|
75
|
+
return this.history.filter(
|
|
76
|
+
(turn) => turn.techniques.includes("compression-error") || turn.techniques.includes("fallback-original")
|
|
77
|
+
).length;
|
|
78
|
+
}
|
|
79
|
+
get tokenSavingsRatio() {
|
|
80
|
+
return this.totalOriginalTokens > 0 ? this.totalTokensSaved / this.totalOriginalTokens : 0;
|
|
81
|
+
}
|
|
82
|
+
get savingsRatio() {
|
|
83
|
+
return this.totalOriginalCharacters > 0 ? this.totalCharactersSaved / this.totalOriginalCharacters : 0;
|
|
84
|
+
}
|
|
85
|
+
};
|
|
31
86
|
function toNextResponse(stream, options = {}) {
|
|
32
87
|
const mode = options.mode ?? "text";
|
|
33
88
|
const headers = new Headers(options.headers ?? {});
|
|
@@ -125,8 +180,24 @@ function wrapOpenAI(client, usageTap, options = {}) {
|
|
|
125
180
|
}
|
|
126
181
|
const defaultContext = options.defaultContext;
|
|
127
182
|
const applyVendorHints = options.applyVendorHints !== false;
|
|
128
|
-
const
|
|
129
|
-
const
|
|
183
|
+
const defaultPromptCompression = normalizePromptCompressionOptions(options.promptCompression);
|
|
184
|
+
const promptCompressionStats = new OpenAIPromptCompressionStats();
|
|
185
|
+
const proxiedChat = client.chat ? createChatProxy(
|
|
186
|
+
client.chat,
|
|
187
|
+
usageTap,
|
|
188
|
+
defaultContext,
|
|
189
|
+
applyVendorHints,
|
|
190
|
+
defaultPromptCompression,
|
|
191
|
+
promptCompressionStats
|
|
192
|
+
) : void 0;
|
|
193
|
+
const proxiedResponses = typeof client.responses !== "undefined" ? createResponsesProxy(
|
|
194
|
+
client.responses,
|
|
195
|
+
usageTap,
|
|
196
|
+
defaultContext,
|
|
197
|
+
applyVendorHints,
|
|
198
|
+
defaultPromptCompression,
|
|
199
|
+
promptCompressionStats
|
|
200
|
+
) : void 0;
|
|
130
201
|
const handler = {
|
|
131
202
|
get(target, prop, receiver) {
|
|
132
203
|
if (prop === "chat" && proxiedChat) {
|
|
@@ -141,6 +212,9 @@ function wrapOpenAI(client, usageTap, options = {}) {
|
|
|
141
212
|
if (prop === "pipeToResponse") {
|
|
142
213
|
return pipeToResponse;
|
|
143
214
|
}
|
|
215
|
+
if (prop === "promptCompression") {
|
|
216
|
+
return promptCompressionStats;
|
|
217
|
+
}
|
|
144
218
|
if (prop === "unwrap") {
|
|
145
219
|
return () => target;
|
|
146
220
|
}
|
|
@@ -149,12 +223,14 @@ function wrapOpenAI(client, usageTap, options = {}) {
|
|
|
149
223
|
};
|
|
150
224
|
return new Proxy(client, handler);
|
|
151
225
|
}
|
|
152
|
-
function createChatProxy(resource, usageTap, defaultContext, applyVendorHints) {
|
|
226
|
+
function createChatProxy(resource, usageTap, defaultContext, applyVendorHints, defaultPromptCompression, promptCompressionStats) {
|
|
153
227
|
const completions = createChatCompletionsProxy(
|
|
154
228
|
resource.completions,
|
|
155
229
|
usageTap,
|
|
156
230
|
defaultContext,
|
|
157
|
-
applyVendorHints
|
|
231
|
+
applyVendorHints,
|
|
232
|
+
defaultPromptCompression,
|
|
233
|
+
promptCompressionStats
|
|
158
234
|
);
|
|
159
235
|
const handler = {
|
|
160
236
|
get(target, prop, receiver) {
|
|
@@ -166,7 +242,7 @@ function createChatProxy(resource, usageTap, defaultContext, applyVendorHints) {
|
|
|
166
242
|
};
|
|
167
243
|
return new Proxy(resource, handler);
|
|
168
244
|
}
|
|
169
|
-
function createResponsesProxy(resource, usageTap, defaultContext, applyVendorHints) {
|
|
245
|
+
function createResponsesProxy(resource, usageTap, defaultContext, applyVendorHints, defaultPromptCompression, promptCompressionStats) {
|
|
170
246
|
if (!resource || typeof resource !== "object") {
|
|
171
247
|
return void 0;
|
|
172
248
|
}
|
|
@@ -175,11 +251,26 @@ function createResponsesProxy(resource, usageTap, defaultContext, applyVendorHin
|
|
|
175
251
|
}
|
|
176
252
|
const originalCreate = resource.create.bind(resource);
|
|
177
253
|
const wrappedCreate = (params, options) => {
|
|
178
|
-
const {
|
|
254
|
+
const {
|
|
255
|
+
requestOptions,
|
|
256
|
+
usageContext,
|
|
257
|
+
withUsage: withUsage2,
|
|
258
|
+
promptCompression
|
|
259
|
+
} = splitUsageOptions(options);
|
|
179
260
|
const beginRequest = resolveBeginRequest(defaultContext, usageContext);
|
|
180
261
|
const wantsStream = isStreamingRequest(params);
|
|
181
|
-
return usageTap.withUsage(beginRequest, (ctx) => {
|
|
182
|
-
const
|
|
262
|
+
return usageTap.withUsage(beginRequest, async (ctx) => {
|
|
263
|
+
const hintedParams = applyVendorHints ? applyResponsesVendorHints(params, ctx.begin.data.vendorHints) : params;
|
|
264
|
+
const finalParams = await compressResponsesParamsForCall({
|
|
265
|
+
params: hintedParams,
|
|
266
|
+
usageTap,
|
|
267
|
+
ctx,
|
|
268
|
+
defaultPromptCompression,
|
|
269
|
+
callPromptCompression: promptCompression,
|
|
270
|
+
stats: promptCompressionStats,
|
|
271
|
+
withUsage: withUsage2,
|
|
272
|
+
operation: "responses.create"
|
|
273
|
+
});
|
|
183
274
|
const request = attachCorrelationHeader(requestOptions, ctx.begin.correlationId);
|
|
184
275
|
if (wantsStream) {
|
|
185
276
|
const apiPromise2 = originalCreate(finalParams, request);
|
|
@@ -213,16 +304,31 @@ function createResponsesProxy(resource, usageTap, defaultContext, applyVendorHin
|
|
|
213
304
|
};
|
|
214
305
|
return new Proxy(resource, handler);
|
|
215
306
|
}
|
|
216
|
-
function createChatCompletionsProxy(resource, usageTap, defaultContext, applyVendorHints) {
|
|
307
|
+
function createChatCompletionsProxy(resource, usageTap, defaultContext, applyVendorHints, defaultPromptCompression, promptCompressionStats) {
|
|
217
308
|
const originalCreate = resource.create.bind(resource);
|
|
218
309
|
const streamCandidate = resource.stream;
|
|
219
310
|
const originalStream = typeof streamCandidate === "function" ? streamCandidate.bind(resource) : void 0;
|
|
220
311
|
const wrappedCreate = (params, options) => {
|
|
221
|
-
const {
|
|
312
|
+
const {
|
|
313
|
+
requestOptions,
|
|
314
|
+
usageContext,
|
|
315
|
+
withUsage: withUsage2,
|
|
316
|
+
promptCompression
|
|
317
|
+
} = splitUsageOptions(options);
|
|
222
318
|
const beginRequest = resolveBeginRequest(defaultContext, usageContext);
|
|
223
319
|
const wantsStream = isStreamingRequest(params);
|
|
224
|
-
return usageTap.withUsage(beginRequest, (ctx) => {
|
|
225
|
-
const
|
|
320
|
+
return usageTap.withUsage(beginRequest, async (ctx) => {
|
|
321
|
+
const hintedParams = applyVendorHints ? applyChatVendorHints(params, ctx.begin.data.vendorHints) : params;
|
|
322
|
+
const finalParams = await compressChatParamsForCall({
|
|
323
|
+
params: hintedParams,
|
|
324
|
+
usageTap,
|
|
325
|
+
ctx,
|
|
326
|
+
defaultPromptCompression,
|
|
327
|
+
callPromptCompression: promptCompression,
|
|
328
|
+
stats: promptCompressionStats,
|
|
329
|
+
withUsage: withUsage2,
|
|
330
|
+
operation: "chat.completions.create"
|
|
331
|
+
});
|
|
226
332
|
const request = attachCorrelationHeader(requestOptions, ctx.begin.correlationId);
|
|
227
333
|
if (wantsStream) {
|
|
228
334
|
const apiPromise2 = originalCreate(finalParams, request);
|
|
@@ -247,10 +353,25 @@ function createChatCompletionsProxy(resource, usageTap, defaultContext, applyVen
|
|
|
247
353
|
}, withUsage2);
|
|
248
354
|
};
|
|
249
355
|
const wrappedStream = originalStream ? (params, options) => {
|
|
250
|
-
const {
|
|
356
|
+
const {
|
|
357
|
+
requestOptions,
|
|
358
|
+
usageContext,
|
|
359
|
+
withUsage: withUsage2,
|
|
360
|
+
promptCompression
|
|
361
|
+
} = splitUsageOptions(options);
|
|
251
362
|
const beginRequest = resolveBeginRequest(defaultContext, usageContext);
|
|
252
|
-
return usageTap.withUsage(beginRequest, (ctx) => {
|
|
253
|
-
const
|
|
363
|
+
return usageTap.withUsage(beginRequest, async (ctx) => {
|
|
364
|
+
const hintedParams = applyVendorHints ? applyChatVendorHints(params, ctx.begin.data.vendorHints) : params;
|
|
365
|
+
const finalParams = await compressChatParamsForCall({
|
|
366
|
+
params: hintedParams,
|
|
367
|
+
usageTap,
|
|
368
|
+
ctx,
|
|
369
|
+
defaultPromptCompression,
|
|
370
|
+
callPromptCompression: promptCompression,
|
|
371
|
+
stats: promptCompressionStats,
|
|
372
|
+
withUsage: withUsage2,
|
|
373
|
+
operation: "chat.completions.stream"
|
|
374
|
+
});
|
|
254
375
|
const request = attachCorrelationHeader(requestOptions, ctx.begin.correlationId);
|
|
255
376
|
const apiPromise = originalStream(finalParams, request);
|
|
256
377
|
const wrappedPromise = transformApiPromise(apiPromise, (rawStream) => {
|
|
@@ -279,16 +400,475 @@ function createChatCompletionsProxy(resource, usageTap, defaultContext, applyVen
|
|
|
279
400
|
};
|
|
280
401
|
return new Proxy(resource, handler);
|
|
281
402
|
}
|
|
403
|
+
async function compressChatParamsForCall(args) {
|
|
404
|
+
const compression = resolveEffectivePromptCompressionOptions(
|
|
405
|
+
args.defaultPromptCompression,
|
|
406
|
+
args.callPromptCompression
|
|
407
|
+
);
|
|
408
|
+
if (!compression) {
|
|
409
|
+
return args.params;
|
|
410
|
+
}
|
|
411
|
+
const outcome = await compressChatParams(
|
|
412
|
+
args.params,
|
|
413
|
+
args.usageTap,
|
|
414
|
+
compression,
|
|
415
|
+
args.withUsage?.signal
|
|
416
|
+
);
|
|
417
|
+
await recordCompressionOutcome({
|
|
418
|
+
outcome,
|
|
419
|
+
compression,
|
|
420
|
+
usageTap: args.usageTap,
|
|
421
|
+
ctx: args.ctx,
|
|
422
|
+
stats: args.stats,
|
|
423
|
+
withUsage: args.withUsage,
|
|
424
|
+
operation: args.operation
|
|
425
|
+
});
|
|
426
|
+
return outcome.params;
|
|
427
|
+
}
|
|
428
|
+
async function compressResponsesParamsForCall(args) {
|
|
429
|
+
const compression = resolveEffectivePromptCompressionOptions(
|
|
430
|
+
args.defaultPromptCompression,
|
|
431
|
+
args.callPromptCompression
|
|
432
|
+
);
|
|
433
|
+
if (!compression) {
|
|
434
|
+
return args.params;
|
|
435
|
+
}
|
|
436
|
+
const outcome = await compressResponsesParams(
|
|
437
|
+
args.params,
|
|
438
|
+
args.usageTap,
|
|
439
|
+
compression,
|
|
440
|
+
args.withUsage?.signal
|
|
441
|
+
);
|
|
442
|
+
await recordCompressionOutcome({
|
|
443
|
+
outcome,
|
|
444
|
+
compression,
|
|
445
|
+
usageTap: args.usageTap,
|
|
446
|
+
ctx: args.ctx,
|
|
447
|
+
stats: args.stats,
|
|
448
|
+
withUsage: args.withUsage,
|
|
449
|
+
operation: args.operation
|
|
450
|
+
});
|
|
451
|
+
return outcome.params;
|
|
452
|
+
}
|
|
453
|
+
async function recordCompressionOutcome(args) {
|
|
454
|
+
const telemetry = buildPromptCompressionTelemetry(args.outcome.segments);
|
|
455
|
+
if (!telemetry) {
|
|
456
|
+
return;
|
|
457
|
+
}
|
|
458
|
+
const turn = {
|
|
459
|
+
...telemetry,
|
|
460
|
+
callId: args.ctx.begin.data.callId,
|
|
461
|
+
operation: args.operation,
|
|
462
|
+
messagesCompressed: args.outcome.segments.length,
|
|
463
|
+
timestamp: Date.now()
|
|
464
|
+
};
|
|
465
|
+
args.stats._record(turn);
|
|
466
|
+
try {
|
|
467
|
+
await args.usageTap.recordPromptCompression(
|
|
468
|
+
{
|
|
469
|
+
callId: args.ctx.begin.data.callId,
|
|
470
|
+
promptCompression: telemetry
|
|
471
|
+
},
|
|
472
|
+
promptCompressionRequestOptions(args.withUsage, args.ctx.begin.correlationId)
|
|
473
|
+
);
|
|
474
|
+
} catch (error) {
|
|
475
|
+
args.stats._recordFailure({
|
|
476
|
+
callId: args.ctx.begin.data.callId,
|
|
477
|
+
operation: args.operation,
|
|
478
|
+
stage: "telemetry",
|
|
479
|
+
message: error instanceof Error ? error.message : String(error),
|
|
480
|
+
timestamp: Date.now()
|
|
481
|
+
});
|
|
482
|
+
if (args.compression.failOpen === false) {
|
|
483
|
+
throw error;
|
|
484
|
+
}
|
|
485
|
+
}
|
|
486
|
+
}
|
|
487
|
+
async function compressChatParams(params, usageTap, compression, signal) {
|
|
488
|
+
if (!params || typeof params !== "object") {
|
|
489
|
+
return { params, segments: [] };
|
|
490
|
+
}
|
|
491
|
+
const source = cloneRecord(params);
|
|
492
|
+
const messages = Array.isArray(source.messages) ? source.messages : void 0;
|
|
493
|
+
if (!messages) {
|
|
494
|
+
return { params, segments: [] };
|
|
495
|
+
}
|
|
496
|
+
const messageResults = await Promise.all(
|
|
497
|
+
messages.map(
|
|
498
|
+
(message) => compressOpenAIMessage(message, usageTap, compression, signal)
|
|
499
|
+
)
|
|
500
|
+
);
|
|
501
|
+
return {
|
|
502
|
+
params: {
|
|
503
|
+
...source,
|
|
504
|
+
messages: messageResults.map((result) => result.value)
|
|
505
|
+
},
|
|
506
|
+
segments: messageResults.flatMap((result) => result.segments)
|
|
507
|
+
};
|
|
508
|
+
}
|
|
509
|
+
async function compressResponsesParams(params, usageTap, compression, signal) {
|
|
510
|
+
if (!params || typeof params !== "object") {
|
|
511
|
+
return { params, segments: [] };
|
|
512
|
+
}
|
|
513
|
+
const source = cloneRecord(params);
|
|
514
|
+
const segments = [];
|
|
515
|
+
if (typeof source.instructions === "string") {
|
|
516
|
+
const compressed = await compressTextForRole(
|
|
517
|
+
source.instructions,
|
|
518
|
+
"system",
|
|
519
|
+
usageTap,
|
|
520
|
+
compression,
|
|
521
|
+
signal
|
|
522
|
+
);
|
|
523
|
+
if (compressed) {
|
|
524
|
+
source.instructions = compressed.text;
|
|
525
|
+
segments.push(compressed.segment);
|
|
526
|
+
}
|
|
527
|
+
}
|
|
528
|
+
if (typeof source.input === "string") {
|
|
529
|
+
const compressed = await compressTextForRole(
|
|
530
|
+
source.input,
|
|
531
|
+
"user",
|
|
532
|
+
usageTap,
|
|
533
|
+
compression,
|
|
534
|
+
signal
|
|
535
|
+
);
|
|
536
|
+
if (compressed) {
|
|
537
|
+
source.input = compressed.text;
|
|
538
|
+
segments.push(compressed.segment);
|
|
539
|
+
}
|
|
540
|
+
} else if (Array.isArray(source.input)) {
|
|
541
|
+
const inputResults = await Promise.all(
|
|
542
|
+
source.input.map(
|
|
543
|
+
(item) => compressResponsesInputItem(item, usageTap, compression, signal)
|
|
544
|
+
)
|
|
545
|
+
);
|
|
546
|
+
source.input = inputResults.map((result) => result.value);
|
|
547
|
+
segments.push(...inputResults.flatMap((result) => result.segments));
|
|
548
|
+
}
|
|
549
|
+
return {
|
|
550
|
+
params: source,
|
|
551
|
+
segments
|
|
552
|
+
};
|
|
553
|
+
}
|
|
554
|
+
async function compressOpenAIMessage(message, usageTap, compression, signal) {
|
|
555
|
+
if (!isObjectRecord(message)) {
|
|
556
|
+
return { value: message, segments: [] };
|
|
557
|
+
}
|
|
558
|
+
const role = mapOpenAIRole(message.role);
|
|
559
|
+
if (!role) {
|
|
560
|
+
return { value: message, segments: [] };
|
|
561
|
+
}
|
|
562
|
+
const content = message.content;
|
|
563
|
+
if (typeof content === "string") {
|
|
564
|
+
const compressed = await compressTextForRole(
|
|
565
|
+
content,
|
|
566
|
+
role,
|
|
567
|
+
usageTap,
|
|
568
|
+
compression,
|
|
569
|
+
signal
|
|
570
|
+
);
|
|
571
|
+
if (!compressed) {
|
|
572
|
+
return { value: message, segments: [] };
|
|
573
|
+
}
|
|
574
|
+
return {
|
|
575
|
+
value: { ...message, content: compressed.text },
|
|
576
|
+
segments: [compressed.segment]
|
|
577
|
+
};
|
|
578
|
+
}
|
|
579
|
+
if (Array.isArray(content)) {
|
|
580
|
+
const blockResults = await Promise.all(
|
|
581
|
+
content.map(
|
|
582
|
+
(block) => compressOpenAITextBlock(block, role, usageTap, compression, signal)
|
|
583
|
+
)
|
|
584
|
+
);
|
|
585
|
+
const segments = blockResults.flatMap(
|
|
586
|
+
(result) => result.segment ? [result.segment] : []
|
|
587
|
+
);
|
|
588
|
+
return {
|
|
589
|
+
value: segments.length ? { ...message, content: blockResults.map((result) => result.value) } : message,
|
|
590
|
+
segments
|
|
591
|
+
};
|
|
592
|
+
}
|
|
593
|
+
return { value: message, segments: [] };
|
|
594
|
+
}
|
|
595
|
+
async function compressOpenAITextBlock(block, role, usageTap, compression, signal) {
|
|
596
|
+
if (!isObjectRecord(block) || block.type !== "text" || typeof block.text !== "string") {
|
|
597
|
+
return { value: block };
|
|
598
|
+
}
|
|
599
|
+
const compressed = await compressTextForRole(
|
|
600
|
+
block.text,
|
|
601
|
+
role,
|
|
602
|
+
usageTap,
|
|
603
|
+
compression,
|
|
604
|
+
signal
|
|
605
|
+
);
|
|
606
|
+
if (!compressed) {
|
|
607
|
+
return { value: block };
|
|
608
|
+
}
|
|
609
|
+
return {
|
|
610
|
+
value: { ...block, text: compressed.text },
|
|
611
|
+
segment: compressed.segment
|
|
612
|
+
};
|
|
613
|
+
}
|
|
614
|
+
async function compressResponsesInputItem(item, usageTap, compression, signal) {
|
|
615
|
+
if (!isObjectRecord(item)) {
|
|
616
|
+
return { value: item, segments: [] };
|
|
617
|
+
}
|
|
618
|
+
const specialToolRole = mapResponsesItemTypeToRole(item.type);
|
|
619
|
+
const role = specialToolRole ?? mapOpenAIRole(item.role);
|
|
620
|
+
const segments = [];
|
|
621
|
+
let next = item;
|
|
622
|
+
if (role && typeof item.content === "string") {
|
|
623
|
+
const compressed = await compressTextForRole(
|
|
624
|
+
item.content,
|
|
625
|
+
role,
|
|
626
|
+
usageTap,
|
|
627
|
+
compression,
|
|
628
|
+
signal
|
|
629
|
+
);
|
|
630
|
+
if (compressed) {
|
|
631
|
+
next = { ...next, content: compressed.text };
|
|
632
|
+
segments.push(compressed.segment);
|
|
633
|
+
}
|
|
634
|
+
} else if (role && Array.isArray(item.content)) {
|
|
635
|
+
const contentResults = await Promise.all(
|
|
636
|
+
item.content.map(
|
|
637
|
+
(block) => compressResponsesContentBlock(block, role, usageTap, compression, signal)
|
|
638
|
+
)
|
|
639
|
+
);
|
|
640
|
+
segments.push(
|
|
641
|
+
...contentResults.flatMap(
|
|
642
|
+
(result) => result.segment ? [result.segment] : []
|
|
643
|
+
)
|
|
644
|
+
);
|
|
645
|
+
if (segments.length) {
|
|
646
|
+
next = {
|
|
647
|
+
...next,
|
|
648
|
+
content: contentResults.map((result) => result.value)
|
|
649
|
+
};
|
|
650
|
+
}
|
|
651
|
+
}
|
|
652
|
+
if (specialToolRole && typeof item.output === "string") {
|
|
653
|
+
const compressed = await compressTextForRole(
|
|
654
|
+
item.output,
|
|
655
|
+
specialToolRole,
|
|
656
|
+
usageTap,
|
|
657
|
+
compression,
|
|
658
|
+
signal
|
|
659
|
+
);
|
|
660
|
+
if (compressed) {
|
|
661
|
+
next = { ...next, output: compressed.text };
|
|
662
|
+
segments.push(compressed.segment);
|
|
663
|
+
}
|
|
664
|
+
}
|
|
665
|
+
return { value: next, segments };
|
|
666
|
+
}
|
|
667
|
+
async function compressResponsesContentBlock(block, role, usageTap, compression, signal) {
|
|
668
|
+
if (!isObjectRecord(block)) {
|
|
669
|
+
return { value: block };
|
|
670
|
+
}
|
|
671
|
+
if ((block.type === "input_text" || block.type === "text") && typeof block.text === "string") {
|
|
672
|
+
const compressed = await compressTextForRole(
|
|
673
|
+
block.text,
|
|
674
|
+
role,
|
|
675
|
+
usageTap,
|
|
676
|
+
compression,
|
|
677
|
+
signal
|
|
678
|
+
);
|
|
679
|
+
if (compressed) {
|
|
680
|
+
return {
|
|
681
|
+
value: { ...block, text: compressed.text },
|
|
682
|
+
segment: compressed.segment
|
|
683
|
+
};
|
|
684
|
+
}
|
|
685
|
+
}
|
|
686
|
+
if (role === "tool" && typeof block.output === "string") {
|
|
687
|
+
const compressed = await compressTextForRole(
|
|
688
|
+
block.output,
|
|
689
|
+
role,
|
|
690
|
+
usageTap,
|
|
691
|
+
compression,
|
|
692
|
+
signal
|
|
693
|
+
);
|
|
694
|
+
if (compressed) {
|
|
695
|
+
return {
|
|
696
|
+
value: { ...block, output: compressed.text },
|
|
697
|
+
segment: compressed.segment
|
|
698
|
+
};
|
|
699
|
+
}
|
|
700
|
+
}
|
|
701
|
+
return { value: block };
|
|
702
|
+
}
|
|
703
|
+
async function compressTextForRole(text, role, usageTap, compression, signal) {
|
|
704
|
+
if (!text.trim()) {
|
|
705
|
+
return void 0;
|
|
706
|
+
}
|
|
707
|
+
const roleOptions = resolveRoleCompressionOptions(compression, role);
|
|
708
|
+
if (!roleOptions) {
|
|
709
|
+
return void 0;
|
|
710
|
+
}
|
|
711
|
+
const estimatedTokens = estimatePromptTokens(text);
|
|
712
|
+
if (typeof roleOptions.minTokens === "number" && estimatedTokens < roleOptions.minTokens) {
|
|
713
|
+
return void 0;
|
|
714
|
+
}
|
|
715
|
+
const result = await usageTap.compressPromptInput(text, {
|
|
716
|
+
provider: roleOptions.provider,
|
|
717
|
+
failOpen: roleOptions.failOpen,
|
|
718
|
+
tokenCompanyModel: roleOptions.tokenCompanyModel,
|
|
719
|
+
tokenCompanyAggressiveness: roleOptions.tokenCompanyAggressiveness,
|
|
720
|
+
tokenCompanyAppId: roleOptions.tokenCompanyAppId,
|
|
721
|
+
signal
|
|
722
|
+
});
|
|
723
|
+
const compressedText = typeof result.compressedInput === "string" ? result.compressedInput : String(result.compressedInput);
|
|
724
|
+
return {
|
|
725
|
+
text: compressedText,
|
|
726
|
+
segment: { role, result: { ...result, compressedInput: compressedText } }
|
|
727
|
+
};
|
|
728
|
+
}
|
|
729
|
+
function normalizePromptCompressionOptions(options) {
|
|
730
|
+
if (!options) {
|
|
731
|
+
return void 0;
|
|
732
|
+
}
|
|
733
|
+
if (options === true) {
|
|
734
|
+
return {};
|
|
735
|
+
}
|
|
736
|
+
if (options.enabled === false) {
|
|
737
|
+
return void 0;
|
|
738
|
+
}
|
|
739
|
+
return options;
|
|
740
|
+
}
|
|
741
|
+
function resolveEffectivePromptCompressionOptions(defaults, override) {
|
|
742
|
+
if (override === false) {
|
|
743
|
+
return void 0;
|
|
744
|
+
}
|
|
745
|
+
if (override === void 0) {
|
|
746
|
+
return defaults;
|
|
747
|
+
}
|
|
748
|
+
if (override === true) {
|
|
749
|
+
return defaults ?? {};
|
|
750
|
+
}
|
|
751
|
+
const merged = {
|
|
752
|
+
...defaults ?? {},
|
|
753
|
+
...override,
|
|
754
|
+
roles: override.roles ?? defaults?.roles
|
|
755
|
+
};
|
|
756
|
+
return normalizePromptCompressionOptions(merged);
|
|
757
|
+
}
|
|
758
|
+
function resolveRoleCompressionOptions(compression, role) {
|
|
759
|
+
const hasExplicitRoles = compression.roles !== void 0;
|
|
760
|
+
const setting = compression.roles?.[role];
|
|
761
|
+
if (hasExplicitRoles && setting === void 0) {
|
|
762
|
+
return void 0;
|
|
763
|
+
}
|
|
764
|
+
if (!hasExplicitRoles && role === "assistant") {
|
|
765
|
+
return void 0;
|
|
766
|
+
}
|
|
767
|
+
if (setting === false) {
|
|
768
|
+
return void 0;
|
|
769
|
+
}
|
|
770
|
+
const roleOptions = typeof setting === "object" ? setting : void 0;
|
|
771
|
+
if (roleOptions?.enabled === false) {
|
|
772
|
+
return void 0;
|
|
773
|
+
}
|
|
774
|
+
return {
|
|
775
|
+
provider: roleOptions?.provider ?? compression.provider,
|
|
776
|
+
minTokens: roleOptions?.minTokens ?? compression.minTokens,
|
|
777
|
+
failOpen: compression.failOpen,
|
|
778
|
+
tokenCompanyModel: compression.tokenCompanyModel,
|
|
779
|
+
tokenCompanyAggressiveness: roleOptions?.tokenCompanyAggressiveness ?? resolveTokenCompanyAggressiveness(compression, role),
|
|
780
|
+
tokenCompanyAppId: compression.tokenCompanyAppId
|
|
781
|
+
};
|
|
782
|
+
}
|
|
783
|
+
function resolveTokenCompanyAggressiveness(compression, role) {
|
|
784
|
+
if (typeof compression.tokenCompanyAggressiveness === "number") {
|
|
785
|
+
return compression.tokenCompanyAggressiveness;
|
|
786
|
+
}
|
|
787
|
+
return compression.tokenCompanyAggressiveness?.[role];
|
|
788
|
+
}
|
|
789
|
+
function buildPromptCompressionTelemetry(segments) {
|
|
790
|
+
if (!segments.length) {
|
|
791
|
+
return void 0;
|
|
792
|
+
}
|
|
793
|
+
const originalCharacters = segments.reduce(
|
|
794
|
+
(sum, segment) => sum + segment.result.originalCharacters,
|
|
795
|
+
0
|
|
796
|
+
);
|
|
797
|
+
const compressedCharacters = segments.reduce(
|
|
798
|
+
(sum, segment) => sum + segment.result.compressedCharacters,
|
|
799
|
+
0
|
|
800
|
+
);
|
|
801
|
+
const originalTokens = segments.reduce(
|
|
802
|
+
(sum, segment) => sum + segment.result.originalTokens,
|
|
803
|
+
0
|
|
804
|
+
);
|
|
805
|
+
const compressedTokens = segments.reduce(
|
|
806
|
+
(sum, segment) => sum + segment.result.compressedTokens,
|
|
807
|
+
0
|
|
808
|
+
);
|
|
809
|
+
const savedCharacters = Math.max(0, originalCharacters - compressedCharacters);
|
|
810
|
+
const savedTokens = Math.max(0, originalTokens - compressedTokens);
|
|
811
|
+
const providers = dedupeStrings(segments.map((segment) => segment.result.provider));
|
|
812
|
+
const roles = dedupeStrings(segments.map((segment) => `role:${segment.role}`));
|
|
813
|
+
const techniques = dedupeStrings([
|
|
814
|
+
"openai-wrapper",
|
|
815
|
+
...roles,
|
|
816
|
+
...segments.flatMap((segment) => segment.result.techniques),
|
|
817
|
+
...providers.length > 1 ? ["mixed-providers"] : []
|
|
818
|
+
]);
|
|
819
|
+
return {
|
|
820
|
+
provider: segments[0]?.result.provider ?? "heuristic",
|
|
821
|
+
originalCharacters,
|
|
822
|
+
compressedCharacters,
|
|
823
|
+
savedCharacters,
|
|
824
|
+
originalTokens,
|
|
825
|
+
compressedTokens,
|
|
826
|
+
savedTokens,
|
|
827
|
+
tokenSavingsRatio: originalTokens > 0 ? savedTokens / originalTokens : 0,
|
|
828
|
+
savingsRatio: originalCharacters > 0 ? savedCharacters / originalCharacters : 0,
|
|
829
|
+
techniques
|
|
830
|
+
};
|
|
831
|
+
}
|
|
832
|
+
function promptCompressionRequestOptions(withUsage2, correlationId) {
|
|
833
|
+
return {
|
|
834
|
+
signal: withUsage2?.signal,
|
|
835
|
+
headers: withUsage2?.headers,
|
|
836
|
+
retries: withUsage2?.retries,
|
|
837
|
+
correlationId
|
|
838
|
+
};
|
|
839
|
+
}
|
|
840
|
+
function mapOpenAIRole(role) {
|
|
841
|
+
if (role === "system" || role === "developer") {
|
|
842
|
+
return "system";
|
|
843
|
+
}
|
|
844
|
+
if (role === "user") {
|
|
845
|
+
return "user";
|
|
846
|
+
}
|
|
847
|
+
if (role === "tool" || role === "function") {
|
|
848
|
+
return "tool";
|
|
849
|
+
}
|
|
850
|
+
if (role === "assistant") {
|
|
851
|
+
return "assistant";
|
|
852
|
+
}
|
|
853
|
+
return void 0;
|
|
854
|
+
}
|
|
855
|
+
function mapResponsesItemTypeToRole(type) {
|
|
856
|
+
if (type === "function_call_output" || type === "tool_result" || type === "computer_call_output") {
|
|
857
|
+
return "tool";
|
|
858
|
+
}
|
|
859
|
+
return void 0;
|
|
860
|
+
}
|
|
282
861
|
function splitUsageOptions(options) {
|
|
283
862
|
if (!options || typeof options !== "object") {
|
|
284
863
|
return {};
|
|
285
864
|
}
|
|
286
|
-
const { usageTap, withUsage: withUsage2, ...rest } = options;
|
|
865
|
+
const { usageTap, withUsage: withUsage2, promptCompression, ...rest } = options;
|
|
287
866
|
const requestOptions = Object.keys(rest).length ? cloneRequestOptions(rest) : void 0;
|
|
288
867
|
return {
|
|
289
868
|
requestOptions,
|
|
290
869
|
usageContext: usageTap,
|
|
291
|
-
withUsage: withUsage2
|
|
870
|
+
withUsage: withUsage2,
|
|
871
|
+
promptCompression
|
|
292
872
|
};
|
|
293
873
|
}
|
|
294
874
|
function resolveBeginRequest(defaults, override) {
|