@usagetap/sdk 0.10.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +83 -22
- package/dist/adapters/anthropic.cjs +943 -0
- package/dist/adapters/anthropic.cjs.map +1 -0
- package/dist/adapters/anthropic.d.cts +81 -0
- package/dist/adapters/anthropic.d.ts +81 -0
- package/dist/adapters/anthropic.mjs +940 -0
- package/dist/adapters/anthropic.mjs.map +1 -0
- package/dist/adapters/openai.cjs +601 -17
- package/dist/adapters/openai.cjs.map +1 -1
- package/dist/adapters/openai.d.cts +57 -2
- package/dist/adapters/openai.d.ts +57 -2
- package/dist/adapters/openai.mjs +601 -18
- package/dist/adapters/openai.mjs.map +1 -1
- package/dist/adapters/openrouter.cjs.map +1 -1
- package/dist/adapters/openrouter.d.cts +1 -1
- package/dist/adapters/openrouter.d.ts +1 -1
- package/dist/adapters/openrouter.mjs.map +1 -1
- package/dist/anthropic/index.cjs +943 -0
- package/dist/anthropic/index.cjs.map +1 -0
- package/dist/anthropic/index.d.cts +2 -0
- package/dist/anthropic/index.d.ts +2 -0
- package/dist/anthropic/index.mjs +940 -0
- package/dist/anthropic/index.mjs.map +1 -0
- package/dist/{client-DEbk0Q2l.d.cts → client-BA-QlnRq.d.cts} +95 -1
- package/dist/{client-DEbk0Q2l.d.ts → client-BA-QlnRq.d.ts} +95 -1
- package/dist/express/index.cjs +597 -17
- package/dist/express/index.cjs.map +1 -1
- package/dist/express/index.d.cts +1 -1
- package/dist/express/index.d.ts +1 -1
- package/dist/express/index.mjs +597 -17
- package/dist/express/index.mjs.map +1 -1
- package/dist/index.cjs +586 -1
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +2 -2
- package/dist/index.d.ts +2 -2
- package/dist/index.mjs +581 -2
- package/dist/index.mjs.map +1 -1
- package/dist/openai/index.cjs +601 -17
- package/dist/openai/index.cjs.map +1 -1
- package/dist/openai/index.d.cts +2 -2
- package/dist/openai/index.d.ts +2 -2
- package/dist/openai/index.mjs +601 -18
- package/dist/openai/index.mjs.map +1 -1
- package/package.json +22 -2
package/dist/express/index.cjs
CHANGED
|
@@ -29,7 +29,62 @@ var UsageTapError = class extends Error {
|
|
|
29
29
|
}
|
|
30
30
|
};
|
|
31
31
|
|
|
32
|
+
// src/prompt-compression.ts
|
|
33
|
+
function estimatePromptTokens(input) {
|
|
34
|
+
const text = typeof input === "string" ? input : stableStringifyInput(input);
|
|
35
|
+
return text.match(/[\p{L}\p{N}]+|[^\s]/gu)?.length ?? 0;
|
|
36
|
+
}
|
|
37
|
+
function stableStringifyInput(input) {
|
|
38
|
+
if (typeof input === "string") return input;
|
|
39
|
+
return JSON.stringify(input) ?? String(input);
|
|
40
|
+
}
|
|
41
|
+
|
|
32
42
|
// src/adapters/openai.ts
|
|
43
|
+
var OpenAIPromptCompressionStats = class {
|
|
44
|
+
history = [];
|
|
45
|
+
failures = [];
|
|
46
|
+
_record(turn) {
|
|
47
|
+
this.history.push(turn);
|
|
48
|
+
}
|
|
49
|
+
_recordFailure(failure) {
|
|
50
|
+
this.failures.push(failure);
|
|
51
|
+
}
|
|
52
|
+
get totalOriginalTokens() {
|
|
53
|
+
return this.history.reduce((sum, turn) => sum + (turn.originalTokens ?? 0), 0);
|
|
54
|
+
}
|
|
55
|
+
get totalCompressedTokens() {
|
|
56
|
+
return this.history.reduce((sum, turn) => sum + (turn.compressedTokens ?? 0), 0);
|
|
57
|
+
}
|
|
58
|
+
get totalTokensSaved() {
|
|
59
|
+
return this.history.reduce((sum, turn) => sum + (turn.savedTokens ?? 0), 0);
|
|
60
|
+
}
|
|
61
|
+
get totalOriginalCharacters() {
|
|
62
|
+
return this.history.reduce((sum, turn) => sum + turn.originalCharacters, 0);
|
|
63
|
+
}
|
|
64
|
+
get totalCompressedCharacters() {
|
|
65
|
+
return this.history.reduce((sum, turn) => sum + turn.compressedCharacters, 0);
|
|
66
|
+
}
|
|
67
|
+
get totalCharactersSaved() {
|
|
68
|
+
return this.history.reduce((sum, turn) => sum + turn.savedCharacters, 0);
|
|
69
|
+
}
|
|
70
|
+
get calls() {
|
|
71
|
+
return this.history.length;
|
|
72
|
+
}
|
|
73
|
+
get telemetryFailures() {
|
|
74
|
+
return this.failures.length;
|
|
75
|
+
}
|
|
76
|
+
get failOpenEvents() {
|
|
77
|
+
return this.history.filter(
|
|
78
|
+
(turn) => turn.techniques.includes("compression-error") || turn.techniques.includes("fallback-original")
|
|
79
|
+
).length;
|
|
80
|
+
}
|
|
81
|
+
get tokenSavingsRatio() {
|
|
82
|
+
return this.totalOriginalTokens > 0 ? this.totalTokensSaved / this.totalOriginalTokens : 0;
|
|
83
|
+
}
|
|
84
|
+
get savingsRatio() {
|
|
85
|
+
return this.totalOriginalCharacters > 0 ? this.totalCharactersSaved / this.totalOriginalCharacters : 0;
|
|
86
|
+
}
|
|
87
|
+
};
|
|
33
88
|
function toNextResponse(stream, options = {}) {
|
|
34
89
|
const mode = options.mode ?? "text";
|
|
35
90
|
const headers = new Headers(options.headers ?? {});
|
|
@@ -127,8 +182,24 @@ function wrapOpenAI(client, usageTap, options = {}) {
|
|
|
127
182
|
}
|
|
128
183
|
const defaultContext = options.defaultContext;
|
|
129
184
|
const applyVendorHints = options.applyVendorHints !== false;
|
|
130
|
-
const
|
|
131
|
-
const
|
|
185
|
+
const defaultPromptCompression = normalizePromptCompressionOptions(options.promptCompression);
|
|
186
|
+
const promptCompressionStats = new OpenAIPromptCompressionStats();
|
|
187
|
+
const proxiedChat = client.chat ? createChatProxy(
|
|
188
|
+
client.chat,
|
|
189
|
+
usageTap,
|
|
190
|
+
defaultContext,
|
|
191
|
+
applyVendorHints,
|
|
192
|
+
defaultPromptCompression,
|
|
193
|
+
promptCompressionStats
|
|
194
|
+
) : void 0;
|
|
195
|
+
const proxiedResponses = typeof client.responses !== "undefined" ? createResponsesProxy(
|
|
196
|
+
client.responses,
|
|
197
|
+
usageTap,
|
|
198
|
+
defaultContext,
|
|
199
|
+
applyVendorHints,
|
|
200
|
+
defaultPromptCompression,
|
|
201
|
+
promptCompressionStats
|
|
202
|
+
) : void 0;
|
|
132
203
|
const handler = {
|
|
133
204
|
get(target, prop, receiver) {
|
|
134
205
|
if (prop === "chat" && proxiedChat) {
|
|
@@ -143,6 +214,9 @@ function wrapOpenAI(client, usageTap, options = {}) {
|
|
|
143
214
|
if (prop === "pipeToResponse") {
|
|
144
215
|
return pipeToResponse;
|
|
145
216
|
}
|
|
217
|
+
if (prop === "promptCompression") {
|
|
218
|
+
return promptCompressionStats;
|
|
219
|
+
}
|
|
146
220
|
if (prop === "unwrap") {
|
|
147
221
|
return () => target;
|
|
148
222
|
}
|
|
@@ -151,12 +225,14 @@ function wrapOpenAI(client, usageTap, options = {}) {
|
|
|
151
225
|
};
|
|
152
226
|
return new Proxy(client, handler);
|
|
153
227
|
}
|
|
154
|
-
function createChatProxy(resource, usageTap, defaultContext, applyVendorHints) {
|
|
228
|
+
function createChatProxy(resource, usageTap, defaultContext, applyVendorHints, defaultPromptCompression, promptCompressionStats) {
|
|
155
229
|
const completions = createChatCompletionsProxy(
|
|
156
230
|
resource.completions,
|
|
157
231
|
usageTap,
|
|
158
232
|
defaultContext,
|
|
159
|
-
applyVendorHints
|
|
233
|
+
applyVendorHints,
|
|
234
|
+
defaultPromptCompression,
|
|
235
|
+
promptCompressionStats
|
|
160
236
|
);
|
|
161
237
|
const handler = {
|
|
162
238
|
get(target, prop, receiver) {
|
|
@@ -168,7 +244,7 @@ function createChatProxy(resource, usageTap, defaultContext, applyVendorHints) {
|
|
|
168
244
|
};
|
|
169
245
|
return new Proxy(resource, handler);
|
|
170
246
|
}
|
|
171
|
-
function createResponsesProxy(resource, usageTap, defaultContext, applyVendorHints) {
|
|
247
|
+
function createResponsesProxy(resource, usageTap, defaultContext, applyVendorHints, defaultPromptCompression, promptCompressionStats) {
|
|
172
248
|
if (!resource || typeof resource !== "object") {
|
|
173
249
|
return void 0;
|
|
174
250
|
}
|
|
@@ -177,11 +253,26 @@ function createResponsesProxy(resource, usageTap, defaultContext, applyVendorHin
|
|
|
177
253
|
}
|
|
178
254
|
const originalCreate = resource.create.bind(resource);
|
|
179
255
|
const wrappedCreate = (params, options) => {
|
|
180
|
-
const {
|
|
256
|
+
const {
|
|
257
|
+
requestOptions,
|
|
258
|
+
usageContext,
|
|
259
|
+
withUsage: withUsage2,
|
|
260
|
+
promptCompression
|
|
261
|
+
} = splitUsageOptions(options);
|
|
181
262
|
const beginRequest = resolveBeginRequest(defaultContext, usageContext);
|
|
182
263
|
const wantsStream = isStreamingRequest(params);
|
|
183
|
-
return usageTap.withUsage(beginRequest, (ctx) => {
|
|
184
|
-
const
|
|
264
|
+
return usageTap.withUsage(beginRequest, async (ctx) => {
|
|
265
|
+
const hintedParams = applyVendorHints ? applyResponsesVendorHints(params, ctx.begin.data.vendorHints) : params;
|
|
266
|
+
const finalParams = await compressResponsesParamsForCall({
|
|
267
|
+
params: hintedParams,
|
|
268
|
+
usageTap,
|
|
269
|
+
ctx,
|
|
270
|
+
defaultPromptCompression,
|
|
271
|
+
callPromptCompression: promptCompression,
|
|
272
|
+
stats: promptCompressionStats,
|
|
273
|
+
withUsage: withUsage2,
|
|
274
|
+
operation: "responses.create"
|
|
275
|
+
});
|
|
185
276
|
const request = attachCorrelationHeader(requestOptions, ctx.begin.correlationId);
|
|
186
277
|
if (wantsStream) {
|
|
187
278
|
const apiPromise2 = originalCreate(finalParams, request);
|
|
@@ -215,16 +306,31 @@ function createResponsesProxy(resource, usageTap, defaultContext, applyVendorHin
|
|
|
215
306
|
};
|
|
216
307
|
return new Proxy(resource, handler);
|
|
217
308
|
}
|
|
218
|
-
function createChatCompletionsProxy(resource, usageTap, defaultContext, applyVendorHints) {
|
|
309
|
+
function createChatCompletionsProxy(resource, usageTap, defaultContext, applyVendorHints, defaultPromptCompression, promptCompressionStats) {
|
|
219
310
|
const originalCreate = resource.create.bind(resource);
|
|
220
311
|
const streamCandidate = resource.stream;
|
|
221
312
|
const originalStream = typeof streamCandidate === "function" ? streamCandidate.bind(resource) : void 0;
|
|
222
313
|
const wrappedCreate = (params, options) => {
|
|
223
|
-
const {
|
|
314
|
+
const {
|
|
315
|
+
requestOptions,
|
|
316
|
+
usageContext,
|
|
317
|
+
withUsage: withUsage2,
|
|
318
|
+
promptCompression
|
|
319
|
+
} = splitUsageOptions(options);
|
|
224
320
|
const beginRequest = resolveBeginRequest(defaultContext, usageContext);
|
|
225
321
|
const wantsStream = isStreamingRequest(params);
|
|
226
|
-
return usageTap.withUsage(beginRequest, (ctx) => {
|
|
227
|
-
const
|
|
322
|
+
return usageTap.withUsage(beginRequest, async (ctx) => {
|
|
323
|
+
const hintedParams = applyVendorHints ? applyChatVendorHints(params, ctx.begin.data.vendorHints) : params;
|
|
324
|
+
const finalParams = await compressChatParamsForCall({
|
|
325
|
+
params: hintedParams,
|
|
326
|
+
usageTap,
|
|
327
|
+
ctx,
|
|
328
|
+
defaultPromptCompression,
|
|
329
|
+
callPromptCompression: promptCompression,
|
|
330
|
+
stats: promptCompressionStats,
|
|
331
|
+
withUsage: withUsage2,
|
|
332
|
+
operation: "chat.completions.create"
|
|
333
|
+
});
|
|
228
334
|
const request = attachCorrelationHeader(requestOptions, ctx.begin.correlationId);
|
|
229
335
|
if (wantsStream) {
|
|
230
336
|
const apiPromise2 = originalCreate(finalParams, request);
|
|
@@ -249,10 +355,25 @@ function createChatCompletionsProxy(resource, usageTap, defaultContext, applyVen
|
|
|
249
355
|
}, withUsage2);
|
|
250
356
|
};
|
|
251
357
|
const wrappedStream = originalStream ? (params, options) => {
|
|
252
|
-
const {
|
|
358
|
+
const {
|
|
359
|
+
requestOptions,
|
|
360
|
+
usageContext,
|
|
361
|
+
withUsage: withUsage2,
|
|
362
|
+
promptCompression
|
|
363
|
+
} = splitUsageOptions(options);
|
|
253
364
|
const beginRequest = resolveBeginRequest(defaultContext, usageContext);
|
|
254
|
-
return usageTap.withUsage(beginRequest, (ctx) => {
|
|
255
|
-
const
|
|
365
|
+
return usageTap.withUsage(beginRequest, async (ctx) => {
|
|
366
|
+
const hintedParams = applyVendorHints ? applyChatVendorHints(params, ctx.begin.data.vendorHints) : params;
|
|
367
|
+
const finalParams = await compressChatParamsForCall({
|
|
368
|
+
params: hintedParams,
|
|
369
|
+
usageTap,
|
|
370
|
+
ctx,
|
|
371
|
+
defaultPromptCompression,
|
|
372
|
+
callPromptCompression: promptCompression,
|
|
373
|
+
stats: promptCompressionStats,
|
|
374
|
+
withUsage: withUsage2,
|
|
375
|
+
operation: "chat.completions.stream"
|
|
376
|
+
});
|
|
256
377
|
const request = attachCorrelationHeader(requestOptions, ctx.begin.correlationId);
|
|
257
378
|
const apiPromise = originalStream(finalParams, request);
|
|
258
379
|
const wrappedPromise = transformApiPromise(apiPromise, (rawStream) => {
|
|
@@ -281,16 +402,475 @@ function createChatCompletionsProxy(resource, usageTap, defaultContext, applyVen
|
|
|
281
402
|
};
|
|
282
403
|
return new Proxy(resource, handler);
|
|
283
404
|
}
|
|
405
|
+
async function compressChatParamsForCall(args) {
|
|
406
|
+
const compression = resolveEffectivePromptCompressionOptions(
|
|
407
|
+
args.defaultPromptCompression,
|
|
408
|
+
args.callPromptCompression
|
|
409
|
+
);
|
|
410
|
+
if (!compression) {
|
|
411
|
+
return args.params;
|
|
412
|
+
}
|
|
413
|
+
const outcome = await compressChatParams(
|
|
414
|
+
args.params,
|
|
415
|
+
args.usageTap,
|
|
416
|
+
compression,
|
|
417
|
+
args.withUsage?.signal
|
|
418
|
+
);
|
|
419
|
+
await recordCompressionOutcome({
|
|
420
|
+
outcome,
|
|
421
|
+
compression,
|
|
422
|
+
usageTap: args.usageTap,
|
|
423
|
+
ctx: args.ctx,
|
|
424
|
+
stats: args.stats,
|
|
425
|
+
withUsage: args.withUsage,
|
|
426
|
+
operation: args.operation
|
|
427
|
+
});
|
|
428
|
+
return outcome.params;
|
|
429
|
+
}
|
|
430
|
+
async function compressResponsesParamsForCall(args) {
|
|
431
|
+
const compression = resolveEffectivePromptCompressionOptions(
|
|
432
|
+
args.defaultPromptCompression,
|
|
433
|
+
args.callPromptCompression
|
|
434
|
+
);
|
|
435
|
+
if (!compression) {
|
|
436
|
+
return args.params;
|
|
437
|
+
}
|
|
438
|
+
const outcome = await compressResponsesParams(
|
|
439
|
+
args.params,
|
|
440
|
+
args.usageTap,
|
|
441
|
+
compression,
|
|
442
|
+
args.withUsage?.signal
|
|
443
|
+
);
|
|
444
|
+
await recordCompressionOutcome({
|
|
445
|
+
outcome,
|
|
446
|
+
compression,
|
|
447
|
+
usageTap: args.usageTap,
|
|
448
|
+
ctx: args.ctx,
|
|
449
|
+
stats: args.stats,
|
|
450
|
+
withUsage: args.withUsage,
|
|
451
|
+
operation: args.operation
|
|
452
|
+
});
|
|
453
|
+
return outcome.params;
|
|
454
|
+
}
|
|
455
|
+
async function recordCompressionOutcome(args) {
|
|
456
|
+
const telemetry = buildPromptCompressionTelemetry(args.outcome.segments);
|
|
457
|
+
if (!telemetry) {
|
|
458
|
+
return;
|
|
459
|
+
}
|
|
460
|
+
const turn = {
|
|
461
|
+
...telemetry,
|
|
462
|
+
callId: args.ctx.begin.data.callId,
|
|
463
|
+
operation: args.operation,
|
|
464
|
+
messagesCompressed: args.outcome.segments.length,
|
|
465
|
+
timestamp: Date.now()
|
|
466
|
+
};
|
|
467
|
+
args.stats._record(turn);
|
|
468
|
+
try {
|
|
469
|
+
await args.usageTap.recordPromptCompression(
|
|
470
|
+
{
|
|
471
|
+
callId: args.ctx.begin.data.callId,
|
|
472
|
+
promptCompression: telemetry
|
|
473
|
+
},
|
|
474
|
+
promptCompressionRequestOptions(args.withUsage, args.ctx.begin.correlationId)
|
|
475
|
+
);
|
|
476
|
+
} catch (error) {
|
|
477
|
+
args.stats._recordFailure({
|
|
478
|
+
callId: args.ctx.begin.data.callId,
|
|
479
|
+
operation: args.operation,
|
|
480
|
+
stage: "telemetry",
|
|
481
|
+
message: error instanceof Error ? error.message : String(error),
|
|
482
|
+
timestamp: Date.now()
|
|
483
|
+
});
|
|
484
|
+
if (args.compression.failOpen === false) {
|
|
485
|
+
throw error;
|
|
486
|
+
}
|
|
487
|
+
}
|
|
488
|
+
}
|
|
489
|
+
async function compressChatParams(params, usageTap, compression, signal) {
|
|
490
|
+
if (!params || typeof params !== "object") {
|
|
491
|
+
return { params, segments: [] };
|
|
492
|
+
}
|
|
493
|
+
const source = cloneRecord(params);
|
|
494
|
+
const messages = Array.isArray(source.messages) ? source.messages : void 0;
|
|
495
|
+
if (!messages) {
|
|
496
|
+
return { params, segments: [] };
|
|
497
|
+
}
|
|
498
|
+
const messageResults = await Promise.all(
|
|
499
|
+
messages.map(
|
|
500
|
+
(message) => compressOpenAIMessage(message, usageTap, compression, signal)
|
|
501
|
+
)
|
|
502
|
+
);
|
|
503
|
+
return {
|
|
504
|
+
params: {
|
|
505
|
+
...source,
|
|
506
|
+
messages: messageResults.map((result) => result.value)
|
|
507
|
+
},
|
|
508
|
+
segments: messageResults.flatMap((result) => result.segments)
|
|
509
|
+
};
|
|
510
|
+
}
|
|
511
|
+
async function compressResponsesParams(params, usageTap, compression, signal) {
|
|
512
|
+
if (!params || typeof params !== "object") {
|
|
513
|
+
return { params, segments: [] };
|
|
514
|
+
}
|
|
515
|
+
const source = cloneRecord(params);
|
|
516
|
+
const segments = [];
|
|
517
|
+
if (typeof source.instructions === "string") {
|
|
518
|
+
const compressed = await compressTextForRole(
|
|
519
|
+
source.instructions,
|
|
520
|
+
"system",
|
|
521
|
+
usageTap,
|
|
522
|
+
compression,
|
|
523
|
+
signal
|
|
524
|
+
);
|
|
525
|
+
if (compressed) {
|
|
526
|
+
source.instructions = compressed.text;
|
|
527
|
+
segments.push(compressed.segment);
|
|
528
|
+
}
|
|
529
|
+
}
|
|
530
|
+
if (typeof source.input === "string") {
|
|
531
|
+
const compressed = await compressTextForRole(
|
|
532
|
+
source.input,
|
|
533
|
+
"user",
|
|
534
|
+
usageTap,
|
|
535
|
+
compression,
|
|
536
|
+
signal
|
|
537
|
+
);
|
|
538
|
+
if (compressed) {
|
|
539
|
+
source.input = compressed.text;
|
|
540
|
+
segments.push(compressed.segment);
|
|
541
|
+
}
|
|
542
|
+
} else if (Array.isArray(source.input)) {
|
|
543
|
+
const inputResults = await Promise.all(
|
|
544
|
+
source.input.map(
|
|
545
|
+
(item) => compressResponsesInputItem(item, usageTap, compression, signal)
|
|
546
|
+
)
|
|
547
|
+
);
|
|
548
|
+
source.input = inputResults.map((result) => result.value);
|
|
549
|
+
segments.push(...inputResults.flatMap((result) => result.segments));
|
|
550
|
+
}
|
|
551
|
+
return {
|
|
552
|
+
params: source,
|
|
553
|
+
segments
|
|
554
|
+
};
|
|
555
|
+
}
|
|
556
|
+
async function compressOpenAIMessage(message, usageTap, compression, signal) {
|
|
557
|
+
if (!isObjectRecord(message)) {
|
|
558
|
+
return { value: message, segments: [] };
|
|
559
|
+
}
|
|
560
|
+
const role = mapOpenAIRole(message.role);
|
|
561
|
+
if (!role) {
|
|
562
|
+
return { value: message, segments: [] };
|
|
563
|
+
}
|
|
564
|
+
const content = message.content;
|
|
565
|
+
if (typeof content === "string") {
|
|
566
|
+
const compressed = await compressTextForRole(
|
|
567
|
+
content,
|
|
568
|
+
role,
|
|
569
|
+
usageTap,
|
|
570
|
+
compression,
|
|
571
|
+
signal
|
|
572
|
+
);
|
|
573
|
+
if (!compressed) {
|
|
574
|
+
return { value: message, segments: [] };
|
|
575
|
+
}
|
|
576
|
+
return {
|
|
577
|
+
value: { ...message, content: compressed.text },
|
|
578
|
+
segments: [compressed.segment]
|
|
579
|
+
};
|
|
580
|
+
}
|
|
581
|
+
if (Array.isArray(content)) {
|
|
582
|
+
const blockResults = await Promise.all(
|
|
583
|
+
content.map(
|
|
584
|
+
(block) => compressOpenAITextBlock(block, role, usageTap, compression, signal)
|
|
585
|
+
)
|
|
586
|
+
);
|
|
587
|
+
const segments = blockResults.flatMap(
|
|
588
|
+
(result) => result.segment ? [result.segment] : []
|
|
589
|
+
);
|
|
590
|
+
return {
|
|
591
|
+
value: segments.length ? { ...message, content: blockResults.map((result) => result.value) } : message,
|
|
592
|
+
segments
|
|
593
|
+
};
|
|
594
|
+
}
|
|
595
|
+
return { value: message, segments: [] };
|
|
596
|
+
}
|
|
597
|
+
async function compressOpenAITextBlock(block, role, usageTap, compression, signal) {
|
|
598
|
+
if (!isObjectRecord(block) || block.type !== "text" || typeof block.text !== "string") {
|
|
599
|
+
return { value: block };
|
|
600
|
+
}
|
|
601
|
+
const compressed = await compressTextForRole(
|
|
602
|
+
block.text,
|
|
603
|
+
role,
|
|
604
|
+
usageTap,
|
|
605
|
+
compression,
|
|
606
|
+
signal
|
|
607
|
+
);
|
|
608
|
+
if (!compressed) {
|
|
609
|
+
return { value: block };
|
|
610
|
+
}
|
|
611
|
+
return {
|
|
612
|
+
value: { ...block, text: compressed.text },
|
|
613
|
+
segment: compressed.segment
|
|
614
|
+
};
|
|
615
|
+
}
|
|
616
|
+
async function compressResponsesInputItem(item, usageTap, compression, signal) {
|
|
617
|
+
if (!isObjectRecord(item)) {
|
|
618
|
+
return { value: item, segments: [] };
|
|
619
|
+
}
|
|
620
|
+
const specialToolRole = mapResponsesItemTypeToRole(item.type);
|
|
621
|
+
const role = specialToolRole ?? mapOpenAIRole(item.role);
|
|
622
|
+
const segments = [];
|
|
623
|
+
let next = item;
|
|
624
|
+
if (role && typeof item.content === "string") {
|
|
625
|
+
const compressed = await compressTextForRole(
|
|
626
|
+
item.content,
|
|
627
|
+
role,
|
|
628
|
+
usageTap,
|
|
629
|
+
compression,
|
|
630
|
+
signal
|
|
631
|
+
);
|
|
632
|
+
if (compressed) {
|
|
633
|
+
next = { ...next, content: compressed.text };
|
|
634
|
+
segments.push(compressed.segment);
|
|
635
|
+
}
|
|
636
|
+
} else if (role && Array.isArray(item.content)) {
|
|
637
|
+
const contentResults = await Promise.all(
|
|
638
|
+
item.content.map(
|
|
639
|
+
(block) => compressResponsesContentBlock(block, role, usageTap, compression, signal)
|
|
640
|
+
)
|
|
641
|
+
);
|
|
642
|
+
segments.push(
|
|
643
|
+
...contentResults.flatMap(
|
|
644
|
+
(result) => result.segment ? [result.segment] : []
|
|
645
|
+
)
|
|
646
|
+
);
|
|
647
|
+
if (segments.length) {
|
|
648
|
+
next = {
|
|
649
|
+
...next,
|
|
650
|
+
content: contentResults.map((result) => result.value)
|
|
651
|
+
};
|
|
652
|
+
}
|
|
653
|
+
}
|
|
654
|
+
if (specialToolRole && typeof item.output === "string") {
|
|
655
|
+
const compressed = await compressTextForRole(
|
|
656
|
+
item.output,
|
|
657
|
+
specialToolRole,
|
|
658
|
+
usageTap,
|
|
659
|
+
compression,
|
|
660
|
+
signal
|
|
661
|
+
);
|
|
662
|
+
if (compressed) {
|
|
663
|
+
next = { ...next, output: compressed.text };
|
|
664
|
+
segments.push(compressed.segment);
|
|
665
|
+
}
|
|
666
|
+
}
|
|
667
|
+
return { value: next, segments };
|
|
668
|
+
}
|
|
669
|
+
async function compressResponsesContentBlock(block, role, usageTap, compression, signal) {
|
|
670
|
+
if (!isObjectRecord(block)) {
|
|
671
|
+
return { value: block };
|
|
672
|
+
}
|
|
673
|
+
if ((block.type === "input_text" || block.type === "text") && typeof block.text === "string") {
|
|
674
|
+
const compressed = await compressTextForRole(
|
|
675
|
+
block.text,
|
|
676
|
+
role,
|
|
677
|
+
usageTap,
|
|
678
|
+
compression,
|
|
679
|
+
signal
|
|
680
|
+
);
|
|
681
|
+
if (compressed) {
|
|
682
|
+
return {
|
|
683
|
+
value: { ...block, text: compressed.text },
|
|
684
|
+
segment: compressed.segment
|
|
685
|
+
};
|
|
686
|
+
}
|
|
687
|
+
}
|
|
688
|
+
if (role === "tool" && typeof block.output === "string") {
|
|
689
|
+
const compressed = await compressTextForRole(
|
|
690
|
+
block.output,
|
|
691
|
+
role,
|
|
692
|
+
usageTap,
|
|
693
|
+
compression,
|
|
694
|
+
signal
|
|
695
|
+
);
|
|
696
|
+
if (compressed) {
|
|
697
|
+
return {
|
|
698
|
+
value: { ...block, output: compressed.text },
|
|
699
|
+
segment: compressed.segment
|
|
700
|
+
};
|
|
701
|
+
}
|
|
702
|
+
}
|
|
703
|
+
return { value: block };
|
|
704
|
+
}
|
|
705
|
+
async function compressTextForRole(text, role, usageTap, compression, signal) {
|
|
706
|
+
if (!text.trim()) {
|
|
707
|
+
return void 0;
|
|
708
|
+
}
|
|
709
|
+
const roleOptions = resolveRoleCompressionOptions(compression, role);
|
|
710
|
+
if (!roleOptions) {
|
|
711
|
+
return void 0;
|
|
712
|
+
}
|
|
713
|
+
const estimatedTokens = estimatePromptTokens(text);
|
|
714
|
+
if (typeof roleOptions.minTokens === "number" && estimatedTokens < roleOptions.minTokens) {
|
|
715
|
+
return void 0;
|
|
716
|
+
}
|
|
717
|
+
const result = await usageTap.compressPromptInput(text, {
|
|
718
|
+
provider: roleOptions.provider,
|
|
719
|
+
failOpen: roleOptions.failOpen,
|
|
720
|
+
tokenCompanyModel: roleOptions.tokenCompanyModel,
|
|
721
|
+
tokenCompanyAggressiveness: roleOptions.tokenCompanyAggressiveness,
|
|
722
|
+
tokenCompanyAppId: roleOptions.tokenCompanyAppId,
|
|
723
|
+
signal
|
|
724
|
+
});
|
|
725
|
+
const compressedText = typeof result.compressedInput === "string" ? result.compressedInput : String(result.compressedInput);
|
|
726
|
+
return {
|
|
727
|
+
text: compressedText,
|
|
728
|
+
segment: { role, result: { ...result, compressedInput: compressedText } }
|
|
729
|
+
};
|
|
730
|
+
}
|
|
731
|
+
function normalizePromptCompressionOptions(options) {
|
|
732
|
+
if (!options) {
|
|
733
|
+
return void 0;
|
|
734
|
+
}
|
|
735
|
+
if (options === true) {
|
|
736
|
+
return {};
|
|
737
|
+
}
|
|
738
|
+
if (options.enabled === false) {
|
|
739
|
+
return void 0;
|
|
740
|
+
}
|
|
741
|
+
return options;
|
|
742
|
+
}
|
|
743
|
+
function resolveEffectivePromptCompressionOptions(defaults, override) {
|
|
744
|
+
if (override === false) {
|
|
745
|
+
return void 0;
|
|
746
|
+
}
|
|
747
|
+
if (override === void 0) {
|
|
748
|
+
return defaults;
|
|
749
|
+
}
|
|
750
|
+
if (override === true) {
|
|
751
|
+
return defaults ?? {};
|
|
752
|
+
}
|
|
753
|
+
const merged = {
|
|
754
|
+
...defaults ?? {},
|
|
755
|
+
...override,
|
|
756
|
+
roles: override.roles ?? defaults?.roles
|
|
757
|
+
};
|
|
758
|
+
return normalizePromptCompressionOptions(merged);
|
|
759
|
+
}
|
|
760
|
+
function resolveRoleCompressionOptions(compression, role) {
|
|
761
|
+
const hasExplicitRoles = compression.roles !== void 0;
|
|
762
|
+
const setting = compression.roles?.[role];
|
|
763
|
+
if (hasExplicitRoles && setting === void 0) {
|
|
764
|
+
return void 0;
|
|
765
|
+
}
|
|
766
|
+
if (!hasExplicitRoles && role === "assistant") {
|
|
767
|
+
return void 0;
|
|
768
|
+
}
|
|
769
|
+
if (setting === false) {
|
|
770
|
+
return void 0;
|
|
771
|
+
}
|
|
772
|
+
const roleOptions = typeof setting === "object" ? setting : void 0;
|
|
773
|
+
if (roleOptions?.enabled === false) {
|
|
774
|
+
return void 0;
|
|
775
|
+
}
|
|
776
|
+
return {
|
|
777
|
+
provider: roleOptions?.provider ?? compression.provider,
|
|
778
|
+
minTokens: roleOptions?.minTokens ?? compression.minTokens,
|
|
779
|
+
failOpen: compression.failOpen,
|
|
780
|
+
tokenCompanyModel: compression.tokenCompanyModel,
|
|
781
|
+
tokenCompanyAggressiveness: roleOptions?.tokenCompanyAggressiveness ?? resolveTokenCompanyAggressiveness(compression, role),
|
|
782
|
+
tokenCompanyAppId: compression.tokenCompanyAppId
|
|
783
|
+
};
|
|
784
|
+
}
|
|
785
|
+
function resolveTokenCompanyAggressiveness(compression, role) {
|
|
786
|
+
if (typeof compression.tokenCompanyAggressiveness === "number") {
|
|
787
|
+
return compression.tokenCompanyAggressiveness;
|
|
788
|
+
}
|
|
789
|
+
return compression.tokenCompanyAggressiveness?.[role];
|
|
790
|
+
}
|
|
791
|
+
function buildPromptCompressionTelemetry(segments) {
|
|
792
|
+
if (!segments.length) {
|
|
793
|
+
return void 0;
|
|
794
|
+
}
|
|
795
|
+
const originalCharacters = segments.reduce(
|
|
796
|
+
(sum, segment) => sum + segment.result.originalCharacters,
|
|
797
|
+
0
|
|
798
|
+
);
|
|
799
|
+
const compressedCharacters = segments.reduce(
|
|
800
|
+
(sum, segment) => sum + segment.result.compressedCharacters,
|
|
801
|
+
0
|
|
802
|
+
);
|
|
803
|
+
const originalTokens = segments.reduce(
|
|
804
|
+
(sum, segment) => sum + segment.result.originalTokens,
|
|
805
|
+
0
|
|
806
|
+
);
|
|
807
|
+
const compressedTokens = segments.reduce(
|
|
808
|
+
(sum, segment) => sum + segment.result.compressedTokens,
|
|
809
|
+
0
|
|
810
|
+
);
|
|
811
|
+
const savedCharacters = Math.max(0, originalCharacters - compressedCharacters);
|
|
812
|
+
const savedTokens = Math.max(0, originalTokens - compressedTokens);
|
|
813
|
+
const providers = dedupeStrings(segments.map((segment) => segment.result.provider));
|
|
814
|
+
const roles = dedupeStrings(segments.map((segment) => `role:${segment.role}`));
|
|
815
|
+
const techniques = dedupeStrings([
|
|
816
|
+
"openai-wrapper",
|
|
817
|
+
...roles,
|
|
818
|
+
...segments.flatMap((segment) => segment.result.techniques),
|
|
819
|
+
...providers.length > 1 ? ["mixed-providers"] : []
|
|
820
|
+
]);
|
|
821
|
+
return {
|
|
822
|
+
provider: segments[0]?.result.provider ?? "heuristic",
|
|
823
|
+
originalCharacters,
|
|
824
|
+
compressedCharacters,
|
|
825
|
+
savedCharacters,
|
|
826
|
+
originalTokens,
|
|
827
|
+
compressedTokens,
|
|
828
|
+
savedTokens,
|
|
829
|
+
tokenSavingsRatio: originalTokens > 0 ? savedTokens / originalTokens : 0,
|
|
830
|
+
savingsRatio: originalCharacters > 0 ? savedCharacters / originalCharacters : 0,
|
|
831
|
+
techniques
|
|
832
|
+
};
|
|
833
|
+
}
|
|
834
|
+
function promptCompressionRequestOptions(withUsage2, correlationId) {
|
|
835
|
+
return {
|
|
836
|
+
signal: withUsage2?.signal,
|
|
837
|
+
headers: withUsage2?.headers,
|
|
838
|
+
retries: withUsage2?.retries,
|
|
839
|
+
correlationId
|
|
840
|
+
};
|
|
841
|
+
}
|
|
842
|
+
function mapOpenAIRole(role) {
|
|
843
|
+
if (role === "system" || role === "developer") {
|
|
844
|
+
return "system";
|
|
845
|
+
}
|
|
846
|
+
if (role === "user") {
|
|
847
|
+
return "user";
|
|
848
|
+
}
|
|
849
|
+
if (role === "tool" || role === "function") {
|
|
850
|
+
return "tool";
|
|
851
|
+
}
|
|
852
|
+
if (role === "assistant") {
|
|
853
|
+
return "assistant";
|
|
854
|
+
}
|
|
855
|
+
return void 0;
|
|
856
|
+
}
|
|
857
|
+
function mapResponsesItemTypeToRole(type) {
|
|
858
|
+
if (type === "function_call_output" || type === "tool_result" || type === "computer_call_output") {
|
|
859
|
+
return "tool";
|
|
860
|
+
}
|
|
861
|
+
return void 0;
|
|
862
|
+
}
|
|
284
863
|
function splitUsageOptions(options) {
|
|
285
864
|
if (!options || typeof options !== "object") {
|
|
286
865
|
return {};
|
|
287
866
|
}
|
|
288
|
-
const { usageTap, withUsage: withUsage2, ...rest } = options;
|
|
867
|
+
const { usageTap, withUsage: withUsage2, promptCompression, ...rest } = options;
|
|
289
868
|
const requestOptions = Object.keys(rest).length ? cloneRequestOptions(rest) : void 0;
|
|
290
869
|
return {
|
|
291
870
|
requestOptions,
|
|
292
871
|
usageContext: usageTap,
|
|
293
|
-
withUsage: withUsage2
|
|
872
|
+
withUsage: withUsage2,
|
|
873
|
+
promptCompression
|
|
294
874
|
};
|
|
295
875
|
}
|
|
296
876
|
function resolveBeginRequest(defaults, override) {
|