@ljoukov/llm 4.1.1 → 5.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -4053,6 +4053,71 @@ var files = {
4053
4053
  content: filesContent
4054
4054
  };
4055
4055
 
4056
+ // src/telemetry.ts
4057
+ var telemetryState = getRuntimeSingleton(
4058
+ /* @__PURE__ */ Symbol.for("@ljoukov/llm.telemetryState"),
4059
+ () => ({
4060
+ configuredTelemetry: void 0
4061
+ })
4062
+ );
4063
+ function configureTelemetry(telemetry = void 0) {
4064
+ telemetryState.configuredTelemetry = telemetry === void 0 || telemetry === false ? void 0 : telemetry;
4065
+ }
4066
+ function resetTelemetry() {
4067
+ telemetryState.configuredTelemetry = void 0;
4068
+ }
4069
+ function isPromiseLike2(value) {
4070
+ return (typeof value === "object" || typeof value === "function") && value !== null && typeof value.then === "function";
4071
+ }
4072
+ function resolveTelemetrySelection(telemetry) {
4073
+ if (telemetry === false) {
4074
+ return void 0;
4075
+ }
4076
+ if (telemetry !== void 0) {
4077
+ return telemetry;
4078
+ }
4079
+ return telemetryState.configuredTelemetry;
4080
+ }
4081
+ function createTelemetrySession(telemetry) {
4082
+ const config = resolveTelemetrySelection(telemetry);
4083
+ if (!config) {
4084
+ return void 0;
4085
+ }
4086
+ const pending = /* @__PURE__ */ new Set();
4087
+ const trackPromise = (promise) => {
4088
+ pending.add(promise);
4089
+ promise.finally(() => {
4090
+ pending.delete(promise);
4091
+ });
4092
+ };
4093
+ const emit = (event) => {
4094
+ try {
4095
+ const output = config.sink.emit(event);
4096
+ if (isPromiseLike2(output)) {
4097
+ const task = Promise.resolve(output).then(() => void 0).catch(() => void 0);
4098
+ trackPromise(task);
4099
+ }
4100
+ } catch {
4101
+ }
4102
+ };
4103
+ const flush = async () => {
4104
+ while (pending.size > 0) {
4105
+ await Promise.allSettled([...pending]);
4106
+ }
4107
+ if (typeof config.sink.flush === "function") {
4108
+ try {
4109
+ await config.sink.flush();
4110
+ } catch {
4111
+ }
4112
+ }
4113
+ };
4114
+ return {
4115
+ includeStreamEvents: config.includeStreamEvents === true,
4116
+ emit,
4117
+ flush
4118
+ };
4119
+ }
4120
+
4056
4121
  // src/llm.ts
4057
4122
  var toolCallContextStorage = getRuntimeSingleton(
4058
4123
  /* @__PURE__ */ Symbol.for("@ljoukov/llm.toolCallContextStorage"),
@@ -5639,6 +5704,65 @@ function mergeTokenUpdates(current, next) {
5639
5704
  toolUsePromptTokens: next.toolUsePromptTokens ?? current.toolUsePromptTokens
5640
5705
  };
5641
5706
  }
5707
+ function sumUsageValue(current, next) {
5708
+ if (typeof next !== "number" || !Number.isFinite(next)) {
5709
+ return current;
5710
+ }
5711
+ const normalizedNext = Math.max(0, next);
5712
+ if (typeof current !== "number" || !Number.isFinite(current)) {
5713
+ return normalizedNext;
5714
+ }
5715
+ return Math.max(0, current) + normalizedNext;
5716
+ }
5717
+ function sumUsageTokens(current, next) {
5718
+ if (!next) {
5719
+ return current;
5720
+ }
5721
+ return {
5722
+ promptTokens: sumUsageValue(current?.promptTokens, next.promptTokens),
5723
+ cachedTokens: sumUsageValue(current?.cachedTokens, next.cachedTokens),
5724
+ responseTokens: sumUsageValue(current?.responseTokens, next.responseTokens),
5725
+ responseImageTokens: sumUsageValue(current?.responseImageTokens, next.responseImageTokens),
5726
+ thinkingTokens: sumUsageValue(current?.thinkingTokens, next.thinkingTokens),
5727
+ totalTokens: sumUsageValue(current?.totalTokens, next.totalTokens),
5728
+ toolUsePromptTokens: sumUsageValue(current?.toolUsePromptTokens, next.toolUsePromptTokens)
5729
+ };
5730
+ }
5731
+ function countInlineImagesInContent(content) {
5732
+ if (!content) {
5733
+ return 0;
5734
+ }
5735
+ let count = 0;
5736
+ for (const part of content.parts) {
5737
+ if (part.type === "inlineData" && isInlineImageMime(part.mimeType)) {
5738
+ count += 1;
5739
+ }
5740
+ }
5741
+ return count;
5742
+ }
5743
+ function createLlmTelemetryEmitter(params) {
5744
+ const session = createTelemetrySession(params.telemetry);
5745
+ const callId = randomBytes(8).toString("hex");
5746
+ return {
5747
+ includeStreamEvents: session?.includeStreamEvents === true,
5748
+ emit: (event) => {
5749
+ if (!session) {
5750
+ return;
5751
+ }
5752
+ session.emit({
5753
+ ...event,
5754
+ timestamp: (/* @__PURE__ */ new Date()).toISOString(),
5755
+ callId,
5756
+ operation: params.operation,
5757
+ provider: params.provider,
5758
+ model: params.model
5759
+ });
5760
+ },
5761
+ flush: async () => {
5762
+ await session?.flush();
5763
+ }
5764
+ };
5765
+ }
5642
5766
  function toMaybeNumber(value) {
5643
5767
  if (typeof value === "number" && Number.isFinite(value)) {
5644
5768
  return value;
@@ -5859,6 +5983,23 @@ function toOpenAiToolOutput(value) {
5859
5983
  }
5860
5984
  return mergeToolOutput(value);
5861
5985
  }
5986
+ function toChatGptToolOutput(value) {
5987
+ const toolOutput = toOpenAiToolOutput(value);
5988
+ if (typeof toolOutput === "string") {
5989
+ return toolOutput;
5990
+ }
5991
+ return toolOutput.map((item) => {
5992
+ if (item.type !== "input_image") {
5993
+ return item;
5994
+ }
5995
+ return {
5996
+ type: "input_image",
5997
+ ...item.file_id ? { file_id: item.file_id } : {},
5998
+ ...item.image_url ? { image_url: item.image_url } : {},
5999
+ ...item.detail ? { detail: item.detail } : {}
6000
+ };
6001
+ });
6002
+ }
5862
6003
  function toGeminiToolOutputItems(value) {
5863
6004
  if (isLlmToolOutputContentItem(value)) {
5864
6005
  return [value];
@@ -7048,6 +7189,10 @@ async function runTextCall(params) {
7048
7189
  let responseRole;
7049
7190
  let latestUsage;
7050
7191
  let responseImages = 0;
7192
+ const pushEvent = (event) => {
7193
+ queue.push(event);
7194
+ params.onEvent?.(event);
7195
+ };
7051
7196
  const pushDelta = (channel, text) => {
7052
7197
  if (!text) {
7053
7198
  return;
@@ -7058,7 +7203,7 @@ async function runTextCall(params) {
7058
7203
  } else {
7059
7204
  callLogger?.appendResponseDelta(text);
7060
7205
  }
7061
- queue.push({ type: "delta", channel, text });
7206
+ pushEvent({ type: "delta", channel, text });
7062
7207
  };
7063
7208
  const pushInline = (data, mimeType) => {
7064
7209
  if (!data) {
@@ -7128,7 +7273,7 @@ async function runTextCall(params) {
7128
7273
  }
7129
7274
  case "response.refusal.delta": {
7130
7275
  blocked = true;
7131
- queue.push({ type: "blocked" });
7276
+ pushEvent({ type: "blocked" });
7132
7277
  break;
7133
7278
  }
7134
7279
  default:
@@ -7137,7 +7282,7 @@ async function runTextCall(params) {
7137
7282
  }
7138
7283
  const finalResponse = await stream.finalResponse();
7139
7284
  modelVersion = typeof finalResponse.model === "string" ? finalResponse.model : request.model;
7140
- queue.push({ type: "model", modelVersion });
7285
+ pushEvent({ type: "model", modelVersion });
7141
7286
  if (finalResponse.error) {
7142
7287
  const message = typeof finalResponse.error.message === "string" ? finalResponse.error.message : "OpenAI response failed";
7143
7288
  throw new Error(message);
@@ -7201,11 +7346,11 @@ async function runTextCall(params) {
7201
7346
  });
7202
7347
  blocked = blocked || result2.blocked;
7203
7348
  if (blocked) {
7204
- queue.push({ type: "blocked" });
7349
+ pushEvent({ type: "blocked" });
7205
7350
  }
7206
7351
  if (result2.model) {
7207
7352
  modelVersion = providerInfo.serviceTier ? request.model : `chatgpt-${result2.model}`;
7208
- queue.push({ type: "model", modelVersion });
7353
+ pushEvent({ type: "model", modelVersion });
7209
7354
  }
7210
7355
  latestUsage = extractChatGptUsageTokens(result2.usage);
7211
7356
  const fallbackText = typeof result2.text === "string" ? result2.text : "";
@@ -7243,11 +7388,11 @@ async function runTextCall(params) {
7243
7388
  { signal }
7244
7389
  );
7245
7390
  modelVersion = typeof response.model === "string" ? response.model : request.model;
7246
- queue.push({ type: "model", modelVersion });
7391
+ pushEvent({ type: "model", modelVersion });
7247
7392
  const choice = Array.isArray(response.choices) ? response.choices[0] : void 0;
7248
7393
  if (choice?.finish_reason === "content_filter") {
7249
7394
  blocked = true;
7250
- queue.push({ type: "blocked" });
7395
+ pushEvent({ type: "blocked" });
7251
7396
  }
7252
7397
  const textOutput = extractFireworksMessageText(
7253
7398
  choice?.message
@@ -7289,11 +7434,11 @@ async function runTextCall(params) {
7289
7434
  for await (const chunk of stream) {
7290
7435
  if (chunk.modelVersion) {
7291
7436
  modelVersion = chunk.modelVersion;
7292
- queue.push({ type: "model", modelVersion });
7437
+ pushEvent({ type: "model", modelVersion });
7293
7438
  }
7294
7439
  if (chunk.promptFeedback?.blockReason) {
7295
7440
  blocked = true;
7296
- queue.push({ type: "blocked" });
7441
+ pushEvent({ type: "blocked" });
7297
7442
  }
7298
7443
  latestUsage = mergeTokenUpdates(
7299
7444
  latestUsage,
@@ -7306,7 +7451,7 @@ async function runTextCall(params) {
7306
7451
  const primary = candidates[0];
7307
7452
  if (primary && isModerationFinish(primary.finishReason)) {
7308
7453
  blocked = true;
7309
- queue.push({ type: "blocked" });
7454
+ pushEvent({ type: "blocked" });
7310
7455
  }
7311
7456
  for (const candidate of candidates) {
7312
7457
  const candidateContent = candidate.content;
@@ -7343,7 +7488,7 @@ async function runTextCall(params) {
7343
7488
  imageSize: request.imageSize
7344
7489
  });
7345
7490
  if (latestUsage) {
7346
- queue.push({ type: "usage", usage: latestUsage, costUsd, modelVersion });
7491
+ pushEvent({ type: "usage", usage: latestUsage, costUsd, modelVersion });
7347
7492
  }
7348
7493
  callLogger?.complete({
7349
7494
  responseText: text,
@@ -7397,18 +7542,76 @@ async function runTextCall(params) {
7397
7542
  });
7398
7543
  return result;
7399
7544
  }
7400
- function streamText(request) {
7545
+ function startTextStream(request, operation) {
7401
7546
  const queue = createAsyncQueue();
7402
7547
  const abortController = new AbortController();
7548
+ const provider = resolveProvider(request.model).provider;
7549
+ const telemetry = createLlmTelemetryEmitter({
7550
+ telemetry: request.telemetry,
7551
+ operation,
7552
+ provider,
7553
+ model: request.model
7554
+ });
7555
+ const startedAtMs = Date.now();
7556
+ telemetry.emit({
7557
+ type: "llm.call.started",
7558
+ inputMode: typeof request.input === "string" ? "string" : "messages",
7559
+ toolCount: request.tools?.length ?? 0,
7560
+ responseModalities: request.responseModalities
7561
+ });
7403
7562
  const result = (async () => {
7563
+ let uploadMetrics = emptyFileUploadMetrics();
7404
7564
  try {
7405
- const output = await runTextCall({ request, queue, abortController });
7565
+ let output;
7566
+ await collectFileUploadMetrics(async () => {
7567
+ try {
7568
+ output = await runTextCall({
7569
+ request,
7570
+ queue,
7571
+ abortController,
7572
+ onEvent: telemetry.includeStreamEvents ? (event) => {
7573
+ telemetry.emit({ type: "llm.call.stream", event });
7574
+ } : void 0
7575
+ });
7576
+ } finally {
7577
+ uploadMetrics = getCurrentFileUploadMetrics();
7578
+ }
7579
+ });
7580
+ if (!output) {
7581
+ throw new Error("LLM text call returned no result.");
7582
+ }
7583
+ telemetry.emit({
7584
+ type: "llm.call.completed",
7585
+ success: true,
7586
+ durationMs: Math.max(0, Date.now() - startedAtMs),
7587
+ modelVersion: output.modelVersion,
7588
+ blocked: output.blocked,
7589
+ usage: output.usage,
7590
+ costUsd: output.costUsd,
7591
+ outputTextChars: output.text.length,
7592
+ thoughtChars: output.thoughts.length,
7593
+ responseImages: countInlineImagesInContent(output.content),
7594
+ uploadCount: uploadMetrics.count,
7595
+ uploadBytes: uploadMetrics.totalBytes,
7596
+ uploadLatencyMs: uploadMetrics.totalLatencyMs
7597
+ });
7406
7598
  queue.close();
7407
7599
  return output;
7408
7600
  } catch (error) {
7409
7601
  const err = error instanceof Error ? error : new Error(String(error));
7602
+ telemetry.emit({
7603
+ type: "llm.call.completed",
7604
+ success: false,
7605
+ durationMs: Math.max(0, Date.now() - startedAtMs),
7606
+ uploadCount: uploadMetrics.count,
7607
+ uploadBytes: uploadMetrics.totalBytes,
7608
+ uploadLatencyMs: uploadMetrics.totalLatencyMs,
7609
+ error: err.message
7610
+ });
7410
7611
  queue.fail(err);
7411
7612
  throw err;
7613
+ } finally {
7614
+ await telemetry.flush();
7412
7615
  }
7413
7616
  })();
7414
7617
  return {
@@ -7417,8 +7620,11 @@ function streamText(request) {
7417
7620
  abort: () => abortController.abort()
7418
7621
  };
7419
7622
  }
7623
+ function streamText(request) {
7624
+ return startTextStream(request, "streamText");
7625
+ }
7420
7626
  async function generateText(request) {
7421
- const call = streamText(request);
7627
+ const call = startTextStream(request, "generateText");
7422
7628
  for await (const _event of call.events) {
7423
7629
  }
7424
7630
  return await call.result;
@@ -7444,9 +7650,26 @@ function buildJsonSchemaConfig(request) {
7444
7650
  } : void 0;
7445
7651
  return { providerInfo, responseJsonSchema, openAiTextFormat };
7446
7652
  }
7447
- function streamJson(request) {
7653
+ function startJsonStream(request, operation) {
7448
7654
  const queue = createAsyncQueue();
7449
7655
  const abortController = new AbortController();
7656
+ const provider = resolveProvider(request.model).provider;
7657
+ const telemetry = createLlmTelemetryEmitter({
7658
+ telemetry: request.telemetry,
7659
+ operation,
7660
+ provider,
7661
+ model: request.model
7662
+ });
7663
+ const startedAtMs = Date.now();
7664
+ const maxAttempts = Math.max(1, Math.floor(request.maxAttempts ?? 2));
7665
+ const streamMode = request.streamMode ?? "partial";
7666
+ telemetry.emit({
7667
+ type: "llm.call.started",
7668
+ inputMode: typeof request.input === "string" ? "string" : "messages",
7669
+ toolCount: request.tools?.length ?? 0,
7670
+ maxAttempts,
7671
+ streamMode
7672
+ });
7450
7673
  const resolveAbortSignal = () => {
7451
7674
  if (!request.signal) {
7452
7675
  return abortController.signal;
@@ -7465,135 +7688,155 @@ function streamJson(request) {
7465
7688
  return abortController.signal;
7466
7689
  };
7467
7690
  const result = (async () => {
7468
- const signal = resolveAbortSignal();
7469
- const maxAttempts = Math.max(1, Math.floor(request.maxAttempts ?? 2));
7470
- const { providerInfo, responseJsonSchema, openAiTextFormat } = buildJsonSchemaConfig(request);
7471
- const streamMode = request.streamMode ?? "partial";
7472
- const failures = [];
7473
- let openAiTextFormatForAttempt = openAiTextFormat;
7474
- for (let attempt = 1; attempt <= maxAttempts; attempt += 1) {
7475
- let rawText = "";
7476
- let lastPartial = "";
7477
- try {
7478
- const call = streamText({
7479
- model: request.model,
7480
- input: request.input,
7481
- instructions: request.instructions,
7482
- tools: request.tools,
7483
- responseMimeType: request.responseMimeType ?? "application/json",
7484
- responseJsonSchema,
7485
- thinkingLevel: request.thinkingLevel,
7486
- ...openAiTextFormatForAttempt ? { openAiTextFormat: openAiTextFormatForAttempt } : {},
7487
- signal
7488
- });
7691
+ let uploadMetrics = emptyFileUploadMetrics();
7692
+ let attemptsUsed = 0;
7693
+ try {
7694
+ let output;
7695
+ await collectFileUploadMetrics(async () => {
7489
7696
  try {
7490
- for await (const event of call.events) {
7491
- queue.push(event);
7492
- if (event.type === "delta" && event.channel === "response") {
7493
- rawText += event.text;
7494
- if (streamMode === "partial") {
7495
- const partial = parsePartialJsonFromLlmText(rawText);
7496
- if (partial !== null) {
7497
- const serialized = JSON.stringify(partial);
7498
- if (serialized !== lastPartial) {
7499
- lastPartial = serialized;
7500
- queue.push({
7501
- type: "json",
7502
- stage: "partial",
7503
- value: partial
7504
- });
7697
+ const signal = resolveAbortSignal();
7698
+ const { providerInfo, responseJsonSchema, openAiTextFormat } = buildJsonSchemaConfig(request);
7699
+ const failures = [];
7700
+ let openAiTextFormatForAttempt = openAiTextFormat;
7701
+ for (let attempt = 1; attempt <= maxAttempts; attempt += 1) {
7702
+ attemptsUsed = attempt;
7703
+ let rawText = "";
7704
+ let lastPartial = "";
7705
+ try {
7706
+ const call = streamText({
7707
+ model: request.model,
7708
+ input: request.input,
7709
+ instructions: request.instructions,
7710
+ tools: request.tools,
7711
+ responseMimeType: request.responseMimeType ?? "application/json",
7712
+ responseJsonSchema,
7713
+ thinkingLevel: request.thinkingLevel,
7714
+ ...openAiTextFormatForAttempt ? { openAiTextFormat: openAiTextFormatForAttempt } : {},
7715
+ telemetry: false,
7716
+ signal
7717
+ });
7718
+ try {
7719
+ for await (const event of call.events) {
7720
+ queue.push(event);
7721
+ if (telemetry.includeStreamEvents) {
7722
+ telemetry.emit({ type: "llm.call.stream", event });
7723
+ }
7724
+ if (event.type === "delta" && event.channel === "response") {
7725
+ rawText += event.text;
7726
+ if (streamMode === "partial") {
7727
+ const partial = parsePartialJsonFromLlmText(rawText);
7728
+ if (partial !== null) {
7729
+ const serialized = JSON.stringify(partial);
7730
+ if (serialized !== lastPartial) {
7731
+ lastPartial = serialized;
7732
+ queue.push({
7733
+ type: "json",
7734
+ stage: "partial",
7735
+ value: partial
7736
+ });
7737
+ }
7738
+ }
7739
+ }
7505
7740
  }
7506
7741
  }
7742
+ } catch (streamError) {
7743
+ await call.result.catch(() => void 0);
7744
+ throw streamError;
7745
+ }
7746
+ const result2 = await call.result;
7747
+ rawText = rawText || result2.text;
7748
+ const cleanedText = normalizeJsonText(rawText);
7749
+ const repairedText = escapeNewlinesInStrings(cleanedText);
7750
+ const payload = JSON.parse(repairedText);
7751
+ const normalized = typeof request.normalizeJson === "function" ? request.normalizeJson(payload) : payload;
7752
+ const parsed = request.schema.parse(normalized);
7753
+ queue.push({ type: "json", stage: "final", value: parsed });
7754
+ output = { value: parsed, rawText, result: result2 };
7755
+ return;
7756
+ } catch (error) {
7757
+ const handled = error instanceof Error ? error : new Error(String(error));
7758
+ failures.push({ attempt, rawText, error: handled });
7759
+ if (providerInfo.provider === "chatgpt" && openAiTextFormatForAttempt) {
7760
+ openAiTextFormatForAttempt = void 0;
7761
+ }
7762
+ if (attempt >= maxAttempts) {
7763
+ throw new LlmJsonCallError(
7764
+ `LLM JSON call failed after ${attempt} attempt(s)`,
7765
+ failures
7766
+ );
7507
7767
  }
7508
7768
  }
7509
7769
  }
7510
- } catch (streamError) {
7511
- await call.result.catch(() => void 0);
7512
- throw streamError;
7513
- }
7514
- const result2 = await call.result;
7515
- rawText = rawText || result2.text;
7516
- const cleanedText = normalizeJsonText(rawText);
7517
- const repairedText = escapeNewlinesInStrings(cleanedText);
7518
- const payload = JSON.parse(repairedText);
7519
- const normalized = typeof request.normalizeJson === "function" ? request.normalizeJson(payload) : payload;
7520
- const parsed = request.schema.parse(normalized);
7521
- queue.push({ type: "json", stage: "final", value: parsed });
7522
- queue.close();
7523
- return { value: parsed, rawText, result: result2 };
7524
- } catch (error) {
7525
- const handled = error instanceof Error ? error : new Error(String(error));
7526
- failures.push({ attempt, rawText, error: handled });
7527
- if (providerInfo.provider === "chatgpt" && openAiTextFormatForAttempt) {
7528
- openAiTextFormatForAttempt = void 0;
7529
- }
7530
- if (attempt >= maxAttempts) {
7531
- throw new LlmJsonCallError(`LLM JSON call failed after ${attempt} attempt(s)`, failures);
7770
+ throw new LlmJsonCallError("LLM JSON call failed", failures);
7771
+ } finally {
7772
+ uploadMetrics = getCurrentFileUploadMetrics();
7532
7773
  }
7533
- }
7774
+ });
7775
+ if (!output) {
7776
+ throw new Error("LLM JSON call returned no result.");
7777
+ }
7778
+ telemetry.emit({
7779
+ type: "llm.call.completed",
7780
+ success: true,
7781
+ durationMs: Math.max(0, Date.now() - startedAtMs),
7782
+ modelVersion: output.result.modelVersion,
7783
+ blocked: output.result.blocked,
7784
+ usage: output.result.usage,
7785
+ costUsd: output.result.costUsd,
7786
+ rawTextChars: output.rawText.length,
7787
+ attempts: attemptsUsed,
7788
+ uploadCount: uploadMetrics.count,
7789
+ uploadBytes: uploadMetrics.totalBytes,
7790
+ uploadLatencyMs: uploadMetrics.totalLatencyMs
7791
+ });
7792
+ queue.close();
7793
+ return output;
7794
+ } catch (error) {
7795
+ const err = error instanceof Error ? error : new Error(String(error));
7796
+ telemetry.emit({
7797
+ type: "llm.call.completed",
7798
+ success: false,
7799
+ durationMs: Math.max(0, Date.now() - startedAtMs),
7800
+ attempts: attemptsUsed > 0 ? attemptsUsed : void 0,
7801
+ uploadCount: uploadMetrics.count,
7802
+ uploadBytes: uploadMetrics.totalBytes,
7803
+ uploadLatencyMs: uploadMetrics.totalLatencyMs,
7804
+ error: err.message
7805
+ });
7806
+ queue.fail(err);
7807
+ throw err;
7808
+ } finally {
7809
+ await telemetry.flush();
7534
7810
  }
7535
- throw new LlmJsonCallError("LLM JSON call failed", failures);
7536
- })().catch((error) => {
7537
- const err = error instanceof Error ? error : new Error(String(error));
7538
- queue.fail(err);
7539
- throw err;
7540
- });
7811
+ })();
7541
7812
  return {
7542
7813
  events: queue.iterable,
7543
7814
  result,
7544
7815
  abort: () => abortController.abort()
7545
7816
  };
7546
7817
  }
7818
+ function streamJson(request) {
7819
+ return startJsonStream(request, "streamJson");
7820
+ }
7547
7821
  async function generateJson(request) {
7548
- const maxAttempts = Math.max(1, Math.floor(request.maxAttempts ?? 2));
7549
- const { providerInfo, responseJsonSchema, openAiTextFormat } = buildJsonSchemaConfig(request);
7550
- let openAiTextFormatForAttempt = openAiTextFormat;
7551
- const failures = [];
7552
- for (let attempt = 1; attempt <= maxAttempts; attempt += 1) {
7553
- let rawText = "";
7554
- try {
7555
- const call = streamText({
7556
- model: request.model,
7557
- input: request.input,
7558
- instructions: request.instructions,
7559
- tools: request.tools,
7560
- responseMimeType: request.responseMimeType ?? "application/json",
7561
- responseJsonSchema,
7562
- thinkingLevel: request.thinkingLevel,
7563
- ...openAiTextFormatForAttempt ? { openAiTextFormat: openAiTextFormatForAttempt } : {},
7564
- signal: request.signal
7565
- });
7566
- try {
7567
- for await (const event of call.events) {
7568
- request.onEvent?.(event);
7569
- if (event.type === "delta" && event.channel === "response") {
7570
- rawText += event.text;
7571
- }
7572
- }
7573
- } catch (streamError) {
7574
- await call.result.catch(() => void 0);
7575
- throw streamError;
7576
- }
7577
- const result = await call.result;
7578
- rawText = rawText || result.text;
7579
- const cleanedText = normalizeJsonText(rawText);
7580
- const repairedText = escapeNewlinesInStrings(cleanedText);
7581
- const payload = JSON.parse(repairedText);
7582
- const normalized = typeof request.normalizeJson === "function" ? request.normalizeJson(payload) : payload;
7583
- const parsed = request.schema.parse(normalized);
7584
- return { value: parsed, rawText, result };
7585
- } catch (error) {
7586
- const handled = error instanceof Error ? error : new Error(String(error));
7587
- failures.push({ attempt, rawText, error: handled });
7588
- if (providerInfo.provider === "chatgpt" && openAiTextFormatForAttempt) {
7589
- openAiTextFormatForAttempt = void 0;
7590
- }
7591
- if (attempt >= maxAttempts) {
7592
- throw new LlmJsonCallError(`LLM JSON call failed after ${attempt} attempt(s)`, failures);
7822
+ const call = startJsonStream(
7823
+ {
7824
+ ...request,
7825
+ streamMode: "final"
7826
+ },
7827
+ "generateJson"
7828
+ );
7829
+ try {
7830
+ for await (const event of call.events) {
7831
+ if (event.type !== "json") {
7832
+ request.onEvent?.(event);
7593
7833
  }
7594
7834
  }
7835
+ } catch (streamError) {
7836
+ await call.result.catch(() => void 0);
7837
+ throw streamError;
7595
7838
  }
7596
- throw new LlmJsonCallError("LLM JSON call failed", failures);
7839
+ return await call.result;
7597
7840
  }
7598
7841
  var DEFAULT_TOOL_LOOP_MAX_STEPS = 8;
7599
7842
  function resolveToolLoopContents(input) {
@@ -8441,7 +8684,7 @@ async function runToolLoop(request) {
8441
8684
  toolOutputs.push({
8442
8685
  type: "custom_tool_call_output",
8443
8686
  call_id: entry.ids.callId,
8444
- output: toOpenAiToolOutput(outputPayload)
8687
+ output: toChatGptToolOutput(outputPayload)
8445
8688
  });
8446
8689
  } else {
8447
8690
  toolOutputs.push({
@@ -8455,7 +8698,7 @@ async function runToolLoop(request) {
8455
8698
  toolOutputs.push({
8456
8699
  type: "function_call_output",
8457
8700
  call_id: entry.ids.callId,
8458
- output: toOpenAiToolOutput(outputPayload)
8701
+ output: toChatGptToolOutput(outputPayload)
8459
8702
  });
8460
8703
  }
8461
8704
  }
@@ -9209,7 +9452,10 @@ function streamToolLoop(request) {
9209
9452
  abort: () => abortController.abort()
9210
9453
  };
9211
9454
  }
9212
- var IMAGE_GRADE_SCHEMA = z3.enum(["pass", "fail"]);
9455
+ var IMAGE_GRADE_VALUE_SCHEMA = z3.enum(["pass", "fail"]);
9456
+ var IMAGE_GRADE_SCHEMA = z3.object({
9457
+ grade: IMAGE_GRADE_VALUE_SCHEMA
9458
+ });
9213
9459
  async function gradeGeneratedImage(params) {
9214
9460
  const parts = [
9215
9461
  {
@@ -9220,7 +9466,7 @@ async function gradeGeneratedImage(params) {
9220
9466
  "Image prompt to grade:",
9221
9467
  params.imagePrompt,
9222
9468
  "",
9223
- 'Respond with the JSON string "pass" or "fail".'
9469
+ 'Respond with JSON like {"grade":"pass"} or {"grade":"fail"}.'
9224
9470
  ].join("\\n")
9225
9471
  },
9226
9472
  {
@@ -9229,12 +9475,13 @@ async function gradeGeneratedImage(params) {
9229
9475
  mimeType: params.image.mimeType ?? "image/png"
9230
9476
  }
9231
9477
  ];
9232
- const { value } = await generateJson({
9478
+ const { value, result } = await generateJson({
9233
9479
  model: params.model,
9234
9480
  input: [{ role: "user", content: parts }],
9235
- schema: IMAGE_GRADE_SCHEMA
9481
+ schema: IMAGE_GRADE_SCHEMA,
9482
+ telemetry: false
9236
9483
  });
9237
- return value;
9484
+ return { grade: value.grade, result };
9238
9485
  }
9239
9486
  async function generateImages(request) {
9240
9487
  const maxAttempts = Math.max(1, Math.floor(request.maxAttempts ?? 4));
@@ -9254,6 +9501,19 @@ async function generateImages(request) {
9254
9501
  if (!gradingPrompt) {
9255
9502
  throw new Error("imageGradingPrompt must be a non-empty string");
9256
9503
  }
9504
+ const telemetry = createLlmTelemetryEmitter({
9505
+ telemetry: request.telemetry,
9506
+ operation: "generateImages",
9507
+ provider: resolveProvider(request.model).provider,
9508
+ model: request.model
9509
+ });
9510
+ const startedAtMs = Date.now();
9511
+ telemetry.emit({
9512
+ type: "llm.call.started",
9513
+ imagePromptCount: promptList.length,
9514
+ styleImageCount: request.styleImages?.length ?? 0,
9515
+ maxAttempts
9516
+ });
9257
9517
  const addText = (parts, text) => {
9258
9518
  const lastPart = parts[parts.length - 1];
9259
9519
  if (lastPart !== void 0 && lastPart.type === "text") {
@@ -9311,6 +9571,9 @@ async function generateImages(request) {
9311
9571
  const inputMessages = [{ role: "user", content: buildInitialPromptParts() }];
9312
9572
  const orderedEntries = [...promptEntries];
9313
9573
  const resolvedImages = /* @__PURE__ */ new Map();
9574
+ let totalCostUsd = 0;
9575
+ let totalUsage;
9576
+ let attemptsUsed = 0;
9314
9577
  const removeResolvedEntries = (resolved) => {
9315
9578
  if (resolved.size === 0) {
9316
9579
  return;
@@ -9325,70 +9588,118 @@ async function generateImages(request) {
9325
9588
  }
9326
9589
  }
9327
9590
  };
9328
- for (let attempt = 1; attempt <= maxAttempts; attempt += 1) {
9329
- const result = await generateText({
9330
- model: request.model,
9331
- input: inputMessages,
9332
- responseModalities: ["IMAGE", "TEXT"],
9333
- imageAspectRatio: request.imageAspectRatio,
9334
- imageSize: request.imageSize ?? "2K"
9335
- });
9336
- if (result.blocked || !result.content) {
9337
- continue;
9338
- }
9339
- const images = extractImages(result.content);
9340
- if (images.length > 0 && promptEntries.length > 0) {
9341
- const assignedCount = Math.min(images.length, promptEntries.length);
9342
- const pendingAssignments = promptEntries.slice(0, assignedCount);
9343
- const assignedImages = images.slice(0, assignedCount);
9344
- const gradeResults = await Promise.all(
9345
- pendingAssignments.map(
9346
- (entry, index) => gradeGeneratedImage({
9347
- gradingPrompt,
9348
- imagePrompt: entry.prompt,
9349
- image: (() => {
9350
- const image = assignedImages[index];
9351
- if (!image) {
9352
- throw new Error("Image generation returned fewer images than expected.");
9591
+ let uploadMetrics = emptyFileUploadMetrics();
9592
+ try {
9593
+ await collectFileUploadMetrics(async () => {
9594
+ try {
9595
+ for (let attempt = 1; attempt <= maxAttempts; attempt += 1) {
9596
+ attemptsUsed = attempt;
9597
+ const result = await generateText({
9598
+ model: request.model,
9599
+ input: inputMessages,
9600
+ responseModalities: ["IMAGE", "TEXT"],
9601
+ imageAspectRatio: request.imageAspectRatio,
9602
+ imageSize: request.imageSize ?? "2K",
9603
+ telemetry: false
9604
+ });
9605
+ totalCostUsd += result.costUsd;
9606
+ totalUsage = sumUsageTokens(totalUsage, result.usage);
9607
+ if (result.blocked || !result.content) {
9608
+ continue;
9609
+ }
9610
+ const images = extractImages(result.content);
9611
+ if (images.length > 0 && promptEntries.length > 0) {
9612
+ const assignedCount = Math.min(images.length, promptEntries.length);
9613
+ const pendingAssignments = promptEntries.slice(0, assignedCount);
9614
+ const assignedImages = images.slice(0, assignedCount);
9615
+ const gradeResults = await Promise.all(
9616
+ pendingAssignments.map(
9617
+ (entry, index) => gradeGeneratedImage({
9618
+ gradingPrompt,
9619
+ imagePrompt: entry.prompt,
9620
+ image: (() => {
9621
+ const image = assignedImages[index];
9622
+ if (!image) {
9623
+ throw new Error("Image generation returned fewer images than expected.");
9624
+ }
9625
+ return image;
9626
+ })(),
9627
+ model: "gpt-5.2"
9628
+ })
9629
+ )
9630
+ );
9631
+ const passedEntries = /* @__PURE__ */ new Set();
9632
+ for (let i = 0; i < gradeResults.length; i += 1) {
9633
+ const gradeResult = gradeResults[i];
9634
+ const entry = pendingAssignments[i];
9635
+ const image = assignedImages[i];
9636
+ if (!gradeResult || !entry || !image) {
9637
+ continue;
9353
9638
  }
9354
- return image;
9355
- })(),
9356
- model: "gpt-5.2"
9357
- })
9358
- )
9359
- );
9360
- const passedEntries = /* @__PURE__ */ new Set();
9361
- for (let i = 0; i < gradeResults.length; i += 1) {
9362
- const grade = gradeResults[i];
9363
- const entry = pendingAssignments[i];
9364
- const image = assignedImages[i];
9365
- if (!grade || !entry || !image) {
9366
- continue;
9367
- }
9368
- if (grade === "pass") {
9369
- resolvedImages.set(entry.index, image);
9370
- passedEntries.add(entry.index);
9639
+ totalCostUsd += gradeResult.result.costUsd;
9640
+ totalUsage = sumUsageTokens(totalUsage, gradeResult.result.usage);
9641
+ if (gradeResult.grade === "pass") {
9642
+ resolvedImages.set(entry.index, image);
9643
+ passedEntries.add(entry.index);
9644
+ }
9645
+ }
9646
+ removeResolvedEntries(passedEntries);
9647
+ }
9648
+ if (promptEntries.length === 0) {
9649
+ break;
9650
+ }
9651
+ inputMessages.push({
9652
+ role: "assistant",
9653
+ content: result.content.parts
9654
+ });
9655
+ inputMessages.push({
9656
+ role: "user",
9657
+ content: buildContinuationPromptParts(promptEntries)
9658
+ });
9371
9659
  }
9660
+ } finally {
9661
+ uploadMetrics = getCurrentFileUploadMetrics();
9372
9662
  }
9373
- removeResolvedEntries(passedEntries);
9374
- }
9375
- if (promptEntries.length === 0) {
9376
- break;
9377
- }
9378
- inputMessages.push({
9379
- role: "assistant",
9380
- content: result.content.parts
9381
9663
  });
9382
- inputMessages.push({ role: "user", content: buildContinuationPromptParts(promptEntries) });
9383
- }
9384
- const orderedImages = [];
9385
- for (const entry of orderedEntries) {
9386
- const image = resolvedImages.get(entry.index);
9387
- if (image) {
9388
- orderedImages.push(image);
9664
+ const orderedImages = [];
9665
+ for (const entry of orderedEntries) {
9666
+ const image = resolvedImages.get(entry.index);
9667
+ if (image) {
9668
+ orderedImages.push(image);
9669
+ }
9389
9670
  }
9671
+ const outputImages = orderedImages.slice(0, numImages);
9672
+ telemetry.emit({
9673
+ type: "llm.call.completed",
9674
+ success: true,
9675
+ durationMs: Math.max(0, Date.now() - startedAtMs),
9676
+ usage: totalUsage,
9677
+ costUsd: totalCostUsd,
9678
+ imageCount: outputImages.length,
9679
+ attempts: attemptsUsed,
9680
+ uploadCount: uploadMetrics.count,
9681
+ uploadBytes: uploadMetrics.totalBytes,
9682
+ uploadLatencyMs: uploadMetrics.totalLatencyMs
9683
+ });
9684
+ return outputImages;
9685
+ } catch (error) {
9686
+ const err = error instanceof Error ? error : new Error(String(error));
9687
+ telemetry.emit({
9688
+ type: "llm.call.completed",
9689
+ success: false,
9690
+ durationMs: Math.max(0, Date.now() - startedAtMs),
9691
+ usage: totalUsage,
9692
+ costUsd: totalCostUsd,
9693
+ attempts: attemptsUsed > 0 ? attemptsUsed : void 0,
9694
+ uploadCount: uploadMetrics.count,
9695
+ uploadBytes: uploadMetrics.totalBytes,
9696
+ uploadLatencyMs: uploadMetrics.totalLatencyMs,
9697
+ error: err.message
9698
+ });
9699
+ throw err;
9700
+ } finally {
9701
+ await telemetry.flush();
9390
9702
  }
9391
- return orderedImages.slice(0, numImages);
9392
9703
  }
9393
9704
  async function generateImageInBatches(request) {
9394
9705
  const {
@@ -12039,7 +12350,7 @@ function isNoEntError(error) {
12039
12350
 
12040
12351
  // src/agent.ts
12041
12352
  async function runAgentLoop(request) {
12042
- const telemetry = createAgentTelemetrySession(request.telemetry);
12353
+ const telemetry = createTelemetrySession(request.telemetry);
12043
12354
  const logging = createRootAgentLoggingSession(request);
12044
12355
  try {
12045
12356
  return await runWithAgentLoggingSession(logging, async () => {
@@ -12125,7 +12436,7 @@ async function runAgentLoopInternal(request, context) {
12125
12436
  logging: _logging,
12126
12437
  ...toolLoopRequest
12127
12438
  } = request;
12128
- const telemetrySession = context.telemetry ?? createAgentTelemetrySession(telemetry);
12439
+ const telemetrySession = context.telemetry ?? createTelemetrySession(telemetry);
12129
12440
  const loggingSession = context.logging;
12130
12441
  const runId = randomRunId();
12131
12442
  const startedAtMs = Date.now();
@@ -12188,15 +12499,15 @@ async function runAgentLoopInternal(request, context) {
12188
12499
  ].join(" ")
12189
12500
  );
12190
12501
  const sourceOnEvent = toolLoopRequestWithSteering.onEvent;
12191
- const includeLlmStreamEvents = telemetrySession?.includeLlmStreamEvents === true;
12502
+ const includeStreamEvents = telemetrySession?.includeStreamEvents === true;
12192
12503
  const streamEventLogger = loggingSession ? createAgentStreamEventLogger({
12193
12504
  append: (line) => {
12194
12505
  loggingSession.logLine(`[agent:${runId}] ${line}`);
12195
12506
  }
12196
12507
  }) : void 0;
12197
- const wrappedOnEvent = sourceOnEvent || includeLlmStreamEvents ? (event) => {
12508
+ const wrappedOnEvent = sourceOnEvent || includeStreamEvents ? (event) => {
12198
12509
  sourceOnEvent?.(event);
12199
- if (includeLlmStreamEvents) {
12510
+ if (includeStreamEvents) {
12200
12511
  emitTelemetry({ type: "agent.run.stream", event });
12201
12512
  }
12202
12513
  streamEventLogger?.appendEvent(event);
@@ -12434,7 +12745,7 @@ function countToolCalls(result) {
12434
12745
  }
12435
12746
  return count;
12436
12747
  }
12437
- function sumUsageValue(current, next) {
12748
+ function sumUsageValue2(current, next) {
12438
12749
  if (typeof next !== "number" || !Number.isFinite(next)) {
12439
12750
  return current;
12440
12751
  }
@@ -12452,20 +12763,17 @@ function summarizeResultUsage(result) {
12452
12763
  continue;
12453
12764
  }
12454
12765
  summary = {
12455
- promptTokens: sumUsageValue(summary?.promptTokens, usage.promptTokens),
12456
- cachedTokens: sumUsageValue(summary?.cachedTokens, usage.cachedTokens),
12457
- responseTokens: sumUsageValue(summary?.responseTokens, usage.responseTokens),
12458
- responseImageTokens: sumUsageValue(summary?.responseImageTokens, usage.responseImageTokens),
12459
- thinkingTokens: sumUsageValue(summary?.thinkingTokens, usage.thinkingTokens),
12460
- totalTokens: sumUsageValue(summary?.totalTokens, usage.totalTokens),
12461
- toolUsePromptTokens: sumUsageValue(summary?.toolUsePromptTokens, usage.toolUsePromptTokens)
12766
+ promptTokens: sumUsageValue2(summary?.promptTokens, usage.promptTokens),
12767
+ cachedTokens: sumUsageValue2(summary?.cachedTokens, usage.cachedTokens),
12768
+ responseTokens: sumUsageValue2(summary?.responseTokens, usage.responseTokens),
12769
+ responseImageTokens: sumUsageValue2(summary?.responseImageTokens, usage.responseImageTokens),
12770
+ thinkingTokens: sumUsageValue2(summary?.thinkingTokens, usage.thinkingTokens),
12771
+ totalTokens: sumUsageValue2(summary?.totalTokens, usage.totalTokens),
12772
+ toolUsePromptTokens: sumUsageValue2(summary?.toolUsePromptTokens, usage.toolUsePromptTokens)
12462
12773
  };
12463
12774
  }
12464
12775
  return summary;
12465
12776
  }
12466
- function isPromiseLike2(value) {
12467
- return (typeof value === "object" || typeof value === "function") && value !== null && typeof value.then === "function";
12468
- }
12469
12777
  function resolveAgentLoggingSelection(value) {
12470
12778
  if (value === false) {
12471
12779
  return void 0;
@@ -12499,60 +12807,6 @@ function createRootAgentLoggingSession(request) {
12499
12807
  mirrorToConsole: selected.mirrorToConsole !== false
12500
12808
  });
12501
12809
  }
12502
- function isAgentTelemetrySink(value) {
12503
- return typeof value === "object" && value !== null && typeof value.emit === "function";
12504
- }
12505
- function resolveTelemetrySelection(telemetry) {
12506
- if (!telemetry) {
12507
- return void 0;
12508
- }
12509
- if (isAgentTelemetrySink(telemetry)) {
12510
- return { sink: telemetry };
12511
- }
12512
- if (isAgentTelemetrySink(telemetry.sink)) {
12513
- return telemetry;
12514
- }
12515
- throw new Error("Invalid runAgentLoop telemetry config: expected a sink with emit(event).");
12516
- }
12517
- function createAgentTelemetrySession(telemetry) {
12518
- const config = resolveTelemetrySelection(telemetry);
12519
- if (!config) {
12520
- return void 0;
12521
- }
12522
- const pending = /* @__PURE__ */ new Set();
12523
- const trackPromise = (promise) => {
12524
- pending.add(promise);
12525
- promise.finally(() => {
12526
- pending.delete(promise);
12527
- });
12528
- };
12529
- const emit = (event) => {
12530
- try {
12531
- const output = config.sink.emit(event);
12532
- if (isPromiseLike2(output)) {
12533
- const task = Promise.resolve(output).then(() => void 0).catch(() => void 0);
12534
- trackPromise(task);
12535
- }
12536
- } catch {
12537
- }
12538
- };
12539
- const flush = async () => {
12540
- while (pending.size > 0) {
12541
- await Promise.allSettled([...pending]);
12542
- }
12543
- if (typeof config.sink.flush === "function") {
12544
- try {
12545
- await config.sink.flush();
12546
- } catch {
12547
- }
12548
- }
12549
- };
12550
- return {
12551
- includeLlmStreamEvents: config.includeLlmStreamEvents === true,
12552
- emit,
12553
- flush
12554
- };
12555
- }
12556
12810
  function createAgentTelemetryEmitter(params) {
12557
12811
  return (event) => {
12558
12812
  if (!params.session) {
@@ -13245,6 +13499,7 @@ export {
13245
13499
  applyPatch,
13246
13500
  configureGemini,
13247
13501
  configureModelConcurrency,
13502
+ configureTelemetry,
13248
13503
  convertGooglePartsToLlmParts,
13249
13504
  createApplyPatchTool,
13250
13505
  createCodexApplyPatchTool,
@@ -13293,6 +13548,7 @@ export {
13293
13548
  parseJsonFromLlmText,
13294
13549
  refreshChatGptOauthToken,
13295
13550
  resetModelConcurrencyConfig,
13551
+ resetTelemetry,
13296
13552
  resolveFilesystemToolProfile,
13297
13553
  resolveFireworksModelId,
13298
13554
  runAgentLoop,