@ljoukov/llm 4.1.1 → 5.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -4053,6 +4053,71 @@ var files = {
4053
4053
  content: filesContent
4054
4054
  };
4055
4055
 
4056
+ // src/telemetry.ts
4057
+ var telemetryState = getRuntimeSingleton(
4058
+ /* @__PURE__ */ Symbol.for("@ljoukov/llm.telemetryState"),
4059
+ () => ({
4060
+ configuredTelemetry: void 0
4061
+ })
4062
+ );
4063
+ function configureTelemetry(telemetry = void 0) {
4064
+ telemetryState.configuredTelemetry = telemetry === void 0 || telemetry === false ? void 0 : telemetry;
4065
+ }
4066
+ function resetTelemetry() {
4067
+ telemetryState.configuredTelemetry = void 0;
4068
+ }
4069
+ function isPromiseLike2(value) {
4070
+ return (typeof value === "object" || typeof value === "function") && value !== null && typeof value.then === "function";
4071
+ }
4072
+ function resolveTelemetrySelection(telemetry) {
4073
+ if (telemetry === false) {
4074
+ return void 0;
4075
+ }
4076
+ if (telemetry !== void 0) {
4077
+ return telemetry;
4078
+ }
4079
+ return telemetryState.configuredTelemetry;
4080
+ }
4081
+ function createTelemetrySession(telemetry) {
4082
+ const config = resolveTelemetrySelection(telemetry);
4083
+ if (!config) {
4084
+ return void 0;
4085
+ }
4086
+ const pending = /* @__PURE__ */ new Set();
4087
+ const trackPromise = (promise) => {
4088
+ pending.add(promise);
4089
+ promise.finally(() => {
4090
+ pending.delete(promise);
4091
+ });
4092
+ };
4093
+ const emit = (event) => {
4094
+ try {
4095
+ const output = config.sink.emit(event);
4096
+ if (isPromiseLike2(output)) {
4097
+ const task = Promise.resolve(output).then(() => void 0).catch(() => void 0);
4098
+ trackPromise(task);
4099
+ }
4100
+ } catch {
4101
+ }
4102
+ };
4103
+ const flush = async () => {
4104
+ while (pending.size > 0) {
4105
+ await Promise.allSettled([...pending]);
4106
+ }
4107
+ if (typeof config.sink.flush === "function") {
4108
+ try {
4109
+ await config.sink.flush();
4110
+ } catch {
4111
+ }
4112
+ }
4113
+ };
4114
+ return {
4115
+ includeStreamEvents: config.includeStreamEvents === true,
4116
+ emit,
4117
+ flush
4118
+ };
4119
+ }
4120
+
4056
4121
  // src/llm.ts
4057
4122
  var toolCallContextStorage = getRuntimeSingleton(
4058
4123
  /* @__PURE__ */ Symbol.for("@ljoukov/llm.toolCallContextStorage"),
@@ -5639,6 +5704,65 @@ function mergeTokenUpdates(current, next) {
5639
5704
  toolUsePromptTokens: next.toolUsePromptTokens ?? current.toolUsePromptTokens
5640
5705
  };
5641
5706
  }
5707
+ function sumUsageValue(current, next) {
5708
+ if (typeof next !== "number" || !Number.isFinite(next)) {
5709
+ return current;
5710
+ }
5711
+ const normalizedNext = Math.max(0, next);
5712
+ if (typeof current !== "number" || !Number.isFinite(current)) {
5713
+ return normalizedNext;
5714
+ }
5715
+ return Math.max(0, current) + normalizedNext;
5716
+ }
5717
+ function sumUsageTokens(current, next) {
5718
+ if (!next) {
5719
+ return current;
5720
+ }
5721
+ return {
5722
+ promptTokens: sumUsageValue(current?.promptTokens, next.promptTokens),
5723
+ cachedTokens: sumUsageValue(current?.cachedTokens, next.cachedTokens),
5724
+ responseTokens: sumUsageValue(current?.responseTokens, next.responseTokens),
5725
+ responseImageTokens: sumUsageValue(current?.responseImageTokens, next.responseImageTokens),
5726
+ thinkingTokens: sumUsageValue(current?.thinkingTokens, next.thinkingTokens),
5727
+ totalTokens: sumUsageValue(current?.totalTokens, next.totalTokens),
5728
+ toolUsePromptTokens: sumUsageValue(current?.toolUsePromptTokens, next.toolUsePromptTokens)
5729
+ };
5730
+ }
5731
+ function countInlineImagesInContent(content) {
5732
+ if (!content) {
5733
+ return 0;
5734
+ }
5735
+ let count = 0;
5736
+ for (const part of content.parts) {
5737
+ if (part.type === "inlineData" && isInlineImageMime(part.mimeType)) {
5738
+ count += 1;
5739
+ }
5740
+ }
5741
+ return count;
5742
+ }
5743
+ function createLlmTelemetryEmitter(params) {
5744
+ const session = createTelemetrySession(params.telemetry);
5745
+ const callId = randomBytes(8).toString("hex");
5746
+ return {
5747
+ includeStreamEvents: session?.includeStreamEvents === true,
5748
+ emit: (event) => {
5749
+ if (!session) {
5750
+ return;
5751
+ }
5752
+ session.emit({
5753
+ ...event,
5754
+ timestamp: (/* @__PURE__ */ new Date()).toISOString(),
5755
+ callId,
5756
+ operation: params.operation,
5757
+ provider: params.provider,
5758
+ model: params.model
5759
+ });
5760
+ },
5761
+ flush: async () => {
5762
+ await session?.flush();
5763
+ }
5764
+ };
5765
+ }
5642
5766
  function toMaybeNumber(value) {
5643
5767
  if (typeof value === "number" && Number.isFinite(value)) {
5644
5768
  return value;
@@ -7048,6 +7172,10 @@ async function runTextCall(params) {
7048
7172
  let responseRole;
7049
7173
  let latestUsage;
7050
7174
  let responseImages = 0;
7175
+ const pushEvent = (event) => {
7176
+ queue.push(event);
7177
+ params.onEvent?.(event);
7178
+ };
7051
7179
  const pushDelta = (channel, text) => {
7052
7180
  if (!text) {
7053
7181
  return;
@@ -7058,7 +7186,7 @@ async function runTextCall(params) {
7058
7186
  } else {
7059
7187
  callLogger?.appendResponseDelta(text);
7060
7188
  }
7061
- queue.push({ type: "delta", channel, text });
7189
+ pushEvent({ type: "delta", channel, text });
7062
7190
  };
7063
7191
  const pushInline = (data, mimeType) => {
7064
7192
  if (!data) {
@@ -7128,7 +7256,7 @@ async function runTextCall(params) {
7128
7256
  }
7129
7257
  case "response.refusal.delta": {
7130
7258
  blocked = true;
7131
- queue.push({ type: "blocked" });
7259
+ pushEvent({ type: "blocked" });
7132
7260
  break;
7133
7261
  }
7134
7262
  default:
@@ -7137,7 +7265,7 @@ async function runTextCall(params) {
7137
7265
  }
7138
7266
  const finalResponse = await stream.finalResponse();
7139
7267
  modelVersion = typeof finalResponse.model === "string" ? finalResponse.model : request.model;
7140
- queue.push({ type: "model", modelVersion });
7268
+ pushEvent({ type: "model", modelVersion });
7141
7269
  if (finalResponse.error) {
7142
7270
  const message = typeof finalResponse.error.message === "string" ? finalResponse.error.message : "OpenAI response failed";
7143
7271
  throw new Error(message);
@@ -7201,11 +7329,11 @@ async function runTextCall(params) {
7201
7329
  });
7202
7330
  blocked = blocked || result2.blocked;
7203
7331
  if (blocked) {
7204
- queue.push({ type: "blocked" });
7332
+ pushEvent({ type: "blocked" });
7205
7333
  }
7206
7334
  if (result2.model) {
7207
7335
  modelVersion = providerInfo.serviceTier ? request.model : `chatgpt-${result2.model}`;
7208
- queue.push({ type: "model", modelVersion });
7336
+ pushEvent({ type: "model", modelVersion });
7209
7337
  }
7210
7338
  latestUsage = extractChatGptUsageTokens(result2.usage);
7211
7339
  const fallbackText = typeof result2.text === "string" ? result2.text : "";
@@ -7243,11 +7371,11 @@ async function runTextCall(params) {
7243
7371
  { signal }
7244
7372
  );
7245
7373
  modelVersion = typeof response.model === "string" ? response.model : request.model;
7246
- queue.push({ type: "model", modelVersion });
7374
+ pushEvent({ type: "model", modelVersion });
7247
7375
  const choice = Array.isArray(response.choices) ? response.choices[0] : void 0;
7248
7376
  if (choice?.finish_reason === "content_filter") {
7249
7377
  blocked = true;
7250
- queue.push({ type: "blocked" });
7378
+ pushEvent({ type: "blocked" });
7251
7379
  }
7252
7380
  const textOutput = extractFireworksMessageText(
7253
7381
  choice?.message
@@ -7289,11 +7417,11 @@ async function runTextCall(params) {
7289
7417
  for await (const chunk of stream) {
7290
7418
  if (chunk.modelVersion) {
7291
7419
  modelVersion = chunk.modelVersion;
7292
- queue.push({ type: "model", modelVersion });
7420
+ pushEvent({ type: "model", modelVersion });
7293
7421
  }
7294
7422
  if (chunk.promptFeedback?.blockReason) {
7295
7423
  blocked = true;
7296
- queue.push({ type: "blocked" });
7424
+ pushEvent({ type: "blocked" });
7297
7425
  }
7298
7426
  latestUsage = mergeTokenUpdates(
7299
7427
  latestUsage,
@@ -7306,7 +7434,7 @@ async function runTextCall(params) {
7306
7434
  const primary = candidates[0];
7307
7435
  if (primary && isModerationFinish(primary.finishReason)) {
7308
7436
  blocked = true;
7309
- queue.push({ type: "blocked" });
7437
+ pushEvent({ type: "blocked" });
7310
7438
  }
7311
7439
  for (const candidate of candidates) {
7312
7440
  const candidateContent = candidate.content;
@@ -7343,7 +7471,7 @@ async function runTextCall(params) {
7343
7471
  imageSize: request.imageSize
7344
7472
  });
7345
7473
  if (latestUsage) {
7346
- queue.push({ type: "usage", usage: latestUsage, costUsd, modelVersion });
7474
+ pushEvent({ type: "usage", usage: latestUsage, costUsd, modelVersion });
7347
7475
  }
7348
7476
  callLogger?.complete({
7349
7477
  responseText: text,
@@ -7397,18 +7525,76 @@ async function runTextCall(params) {
7397
7525
  });
7398
7526
  return result;
7399
7527
  }
7400
- function streamText(request) {
7528
+ function startTextStream(request, operation) {
7401
7529
  const queue = createAsyncQueue();
7402
7530
  const abortController = new AbortController();
7531
+ const provider = resolveProvider(request.model).provider;
7532
+ const telemetry = createLlmTelemetryEmitter({
7533
+ telemetry: request.telemetry,
7534
+ operation,
7535
+ provider,
7536
+ model: request.model
7537
+ });
7538
+ const startedAtMs = Date.now();
7539
+ telemetry.emit({
7540
+ type: "llm.call.started",
7541
+ inputMode: typeof request.input === "string" ? "string" : "messages",
7542
+ toolCount: request.tools?.length ?? 0,
7543
+ responseModalities: request.responseModalities
7544
+ });
7403
7545
  const result = (async () => {
7546
+ let uploadMetrics = emptyFileUploadMetrics();
7404
7547
  try {
7405
- const output = await runTextCall({ request, queue, abortController });
7548
+ let output;
7549
+ await collectFileUploadMetrics(async () => {
7550
+ try {
7551
+ output = await runTextCall({
7552
+ request,
7553
+ queue,
7554
+ abortController,
7555
+ onEvent: telemetry.includeStreamEvents ? (event) => {
7556
+ telemetry.emit({ type: "llm.call.stream", event });
7557
+ } : void 0
7558
+ });
7559
+ } finally {
7560
+ uploadMetrics = getCurrentFileUploadMetrics();
7561
+ }
7562
+ });
7563
+ if (!output) {
7564
+ throw new Error("LLM text call returned no result.");
7565
+ }
7566
+ telemetry.emit({
7567
+ type: "llm.call.completed",
7568
+ success: true,
7569
+ durationMs: Math.max(0, Date.now() - startedAtMs),
7570
+ modelVersion: output.modelVersion,
7571
+ blocked: output.blocked,
7572
+ usage: output.usage,
7573
+ costUsd: output.costUsd,
7574
+ outputTextChars: output.text.length,
7575
+ thoughtChars: output.thoughts.length,
7576
+ responseImages: countInlineImagesInContent(output.content),
7577
+ uploadCount: uploadMetrics.count,
7578
+ uploadBytes: uploadMetrics.totalBytes,
7579
+ uploadLatencyMs: uploadMetrics.totalLatencyMs
7580
+ });
7406
7581
  queue.close();
7407
7582
  return output;
7408
7583
  } catch (error) {
7409
7584
  const err = error instanceof Error ? error : new Error(String(error));
7585
+ telemetry.emit({
7586
+ type: "llm.call.completed",
7587
+ success: false,
7588
+ durationMs: Math.max(0, Date.now() - startedAtMs),
7589
+ uploadCount: uploadMetrics.count,
7590
+ uploadBytes: uploadMetrics.totalBytes,
7591
+ uploadLatencyMs: uploadMetrics.totalLatencyMs,
7592
+ error: err.message
7593
+ });
7410
7594
  queue.fail(err);
7411
7595
  throw err;
7596
+ } finally {
7597
+ await telemetry.flush();
7412
7598
  }
7413
7599
  })();
7414
7600
  return {
@@ -7417,8 +7603,11 @@ function streamText(request) {
7417
7603
  abort: () => abortController.abort()
7418
7604
  };
7419
7605
  }
7606
+ function streamText(request) {
7607
+ return startTextStream(request, "streamText");
7608
+ }
7420
7609
  async function generateText(request) {
7421
- const call = streamText(request);
7610
+ const call = startTextStream(request, "generateText");
7422
7611
  for await (const _event of call.events) {
7423
7612
  }
7424
7613
  return await call.result;
@@ -7444,9 +7633,26 @@ function buildJsonSchemaConfig(request) {
7444
7633
  } : void 0;
7445
7634
  return { providerInfo, responseJsonSchema, openAiTextFormat };
7446
7635
  }
7447
- function streamJson(request) {
7636
+ function startJsonStream(request, operation) {
7448
7637
  const queue = createAsyncQueue();
7449
7638
  const abortController = new AbortController();
7639
+ const provider = resolveProvider(request.model).provider;
7640
+ const telemetry = createLlmTelemetryEmitter({
7641
+ telemetry: request.telemetry,
7642
+ operation,
7643
+ provider,
7644
+ model: request.model
7645
+ });
7646
+ const startedAtMs = Date.now();
7647
+ const maxAttempts = Math.max(1, Math.floor(request.maxAttempts ?? 2));
7648
+ const streamMode = request.streamMode ?? "partial";
7649
+ telemetry.emit({
7650
+ type: "llm.call.started",
7651
+ inputMode: typeof request.input === "string" ? "string" : "messages",
7652
+ toolCount: request.tools?.length ?? 0,
7653
+ maxAttempts,
7654
+ streamMode
7655
+ });
7450
7656
  const resolveAbortSignal = () => {
7451
7657
  if (!request.signal) {
7452
7658
  return abortController.signal;
@@ -7465,135 +7671,155 @@ function streamJson(request) {
7465
7671
  return abortController.signal;
7466
7672
  };
7467
7673
  const result = (async () => {
7468
- const signal = resolveAbortSignal();
7469
- const maxAttempts = Math.max(1, Math.floor(request.maxAttempts ?? 2));
7470
- const { providerInfo, responseJsonSchema, openAiTextFormat } = buildJsonSchemaConfig(request);
7471
- const streamMode = request.streamMode ?? "partial";
7472
- const failures = [];
7473
- let openAiTextFormatForAttempt = openAiTextFormat;
7474
- for (let attempt = 1; attempt <= maxAttempts; attempt += 1) {
7475
- let rawText = "";
7476
- let lastPartial = "";
7477
- try {
7478
- const call = streamText({
7479
- model: request.model,
7480
- input: request.input,
7481
- instructions: request.instructions,
7482
- tools: request.tools,
7483
- responseMimeType: request.responseMimeType ?? "application/json",
7484
- responseJsonSchema,
7485
- thinkingLevel: request.thinkingLevel,
7486
- ...openAiTextFormatForAttempt ? { openAiTextFormat: openAiTextFormatForAttempt } : {},
7487
- signal
7488
- });
7674
+ let uploadMetrics = emptyFileUploadMetrics();
7675
+ let attemptsUsed = 0;
7676
+ try {
7677
+ let output;
7678
+ await collectFileUploadMetrics(async () => {
7489
7679
  try {
7490
- for await (const event of call.events) {
7491
- queue.push(event);
7492
- if (event.type === "delta" && event.channel === "response") {
7493
- rawText += event.text;
7494
- if (streamMode === "partial") {
7495
- const partial = parsePartialJsonFromLlmText(rawText);
7496
- if (partial !== null) {
7497
- const serialized = JSON.stringify(partial);
7498
- if (serialized !== lastPartial) {
7499
- lastPartial = serialized;
7500
- queue.push({
7501
- type: "json",
7502
- stage: "partial",
7503
- value: partial
7504
- });
7680
+ const signal = resolveAbortSignal();
7681
+ const { providerInfo, responseJsonSchema, openAiTextFormat } = buildJsonSchemaConfig(request);
7682
+ const failures = [];
7683
+ let openAiTextFormatForAttempt = openAiTextFormat;
7684
+ for (let attempt = 1; attempt <= maxAttempts; attempt += 1) {
7685
+ attemptsUsed = attempt;
7686
+ let rawText = "";
7687
+ let lastPartial = "";
7688
+ try {
7689
+ const call = streamText({
7690
+ model: request.model,
7691
+ input: request.input,
7692
+ instructions: request.instructions,
7693
+ tools: request.tools,
7694
+ responseMimeType: request.responseMimeType ?? "application/json",
7695
+ responseJsonSchema,
7696
+ thinkingLevel: request.thinkingLevel,
7697
+ ...openAiTextFormatForAttempt ? { openAiTextFormat: openAiTextFormatForAttempt } : {},
7698
+ telemetry: false,
7699
+ signal
7700
+ });
7701
+ try {
7702
+ for await (const event of call.events) {
7703
+ queue.push(event);
7704
+ if (telemetry.includeStreamEvents) {
7705
+ telemetry.emit({ type: "llm.call.stream", event });
7706
+ }
7707
+ if (event.type === "delta" && event.channel === "response") {
7708
+ rawText += event.text;
7709
+ if (streamMode === "partial") {
7710
+ const partial = parsePartialJsonFromLlmText(rawText);
7711
+ if (partial !== null) {
7712
+ const serialized = JSON.stringify(partial);
7713
+ if (serialized !== lastPartial) {
7714
+ lastPartial = serialized;
7715
+ queue.push({
7716
+ type: "json",
7717
+ stage: "partial",
7718
+ value: partial
7719
+ });
7720
+ }
7721
+ }
7722
+ }
7505
7723
  }
7506
7724
  }
7725
+ } catch (streamError) {
7726
+ await call.result.catch(() => void 0);
7727
+ throw streamError;
7728
+ }
7729
+ const result2 = await call.result;
7730
+ rawText = rawText || result2.text;
7731
+ const cleanedText = normalizeJsonText(rawText);
7732
+ const repairedText = escapeNewlinesInStrings(cleanedText);
7733
+ const payload = JSON.parse(repairedText);
7734
+ const normalized = typeof request.normalizeJson === "function" ? request.normalizeJson(payload) : payload;
7735
+ const parsed = request.schema.parse(normalized);
7736
+ queue.push({ type: "json", stage: "final", value: parsed });
7737
+ output = { value: parsed, rawText, result: result2 };
7738
+ return;
7739
+ } catch (error) {
7740
+ const handled = error instanceof Error ? error : new Error(String(error));
7741
+ failures.push({ attempt, rawText, error: handled });
7742
+ if (providerInfo.provider === "chatgpt" && openAiTextFormatForAttempt) {
7743
+ openAiTextFormatForAttempt = void 0;
7744
+ }
7745
+ if (attempt >= maxAttempts) {
7746
+ throw new LlmJsonCallError(
7747
+ `LLM JSON call failed after ${attempt} attempt(s)`,
7748
+ failures
7749
+ );
7507
7750
  }
7508
7751
  }
7509
7752
  }
7510
- } catch (streamError) {
7511
- await call.result.catch(() => void 0);
7512
- throw streamError;
7513
- }
7514
- const result2 = await call.result;
7515
- rawText = rawText || result2.text;
7516
- const cleanedText = normalizeJsonText(rawText);
7517
- const repairedText = escapeNewlinesInStrings(cleanedText);
7518
- const payload = JSON.parse(repairedText);
7519
- const normalized = typeof request.normalizeJson === "function" ? request.normalizeJson(payload) : payload;
7520
- const parsed = request.schema.parse(normalized);
7521
- queue.push({ type: "json", stage: "final", value: parsed });
7522
- queue.close();
7523
- return { value: parsed, rawText, result: result2 };
7524
- } catch (error) {
7525
- const handled = error instanceof Error ? error : new Error(String(error));
7526
- failures.push({ attempt, rawText, error: handled });
7527
- if (providerInfo.provider === "chatgpt" && openAiTextFormatForAttempt) {
7528
- openAiTextFormatForAttempt = void 0;
7529
- }
7530
- if (attempt >= maxAttempts) {
7531
- throw new LlmJsonCallError(`LLM JSON call failed after ${attempt} attempt(s)`, failures);
7753
+ throw new LlmJsonCallError("LLM JSON call failed", failures);
7754
+ } finally {
7755
+ uploadMetrics = getCurrentFileUploadMetrics();
7532
7756
  }
7533
- }
7757
+ });
7758
+ if (!output) {
7759
+ throw new Error("LLM JSON call returned no result.");
7760
+ }
7761
+ telemetry.emit({
7762
+ type: "llm.call.completed",
7763
+ success: true,
7764
+ durationMs: Math.max(0, Date.now() - startedAtMs),
7765
+ modelVersion: output.result.modelVersion,
7766
+ blocked: output.result.blocked,
7767
+ usage: output.result.usage,
7768
+ costUsd: output.result.costUsd,
7769
+ rawTextChars: output.rawText.length,
7770
+ attempts: attemptsUsed,
7771
+ uploadCount: uploadMetrics.count,
7772
+ uploadBytes: uploadMetrics.totalBytes,
7773
+ uploadLatencyMs: uploadMetrics.totalLatencyMs
7774
+ });
7775
+ queue.close();
7776
+ return output;
7777
+ } catch (error) {
7778
+ const err = error instanceof Error ? error : new Error(String(error));
7779
+ telemetry.emit({
7780
+ type: "llm.call.completed",
7781
+ success: false,
7782
+ durationMs: Math.max(0, Date.now() - startedAtMs),
7783
+ attempts: attemptsUsed > 0 ? attemptsUsed : void 0,
7784
+ uploadCount: uploadMetrics.count,
7785
+ uploadBytes: uploadMetrics.totalBytes,
7786
+ uploadLatencyMs: uploadMetrics.totalLatencyMs,
7787
+ error: err.message
7788
+ });
7789
+ queue.fail(err);
7790
+ throw err;
7791
+ } finally {
7792
+ await telemetry.flush();
7534
7793
  }
7535
- throw new LlmJsonCallError("LLM JSON call failed", failures);
7536
- })().catch((error) => {
7537
- const err = error instanceof Error ? error : new Error(String(error));
7538
- queue.fail(err);
7539
- throw err;
7540
- });
7794
+ })();
7541
7795
  return {
7542
7796
  events: queue.iterable,
7543
7797
  result,
7544
7798
  abort: () => abortController.abort()
7545
7799
  };
7546
7800
  }
7801
+ function streamJson(request) {
7802
+ return startJsonStream(request, "streamJson");
7803
+ }
7547
7804
  async function generateJson(request) {
7548
- const maxAttempts = Math.max(1, Math.floor(request.maxAttempts ?? 2));
7549
- const { providerInfo, responseJsonSchema, openAiTextFormat } = buildJsonSchemaConfig(request);
7550
- let openAiTextFormatForAttempt = openAiTextFormat;
7551
- const failures = [];
7552
- for (let attempt = 1; attempt <= maxAttempts; attempt += 1) {
7553
- let rawText = "";
7554
- try {
7555
- const call = streamText({
7556
- model: request.model,
7557
- input: request.input,
7558
- instructions: request.instructions,
7559
- tools: request.tools,
7560
- responseMimeType: request.responseMimeType ?? "application/json",
7561
- responseJsonSchema,
7562
- thinkingLevel: request.thinkingLevel,
7563
- ...openAiTextFormatForAttempt ? { openAiTextFormat: openAiTextFormatForAttempt } : {},
7564
- signal: request.signal
7565
- });
7566
- try {
7567
- for await (const event of call.events) {
7568
- request.onEvent?.(event);
7569
- if (event.type === "delta" && event.channel === "response") {
7570
- rawText += event.text;
7571
- }
7572
- }
7573
- } catch (streamError) {
7574
- await call.result.catch(() => void 0);
7575
- throw streamError;
7576
- }
7577
- const result = await call.result;
7578
- rawText = rawText || result.text;
7579
- const cleanedText = normalizeJsonText(rawText);
7580
- const repairedText = escapeNewlinesInStrings(cleanedText);
7581
- const payload = JSON.parse(repairedText);
7582
- const normalized = typeof request.normalizeJson === "function" ? request.normalizeJson(payload) : payload;
7583
- const parsed = request.schema.parse(normalized);
7584
- return { value: parsed, rawText, result };
7585
- } catch (error) {
7586
- const handled = error instanceof Error ? error : new Error(String(error));
7587
- failures.push({ attempt, rawText, error: handled });
7588
- if (providerInfo.provider === "chatgpt" && openAiTextFormatForAttempt) {
7589
- openAiTextFormatForAttempt = void 0;
7590
- }
7591
- if (attempt >= maxAttempts) {
7592
- throw new LlmJsonCallError(`LLM JSON call failed after ${attempt} attempt(s)`, failures);
7805
+ const call = startJsonStream(
7806
+ {
7807
+ ...request,
7808
+ streamMode: "final"
7809
+ },
7810
+ "generateJson"
7811
+ );
7812
+ try {
7813
+ for await (const event of call.events) {
7814
+ if (event.type !== "json") {
7815
+ request.onEvent?.(event);
7593
7816
  }
7594
7817
  }
7818
+ } catch (streamError) {
7819
+ await call.result.catch(() => void 0);
7820
+ throw streamError;
7595
7821
  }
7596
- throw new LlmJsonCallError("LLM JSON call failed", failures);
7822
+ return await call.result;
7597
7823
  }
7598
7824
  var DEFAULT_TOOL_LOOP_MAX_STEPS = 8;
7599
7825
  function resolveToolLoopContents(input) {
@@ -9209,7 +9435,10 @@ function streamToolLoop(request) {
9209
9435
  abort: () => abortController.abort()
9210
9436
  };
9211
9437
  }
9212
- var IMAGE_GRADE_SCHEMA = z3.enum(["pass", "fail"]);
9438
+ var IMAGE_GRADE_VALUE_SCHEMA = z3.enum(["pass", "fail"]);
9439
+ var IMAGE_GRADE_SCHEMA = z3.object({
9440
+ grade: IMAGE_GRADE_VALUE_SCHEMA
9441
+ });
9213
9442
  async function gradeGeneratedImage(params) {
9214
9443
  const parts = [
9215
9444
  {
@@ -9220,7 +9449,7 @@ async function gradeGeneratedImage(params) {
9220
9449
  "Image prompt to grade:",
9221
9450
  params.imagePrompt,
9222
9451
  "",
9223
- 'Respond with the JSON string "pass" or "fail".'
9452
+ 'Respond with JSON like {"grade":"pass"} or {"grade":"fail"}.'
9224
9453
  ].join("\\n")
9225
9454
  },
9226
9455
  {
@@ -9229,12 +9458,13 @@ async function gradeGeneratedImage(params) {
9229
9458
  mimeType: params.image.mimeType ?? "image/png"
9230
9459
  }
9231
9460
  ];
9232
- const { value } = await generateJson({
9461
+ const { value, result } = await generateJson({
9233
9462
  model: params.model,
9234
9463
  input: [{ role: "user", content: parts }],
9235
- schema: IMAGE_GRADE_SCHEMA
9464
+ schema: IMAGE_GRADE_SCHEMA,
9465
+ telemetry: false
9236
9466
  });
9237
- return value;
9467
+ return { grade: value.grade, result };
9238
9468
  }
9239
9469
  async function generateImages(request) {
9240
9470
  const maxAttempts = Math.max(1, Math.floor(request.maxAttempts ?? 4));
@@ -9254,6 +9484,19 @@ async function generateImages(request) {
9254
9484
  if (!gradingPrompt) {
9255
9485
  throw new Error("imageGradingPrompt must be a non-empty string");
9256
9486
  }
9487
+ const telemetry = createLlmTelemetryEmitter({
9488
+ telemetry: request.telemetry,
9489
+ operation: "generateImages",
9490
+ provider: resolveProvider(request.model).provider,
9491
+ model: request.model
9492
+ });
9493
+ const startedAtMs = Date.now();
9494
+ telemetry.emit({
9495
+ type: "llm.call.started",
9496
+ imagePromptCount: promptList.length,
9497
+ styleImageCount: request.styleImages?.length ?? 0,
9498
+ maxAttempts
9499
+ });
9257
9500
  const addText = (parts, text) => {
9258
9501
  const lastPart = parts[parts.length - 1];
9259
9502
  if (lastPart !== void 0 && lastPart.type === "text") {
@@ -9311,6 +9554,9 @@ async function generateImages(request) {
9311
9554
  const inputMessages = [{ role: "user", content: buildInitialPromptParts() }];
9312
9555
  const orderedEntries = [...promptEntries];
9313
9556
  const resolvedImages = /* @__PURE__ */ new Map();
9557
+ let totalCostUsd = 0;
9558
+ let totalUsage;
9559
+ let attemptsUsed = 0;
9314
9560
  const removeResolvedEntries = (resolved) => {
9315
9561
  if (resolved.size === 0) {
9316
9562
  return;
@@ -9325,70 +9571,118 @@ async function generateImages(request) {
9325
9571
  }
9326
9572
  }
9327
9573
  };
9328
- for (let attempt = 1; attempt <= maxAttempts; attempt += 1) {
9329
- const result = await generateText({
9330
- model: request.model,
9331
- input: inputMessages,
9332
- responseModalities: ["IMAGE", "TEXT"],
9333
- imageAspectRatio: request.imageAspectRatio,
9334
- imageSize: request.imageSize ?? "2K"
9335
- });
9336
- if (result.blocked || !result.content) {
9337
- continue;
9338
- }
9339
- const images = extractImages(result.content);
9340
- if (images.length > 0 && promptEntries.length > 0) {
9341
- const assignedCount = Math.min(images.length, promptEntries.length);
9342
- const pendingAssignments = promptEntries.slice(0, assignedCount);
9343
- const assignedImages = images.slice(0, assignedCount);
9344
- const gradeResults = await Promise.all(
9345
- pendingAssignments.map(
9346
- (entry, index) => gradeGeneratedImage({
9347
- gradingPrompt,
9348
- imagePrompt: entry.prompt,
9349
- image: (() => {
9350
- const image = assignedImages[index];
9351
- if (!image) {
9352
- throw new Error("Image generation returned fewer images than expected.");
9574
+ let uploadMetrics = emptyFileUploadMetrics();
9575
+ try {
9576
+ await collectFileUploadMetrics(async () => {
9577
+ try {
9578
+ for (let attempt = 1; attempt <= maxAttempts; attempt += 1) {
9579
+ attemptsUsed = attempt;
9580
+ const result = await generateText({
9581
+ model: request.model,
9582
+ input: inputMessages,
9583
+ responseModalities: ["IMAGE", "TEXT"],
9584
+ imageAspectRatio: request.imageAspectRatio,
9585
+ imageSize: request.imageSize ?? "2K",
9586
+ telemetry: false
9587
+ });
9588
+ totalCostUsd += result.costUsd;
9589
+ totalUsage = sumUsageTokens(totalUsage, result.usage);
9590
+ if (result.blocked || !result.content) {
9591
+ continue;
9592
+ }
9593
+ const images = extractImages(result.content);
9594
+ if (images.length > 0 && promptEntries.length > 0) {
9595
+ const assignedCount = Math.min(images.length, promptEntries.length);
9596
+ const pendingAssignments = promptEntries.slice(0, assignedCount);
9597
+ const assignedImages = images.slice(0, assignedCount);
9598
+ const gradeResults = await Promise.all(
9599
+ pendingAssignments.map(
9600
+ (entry, index) => gradeGeneratedImage({
9601
+ gradingPrompt,
9602
+ imagePrompt: entry.prompt,
9603
+ image: (() => {
9604
+ const image = assignedImages[index];
9605
+ if (!image) {
9606
+ throw new Error("Image generation returned fewer images than expected.");
9607
+ }
9608
+ return image;
9609
+ })(),
9610
+ model: "gpt-5.2"
9611
+ })
9612
+ )
9613
+ );
9614
+ const passedEntries = /* @__PURE__ */ new Set();
9615
+ for (let i = 0; i < gradeResults.length; i += 1) {
9616
+ const gradeResult = gradeResults[i];
9617
+ const entry = pendingAssignments[i];
9618
+ const image = assignedImages[i];
9619
+ if (!gradeResult || !entry || !image) {
9620
+ continue;
9353
9621
  }
9354
- return image;
9355
- })(),
9356
- model: "gpt-5.2"
9357
- })
9358
- )
9359
- );
9360
- const passedEntries = /* @__PURE__ */ new Set();
9361
- for (let i = 0; i < gradeResults.length; i += 1) {
9362
- const grade = gradeResults[i];
9363
- const entry = pendingAssignments[i];
9364
- const image = assignedImages[i];
9365
- if (!grade || !entry || !image) {
9366
- continue;
9367
- }
9368
- if (grade === "pass") {
9369
- resolvedImages.set(entry.index, image);
9370
- passedEntries.add(entry.index);
9622
+ totalCostUsd += gradeResult.result.costUsd;
9623
+ totalUsage = sumUsageTokens(totalUsage, gradeResult.result.usage);
9624
+ if (gradeResult.grade === "pass") {
9625
+ resolvedImages.set(entry.index, image);
9626
+ passedEntries.add(entry.index);
9627
+ }
9628
+ }
9629
+ removeResolvedEntries(passedEntries);
9630
+ }
9631
+ if (promptEntries.length === 0) {
9632
+ break;
9633
+ }
9634
+ inputMessages.push({
9635
+ role: "assistant",
9636
+ content: result.content.parts
9637
+ });
9638
+ inputMessages.push({
9639
+ role: "user",
9640
+ content: buildContinuationPromptParts(promptEntries)
9641
+ });
9371
9642
  }
9643
+ } finally {
9644
+ uploadMetrics = getCurrentFileUploadMetrics();
9372
9645
  }
9373
- removeResolvedEntries(passedEntries);
9374
- }
9375
- if (promptEntries.length === 0) {
9376
- break;
9377
- }
9378
- inputMessages.push({
9379
- role: "assistant",
9380
- content: result.content.parts
9381
9646
  });
9382
- inputMessages.push({ role: "user", content: buildContinuationPromptParts(promptEntries) });
9383
- }
9384
- const orderedImages = [];
9385
- for (const entry of orderedEntries) {
9386
- const image = resolvedImages.get(entry.index);
9387
- if (image) {
9388
- orderedImages.push(image);
9647
+ const orderedImages = [];
9648
+ for (const entry of orderedEntries) {
9649
+ const image = resolvedImages.get(entry.index);
9650
+ if (image) {
9651
+ orderedImages.push(image);
9652
+ }
9389
9653
  }
9654
+ const outputImages = orderedImages.slice(0, numImages);
9655
+ telemetry.emit({
9656
+ type: "llm.call.completed",
9657
+ success: true,
9658
+ durationMs: Math.max(0, Date.now() - startedAtMs),
9659
+ usage: totalUsage,
9660
+ costUsd: totalCostUsd,
9661
+ imageCount: outputImages.length,
9662
+ attempts: attemptsUsed,
9663
+ uploadCount: uploadMetrics.count,
9664
+ uploadBytes: uploadMetrics.totalBytes,
9665
+ uploadLatencyMs: uploadMetrics.totalLatencyMs
9666
+ });
9667
+ return outputImages;
9668
+ } catch (error) {
9669
+ const err = error instanceof Error ? error : new Error(String(error));
9670
+ telemetry.emit({
9671
+ type: "llm.call.completed",
9672
+ success: false,
9673
+ durationMs: Math.max(0, Date.now() - startedAtMs),
9674
+ usage: totalUsage,
9675
+ costUsd: totalCostUsd,
9676
+ attempts: attemptsUsed > 0 ? attemptsUsed : void 0,
9677
+ uploadCount: uploadMetrics.count,
9678
+ uploadBytes: uploadMetrics.totalBytes,
9679
+ uploadLatencyMs: uploadMetrics.totalLatencyMs,
9680
+ error: err.message
9681
+ });
9682
+ throw err;
9683
+ } finally {
9684
+ await telemetry.flush();
9390
9685
  }
9391
- return orderedImages.slice(0, numImages);
9392
9686
  }
9393
9687
  async function generateImageInBatches(request) {
9394
9688
  const {
@@ -12039,7 +12333,7 @@ function isNoEntError(error) {
12039
12333
 
12040
12334
  // src/agent.ts
12041
12335
  async function runAgentLoop(request) {
12042
- const telemetry = createAgentTelemetrySession(request.telemetry);
12336
+ const telemetry = createTelemetrySession(request.telemetry);
12043
12337
  const logging = createRootAgentLoggingSession(request);
12044
12338
  try {
12045
12339
  return await runWithAgentLoggingSession(logging, async () => {
@@ -12125,7 +12419,7 @@ async function runAgentLoopInternal(request, context) {
12125
12419
  logging: _logging,
12126
12420
  ...toolLoopRequest
12127
12421
  } = request;
12128
- const telemetrySession = context.telemetry ?? createAgentTelemetrySession(telemetry);
12422
+ const telemetrySession = context.telemetry ?? createTelemetrySession(telemetry);
12129
12423
  const loggingSession = context.logging;
12130
12424
  const runId = randomRunId();
12131
12425
  const startedAtMs = Date.now();
@@ -12188,15 +12482,15 @@ async function runAgentLoopInternal(request, context) {
12188
12482
  ].join(" ")
12189
12483
  );
12190
12484
  const sourceOnEvent = toolLoopRequestWithSteering.onEvent;
12191
- const includeLlmStreamEvents = telemetrySession?.includeLlmStreamEvents === true;
12485
+ const includeStreamEvents = telemetrySession?.includeStreamEvents === true;
12192
12486
  const streamEventLogger = loggingSession ? createAgentStreamEventLogger({
12193
12487
  append: (line) => {
12194
12488
  loggingSession.logLine(`[agent:${runId}] ${line}`);
12195
12489
  }
12196
12490
  }) : void 0;
12197
- const wrappedOnEvent = sourceOnEvent || includeLlmStreamEvents ? (event) => {
12491
+ const wrappedOnEvent = sourceOnEvent || includeStreamEvents ? (event) => {
12198
12492
  sourceOnEvent?.(event);
12199
- if (includeLlmStreamEvents) {
12493
+ if (includeStreamEvents) {
12200
12494
  emitTelemetry({ type: "agent.run.stream", event });
12201
12495
  }
12202
12496
  streamEventLogger?.appendEvent(event);
@@ -12434,7 +12728,7 @@ function countToolCalls(result) {
12434
12728
  }
12435
12729
  return count;
12436
12730
  }
12437
- function sumUsageValue(current, next) {
12731
+ function sumUsageValue2(current, next) {
12438
12732
  if (typeof next !== "number" || !Number.isFinite(next)) {
12439
12733
  return current;
12440
12734
  }
@@ -12452,20 +12746,17 @@ function summarizeResultUsage(result) {
12452
12746
  continue;
12453
12747
  }
12454
12748
  summary = {
12455
- promptTokens: sumUsageValue(summary?.promptTokens, usage.promptTokens),
12456
- cachedTokens: sumUsageValue(summary?.cachedTokens, usage.cachedTokens),
12457
- responseTokens: sumUsageValue(summary?.responseTokens, usage.responseTokens),
12458
- responseImageTokens: sumUsageValue(summary?.responseImageTokens, usage.responseImageTokens),
12459
- thinkingTokens: sumUsageValue(summary?.thinkingTokens, usage.thinkingTokens),
12460
- totalTokens: sumUsageValue(summary?.totalTokens, usage.totalTokens),
12461
- toolUsePromptTokens: sumUsageValue(summary?.toolUsePromptTokens, usage.toolUsePromptTokens)
12749
+ promptTokens: sumUsageValue2(summary?.promptTokens, usage.promptTokens),
12750
+ cachedTokens: sumUsageValue2(summary?.cachedTokens, usage.cachedTokens),
12751
+ responseTokens: sumUsageValue2(summary?.responseTokens, usage.responseTokens),
12752
+ responseImageTokens: sumUsageValue2(summary?.responseImageTokens, usage.responseImageTokens),
12753
+ thinkingTokens: sumUsageValue2(summary?.thinkingTokens, usage.thinkingTokens),
12754
+ totalTokens: sumUsageValue2(summary?.totalTokens, usage.totalTokens),
12755
+ toolUsePromptTokens: sumUsageValue2(summary?.toolUsePromptTokens, usage.toolUsePromptTokens)
12462
12756
  };
12463
12757
  }
12464
12758
  return summary;
12465
12759
  }
12466
- function isPromiseLike2(value) {
12467
- return (typeof value === "object" || typeof value === "function") && value !== null && typeof value.then === "function";
12468
- }
12469
12760
  function resolveAgentLoggingSelection(value) {
12470
12761
  if (value === false) {
12471
12762
  return void 0;
@@ -12499,60 +12790,6 @@ function createRootAgentLoggingSession(request) {
12499
12790
  mirrorToConsole: selected.mirrorToConsole !== false
12500
12791
  });
12501
12792
  }
12502
- function isAgentTelemetrySink(value) {
12503
- return typeof value === "object" && value !== null && typeof value.emit === "function";
12504
- }
12505
- function resolveTelemetrySelection(telemetry) {
12506
- if (!telemetry) {
12507
- return void 0;
12508
- }
12509
- if (isAgentTelemetrySink(telemetry)) {
12510
- return { sink: telemetry };
12511
- }
12512
- if (isAgentTelemetrySink(telemetry.sink)) {
12513
- return telemetry;
12514
- }
12515
- throw new Error("Invalid runAgentLoop telemetry config: expected a sink with emit(event).");
12516
- }
12517
- function createAgentTelemetrySession(telemetry) {
12518
- const config = resolveTelemetrySelection(telemetry);
12519
- if (!config) {
12520
- return void 0;
12521
- }
12522
- const pending = /* @__PURE__ */ new Set();
12523
- const trackPromise = (promise) => {
12524
- pending.add(promise);
12525
- promise.finally(() => {
12526
- pending.delete(promise);
12527
- });
12528
- };
12529
- const emit = (event) => {
12530
- try {
12531
- const output = config.sink.emit(event);
12532
- if (isPromiseLike2(output)) {
12533
- const task = Promise.resolve(output).then(() => void 0).catch(() => void 0);
12534
- trackPromise(task);
12535
- }
12536
- } catch {
12537
- }
12538
- };
12539
- const flush = async () => {
12540
- while (pending.size > 0) {
12541
- await Promise.allSettled([...pending]);
12542
- }
12543
- if (typeof config.sink.flush === "function") {
12544
- try {
12545
- await config.sink.flush();
12546
- } catch {
12547
- }
12548
- }
12549
- };
12550
- return {
12551
- includeLlmStreamEvents: config.includeLlmStreamEvents === true,
12552
- emit,
12553
- flush
12554
- };
12555
- }
12556
12793
  function createAgentTelemetryEmitter(params) {
12557
12794
  return (event) => {
12558
12795
  if (!params.session) {
@@ -13245,6 +13482,7 @@ export {
13245
13482
  applyPatch,
13246
13483
  configureGemini,
13247
13484
  configureModelConcurrency,
13485
+ configureTelemetry,
13248
13486
  convertGooglePartsToLlmParts,
13249
13487
  createApplyPatchTool,
13250
13488
  createCodexApplyPatchTool,
@@ -13293,6 +13531,7 @@ export {
13293
13531
  parseJsonFromLlmText,
13294
13532
  refreshChatGptOauthToken,
13295
13533
  resetModelConcurrencyConfig,
13534
+ resetTelemetry,
13296
13535
  resolveFilesystemToolProfile,
13297
13536
  resolveFireworksModelId,
13298
13537
  runAgentLoop,