@ljoukov/llm 4.1.1 → 5.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -53,6 +53,7 @@ __export(index_exports, {
53
53
  applyPatch: () => applyPatch,
54
54
  configureGemini: () => configureGemini,
55
55
  configureModelConcurrency: () => configureModelConcurrency,
56
+ configureTelemetry: () => configureTelemetry,
56
57
  convertGooglePartsToLlmParts: () => convertGooglePartsToLlmParts,
57
58
  createApplyPatchTool: () => createApplyPatchTool,
58
59
  createCodexApplyPatchTool: () => createCodexApplyPatchTool,
@@ -101,6 +102,7 @@ __export(index_exports, {
101
102
  parseJsonFromLlmText: () => parseJsonFromLlmText,
102
103
  refreshChatGptOauthToken: () => refreshChatGptOauthToken,
103
104
  resetModelConcurrencyConfig: () => resetModelConcurrencyConfig,
105
+ resetTelemetry: () => resetTelemetry,
104
106
  resolveFilesystemToolProfile: () => resolveFilesystemToolProfile,
105
107
  resolveFireworksModelId: () => resolveFireworksModelId,
106
108
  runAgentLoop: () => runAgentLoop,
@@ -4165,6 +4167,71 @@ var files = {
4165
4167
  content: filesContent
4166
4168
  };
4167
4169
 
4170
+ // src/telemetry.ts
4171
+ var telemetryState = getRuntimeSingleton(
4172
+ /* @__PURE__ */ Symbol.for("@ljoukov/llm.telemetryState"),
4173
+ () => ({
4174
+ configuredTelemetry: void 0
4175
+ })
4176
+ );
4177
+ function configureTelemetry(telemetry = void 0) {
4178
+ telemetryState.configuredTelemetry = telemetry === void 0 || telemetry === false ? void 0 : telemetry;
4179
+ }
4180
+ function resetTelemetry() {
4181
+ telemetryState.configuredTelemetry = void 0;
4182
+ }
4183
+ function isPromiseLike2(value) {
4184
+ return (typeof value === "object" || typeof value === "function") && value !== null && typeof value.then === "function";
4185
+ }
4186
+ function resolveTelemetrySelection(telemetry) {
4187
+ if (telemetry === false) {
4188
+ return void 0;
4189
+ }
4190
+ if (telemetry !== void 0) {
4191
+ return telemetry;
4192
+ }
4193
+ return telemetryState.configuredTelemetry;
4194
+ }
4195
+ function createTelemetrySession(telemetry) {
4196
+ const config = resolveTelemetrySelection(telemetry);
4197
+ if (!config) {
4198
+ return void 0;
4199
+ }
4200
+ const pending = /* @__PURE__ */ new Set();
4201
+ const trackPromise = (promise) => {
4202
+ pending.add(promise);
4203
+ promise.finally(() => {
4204
+ pending.delete(promise);
4205
+ });
4206
+ };
4207
+ const emit = (event) => {
4208
+ try {
4209
+ const output = config.sink.emit(event);
4210
+ if (isPromiseLike2(output)) {
4211
+ const task = Promise.resolve(output).then(() => void 0).catch(() => void 0);
4212
+ trackPromise(task);
4213
+ }
4214
+ } catch {
4215
+ }
4216
+ };
4217
+ const flush = async () => {
4218
+ while (pending.size > 0) {
4219
+ await Promise.allSettled([...pending]);
4220
+ }
4221
+ if (typeof config.sink.flush === "function") {
4222
+ try {
4223
+ await config.sink.flush();
4224
+ } catch {
4225
+ }
4226
+ }
4227
+ };
4228
+ return {
4229
+ includeStreamEvents: config.includeStreamEvents === true,
4230
+ emit,
4231
+ flush
4232
+ };
4233
+ }
4234
+
4168
4235
  // src/llm.ts
4169
4236
  var toolCallContextStorage = getRuntimeSingleton(
4170
4237
  /* @__PURE__ */ Symbol.for("@ljoukov/llm.toolCallContextStorage"),
@@ -5751,6 +5818,65 @@ function mergeTokenUpdates(current, next) {
5751
5818
  toolUsePromptTokens: next.toolUsePromptTokens ?? current.toolUsePromptTokens
5752
5819
  };
5753
5820
  }
5821
+ function sumUsageValue(current, next) {
5822
+ if (typeof next !== "number" || !Number.isFinite(next)) {
5823
+ return current;
5824
+ }
5825
+ const normalizedNext = Math.max(0, next);
5826
+ if (typeof current !== "number" || !Number.isFinite(current)) {
5827
+ return normalizedNext;
5828
+ }
5829
+ return Math.max(0, current) + normalizedNext;
5830
+ }
5831
+ function sumUsageTokens(current, next) {
5832
+ if (!next) {
5833
+ return current;
5834
+ }
5835
+ return {
5836
+ promptTokens: sumUsageValue(current?.promptTokens, next.promptTokens),
5837
+ cachedTokens: sumUsageValue(current?.cachedTokens, next.cachedTokens),
5838
+ responseTokens: sumUsageValue(current?.responseTokens, next.responseTokens),
5839
+ responseImageTokens: sumUsageValue(current?.responseImageTokens, next.responseImageTokens),
5840
+ thinkingTokens: sumUsageValue(current?.thinkingTokens, next.thinkingTokens),
5841
+ totalTokens: sumUsageValue(current?.totalTokens, next.totalTokens),
5842
+ toolUsePromptTokens: sumUsageValue(current?.toolUsePromptTokens, next.toolUsePromptTokens)
5843
+ };
5844
+ }
5845
+ function countInlineImagesInContent(content) {
5846
+ if (!content) {
5847
+ return 0;
5848
+ }
5849
+ let count = 0;
5850
+ for (const part of content.parts) {
5851
+ if (part.type === "inlineData" && isInlineImageMime(part.mimeType)) {
5852
+ count += 1;
5853
+ }
5854
+ }
5855
+ return count;
5856
+ }
5857
+ function createLlmTelemetryEmitter(params) {
5858
+ const session = createTelemetrySession(params.telemetry);
5859
+ const callId = (0, import_node_crypto2.randomBytes)(8).toString("hex");
5860
+ return {
5861
+ includeStreamEvents: session?.includeStreamEvents === true,
5862
+ emit: (event) => {
5863
+ if (!session) {
5864
+ return;
5865
+ }
5866
+ session.emit({
5867
+ ...event,
5868
+ timestamp: (/* @__PURE__ */ new Date()).toISOString(),
5869
+ callId,
5870
+ operation: params.operation,
5871
+ provider: params.provider,
5872
+ model: params.model
5873
+ });
5874
+ },
5875
+ flush: async () => {
5876
+ await session?.flush();
5877
+ }
5878
+ };
5879
+ }
5754
5880
  function toMaybeNumber(value) {
5755
5881
  if (typeof value === "number" && Number.isFinite(value)) {
5756
5882
  return value;
@@ -7160,6 +7286,10 @@ async function runTextCall(params) {
7160
7286
  let responseRole;
7161
7287
  let latestUsage;
7162
7288
  let responseImages = 0;
7289
+ const pushEvent = (event) => {
7290
+ queue.push(event);
7291
+ params.onEvent?.(event);
7292
+ };
7163
7293
  const pushDelta = (channel, text) => {
7164
7294
  if (!text) {
7165
7295
  return;
@@ -7170,7 +7300,7 @@ async function runTextCall(params) {
7170
7300
  } else {
7171
7301
  callLogger?.appendResponseDelta(text);
7172
7302
  }
7173
- queue.push({ type: "delta", channel, text });
7303
+ pushEvent({ type: "delta", channel, text });
7174
7304
  };
7175
7305
  const pushInline = (data, mimeType) => {
7176
7306
  if (!data) {
@@ -7240,7 +7370,7 @@ async function runTextCall(params) {
7240
7370
  }
7241
7371
  case "response.refusal.delta": {
7242
7372
  blocked = true;
7243
- queue.push({ type: "blocked" });
7373
+ pushEvent({ type: "blocked" });
7244
7374
  break;
7245
7375
  }
7246
7376
  default:
@@ -7249,7 +7379,7 @@ async function runTextCall(params) {
7249
7379
  }
7250
7380
  const finalResponse = await stream.finalResponse();
7251
7381
  modelVersion = typeof finalResponse.model === "string" ? finalResponse.model : request.model;
7252
- queue.push({ type: "model", modelVersion });
7382
+ pushEvent({ type: "model", modelVersion });
7253
7383
  if (finalResponse.error) {
7254
7384
  const message = typeof finalResponse.error.message === "string" ? finalResponse.error.message : "OpenAI response failed";
7255
7385
  throw new Error(message);
@@ -7313,11 +7443,11 @@ async function runTextCall(params) {
7313
7443
  });
7314
7444
  blocked = blocked || result2.blocked;
7315
7445
  if (blocked) {
7316
- queue.push({ type: "blocked" });
7446
+ pushEvent({ type: "blocked" });
7317
7447
  }
7318
7448
  if (result2.model) {
7319
7449
  modelVersion = providerInfo.serviceTier ? request.model : `chatgpt-${result2.model}`;
7320
- queue.push({ type: "model", modelVersion });
7450
+ pushEvent({ type: "model", modelVersion });
7321
7451
  }
7322
7452
  latestUsage = extractChatGptUsageTokens(result2.usage);
7323
7453
  const fallbackText = typeof result2.text === "string" ? result2.text : "";
@@ -7355,11 +7485,11 @@ async function runTextCall(params) {
7355
7485
  { signal }
7356
7486
  );
7357
7487
  modelVersion = typeof response.model === "string" ? response.model : request.model;
7358
- queue.push({ type: "model", modelVersion });
7488
+ pushEvent({ type: "model", modelVersion });
7359
7489
  const choice = Array.isArray(response.choices) ? response.choices[0] : void 0;
7360
7490
  if (choice?.finish_reason === "content_filter") {
7361
7491
  blocked = true;
7362
- queue.push({ type: "blocked" });
7492
+ pushEvent({ type: "blocked" });
7363
7493
  }
7364
7494
  const textOutput = extractFireworksMessageText(
7365
7495
  choice?.message
@@ -7401,11 +7531,11 @@ async function runTextCall(params) {
7401
7531
  for await (const chunk of stream) {
7402
7532
  if (chunk.modelVersion) {
7403
7533
  modelVersion = chunk.modelVersion;
7404
- queue.push({ type: "model", modelVersion });
7534
+ pushEvent({ type: "model", modelVersion });
7405
7535
  }
7406
7536
  if (chunk.promptFeedback?.blockReason) {
7407
7537
  blocked = true;
7408
- queue.push({ type: "blocked" });
7538
+ pushEvent({ type: "blocked" });
7409
7539
  }
7410
7540
  latestUsage = mergeTokenUpdates(
7411
7541
  latestUsage,
@@ -7418,7 +7548,7 @@ async function runTextCall(params) {
7418
7548
  const primary = candidates[0];
7419
7549
  if (primary && isModerationFinish(primary.finishReason)) {
7420
7550
  blocked = true;
7421
- queue.push({ type: "blocked" });
7551
+ pushEvent({ type: "blocked" });
7422
7552
  }
7423
7553
  for (const candidate of candidates) {
7424
7554
  const candidateContent = candidate.content;
@@ -7455,7 +7585,7 @@ async function runTextCall(params) {
7455
7585
  imageSize: request.imageSize
7456
7586
  });
7457
7587
  if (latestUsage) {
7458
- queue.push({ type: "usage", usage: latestUsage, costUsd, modelVersion });
7588
+ pushEvent({ type: "usage", usage: latestUsage, costUsd, modelVersion });
7459
7589
  }
7460
7590
  callLogger?.complete({
7461
7591
  responseText: text,
@@ -7509,18 +7639,76 @@ async function runTextCall(params) {
7509
7639
  });
7510
7640
  return result;
7511
7641
  }
7512
- function streamText(request) {
7642
+ function startTextStream(request, operation) {
7513
7643
  const queue = createAsyncQueue();
7514
7644
  const abortController = new AbortController();
7645
+ const provider = resolveProvider(request.model).provider;
7646
+ const telemetry = createLlmTelemetryEmitter({
7647
+ telemetry: request.telemetry,
7648
+ operation,
7649
+ provider,
7650
+ model: request.model
7651
+ });
7652
+ const startedAtMs = Date.now();
7653
+ telemetry.emit({
7654
+ type: "llm.call.started",
7655
+ inputMode: typeof request.input === "string" ? "string" : "messages",
7656
+ toolCount: request.tools?.length ?? 0,
7657
+ responseModalities: request.responseModalities
7658
+ });
7515
7659
  const result = (async () => {
7660
+ let uploadMetrics = emptyFileUploadMetrics();
7516
7661
  try {
7517
- const output = await runTextCall({ request, queue, abortController });
7662
+ let output;
7663
+ await collectFileUploadMetrics(async () => {
7664
+ try {
7665
+ output = await runTextCall({
7666
+ request,
7667
+ queue,
7668
+ abortController,
7669
+ onEvent: telemetry.includeStreamEvents ? (event) => {
7670
+ telemetry.emit({ type: "llm.call.stream", event });
7671
+ } : void 0
7672
+ });
7673
+ } finally {
7674
+ uploadMetrics = getCurrentFileUploadMetrics();
7675
+ }
7676
+ });
7677
+ if (!output) {
7678
+ throw new Error("LLM text call returned no result.");
7679
+ }
7680
+ telemetry.emit({
7681
+ type: "llm.call.completed",
7682
+ success: true,
7683
+ durationMs: Math.max(0, Date.now() - startedAtMs),
7684
+ modelVersion: output.modelVersion,
7685
+ blocked: output.blocked,
7686
+ usage: output.usage,
7687
+ costUsd: output.costUsd,
7688
+ outputTextChars: output.text.length,
7689
+ thoughtChars: output.thoughts.length,
7690
+ responseImages: countInlineImagesInContent(output.content),
7691
+ uploadCount: uploadMetrics.count,
7692
+ uploadBytes: uploadMetrics.totalBytes,
7693
+ uploadLatencyMs: uploadMetrics.totalLatencyMs
7694
+ });
7518
7695
  queue.close();
7519
7696
  return output;
7520
7697
  } catch (error) {
7521
7698
  const err = error instanceof Error ? error : new Error(String(error));
7699
+ telemetry.emit({
7700
+ type: "llm.call.completed",
7701
+ success: false,
7702
+ durationMs: Math.max(0, Date.now() - startedAtMs),
7703
+ uploadCount: uploadMetrics.count,
7704
+ uploadBytes: uploadMetrics.totalBytes,
7705
+ uploadLatencyMs: uploadMetrics.totalLatencyMs,
7706
+ error: err.message
7707
+ });
7522
7708
  queue.fail(err);
7523
7709
  throw err;
7710
+ } finally {
7711
+ await telemetry.flush();
7524
7712
  }
7525
7713
  })();
7526
7714
  return {
@@ -7529,8 +7717,11 @@ function streamText(request) {
7529
7717
  abort: () => abortController.abort()
7530
7718
  };
7531
7719
  }
7720
+ function streamText(request) {
7721
+ return startTextStream(request, "streamText");
7722
+ }
7532
7723
  async function generateText(request) {
7533
- const call = streamText(request);
7724
+ const call = startTextStream(request, "generateText");
7534
7725
  for await (const _event of call.events) {
7535
7726
  }
7536
7727
  return await call.result;
@@ -7556,9 +7747,26 @@ function buildJsonSchemaConfig(request) {
7556
7747
  } : void 0;
7557
7748
  return { providerInfo, responseJsonSchema, openAiTextFormat };
7558
7749
  }
7559
- function streamJson(request) {
7750
+ function startJsonStream(request, operation) {
7560
7751
  const queue = createAsyncQueue();
7561
7752
  const abortController = new AbortController();
7753
+ const provider = resolveProvider(request.model).provider;
7754
+ const telemetry = createLlmTelemetryEmitter({
7755
+ telemetry: request.telemetry,
7756
+ operation,
7757
+ provider,
7758
+ model: request.model
7759
+ });
7760
+ const startedAtMs = Date.now();
7761
+ const maxAttempts = Math.max(1, Math.floor(request.maxAttempts ?? 2));
7762
+ const streamMode = request.streamMode ?? "partial";
7763
+ telemetry.emit({
7764
+ type: "llm.call.started",
7765
+ inputMode: typeof request.input === "string" ? "string" : "messages",
7766
+ toolCount: request.tools?.length ?? 0,
7767
+ maxAttempts,
7768
+ streamMode
7769
+ });
7562
7770
  const resolveAbortSignal = () => {
7563
7771
  if (!request.signal) {
7564
7772
  return abortController.signal;
@@ -7577,135 +7785,155 @@ function streamJson(request) {
7577
7785
  return abortController.signal;
7578
7786
  };
7579
7787
  const result = (async () => {
7580
- const signal = resolveAbortSignal();
7581
- const maxAttempts = Math.max(1, Math.floor(request.maxAttempts ?? 2));
7582
- const { providerInfo, responseJsonSchema, openAiTextFormat } = buildJsonSchemaConfig(request);
7583
- const streamMode = request.streamMode ?? "partial";
7584
- const failures = [];
7585
- let openAiTextFormatForAttempt = openAiTextFormat;
7586
- for (let attempt = 1; attempt <= maxAttempts; attempt += 1) {
7587
- let rawText = "";
7588
- let lastPartial = "";
7589
- try {
7590
- const call = streamText({
7591
- model: request.model,
7592
- input: request.input,
7593
- instructions: request.instructions,
7594
- tools: request.tools,
7595
- responseMimeType: request.responseMimeType ?? "application/json",
7596
- responseJsonSchema,
7597
- thinkingLevel: request.thinkingLevel,
7598
- ...openAiTextFormatForAttempt ? { openAiTextFormat: openAiTextFormatForAttempt } : {},
7599
- signal
7600
- });
7788
+ let uploadMetrics = emptyFileUploadMetrics();
7789
+ let attemptsUsed = 0;
7790
+ try {
7791
+ let output;
7792
+ await collectFileUploadMetrics(async () => {
7601
7793
  try {
7602
- for await (const event of call.events) {
7603
- queue.push(event);
7604
- if (event.type === "delta" && event.channel === "response") {
7605
- rawText += event.text;
7606
- if (streamMode === "partial") {
7607
- const partial = parsePartialJsonFromLlmText(rawText);
7608
- if (partial !== null) {
7609
- const serialized = JSON.stringify(partial);
7610
- if (serialized !== lastPartial) {
7611
- lastPartial = serialized;
7612
- queue.push({
7613
- type: "json",
7614
- stage: "partial",
7615
- value: partial
7616
- });
7794
+ const signal = resolveAbortSignal();
7795
+ const { providerInfo, responseJsonSchema, openAiTextFormat } = buildJsonSchemaConfig(request);
7796
+ const failures = [];
7797
+ let openAiTextFormatForAttempt = openAiTextFormat;
7798
+ for (let attempt = 1; attempt <= maxAttempts; attempt += 1) {
7799
+ attemptsUsed = attempt;
7800
+ let rawText = "";
7801
+ let lastPartial = "";
7802
+ try {
7803
+ const call = streamText({
7804
+ model: request.model,
7805
+ input: request.input,
7806
+ instructions: request.instructions,
7807
+ tools: request.tools,
7808
+ responseMimeType: request.responseMimeType ?? "application/json",
7809
+ responseJsonSchema,
7810
+ thinkingLevel: request.thinkingLevel,
7811
+ ...openAiTextFormatForAttempt ? { openAiTextFormat: openAiTextFormatForAttempt } : {},
7812
+ telemetry: false,
7813
+ signal
7814
+ });
7815
+ try {
7816
+ for await (const event of call.events) {
7817
+ queue.push(event);
7818
+ if (telemetry.includeStreamEvents) {
7819
+ telemetry.emit({ type: "llm.call.stream", event });
7820
+ }
7821
+ if (event.type === "delta" && event.channel === "response") {
7822
+ rawText += event.text;
7823
+ if (streamMode === "partial") {
7824
+ const partial = parsePartialJsonFromLlmText(rawText);
7825
+ if (partial !== null) {
7826
+ const serialized = JSON.stringify(partial);
7827
+ if (serialized !== lastPartial) {
7828
+ lastPartial = serialized;
7829
+ queue.push({
7830
+ type: "json",
7831
+ stage: "partial",
7832
+ value: partial
7833
+ });
7834
+ }
7835
+ }
7836
+ }
7617
7837
  }
7618
7838
  }
7839
+ } catch (streamError) {
7840
+ await call.result.catch(() => void 0);
7841
+ throw streamError;
7842
+ }
7843
+ const result2 = await call.result;
7844
+ rawText = rawText || result2.text;
7845
+ const cleanedText = normalizeJsonText(rawText);
7846
+ const repairedText = escapeNewlinesInStrings(cleanedText);
7847
+ const payload = JSON.parse(repairedText);
7848
+ const normalized = typeof request.normalizeJson === "function" ? request.normalizeJson(payload) : payload;
7849
+ const parsed = request.schema.parse(normalized);
7850
+ queue.push({ type: "json", stage: "final", value: parsed });
7851
+ output = { value: parsed, rawText, result: result2 };
7852
+ return;
7853
+ } catch (error) {
7854
+ const handled = error instanceof Error ? error : new Error(String(error));
7855
+ failures.push({ attempt, rawText, error: handled });
7856
+ if (providerInfo.provider === "chatgpt" && openAiTextFormatForAttempt) {
7857
+ openAiTextFormatForAttempt = void 0;
7858
+ }
7859
+ if (attempt >= maxAttempts) {
7860
+ throw new LlmJsonCallError(
7861
+ `LLM JSON call failed after ${attempt} attempt(s)`,
7862
+ failures
7863
+ );
7619
7864
  }
7620
7865
  }
7621
7866
  }
7622
- } catch (streamError) {
7623
- await call.result.catch(() => void 0);
7624
- throw streamError;
7625
- }
7626
- const result2 = await call.result;
7627
- rawText = rawText || result2.text;
7628
- const cleanedText = normalizeJsonText(rawText);
7629
- const repairedText = escapeNewlinesInStrings(cleanedText);
7630
- const payload = JSON.parse(repairedText);
7631
- const normalized = typeof request.normalizeJson === "function" ? request.normalizeJson(payload) : payload;
7632
- const parsed = request.schema.parse(normalized);
7633
- queue.push({ type: "json", stage: "final", value: parsed });
7634
- queue.close();
7635
- return { value: parsed, rawText, result: result2 };
7636
- } catch (error) {
7637
- const handled = error instanceof Error ? error : new Error(String(error));
7638
- failures.push({ attempt, rawText, error: handled });
7639
- if (providerInfo.provider === "chatgpt" && openAiTextFormatForAttempt) {
7640
- openAiTextFormatForAttempt = void 0;
7641
- }
7642
- if (attempt >= maxAttempts) {
7643
- throw new LlmJsonCallError(`LLM JSON call failed after ${attempt} attempt(s)`, failures);
7867
+ throw new LlmJsonCallError("LLM JSON call failed", failures);
7868
+ } finally {
7869
+ uploadMetrics = getCurrentFileUploadMetrics();
7644
7870
  }
7645
- }
7871
+ });
7872
+ if (!output) {
7873
+ throw new Error("LLM JSON call returned no result.");
7874
+ }
7875
+ telemetry.emit({
7876
+ type: "llm.call.completed",
7877
+ success: true,
7878
+ durationMs: Math.max(0, Date.now() - startedAtMs),
7879
+ modelVersion: output.result.modelVersion,
7880
+ blocked: output.result.blocked,
7881
+ usage: output.result.usage,
7882
+ costUsd: output.result.costUsd,
7883
+ rawTextChars: output.rawText.length,
7884
+ attempts: attemptsUsed,
7885
+ uploadCount: uploadMetrics.count,
7886
+ uploadBytes: uploadMetrics.totalBytes,
7887
+ uploadLatencyMs: uploadMetrics.totalLatencyMs
7888
+ });
7889
+ queue.close();
7890
+ return output;
7891
+ } catch (error) {
7892
+ const err = error instanceof Error ? error : new Error(String(error));
7893
+ telemetry.emit({
7894
+ type: "llm.call.completed",
7895
+ success: false,
7896
+ durationMs: Math.max(0, Date.now() - startedAtMs),
7897
+ attempts: attemptsUsed > 0 ? attemptsUsed : void 0,
7898
+ uploadCount: uploadMetrics.count,
7899
+ uploadBytes: uploadMetrics.totalBytes,
7900
+ uploadLatencyMs: uploadMetrics.totalLatencyMs,
7901
+ error: err.message
7902
+ });
7903
+ queue.fail(err);
7904
+ throw err;
7905
+ } finally {
7906
+ await telemetry.flush();
7646
7907
  }
7647
- throw new LlmJsonCallError("LLM JSON call failed", failures);
7648
- })().catch((error) => {
7649
- const err = error instanceof Error ? error : new Error(String(error));
7650
- queue.fail(err);
7651
- throw err;
7652
- });
7908
+ })();
7653
7909
  return {
7654
7910
  events: queue.iterable,
7655
7911
  result,
7656
7912
  abort: () => abortController.abort()
7657
7913
  };
7658
7914
  }
7915
+ function streamJson(request) {
7916
+ return startJsonStream(request, "streamJson");
7917
+ }
7659
7918
  async function generateJson(request) {
7660
- const maxAttempts = Math.max(1, Math.floor(request.maxAttempts ?? 2));
7661
- const { providerInfo, responseJsonSchema, openAiTextFormat } = buildJsonSchemaConfig(request);
7662
- let openAiTextFormatForAttempt = openAiTextFormat;
7663
- const failures = [];
7664
- for (let attempt = 1; attempt <= maxAttempts; attempt += 1) {
7665
- let rawText = "";
7666
- try {
7667
- const call = streamText({
7668
- model: request.model,
7669
- input: request.input,
7670
- instructions: request.instructions,
7671
- tools: request.tools,
7672
- responseMimeType: request.responseMimeType ?? "application/json",
7673
- responseJsonSchema,
7674
- thinkingLevel: request.thinkingLevel,
7675
- ...openAiTextFormatForAttempt ? { openAiTextFormat: openAiTextFormatForAttempt } : {},
7676
- signal: request.signal
7677
- });
7678
- try {
7679
- for await (const event of call.events) {
7680
- request.onEvent?.(event);
7681
- if (event.type === "delta" && event.channel === "response") {
7682
- rawText += event.text;
7683
- }
7684
- }
7685
- } catch (streamError) {
7686
- await call.result.catch(() => void 0);
7687
- throw streamError;
7688
- }
7689
- const result = await call.result;
7690
- rawText = rawText || result.text;
7691
- const cleanedText = normalizeJsonText(rawText);
7692
- const repairedText = escapeNewlinesInStrings(cleanedText);
7693
- const payload = JSON.parse(repairedText);
7694
- const normalized = typeof request.normalizeJson === "function" ? request.normalizeJson(payload) : payload;
7695
- const parsed = request.schema.parse(normalized);
7696
- return { value: parsed, rawText, result };
7697
- } catch (error) {
7698
- const handled = error instanceof Error ? error : new Error(String(error));
7699
- failures.push({ attempt, rawText, error: handled });
7700
- if (providerInfo.provider === "chatgpt" && openAiTextFormatForAttempt) {
7701
- openAiTextFormatForAttempt = void 0;
7702
- }
7703
- if (attempt >= maxAttempts) {
7704
- throw new LlmJsonCallError(`LLM JSON call failed after ${attempt} attempt(s)`, failures);
7919
+ const call = startJsonStream(
7920
+ {
7921
+ ...request,
7922
+ streamMode: "final"
7923
+ },
7924
+ "generateJson"
7925
+ );
7926
+ try {
7927
+ for await (const event of call.events) {
7928
+ if (event.type !== "json") {
7929
+ request.onEvent?.(event);
7705
7930
  }
7706
7931
  }
7932
+ } catch (streamError) {
7933
+ await call.result.catch(() => void 0);
7934
+ throw streamError;
7707
7935
  }
7708
- throw new LlmJsonCallError("LLM JSON call failed", failures);
7936
+ return await call.result;
7709
7937
  }
7710
7938
  var DEFAULT_TOOL_LOOP_MAX_STEPS = 8;
7711
7939
  function resolveToolLoopContents(input) {
@@ -9321,7 +9549,10 @@ function streamToolLoop(request) {
9321
9549
  abort: () => abortController.abort()
9322
9550
  };
9323
9551
  }
9324
- var IMAGE_GRADE_SCHEMA = import_zod3.z.enum(["pass", "fail"]);
9552
+ var IMAGE_GRADE_VALUE_SCHEMA = import_zod3.z.enum(["pass", "fail"]);
9553
+ var IMAGE_GRADE_SCHEMA = import_zod3.z.object({
9554
+ grade: IMAGE_GRADE_VALUE_SCHEMA
9555
+ });
9325
9556
  async function gradeGeneratedImage(params) {
9326
9557
  const parts = [
9327
9558
  {
@@ -9332,7 +9563,7 @@ async function gradeGeneratedImage(params) {
9332
9563
  "Image prompt to grade:",
9333
9564
  params.imagePrompt,
9334
9565
  "",
9335
- 'Respond with the JSON string "pass" or "fail".'
9566
+ 'Respond with JSON like {"grade":"pass"} or {"grade":"fail"}.'
9336
9567
  ].join("\\n")
9337
9568
  },
9338
9569
  {
@@ -9341,12 +9572,13 @@ async function gradeGeneratedImage(params) {
9341
9572
  mimeType: params.image.mimeType ?? "image/png"
9342
9573
  }
9343
9574
  ];
9344
- const { value } = await generateJson({
9575
+ const { value, result } = await generateJson({
9345
9576
  model: params.model,
9346
9577
  input: [{ role: "user", content: parts }],
9347
- schema: IMAGE_GRADE_SCHEMA
9578
+ schema: IMAGE_GRADE_SCHEMA,
9579
+ telemetry: false
9348
9580
  });
9349
- return value;
9581
+ return { grade: value.grade, result };
9350
9582
  }
9351
9583
  async function generateImages(request) {
9352
9584
  const maxAttempts = Math.max(1, Math.floor(request.maxAttempts ?? 4));
@@ -9366,6 +9598,19 @@ async function generateImages(request) {
9366
9598
  if (!gradingPrompt) {
9367
9599
  throw new Error("imageGradingPrompt must be a non-empty string");
9368
9600
  }
9601
+ const telemetry = createLlmTelemetryEmitter({
9602
+ telemetry: request.telemetry,
9603
+ operation: "generateImages",
9604
+ provider: resolveProvider(request.model).provider,
9605
+ model: request.model
9606
+ });
9607
+ const startedAtMs = Date.now();
9608
+ telemetry.emit({
9609
+ type: "llm.call.started",
9610
+ imagePromptCount: promptList.length,
9611
+ styleImageCount: request.styleImages?.length ?? 0,
9612
+ maxAttempts
9613
+ });
9369
9614
  const addText = (parts, text) => {
9370
9615
  const lastPart = parts[parts.length - 1];
9371
9616
  if (lastPart !== void 0 && lastPart.type === "text") {
@@ -9423,6 +9668,9 @@ async function generateImages(request) {
9423
9668
  const inputMessages = [{ role: "user", content: buildInitialPromptParts() }];
9424
9669
  const orderedEntries = [...promptEntries];
9425
9670
  const resolvedImages = /* @__PURE__ */ new Map();
9671
+ let totalCostUsd = 0;
9672
+ let totalUsage;
9673
+ let attemptsUsed = 0;
9426
9674
  const removeResolvedEntries = (resolved) => {
9427
9675
  if (resolved.size === 0) {
9428
9676
  return;
@@ -9437,70 +9685,118 @@ async function generateImages(request) {
9437
9685
  }
9438
9686
  }
9439
9687
  };
9440
- for (let attempt = 1; attempt <= maxAttempts; attempt += 1) {
9441
- const result = await generateText({
9442
- model: request.model,
9443
- input: inputMessages,
9444
- responseModalities: ["IMAGE", "TEXT"],
9445
- imageAspectRatio: request.imageAspectRatio,
9446
- imageSize: request.imageSize ?? "2K"
9447
- });
9448
- if (result.blocked || !result.content) {
9449
- continue;
9450
- }
9451
- const images = extractImages(result.content);
9452
- if (images.length > 0 && promptEntries.length > 0) {
9453
- const assignedCount = Math.min(images.length, promptEntries.length);
9454
- const pendingAssignments = promptEntries.slice(0, assignedCount);
9455
- const assignedImages = images.slice(0, assignedCount);
9456
- const gradeResults = await Promise.all(
9457
- pendingAssignments.map(
9458
- (entry, index) => gradeGeneratedImage({
9459
- gradingPrompt,
9460
- imagePrompt: entry.prompt,
9461
- image: (() => {
9462
- const image = assignedImages[index];
9463
- if (!image) {
9464
- throw new Error("Image generation returned fewer images than expected.");
9688
+ let uploadMetrics = emptyFileUploadMetrics();
9689
+ try {
9690
+ await collectFileUploadMetrics(async () => {
9691
+ try {
9692
+ for (let attempt = 1; attempt <= maxAttempts; attempt += 1) {
9693
+ attemptsUsed = attempt;
9694
+ const result = await generateText({
9695
+ model: request.model,
9696
+ input: inputMessages,
9697
+ responseModalities: ["IMAGE", "TEXT"],
9698
+ imageAspectRatio: request.imageAspectRatio,
9699
+ imageSize: request.imageSize ?? "2K",
9700
+ telemetry: false
9701
+ });
9702
+ totalCostUsd += result.costUsd;
9703
+ totalUsage = sumUsageTokens(totalUsage, result.usage);
9704
+ if (result.blocked || !result.content) {
9705
+ continue;
9706
+ }
9707
+ const images = extractImages(result.content);
9708
+ if (images.length > 0 && promptEntries.length > 0) {
9709
+ const assignedCount = Math.min(images.length, promptEntries.length);
9710
+ const pendingAssignments = promptEntries.slice(0, assignedCount);
9711
+ const assignedImages = images.slice(0, assignedCount);
9712
+ const gradeResults = await Promise.all(
9713
+ pendingAssignments.map(
9714
+ (entry, index) => gradeGeneratedImage({
9715
+ gradingPrompt,
9716
+ imagePrompt: entry.prompt,
9717
+ image: (() => {
9718
+ const image = assignedImages[index];
9719
+ if (!image) {
9720
+ throw new Error("Image generation returned fewer images than expected.");
9721
+ }
9722
+ return image;
9723
+ })(),
9724
+ model: "gpt-5.2"
9725
+ })
9726
+ )
9727
+ );
9728
+ const passedEntries = /* @__PURE__ */ new Set();
9729
+ for (let i = 0; i < gradeResults.length; i += 1) {
9730
+ const gradeResult = gradeResults[i];
9731
+ const entry = pendingAssignments[i];
9732
+ const image = assignedImages[i];
9733
+ if (!gradeResult || !entry || !image) {
9734
+ continue;
9465
9735
  }
9466
- return image;
9467
- })(),
9468
- model: "gpt-5.2"
9469
- })
9470
- )
9471
- );
9472
- const passedEntries = /* @__PURE__ */ new Set();
9473
- for (let i = 0; i < gradeResults.length; i += 1) {
9474
- const grade = gradeResults[i];
9475
- const entry = pendingAssignments[i];
9476
- const image = assignedImages[i];
9477
- if (!grade || !entry || !image) {
9478
- continue;
9479
- }
9480
- if (grade === "pass") {
9481
- resolvedImages.set(entry.index, image);
9482
- passedEntries.add(entry.index);
9736
+ totalCostUsd += gradeResult.result.costUsd;
9737
+ totalUsage = sumUsageTokens(totalUsage, gradeResult.result.usage);
9738
+ if (gradeResult.grade === "pass") {
9739
+ resolvedImages.set(entry.index, image);
9740
+ passedEntries.add(entry.index);
9741
+ }
9742
+ }
9743
+ removeResolvedEntries(passedEntries);
9744
+ }
9745
+ if (promptEntries.length === 0) {
9746
+ break;
9747
+ }
9748
+ inputMessages.push({
9749
+ role: "assistant",
9750
+ content: result.content.parts
9751
+ });
9752
+ inputMessages.push({
9753
+ role: "user",
9754
+ content: buildContinuationPromptParts(promptEntries)
9755
+ });
9483
9756
  }
9757
+ } finally {
9758
+ uploadMetrics = getCurrentFileUploadMetrics();
9484
9759
  }
9485
- removeResolvedEntries(passedEntries);
9486
- }
9487
- if (promptEntries.length === 0) {
9488
- break;
9489
- }
9490
- inputMessages.push({
9491
- role: "assistant",
9492
- content: result.content.parts
9493
9760
  });
9494
- inputMessages.push({ role: "user", content: buildContinuationPromptParts(promptEntries) });
9495
- }
9496
- const orderedImages = [];
9497
- for (const entry of orderedEntries) {
9498
- const image = resolvedImages.get(entry.index);
9499
- if (image) {
9500
- orderedImages.push(image);
9761
+ const orderedImages = [];
9762
+ for (const entry of orderedEntries) {
9763
+ const image = resolvedImages.get(entry.index);
9764
+ if (image) {
9765
+ orderedImages.push(image);
9766
+ }
9501
9767
  }
9768
+ const outputImages = orderedImages.slice(0, numImages);
9769
+ telemetry.emit({
9770
+ type: "llm.call.completed",
9771
+ success: true,
9772
+ durationMs: Math.max(0, Date.now() - startedAtMs),
9773
+ usage: totalUsage,
9774
+ costUsd: totalCostUsd,
9775
+ imageCount: outputImages.length,
9776
+ attempts: attemptsUsed,
9777
+ uploadCount: uploadMetrics.count,
9778
+ uploadBytes: uploadMetrics.totalBytes,
9779
+ uploadLatencyMs: uploadMetrics.totalLatencyMs
9780
+ });
9781
+ return outputImages;
9782
+ } catch (error) {
9783
+ const err = error instanceof Error ? error : new Error(String(error));
9784
+ telemetry.emit({
9785
+ type: "llm.call.completed",
9786
+ success: false,
9787
+ durationMs: Math.max(0, Date.now() - startedAtMs),
9788
+ usage: totalUsage,
9789
+ costUsd: totalCostUsd,
9790
+ attempts: attemptsUsed > 0 ? attemptsUsed : void 0,
9791
+ uploadCount: uploadMetrics.count,
9792
+ uploadBytes: uploadMetrics.totalBytes,
9793
+ uploadLatencyMs: uploadMetrics.totalLatencyMs,
9794
+ error: err.message
9795
+ });
9796
+ throw err;
9797
+ } finally {
9798
+ await telemetry.flush();
9502
9799
  }
9503
- return orderedImages.slice(0, numImages);
9504
9800
  }
9505
9801
  async function generateImageInBatches(request) {
9506
9802
  const {
@@ -12151,7 +12447,7 @@ function isNoEntError(error) {
12151
12447
 
12152
12448
  // src/agent.ts
12153
12449
  async function runAgentLoop(request) {
12154
- const telemetry = createAgentTelemetrySession(request.telemetry);
12450
+ const telemetry = createTelemetrySession(request.telemetry);
12155
12451
  const logging = createRootAgentLoggingSession(request);
12156
12452
  try {
12157
12453
  return await runWithAgentLoggingSession(logging, async () => {
@@ -12237,7 +12533,7 @@ async function runAgentLoopInternal(request, context) {
12237
12533
  logging: _logging,
12238
12534
  ...toolLoopRequest
12239
12535
  } = request;
12240
- const telemetrySession = context.telemetry ?? createAgentTelemetrySession(telemetry);
12536
+ const telemetrySession = context.telemetry ?? createTelemetrySession(telemetry);
12241
12537
  const loggingSession = context.logging;
12242
12538
  const runId = randomRunId();
12243
12539
  const startedAtMs = Date.now();
@@ -12300,15 +12596,15 @@ async function runAgentLoopInternal(request, context) {
12300
12596
  ].join(" ")
12301
12597
  );
12302
12598
  const sourceOnEvent = toolLoopRequestWithSteering.onEvent;
12303
- const includeLlmStreamEvents = telemetrySession?.includeLlmStreamEvents === true;
12599
+ const includeStreamEvents = telemetrySession?.includeStreamEvents === true;
12304
12600
  const streamEventLogger = loggingSession ? createAgentStreamEventLogger({
12305
12601
  append: (line) => {
12306
12602
  loggingSession.logLine(`[agent:${runId}] ${line}`);
12307
12603
  }
12308
12604
  }) : void 0;
12309
- const wrappedOnEvent = sourceOnEvent || includeLlmStreamEvents ? (event) => {
12605
+ const wrappedOnEvent = sourceOnEvent || includeStreamEvents ? (event) => {
12310
12606
  sourceOnEvent?.(event);
12311
- if (includeLlmStreamEvents) {
12607
+ if (includeStreamEvents) {
12312
12608
  emitTelemetry({ type: "agent.run.stream", event });
12313
12609
  }
12314
12610
  streamEventLogger?.appendEvent(event);
@@ -12546,7 +12842,7 @@ function countToolCalls(result) {
12546
12842
  }
12547
12843
  return count;
12548
12844
  }
12549
- function sumUsageValue(current, next) {
12845
+ function sumUsageValue2(current, next) {
12550
12846
  if (typeof next !== "number" || !Number.isFinite(next)) {
12551
12847
  return current;
12552
12848
  }
@@ -12564,20 +12860,17 @@ function summarizeResultUsage(result) {
12564
12860
  continue;
12565
12861
  }
12566
12862
  summary = {
12567
- promptTokens: sumUsageValue(summary?.promptTokens, usage.promptTokens),
12568
- cachedTokens: sumUsageValue(summary?.cachedTokens, usage.cachedTokens),
12569
- responseTokens: sumUsageValue(summary?.responseTokens, usage.responseTokens),
12570
- responseImageTokens: sumUsageValue(summary?.responseImageTokens, usage.responseImageTokens),
12571
- thinkingTokens: sumUsageValue(summary?.thinkingTokens, usage.thinkingTokens),
12572
- totalTokens: sumUsageValue(summary?.totalTokens, usage.totalTokens),
12573
- toolUsePromptTokens: sumUsageValue(summary?.toolUsePromptTokens, usage.toolUsePromptTokens)
12863
+ promptTokens: sumUsageValue2(summary?.promptTokens, usage.promptTokens),
12864
+ cachedTokens: sumUsageValue2(summary?.cachedTokens, usage.cachedTokens),
12865
+ responseTokens: sumUsageValue2(summary?.responseTokens, usage.responseTokens),
12866
+ responseImageTokens: sumUsageValue2(summary?.responseImageTokens, usage.responseImageTokens),
12867
+ thinkingTokens: sumUsageValue2(summary?.thinkingTokens, usage.thinkingTokens),
12868
+ totalTokens: sumUsageValue2(summary?.totalTokens, usage.totalTokens),
12869
+ toolUsePromptTokens: sumUsageValue2(summary?.toolUsePromptTokens, usage.toolUsePromptTokens)
12574
12870
  };
12575
12871
  }
12576
12872
  return summary;
12577
12873
  }
12578
- function isPromiseLike2(value) {
12579
- return (typeof value === "object" || typeof value === "function") && value !== null && typeof value.then === "function";
12580
- }
12581
12874
  function resolveAgentLoggingSelection(value) {
12582
12875
  if (value === false) {
12583
12876
  return void 0;
@@ -12611,60 +12904,6 @@ function createRootAgentLoggingSession(request) {
12611
12904
  mirrorToConsole: selected.mirrorToConsole !== false
12612
12905
  });
12613
12906
  }
12614
- function isAgentTelemetrySink(value) {
12615
- return typeof value === "object" && value !== null && typeof value.emit === "function";
12616
- }
12617
- function resolveTelemetrySelection(telemetry) {
12618
- if (!telemetry) {
12619
- return void 0;
12620
- }
12621
- if (isAgentTelemetrySink(telemetry)) {
12622
- return { sink: telemetry };
12623
- }
12624
- if (isAgentTelemetrySink(telemetry.sink)) {
12625
- return telemetry;
12626
- }
12627
- throw new Error("Invalid runAgentLoop telemetry config: expected a sink with emit(event).");
12628
- }
12629
- function createAgentTelemetrySession(telemetry) {
12630
- const config = resolveTelemetrySelection(telemetry);
12631
- if (!config) {
12632
- return void 0;
12633
- }
12634
- const pending = /* @__PURE__ */ new Set();
12635
- const trackPromise = (promise) => {
12636
- pending.add(promise);
12637
- promise.finally(() => {
12638
- pending.delete(promise);
12639
- });
12640
- };
12641
- const emit = (event) => {
12642
- try {
12643
- const output = config.sink.emit(event);
12644
- if (isPromiseLike2(output)) {
12645
- const task = Promise.resolve(output).then(() => void 0).catch(() => void 0);
12646
- trackPromise(task);
12647
- }
12648
- } catch {
12649
- }
12650
- };
12651
- const flush = async () => {
12652
- while (pending.size > 0) {
12653
- await Promise.allSettled([...pending]);
12654
- }
12655
- if (typeof config.sink.flush === "function") {
12656
- try {
12657
- await config.sink.flush();
12658
- } catch {
12659
- }
12660
- }
12661
- };
12662
- return {
12663
- includeLlmStreamEvents: config.includeLlmStreamEvents === true,
12664
- emit,
12665
- flush
12666
- };
12667
- }
12668
12907
  function createAgentTelemetryEmitter(params) {
12669
12908
  return (event) => {
12670
12909
  if (!params.session) {
@@ -13358,6 +13597,7 @@ async function runCandidateEvolution(options) {
13358
13597
  applyPatch,
13359
13598
  configureGemini,
13360
13599
  configureModelConcurrency,
13600
+ configureTelemetry,
13361
13601
  convertGooglePartsToLlmParts,
13362
13602
  createApplyPatchTool,
13363
13603
  createCodexApplyPatchTool,
@@ -13406,6 +13646,7 @@ async function runCandidateEvolution(options) {
13406
13646
  parseJsonFromLlmText,
13407
13647
  refreshChatGptOauthToken,
13408
13648
  resetModelConcurrencyConfig,
13649
+ resetTelemetry,
13409
13650
  resolveFilesystemToolProfile,
13410
13651
  resolveFireworksModelId,
13411
13652
  runAgentLoop,