@ljoukov/llm 3.0.4 → 3.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -51,6 +51,7 @@ __export(index_exports, {
51
51
  appendMarkdownSourcesSection: () => appendMarkdownSourcesSection,
52
52
  applyPatch: () => applyPatch,
53
53
  configureGemini: () => configureGemini,
54
+ configureModelConcurrency: () => configureModelConcurrency,
54
55
  convertGooglePartsToLlmParts: () => convertGooglePartsToLlmParts,
55
56
  createApplyPatchTool: () => createApplyPatchTool,
56
57
  createCodexApplyPatchTool: () => createCodexApplyPatchTool,
@@ -95,6 +96,7 @@ __export(index_exports, {
95
96
  loadLocalEnv: () => loadLocalEnv,
96
97
  parseJsonFromLlmText: () => parseJsonFromLlmText,
97
98
  refreshChatGptOauthToken: () => refreshChatGptOauthToken,
99
+ resetModelConcurrencyConfig: () => resetModelConcurrencyConfig,
98
100
  resolveFilesystemToolProfile: () => resolveFilesystemToolProfile,
99
101
  resolveFireworksModelId: () => resolveFireworksModelId,
100
102
  runAgentLoop: () => runAgentLoop,
@@ -1693,23 +1695,16 @@ function parseEventBlock(raw) {
1693
1695
  var MIN_MODEL_CONCURRENCY_CAP = 1;
1694
1696
  var MAX_MODEL_CONCURRENCY_CAP = 64;
1695
1697
  var DEFAULT_MODEL_CONCURRENCY_CAP = 3;
1696
- function parsePositiveInteger(raw) {
1697
- if (raw === void 0) {
1698
- return void 0;
1699
- }
1700
- const normalized = raw.trim();
1701
- if (!normalized) {
1702
- return void 0;
1703
- }
1704
- if (!/^-?\d+$/u.test(normalized)) {
1705
- return void 0;
1706
- }
1707
- const parsed = Number.parseInt(normalized, 10);
1708
- if (!Number.isFinite(parsed)) {
1709
- return void 0;
1710
- }
1711
- return parsed;
1712
- }
1698
+ var DEFAULT_OPENAI_MODEL_CONCURRENCY_CAP = 12;
1699
+ var DEFAULT_GOOGLE_MODEL_CONCURRENCY_CAP = 4;
1700
+ var DEFAULT_GOOGLE_PREVIEW_MODEL_CONCURRENCY_CAP = 2;
1701
+ var DEFAULT_FIREWORKS_MODEL_CONCURRENCY_CAP = 6;
1702
+ var MODEL_CONCURRENCY_PROVIDERS = [
1703
+ "openai",
1704
+ "google",
1705
+ "fireworks"
1706
+ ];
1707
+ var configuredModelConcurrency = normalizeModelConcurrencyConfig({});
1713
1708
  function clampModelConcurrencyCap(value) {
1714
1709
  if (!Number.isFinite(value)) {
1715
1710
  return DEFAULT_MODEL_CONCURRENCY_CAP;
@@ -1723,30 +1718,94 @@ function clampModelConcurrencyCap(value) {
1723
1718
  }
1724
1719
  return rounded;
1725
1720
  }
1726
- function normalizeModelIdForEnv(modelId) {
1727
- return modelId.trim().replace(/[^A-Za-z0-9]+/gu, "_").replace(/^_+|_+$/gu, "").toUpperCase();
1721
+ function normalizeModelIdForConfig(modelId) {
1722
+ return modelId.trim().toLowerCase();
1728
1723
  }
1729
- function resolveModelConcurrencyCap(options) {
1730
- const env = options.env ?? process.env;
1731
- const providerPrefix = options.providerEnvPrefix;
1732
- const defaultCap = clampModelConcurrencyCap(options.defaultCap ?? DEFAULT_MODEL_CONCURRENCY_CAP);
1733
- const normalizedModelId = options.modelId ? normalizeModelIdForEnv(options.modelId) : "";
1734
- const candidateKeys = [
1735
- ...normalizedModelId ? [
1736
- `${providerPrefix}_MAX_PARALLEL_REQUESTS_MODEL_${normalizedModelId}`,
1737
- `LLM_MAX_PARALLEL_REQUESTS_MODEL_${normalizedModelId}`
1738
- ] : [],
1739
- `${providerPrefix}_MAX_PARALLEL_REQUESTS_PER_MODEL`,
1740
- "LLM_MAX_PARALLEL_REQUESTS_PER_MODEL"
1741
- ];
1742
- for (const key of candidateKeys) {
1743
- const parsed = parsePositiveInteger(env[key]);
1744
- if (parsed === void 0) {
1724
+ function normalizeCap(value) {
1725
+ if (value === void 0 || !Number.isFinite(value)) {
1726
+ return void 0;
1727
+ }
1728
+ return clampModelConcurrencyCap(value);
1729
+ }
1730
+ function normalizeModelCapMap(caps) {
1731
+ const normalized = /* @__PURE__ */ new Map();
1732
+ if (!caps) {
1733
+ return normalized;
1734
+ }
1735
+ for (const [modelId, cap] of Object.entries(caps)) {
1736
+ const modelKey = normalizeModelIdForConfig(modelId);
1737
+ if (!modelKey) {
1745
1738
  continue;
1746
1739
  }
1747
- return clampModelConcurrencyCap(parsed);
1740
+ const normalizedCap = normalizeCap(cap);
1741
+ if (normalizedCap === void 0) {
1742
+ continue;
1743
+ }
1744
+ normalized.set(modelKey, normalizedCap);
1745
+ }
1746
+ return normalized;
1747
+ }
1748
+ function normalizeModelConcurrencyConfig(config) {
1749
+ const providerCaps = {};
1750
+ const providerModelCaps = {
1751
+ openai: /* @__PURE__ */ new Map(),
1752
+ google: /* @__PURE__ */ new Map(),
1753
+ fireworks: /* @__PURE__ */ new Map()
1754
+ };
1755
+ for (const provider of MODEL_CONCURRENCY_PROVIDERS) {
1756
+ const providerCap = normalizeCap(config.providerCaps?.[provider]);
1757
+ if (providerCap !== void 0) {
1758
+ providerCaps[provider] = providerCap;
1759
+ }
1760
+ providerModelCaps[provider] = new Map(
1761
+ normalizeModelCapMap(config.providerModelCaps?.[provider])
1762
+ );
1748
1763
  }
1749
- return defaultCap;
1764
+ return {
1765
+ globalCap: normalizeCap(config.globalCap),
1766
+ providerCaps,
1767
+ modelCaps: normalizeModelCapMap(config.modelCaps),
1768
+ providerModelCaps
1769
+ };
1770
+ }
1771
+ function resolveDefaultProviderCap(provider, modelId) {
1772
+ if (provider === "openai") {
1773
+ return DEFAULT_OPENAI_MODEL_CONCURRENCY_CAP;
1774
+ }
1775
+ if (provider === "google") {
1776
+ return modelId?.includes("preview") ? DEFAULT_GOOGLE_PREVIEW_MODEL_CONCURRENCY_CAP : DEFAULT_GOOGLE_MODEL_CONCURRENCY_CAP;
1777
+ }
1778
+ return DEFAULT_FIREWORKS_MODEL_CONCURRENCY_CAP;
1779
+ }
1780
+ function configureModelConcurrency(config = {}) {
1781
+ configuredModelConcurrency = normalizeModelConcurrencyConfig(config);
1782
+ }
1783
+ function resetModelConcurrencyConfig() {
1784
+ configuredModelConcurrency = normalizeModelConcurrencyConfig({});
1785
+ }
1786
+ function resolveModelConcurrencyCap(options) {
1787
+ const modelId = options.modelId ? normalizeModelIdForConfig(options.modelId) : void 0;
1788
+ const config = options.config ? normalizeModelConcurrencyConfig(options.config) : configuredModelConcurrency;
1789
+ const providerModelCap = modelId ? config.providerModelCaps[options.provider].get(modelId) : void 0;
1790
+ if (providerModelCap !== void 0) {
1791
+ return providerModelCap;
1792
+ }
1793
+ const modelCap = modelId ? config.modelCaps.get(modelId) : void 0;
1794
+ if (modelCap !== void 0) {
1795
+ return modelCap;
1796
+ }
1797
+ const providerCap = config.providerCaps[options.provider];
1798
+ if (providerCap !== void 0) {
1799
+ return providerCap;
1800
+ }
1801
+ if (config.globalCap !== void 0) {
1802
+ return config.globalCap;
1803
+ }
1804
+ const defaultCap = normalizeCap(options.defaultCap);
1805
+ if (defaultCap !== void 0) {
1806
+ return defaultCap;
1807
+ }
1808
+ return resolveDefaultProviderCap(options.provider, modelId);
1750
1809
  }
1751
1810
 
1752
1811
  // src/utils/scheduler.ts
@@ -1855,12 +1914,20 @@ function createCallScheduler(options = {}) {
1855
1914
  release?.();
1856
1915
  }
1857
1916
  }
1858
- async function attemptWithRetries(fn, attempt) {
1917
+ async function attemptWithRetries(fn, attempt, state) {
1859
1918
  try {
1919
+ const spacingStartedAtMs = Date.now();
1860
1920
  await applyStartSpacing();
1921
+ const callStartedAtMs = Date.now();
1922
+ state.schedulerDelayMs += Math.max(0, callStartedAtMs - spacingStartedAtMs);
1923
+ if (state.startedAtMs === void 0) {
1924
+ state.startedAtMs = callStartedAtMs;
1925
+ }
1926
+ state.attempts = Math.max(state.attempts, attempt);
1861
1927
  return await fn();
1862
1928
  } catch (error) {
1863
1929
  if (isOverloadError2(error)) {
1930
+ state.overloadCount += 1;
1864
1931
  consecutiveSuccesses = 0;
1865
1932
  currentParallelLimit = Math.max(1, Math.ceil(currentParallelLimit / 2));
1866
1933
  }
@@ -1877,9 +1944,10 @@ function createCallScheduler(options = {}) {
1877
1944
  }
1878
1945
  const normalizedDelay = Math.max(0, delay);
1879
1946
  if (normalizedDelay > 0) {
1947
+ state.retryDelayMs += normalizedDelay;
1880
1948
  await sleep(normalizedDelay);
1881
1949
  }
1882
- return attemptWithRetries(fn, attempt + 1);
1950
+ return attemptWithRetries(fn, attempt + 1, state);
1883
1951
  }
1884
1952
  }
1885
1953
  function drainQueue() {
@@ -1892,11 +1960,22 @@ function createCallScheduler(options = {}) {
1892
1960
  void task();
1893
1961
  }
1894
1962
  }
1895
- function run(fn) {
1963
+ function run(fn, runOptions = {}) {
1896
1964
  return new Promise((resolve, reject) => {
1965
+ const enqueuedAtMs = Date.now();
1897
1966
  const job = async () => {
1967
+ const dequeuedAtMs = Date.now();
1968
+ const state = {
1969
+ enqueuedAtMs,
1970
+ dequeuedAtMs,
1971
+ schedulerDelayMs: 0,
1972
+ retryDelayMs: 0,
1973
+ attempts: 0,
1974
+ overloadCount: 0
1975
+ };
1898
1976
  try {
1899
- const result = await attemptWithRetries(fn, 1);
1977
+ const result = await attemptWithRetries(fn, 1, state);
1978
+ state.completedAtMs = Date.now();
1900
1979
  consecutiveSuccesses += 1;
1901
1980
  if (currentParallelLimit < maxParallelRequests && consecutiveSuccesses >= increaseAfterConsecutiveSuccesses) {
1902
1981
  currentParallelLimit += 1;
@@ -1904,8 +1983,26 @@ function createCallScheduler(options = {}) {
1904
1983
  }
1905
1984
  resolve(result);
1906
1985
  } catch (error) {
1986
+ state.completedAtMs = Date.now();
1907
1987
  reject(toError(error));
1908
1988
  } finally {
1989
+ const startedAtMs = state.startedAtMs ?? state.dequeuedAtMs;
1990
+ const completedAtMs = state.completedAtMs ?? Date.now();
1991
+ const metrics = {
1992
+ enqueuedAtMs: state.enqueuedAtMs,
1993
+ dequeuedAtMs: state.dequeuedAtMs,
1994
+ startedAtMs,
1995
+ completedAtMs,
1996
+ queueWaitMs: Math.max(0, state.dequeuedAtMs - state.enqueuedAtMs),
1997
+ schedulerDelayMs: Math.max(0, state.schedulerDelayMs),
1998
+ retryDelayMs: Math.max(0, state.retryDelayMs),
1999
+ attempts: Math.max(1, state.attempts),
2000
+ overloadCount: Math.max(0, state.overloadCount)
2001
+ };
2002
+ try {
2003
+ runOptions.onSettled?.(metrics);
2004
+ } catch {
2005
+ }
1909
2006
  activeCount -= 1;
1910
2007
  queueMicrotask(drainQueue);
1911
2008
  }
@@ -2002,7 +2099,7 @@ function getSchedulerForModel(modelId) {
2002
2099
  }
2003
2100
  const created = createCallScheduler({
2004
2101
  maxParallelRequests: resolveModelConcurrencyCap({
2005
- providerEnvPrefix: "FIREWORKS",
2102
+ provider: "fireworks",
2006
2103
  modelId: normalizedModelId
2007
2104
  }),
2008
2105
  minIntervalBetweenStartMs: 200,
@@ -2011,8 +2108,8 @@ function getSchedulerForModel(modelId) {
2011
2108
  schedulerByModel.set(schedulerKey, created);
2012
2109
  return created;
2013
2110
  }
2014
- async function runFireworksCall(fn, modelId) {
2015
- return getSchedulerForModel(modelId).run(async () => fn(getFireworksClient()));
2111
+ async function runFireworksCall(fn, modelId, runOptions) {
2112
+ return getSchedulerForModel(modelId).run(async () => fn(getFireworksClient()), runOptions);
2016
2113
  }
2017
2114
 
2018
2115
  // src/fireworks/models.ts
@@ -2378,7 +2475,7 @@ function getSchedulerForModel2(modelId) {
2378
2475
  }
2379
2476
  const created = createCallScheduler({
2380
2477
  maxParallelRequests: resolveModelConcurrencyCap({
2381
- providerEnvPrefix: "GOOGLE",
2478
+ provider: "google",
2382
2479
  modelId: normalizedModelId
2383
2480
  }),
2384
2481
  minIntervalBetweenStartMs: 200,
@@ -2398,8 +2495,8 @@ function getSchedulerForModel2(modelId) {
2398
2495
  schedulerByModel2.set(schedulerKey, created);
2399
2496
  return created;
2400
2497
  }
2401
- async function runGeminiCall(fn, modelId) {
2402
- return getSchedulerForModel2(modelId).run(async () => fn(await getGeminiClient()));
2498
+ async function runGeminiCall(fn, modelId, runOptions) {
2499
+ return getSchedulerForModel2(modelId).run(async () => fn(await getGeminiClient()), runOptions);
2403
2500
  }
2404
2501
 
2405
2502
  // src/openai/client.ts
@@ -2571,7 +2668,7 @@ function getSchedulerForModel3(modelId) {
2571
2668
  }
2572
2669
  const created = createCallScheduler({
2573
2670
  maxParallelRequests: resolveModelConcurrencyCap({
2574
- providerEnvPrefix: "OPENAI",
2671
+ provider: "openai",
2575
2672
  modelId: normalizedModelId
2576
2673
  }),
2577
2674
  minIntervalBetweenStartMs: 200,
@@ -2580,8 +2677,8 @@ function getSchedulerForModel3(modelId) {
2580
2677
  schedulerByModel3.set(schedulerKey, created);
2581
2678
  return created;
2582
2679
  }
2583
- async function runOpenAiCall(fn, modelId) {
2584
- return getSchedulerForModel3(modelId).run(async () => fn(getOpenAiClient()));
2680
+ async function runOpenAiCall(fn, modelId, runOptions) {
2681
+ return getSchedulerForModel3(modelId).run(async () => fn(getOpenAiClient()), runOptions);
2585
2682
  }
2586
2683
 
2587
2684
  // src/openai/models.ts
@@ -3035,9 +3132,9 @@ function isRetryableChatGptTransportError(error) {
3035
3132
  return false;
3036
3133
  }
3037
3134
  const message = error.message.toLowerCase();
3038
- return message === "terminated" || message.includes("socket hang up") || message.includes("fetch failed") || message.includes("network");
3135
+ return message === "terminated" || message.includes("socket hang up") || message.includes("fetch failed") || message.includes("network") || message.includes("responses websocket");
3039
3136
  }
3040
- async function collectChatGptCodexResponseWithRetry(options, maxAttempts = 2) {
3137
+ async function collectChatGptCodexResponseWithRetry(options, maxAttempts = 3) {
3041
3138
  let attempt = 1;
3042
3139
  while (true) {
3043
3140
  try {
@@ -3942,77 +4039,153 @@ function buildToolErrorOutput(message, issues) {
3942
4039
  }
3943
4040
  return output;
3944
4041
  }
4042
+ var SUBAGENT_WAIT_TOOL_NAME = "wait";
4043
+ function toIsoTimestamp(ms) {
4044
+ return new Date(ms).toISOString();
4045
+ }
4046
+ function toToolResultDuration(result) {
4047
+ return typeof result.durationMs === "number" && Number.isFinite(result.durationMs) ? Math.max(0, result.durationMs) : 0;
4048
+ }
4049
+ function schedulerMetricsOrDefault(metrics) {
4050
+ if (!metrics) {
4051
+ return {
4052
+ queueWaitMs: 0,
4053
+ schedulerDelayMs: 0,
4054
+ providerRetryDelayMs: 0,
4055
+ providerAttempts: 1
4056
+ };
4057
+ }
4058
+ return {
4059
+ queueWaitMs: Math.max(0, metrics.queueWaitMs),
4060
+ schedulerDelayMs: Math.max(0, metrics.schedulerDelayMs),
4061
+ providerRetryDelayMs: Math.max(0, metrics.retryDelayMs),
4062
+ providerAttempts: Math.max(1, metrics.attempts),
4063
+ modelCallStartedAtMs: metrics.startedAtMs
4064
+ };
4065
+ }
4066
+ function buildStepTiming(params) {
4067
+ const scheduler = schedulerMetricsOrDefault(params.schedulerMetrics);
4068
+ const modelCallStartedAtMs = scheduler.modelCallStartedAtMs ?? params.stepStartedAtMs;
4069
+ const firstModelEventAtMs = params.firstModelEventAtMs;
4070
+ const effectiveFirstEventAtMs = firstModelEventAtMs !== void 0 ? Math.max(modelCallStartedAtMs, firstModelEventAtMs) : params.modelCompletedAtMs;
4071
+ const connectionSetupMs = Math.max(0, effectiveFirstEventAtMs - modelCallStartedAtMs);
4072
+ const activeGenerationMs = Math.max(0, params.modelCompletedAtMs - effectiveFirstEventAtMs);
4073
+ return {
4074
+ startedAt: toIsoTimestamp(params.stepStartedAtMs),
4075
+ completedAt: toIsoTimestamp(params.stepCompletedAtMs),
4076
+ totalMs: Math.max(0, params.stepCompletedAtMs - params.stepStartedAtMs),
4077
+ queueWaitMs: scheduler.queueWaitMs,
4078
+ connectionSetupMs,
4079
+ activeGenerationMs,
4080
+ toolExecutionMs: Math.max(0, params.toolExecutionMs),
4081
+ waitToolMs: Math.max(0, params.waitToolMs),
4082
+ schedulerDelayMs: scheduler.schedulerDelayMs,
4083
+ providerRetryDelayMs: scheduler.providerRetryDelayMs,
4084
+ providerAttempts: scheduler.providerAttempts
4085
+ };
4086
+ }
4087
+ function extractSpawnStartupMetrics(outputPayload) {
4088
+ if (!outputPayload || typeof outputPayload !== "object") {
4089
+ return void 0;
4090
+ }
4091
+ const outputRecord = outputPayload;
4092
+ const notification = typeof outputRecord.notification === "string" ? outputRecord.notification : "";
4093
+ if (notification !== "spawned") {
4094
+ return void 0;
4095
+ }
4096
+ const agent = outputRecord.agent;
4097
+ if (!agent || typeof agent !== "object") {
4098
+ return void 0;
4099
+ }
4100
+ const agentRecord = agent;
4101
+ const startupLatencyMs = agentRecord.spawn_startup_latency_ms;
4102
+ if (typeof startupLatencyMs !== "number" || !Number.isFinite(startupLatencyMs)) {
4103
+ return void 0;
4104
+ }
4105
+ return {
4106
+ spawnStartupLatencyMs: Math.max(0, startupLatencyMs)
4107
+ };
4108
+ }
3945
4109
  async function executeToolCall(params) {
3946
4110
  const { callKind, toolName, tool: tool2, rawInput, parseError } = params;
3947
- if (!tool2) {
3948
- const message = `Unknown tool: ${toolName}`;
4111
+ const startedAtMs = Date.now();
4112
+ const finalize = (base, outputPayload, metrics) => {
4113
+ const completedAtMs = Date.now();
3949
4114
  return {
3950
- result: { toolName, input: rawInput, output: { error: message }, error: message },
3951
- outputPayload: buildToolErrorOutput(message)
4115
+ result: {
4116
+ ...base,
4117
+ startedAt: toIsoTimestamp(startedAtMs),
4118
+ completedAt: toIsoTimestamp(completedAtMs),
4119
+ durationMs: Math.max(0, completedAtMs - startedAtMs),
4120
+ ...metrics ? { metrics } : {}
4121
+ },
4122
+ outputPayload
3952
4123
  };
4124
+ };
4125
+ if (!tool2) {
4126
+ const message = `Unknown tool: ${toolName}`;
4127
+ const outputPayload = buildToolErrorOutput(message);
4128
+ return finalize(
4129
+ { toolName, input: rawInput, output: outputPayload, error: message },
4130
+ outputPayload
4131
+ );
3953
4132
  }
3954
4133
  if (callKind === "custom") {
3955
4134
  if (!isCustomTool(tool2)) {
3956
4135
  const message = `Tool ${toolName} was called as custom_tool_call but is declared as function.`;
3957
4136
  const outputPayload = buildToolErrorOutput(message);
3958
- return {
3959
- result: { toolName, input: rawInput, output: outputPayload, error: message },
4137
+ return finalize(
4138
+ { toolName, input: rawInput, output: outputPayload, error: message },
3960
4139
  outputPayload
3961
- };
4140
+ );
3962
4141
  }
3963
4142
  const input = typeof rawInput === "string" ? rawInput : String(rawInput ?? "");
3964
4143
  try {
3965
4144
  const output = await tool2.execute(input);
3966
- return {
3967
- result: { toolName, input, output },
3968
- outputPayload: output
3969
- };
4145
+ const metrics = toolName === "spawn_agent" ? extractSpawnStartupMetrics(output) : void 0;
4146
+ return finalize({ toolName, input, output }, output, metrics);
3970
4147
  } catch (error) {
3971
4148
  const message = error instanceof Error ? error.message : String(error);
3972
4149
  const outputPayload = buildToolErrorOutput(`Tool ${toolName} failed: ${message}`);
3973
- return {
3974
- result: { toolName, input, output: outputPayload, error: message },
3975
- outputPayload
3976
- };
4150
+ return finalize({ toolName, input, output: outputPayload, error: message }, outputPayload);
3977
4151
  }
3978
4152
  }
3979
4153
  if (isCustomTool(tool2)) {
3980
4154
  const message = `Tool ${toolName} was called as function_call but is declared as custom.`;
3981
4155
  const outputPayload = buildToolErrorOutput(message);
3982
- return {
3983
- result: { toolName, input: rawInput, output: outputPayload, error: message },
4156
+ return finalize(
4157
+ { toolName, input: rawInput, output: outputPayload, error: message },
3984
4158
  outputPayload
3985
- };
4159
+ );
3986
4160
  }
3987
4161
  if (parseError) {
3988
4162
  const message = `Invalid JSON for tool ${toolName}: ${parseError}`;
3989
- return {
3990
- result: { toolName, input: rawInput, output: { error: message }, error: message },
3991
- outputPayload: buildToolErrorOutput(message)
3992
- };
4163
+ const outputPayload = buildToolErrorOutput(message);
4164
+ return finalize(
4165
+ { toolName, input: rawInput, output: outputPayload, error: message },
4166
+ outputPayload
4167
+ );
3993
4168
  }
3994
4169
  const parsed = tool2.inputSchema.safeParse(rawInput);
3995
4170
  if (!parsed.success) {
3996
4171
  const message = `Invalid tool arguments for ${toolName}: ${formatZodIssues(parsed.error.issues)}`;
3997
4172
  const outputPayload = buildToolErrorOutput(message, parsed.error.issues);
3998
- return {
3999
- result: { toolName, input: rawInput, output: outputPayload, error: message },
4173
+ return finalize(
4174
+ { toolName, input: rawInput, output: outputPayload, error: message },
4000
4175
  outputPayload
4001
- };
4176
+ );
4002
4177
  }
4003
4178
  try {
4004
4179
  const output = await tool2.execute(parsed.data);
4005
- return {
4006
- result: { toolName, input: parsed.data, output },
4007
- outputPayload: output
4008
- };
4180
+ const metrics = toolName === "spawn_agent" ? extractSpawnStartupMetrics(output) : void 0;
4181
+ return finalize({ toolName, input: parsed.data, output }, output, metrics);
4009
4182
  } catch (error) {
4010
4183
  const message = error instanceof Error ? error.message : String(error);
4011
4184
  const outputPayload = buildToolErrorOutput(`Tool ${toolName} failed: ${message}`);
4012
- return {
4013
- result: { toolName, input: parsed.data, output: outputPayload, error: message },
4185
+ return finalize(
4186
+ { toolName, input: parsed.data, output: outputPayload, error: message },
4014
4187
  outputPayload
4015
- };
4188
+ );
4016
4189
  }
4017
4190
  }
4018
4191
  function buildToolLogId(turn, toolIndex) {
@@ -4877,6 +5050,9 @@ async function runToolLoop(request) {
4877
5050
  let input = toOpenAiInput(contents);
4878
5051
  for (let stepIndex = 0; stepIndex < maxSteps; stepIndex += 1) {
4879
5052
  const turn = stepIndex + 1;
5053
+ const stepStartedAtMs = Date.now();
5054
+ let firstModelEventAtMs;
5055
+ let schedulerMetrics;
4880
5056
  const abortController = new AbortController();
4881
5057
  if (request.signal) {
4882
5058
  if (request.signal.aborted) {
@@ -4895,45 +5071,59 @@ async function runToolLoop(request) {
4895
5071
  const emitEvent = (ev) => {
4896
5072
  onEvent?.(ev);
4897
5073
  };
4898
- const finalResponse = await runOpenAiCall(async (client) => {
4899
- const stream = client.responses.stream(
4900
- {
4901
- model: providerInfo.model,
4902
- input,
4903
- ...previousResponseId ? { previous_response_id: previousResponseId } : {},
4904
- ...openAiTools.length > 0 ? { tools: openAiTools } : {},
4905
- ...openAiTools.length > 0 ? { parallel_tool_calls: true } : {},
4906
- reasoning,
4907
- text: textConfig,
4908
- include: ["reasoning.encrypted_content"]
4909
- },
4910
- { signal: abortController.signal }
4911
- );
4912
- for await (const event of stream) {
4913
- switch (event.type) {
4914
- case "response.output_text.delta":
4915
- emitEvent({
4916
- type: "delta",
4917
- channel: "response",
4918
- text: typeof event.delta === "string" ? event.delta : ""
4919
- });
4920
- break;
4921
- case "response.reasoning_summary_text.delta":
4922
- emitEvent({
4923
- type: "delta",
4924
- channel: "thought",
4925
- text: typeof event.delta === "string" ? event.delta : ""
4926
- });
4927
- break;
4928
- case "response.refusal.delta":
4929
- emitEvent({ type: "blocked" });
4930
- break;
4931
- default:
4932
- break;
5074
+ const markFirstModelEvent = () => {
5075
+ if (firstModelEventAtMs === void 0) {
5076
+ firstModelEventAtMs = Date.now();
5077
+ }
5078
+ };
5079
+ const finalResponse = await runOpenAiCall(
5080
+ async (client) => {
5081
+ const stream = client.responses.stream(
5082
+ {
5083
+ model: providerInfo.model,
5084
+ input,
5085
+ ...previousResponseId ? { previous_response_id: previousResponseId } : {},
5086
+ ...openAiTools.length > 0 ? { tools: openAiTools } : {},
5087
+ ...openAiTools.length > 0 ? { parallel_tool_calls: true } : {},
5088
+ reasoning,
5089
+ text: textConfig,
5090
+ include: ["reasoning.encrypted_content"]
5091
+ },
5092
+ { signal: abortController.signal }
5093
+ );
5094
+ for await (const event of stream) {
5095
+ markFirstModelEvent();
5096
+ switch (event.type) {
5097
+ case "response.output_text.delta":
5098
+ emitEvent({
5099
+ type: "delta",
5100
+ channel: "response",
5101
+ text: typeof event.delta === "string" ? event.delta : ""
5102
+ });
5103
+ break;
5104
+ case "response.reasoning_summary_text.delta":
5105
+ emitEvent({
5106
+ type: "delta",
5107
+ channel: "thought",
5108
+ text: typeof event.delta === "string" ? event.delta : ""
5109
+ });
5110
+ break;
5111
+ case "response.refusal.delta":
5112
+ emitEvent({ type: "blocked" });
5113
+ break;
5114
+ default:
5115
+ break;
5116
+ }
5117
+ }
5118
+ return await stream.finalResponse();
5119
+ },
5120
+ providerInfo.model,
5121
+ {
5122
+ onSettled: (metrics) => {
5123
+ schedulerMetrics = metrics;
4933
5124
  }
4934
5125
  }
4935
- return await stream.finalResponse();
4936
- }, providerInfo.model);
5126
+ );
4937
5127
  modelVersion = typeof finalResponse.model === "string" ? finalResponse.model : request.model;
4938
5128
  emitEvent({ type: "model", modelVersion });
4939
5129
  if (finalResponse.error) {
@@ -4943,6 +5133,7 @@ async function runToolLoop(request) {
4943
5133
  usageTokens = extractOpenAiUsageTokens(finalResponse.usage);
4944
5134
  const responseText = extractOpenAiResponseParts(finalResponse).parts.filter((p) => p.type === "text" && p.thought !== true).map((p) => p.text).join("").trim();
4945
5135
  const reasoningSummary = extractOpenAiReasoningSummary(finalResponse).trim();
5136
+ const modelCompletedAtMs = Date.now();
4946
5137
  const stepCostUsd = estimateCallCostUsd({
4947
5138
  modelId: modelVersion,
4948
5139
  tokens: usageTokens,
@@ -4957,6 +5148,16 @@ async function runToolLoop(request) {
4957
5148
  if (responseToolCalls.length === 0) {
4958
5149
  finalText = responseText;
4959
5150
  finalThoughts = reasoningSummary;
5151
+ const stepCompletedAtMs2 = Date.now();
5152
+ const timing2 = buildStepTiming({
5153
+ stepStartedAtMs,
5154
+ stepCompletedAtMs: stepCompletedAtMs2,
5155
+ modelCompletedAtMs,
5156
+ firstModelEventAtMs,
5157
+ schedulerMetrics,
5158
+ toolExecutionMs: 0,
5159
+ waitToolMs: 0
5160
+ });
4960
5161
  steps.push({
4961
5162
  step: steps.length + 1,
4962
5163
  modelVersion,
@@ -4964,7 +5165,8 @@ async function runToolLoop(request) {
4964
5165
  thoughts: reasoningSummary || void 0,
4965
5166
  toolCalls: [],
4966
5167
  usage: usageTokens,
4967
- costUsd: stepCostUsd
5168
+ costUsd: stepCostUsd,
5169
+ timing: timing2
4968
5170
  });
4969
5171
  return { text: finalText, thoughts: finalThoughts, steps, totalCostUsd };
4970
5172
  }
@@ -5009,8 +5211,15 @@ async function runToolLoop(request) {
5009
5211
  })
5010
5212
  );
5011
5213
  const toolOutputs = [];
5214
+ let toolExecutionMs = 0;
5215
+ let waitToolMs = 0;
5012
5216
  for (const { entry, result, outputPayload } of callResults) {
5013
5217
  stepToolCalls.push({ ...result, callId: entry.call.call_id });
5218
+ const callDurationMs = toToolResultDuration(result);
5219
+ toolExecutionMs += callDurationMs;
5220
+ if (entry.toolName.toLowerCase() === SUBAGENT_WAIT_TOOL_NAME) {
5221
+ waitToolMs += callDurationMs;
5222
+ }
5014
5223
  if (entry.call.kind === "custom") {
5015
5224
  toolOutputs.push({
5016
5225
  type: "custom_tool_call_output",
@@ -5025,6 +5234,16 @@ async function runToolLoop(request) {
5025
5234
  });
5026
5235
  }
5027
5236
  }
5237
+ const stepCompletedAtMs = Date.now();
5238
+ const timing = buildStepTiming({
5239
+ stepStartedAtMs,
5240
+ stepCompletedAtMs,
5241
+ modelCompletedAtMs,
5242
+ firstModelEventAtMs,
5243
+ schedulerMetrics,
5244
+ toolExecutionMs,
5245
+ waitToolMs
5246
+ });
5028
5247
  steps.push({
5029
5248
  step: steps.length + 1,
5030
5249
  modelVersion,
@@ -5032,7 +5251,8 @@ async function runToolLoop(request) {
5032
5251
  thoughts: reasoningSummary || void 0,
5033
5252
  toolCalls: stepToolCalls,
5034
5253
  usage: usageTokens,
5035
- costUsd: stepCostUsd
5254
+ costUsd: stepCostUsd,
5255
+ timing
5036
5256
  });
5037
5257
  previousResponseId = finalResponse.id;
5038
5258
  input = toolOutputs;
@@ -5053,6 +5273,13 @@ async function runToolLoop(request) {
5053
5273
  let input = [...toolLoopInput.input];
5054
5274
  for (let stepIndex = 0; stepIndex < maxSteps; stepIndex += 1) {
5055
5275
  const turn = stepIndex + 1;
5276
+ const stepStartedAtMs = Date.now();
5277
+ let firstModelEventAtMs;
5278
+ const markFirstModelEvent = () => {
5279
+ if (firstModelEventAtMs === void 0) {
5280
+ firstModelEventAtMs = Date.now();
5281
+ }
5282
+ };
5056
5283
  const response = await collectChatGptCodexResponseWithRetry({
5057
5284
  sessionId: conversationId,
5058
5285
  request: {
@@ -5075,13 +5302,16 @@ async function runToolLoop(request) {
5075
5302
  signal: request.signal,
5076
5303
  onDelta: (delta) => {
5077
5304
  if (delta.thoughtDelta) {
5305
+ markFirstModelEvent();
5078
5306
  request.onEvent?.({ type: "delta", channel: "thought", text: delta.thoughtDelta });
5079
5307
  }
5080
5308
  if (delta.textDelta) {
5309
+ markFirstModelEvent();
5081
5310
  request.onEvent?.({ type: "delta", channel: "response", text: delta.textDelta });
5082
5311
  }
5083
5312
  }
5084
5313
  });
5314
+ const modelCompletedAtMs = Date.now();
5085
5315
  const modelVersion = response.model ? `chatgpt-${response.model}` : request.model;
5086
5316
  const usageTokens = extractChatGptUsageTokens(response.usage);
5087
5317
  const stepCostUsd = estimateCallCostUsd({
@@ -5096,6 +5326,15 @@ async function runToolLoop(request) {
5096
5326
  if (responseToolCalls.length === 0) {
5097
5327
  finalText = responseText;
5098
5328
  finalThoughts = reasoningSummaryText;
5329
+ const stepCompletedAtMs2 = Date.now();
5330
+ const timing2 = buildStepTiming({
5331
+ stepStartedAtMs,
5332
+ stepCompletedAtMs: stepCompletedAtMs2,
5333
+ modelCompletedAtMs,
5334
+ firstModelEventAtMs,
5335
+ toolExecutionMs: 0,
5336
+ waitToolMs: 0
5337
+ });
5099
5338
  steps.push({
5100
5339
  step: steps.length + 1,
5101
5340
  modelVersion,
@@ -5103,7 +5342,8 @@ async function runToolLoop(request) {
5103
5342
  thoughts: reasoningSummaryText || void 0,
5104
5343
  toolCalls: [],
5105
5344
  usage: usageTokens,
5106
- costUsd: stepCostUsd
5345
+ costUsd: stepCostUsd,
5346
+ timing: timing2
5107
5347
  });
5108
5348
  return { text: finalText, thoughts: finalThoughts, steps, totalCostUsd };
5109
5349
  }
@@ -5143,8 +5383,15 @@ async function runToolLoop(request) {
5143
5383
  );
5144
5384
  })
5145
5385
  );
5386
+ let toolExecutionMs = 0;
5387
+ let waitToolMs = 0;
5146
5388
  for (const { entry, result, outputPayload } of callResults) {
5147
5389
  toolCalls.push({ ...result, callId: entry.ids.callId });
5390
+ const callDurationMs = toToolResultDuration(result);
5391
+ toolExecutionMs += callDurationMs;
5392
+ if (entry.toolName.toLowerCase() === SUBAGENT_WAIT_TOOL_NAME) {
5393
+ waitToolMs += callDurationMs;
5394
+ }
5148
5395
  if (entry.call.kind === "custom") {
5149
5396
  toolOutputs.push({
5150
5397
  type: "custom_tool_call",
@@ -5175,6 +5422,15 @@ async function runToolLoop(request) {
5175
5422
  });
5176
5423
  }
5177
5424
  }
5425
+ const stepCompletedAtMs = Date.now();
5426
+ const timing = buildStepTiming({
5427
+ stepStartedAtMs,
5428
+ stepCompletedAtMs,
5429
+ modelCompletedAtMs,
5430
+ firstModelEventAtMs,
5431
+ toolExecutionMs,
5432
+ waitToolMs
5433
+ });
5178
5434
  steps.push({
5179
5435
  step: steps.length + 1,
5180
5436
  modelVersion,
@@ -5182,7 +5438,8 @@ async function runToolLoop(request) {
5182
5438
  thoughts: reasoningSummaryText || void 0,
5183
5439
  toolCalls,
5184
5440
  usage: usageTokens,
5185
- costUsd: stepCostUsd
5441
+ costUsd: stepCostUsd,
5442
+ timing
5186
5443
  });
5187
5444
  input = input.concat(toolOutputs);
5188
5445
  }
@@ -5198,18 +5455,29 @@ async function runToolLoop(request) {
5198
5455
  const messages = toFireworksMessages(contents);
5199
5456
  for (let stepIndex = 0; stepIndex < maxSteps; stepIndex += 1) {
5200
5457
  const turn = stepIndex + 1;
5201
- const response = await runFireworksCall(async (client) => {
5202
- return await client.chat.completions.create(
5203
- {
5204
- model: providerInfo.model,
5205
- messages,
5206
- tools: fireworksTools,
5207
- tool_choice: "auto",
5208
- parallel_tool_calls: true
5209
- },
5210
- { signal: request.signal }
5211
- );
5212
- }, providerInfo.model);
5458
+ const stepStartedAtMs = Date.now();
5459
+ let schedulerMetrics;
5460
+ const response = await runFireworksCall(
5461
+ async (client) => {
5462
+ return await client.chat.completions.create(
5463
+ {
5464
+ model: providerInfo.model,
5465
+ messages,
5466
+ tools: fireworksTools,
5467
+ tool_choice: "auto",
5468
+ parallel_tool_calls: true
5469
+ },
5470
+ { signal: request.signal }
5471
+ );
5472
+ },
5473
+ providerInfo.model,
5474
+ {
5475
+ onSettled: (metrics) => {
5476
+ schedulerMetrics = metrics;
5477
+ }
5478
+ }
5479
+ );
5480
+ const modelCompletedAtMs = Date.now();
5213
5481
  const modelVersion = typeof response.model === "string" ? response.model : request.model;
5214
5482
  request.onEvent?.({ type: "model", modelVersion });
5215
5483
  const choice = Array.isArray(response.choices) ? response.choices[0] : void 0;
@@ -5240,6 +5508,15 @@ async function runToolLoop(request) {
5240
5508
  if (responseToolCalls.length === 0) {
5241
5509
  finalText = responseText;
5242
5510
  finalThoughts = "";
5511
+ const stepCompletedAtMs2 = Date.now();
5512
+ const timing2 = buildStepTiming({
5513
+ stepStartedAtMs,
5514
+ stepCompletedAtMs: stepCompletedAtMs2,
5515
+ modelCompletedAtMs,
5516
+ schedulerMetrics,
5517
+ toolExecutionMs: 0,
5518
+ waitToolMs: 0
5519
+ });
5243
5520
  steps.push({
5244
5521
  step: steps.length + 1,
5245
5522
  modelVersion,
@@ -5247,7 +5524,8 @@ async function runToolLoop(request) {
5247
5524
  thoughts: void 0,
5248
5525
  toolCalls: [],
5249
5526
  usage: usageTokens,
5250
- costUsd: stepCostUsd
5527
+ costUsd: stepCostUsd,
5528
+ timing: timing2
5251
5529
  });
5252
5530
  return { text: finalText, thoughts: finalThoughts, steps, totalCostUsd };
5253
5531
  }
@@ -5282,8 +5560,15 @@ async function runToolLoop(request) {
5282
5560
  );
5283
5561
  const assistantToolCalls = [];
5284
5562
  const toolMessages = [];
5563
+ let toolExecutionMs = 0;
5564
+ let waitToolMs = 0;
5285
5565
  for (const { entry, result, outputPayload } of callResults) {
5286
5566
  stepToolCalls.push({ ...result, callId: entry.call.id });
5567
+ const callDurationMs = toToolResultDuration(result);
5568
+ toolExecutionMs += callDurationMs;
5569
+ if (entry.toolName.toLowerCase() === SUBAGENT_WAIT_TOOL_NAME) {
5570
+ waitToolMs += callDurationMs;
5571
+ }
5287
5572
  assistantToolCalls.push({
5288
5573
  id: entry.call.id,
5289
5574
  type: "function",
@@ -5298,6 +5583,15 @@ async function runToolLoop(request) {
5298
5583
  content: mergeToolOutput(outputPayload)
5299
5584
  });
5300
5585
  }
5586
+ const stepCompletedAtMs = Date.now();
5587
+ const timing = buildStepTiming({
5588
+ stepStartedAtMs,
5589
+ stepCompletedAtMs,
5590
+ modelCompletedAtMs,
5591
+ schedulerMetrics,
5592
+ toolExecutionMs,
5593
+ waitToolMs
5594
+ });
5301
5595
  steps.push({
5302
5596
  step: steps.length + 1,
5303
5597
  modelVersion,
@@ -5305,7 +5599,8 @@ async function runToolLoop(request) {
5305
5599
  thoughts: void 0,
5306
5600
  toolCalls: stepToolCalls,
5307
5601
  usage: usageTokens,
5308
- costUsd: stepCostUsd
5602
+ costUsd: stepCostUsd,
5603
+ timing
5309
5604
  });
5310
5605
  messages.push({
5311
5606
  role: "assistant",
@@ -5321,6 +5616,14 @@ async function runToolLoop(request) {
5321
5616
  const geminiTools = geminiNativeTools ? geminiNativeTools.concat(geminiFunctionTools) : geminiFunctionTools;
5322
5617
  const geminiContents = contents.map(convertLlmContentToGeminiContent);
5323
5618
  for (let stepIndex = 0; stepIndex < maxSteps; stepIndex += 1) {
5619
+ const stepStartedAtMs = Date.now();
5620
+ let firstModelEventAtMs;
5621
+ let schedulerMetrics;
5622
+ const markFirstModelEvent = () => {
5623
+ if (firstModelEventAtMs === void 0) {
5624
+ firstModelEventAtMs = Date.now();
5625
+ }
5626
+ };
5324
5627
  const config = {
5325
5628
  maxOutputTokens: 32e3,
5326
5629
  tools: geminiTools,
@@ -5332,81 +5635,91 @@ async function runToolLoop(request) {
5332
5635
  thinkingConfig: resolveGeminiThinkingConfig(request.model)
5333
5636
  };
5334
5637
  const onEvent = request.onEvent;
5335
- const response = await runGeminiCall(async (client) => {
5336
- const stream = await client.models.generateContentStream({
5337
- model: request.model,
5338
- contents: geminiContents,
5339
- config
5340
- });
5341
- let responseText = "";
5342
- let thoughtsText = "";
5343
- const modelParts = [];
5344
- const functionCalls = [];
5345
- const seenFunctionCallIds = /* @__PURE__ */ new Set();
5346
- const seenFunctionCallKeys = /* @__PURE__ */ new Set();
5347
- let latestUsageMetadata;
5348
- let resolvedModelVersion;
5349
- for await (const chunk of stream) {
5350
- if (chunk.modelVersion) {
5351
- resolvedModelVersion = chunk.modelVersion;
5352
- onEvent?.({ type: "model", modelVersion: chunk.modelVersion });
5353
- }
5354
- if (chunk.usageMetadata) {
5355
- latestUsageMetadata = chunk.usageMetadata;
5356
- }
5357
- const candidates = chunk.candidates;
5358
- if (!candidates || candidates.length === 0) {
5359
- continue;
5360
- }
5361
- const primary = candidates[0];
5362
- const parts = primary?.content?.parts;
5363
- if (!parts || parts.length === 0) {
5364
- continue;
5365
- }
5366
- for (const part of parts) {
5367
- modelParts.push(part);
5368
- const call = part.functionCall;
5369
- if (call) {
5370
- const id = typeof call.id === "string" ? call.id : "";
5371
- const shouldAdd = (() => {
5372
- if (id.length > 0) {
5373
- if (seenFunctionCallIds.has(id)) {
5638
+ const response = await runGeminiCall(
5639
+ async (client) => {
5640
+ const stream = await client.models.generateContentStream({
5641
+ model: request.model,
5642
+ contents: geminiContents,
5643
+ config
5644
+ });
5645
+ let responseText = "";
5646
+ let thoughtsText = "";
5647
+ const modelParts = [];
5648
+ const functionCalls = [];
5649
+ const seenFunctionCallIds = /* @__PURE__ */ new Set();
5650
+ const seenFunctionCallKeys = /* @__PURE__ */ new Set();
5651
+ let latestUsageMetadata;
5652
+ let resolvedModelVersion;
5653
+ for await (const chunk of stream) {
5654
+ markFirstModelEvent();
5655
+ if (chunk.modelVersion) {
5656
+ resolvedModelVersion = chunk.modelVersion;
5657
+ onEvent?.({ type: "model", modelVersion: chunk.modelVersion });
5658
+ }
5659
+ if (chunk.usageMetadata) {
5660
+ latestUsageMetadata = chunk.usageMetadata;
5661
+ }
5662
+ const candidates = chunk.candidates;
5663
+ if (!candidates || candidates.length === 0) {
5664
+ continue;
5665
+ }
5666
+ const primary = candidates[0];
5667
+ const parts = primary?.content?.parts;
5668
+ if (!parts || parts.length === 0) {
5669
+ continue;
5670
+ }
5671
+ for (const part of parts) {
5672
+ modelParts.push(part);
5673
+ const call = part.functionCall;
5674
+ if (call) {
5675
+ const id = typeof call.id === "string" ? call.id : "";
5676
+ const shouldAdd = (() => {
5677
+ if (id.length > 0) {
5678
+ if (seenFunctionCallIds.has(id)) {
5679
+ return false;
5680
+ }
5681
+ seenFunctionCallIds.add(id);
5682
+ return true;
5683
+ }
5684
+ const key = JSON.stringify({ name: call.name ?? "", args: call.args ?? null });
5685
+ if (seenFunctionCallKeys.has(key)) {
5374
5686
  return false;
5375
5687
  }
5376
- seenFunctionCallIds.add(id);
5688
+ seenFunctionCallKeys.add(key);
5377
5689
  return true;
5690
+ })();
5691
+ if (shouldAdd) {
5692
+ functionCalls.push(call);
5378
5693
  }
5379
- const key = JSON.stringify({ name: call.name ?? "", args: call.args ?? null });
5380
- if (seenFunctionCallKeys.has(key)) {
5381
- return false;
5382
- }
5383
- seenFunctionCallKeys.add(key);
5384
- return true;
5385
- })();
5386
- if (shouldAdd) {
5387
- functionCalls.push(call);
5388
5694
  }
5389
- }
5390
- if (typeof part.text === "string" && part.text.length > 0) {
5391
- if (part.thought) {
5392
- thoughtsText += part.text;
5393
- onEvent?.({ type: "delta", channel: "thought", text: part.text });
5394
- } else {
5395
- responseText += part.text;
5396
- onEvent?.({ type: "delta", channel: "response", text: part.text });
5695
+ if (typeof part.text === "string" && part.text.length > 0) {
5696
+ if (part.thought) {
5697
+ thoughtsText += part.text;
5698
+ onEvent?.({ type: "delta", channel: "thought", text: part.text });
5699
+ } else {
5700
+ responseText += part.text;
5701
+ onEvent?.({ type: "delta", channel: "response", text: part.text });
5702
+ }
5397
5703
  }
5398
5704
  }
5399
5705
  }
5706
+ return {
5707
+ responseText,
5708
+ thoughtsText,
5709
+ functionCalls,
5710
+ modelParts,
5711
+ usageMetadata: latestUsageMetadata,
5712
+ modelVersion: resolvedModelVersion ?? request.model
5713
+ };
5714
+ },
5715
+ request.model,
5716
+ {
5717
+ onSettled: (metrics) => {
5718
+ schedulerMetrics = metrics;
5719
+ }
5400
5720
  }
5401
- return {
5402
- responseText,
5403
- thoughtsText,
5404
- functionCalls,
5405
- modelParts,
5406
- usageMetadata: latestUsageMetadata,
5407
- modelVersion: resolvedModelVersion ?? request.model
5408
- };
5409
- }, request.model);
5721
+ );
5722
+ const modelCompletedAtMs = Date.now();
5410
5723
  const usageTokens = extractGeminiUsageTokens(response.usageMetadata);
5411
5724
  const modelVersion = response.modelVersion ?? request.model;
5412
5725
  const stepCostUsd = estimateCallCostUsd({
@@ -5418,6 +5731,16 @@ async function runToolLoop(request) {
5418
5731
  if (response.functionCalls.length === 0) {
5419
5732
  finalText = response.responseText.trim();
5420
5733
  finalThoughts = response.thoughtsText.trim();
5734
+ const stepCompletedAtMs2 = Date.now();
5735
+ const timing2 = buildStepTiming({
5736
+ stepStartedAtMs,
5737
+ stepCompletedAtMs: stepCompletedAtMs2,
5738
+ modelCompletedAtMs,
5739
+ firstModelEventAtMs,
5740
+ schedulerMetrics,
5741
+ toolExecutionMs: 0,
5742
+ waitToolMs: 0
5743
+ });
5421
5744
  steps.push({
5422
5745
  step: steps.length + 1,
5423
5746
  modelVersion,
@@ -5425,7 +5748,8 @@ async function runToolLoop(request) {
5425
5748
  thoughts: finalThoughts || void 0,
5426
5749
  toolCalls: [],
5427
5750
  usage: usageTokens,
5428
- costUsd: stepCostUsd
5751
+ costUsd: stepCostUsd,
5752
+ timing: timing2
5429
5753
  });
5430
5754
  return { text: finalText, thoughts: finalThoughts, steps, totalCostUsd };
5431
5755
  }
@@ -5475,8 +5799,15 @@ async function runToolLoop(request) {
5475
5799
  );
5476
5800
  })
5477
5801
  );
5802
+ let toolExecutionMs = 0;
5803
+ let waitToolMs = 0;
5478
5804
  for (const { entry, result, outputPayload } of callResults) {
5479
5805
  toolCalls.push({ ...result, callId: entry.call.id });
5806
+ const callDurationMs = toToolResultDuration(result);
5807
+ toolExecutionMs += callDurationMs;
5808
+ if (entry.toolName.toLowerCase() === SUBAGENT_WAIT_TOOL_NAME) {
5809
+ waitToolMs += callDurationMs;
5810
+ }
5480
5811
  const responsePayload = isPlainRecord(outputPayload) ? outputPayload : { output: outputPayload };
5481
5812
  responseParts.push({
5482
5813
  functionResponse: {
@@ -5486,6 +5817,16 @@ async function runToolLoop(request) {
5486
5817
  }
5487
5818
  });
5488
5819
  }
5820
+ const stepCompletedAtMs = Date.now();
5821
+ const timing = buildStepTiming({
5822
+ stepStartedAtMs,
5823
+ stepCompletedAtMs,
5824
+ modelCompletedAtMs,
5825
+ firstModelEventAtMs,
5826
+ schedulerMetrics,
5827
+ toolExecutionMs,
5828
+ waitToolMs
5829
+ });
5489
5830
  steps.push({
5490
5831
  step: steps.length + 1,
5491
5832
  modelVersion,
@@ -5493,7 +5834,8 @@ async function runToolLoop(request) {
5493
5834
  thoughts: response.thoughtsText.trim() || void 0,
5494
5835
  toolCalls,
5495
5836
  usage: usageTokens,
5496
- costUsd: stepCostUsd
5837
+ costUsd: stepCostUsd,
5838
+ timing
5497
5839
  });
5498
5840
  geminiContents.push({ role: "user", parts: responseParts });
5499
5841
  }
@@ -5744,6 +6086,9 @@ function appendMarkdownSourcesSection(value, sources) {
5744
6086
  ${lines}`;
5745
6087
  }
5746
6088
 
6089
+ // src/agent.ts
6090
+ var import_node_crypto3 = require("crypto");
6091
+
5747
6092
  // src/agent/subagents.ts
5748
6093
  var import_node_crypto2 = require("crypto");
5749
6094
  var import_zod4 = require("zod");
@@ -6205,7 +6550,12 @@ function startRun(agent, options) {
6205
6550
  }
6206
6551
  const input = [...agent.history, { role: "user", content: nextInput }];
6207
6552
  const abortController = new AbortController();
6553
+ const runStartedAtMs = Date.now();
6208
6554
  agent.abortController = abortController;
6555
+ if (agent.firstRunStartedAtMs === void 0) {
6556
+ agent.firstRunStartedAtMs = runStartedAtMs;
6557
+ }
6558
+ agent.lastRunStartedAtMs = runStartedAtMs;
6209
6559
  agent.lastError = void 0;
6210
6560
  setLifecycle(
6211
6561
  agent,
@@ -6249,6 +6599,9 @@ function startRun(agent, options) {
6249
6599
  agent.lastError = message;
6250
6600
  setLifecycle(agent, "failed", "run_failed", `Subagent ${agent.id} failed: ${message}`);
6251
6601
  } finally {
6602
+ const runCompletedAtMs = Date.now();
6603
+ agent.lastRunCompletedAtMs = runCompletedAtMs;
6604
+ agent.lastRunDurationMs = Math.max(0, runCompletedAtMs - runStartedAtMs);
6252
6605
  agent.runningPromise = void 0;
6253
6606
  agent.abortController = void 0;
6254
6607
  }
@@ -6324,6 +6677,13 @@ function buildSnapshot(agent) {
6324
6677
  turns: agent.turns,
6325
6678
  created_at: new Date(agent.createdAtMs).toISOString(),
6326
6679
  updated_at: new Date(agent.updatedAtMs).toISOString(),
6680
+ ...agent.firstRunStartedAtMs ? {
6681
+ first_run_started_at: new Date(agent.firstRunStartedAtMs).toISOString(),
6682
+ spawn_startup_latency_ms: Math.max(0, agent.firstRunStartedAtMs - agent.createdAtMs)
6683
+ } : {},
6684
+ ...agent.lastRunStartedAtMs ? { last_run_started_at: new Date(agent.lastRunStartedAtMs).toISOString() } : {},
6685
+ ...agent.lastRunCompletedAtMs ? { last_run_completed_at: new Date(agent.lastRunCompletedAtMs).toISOString() } : {},
6686
+ ...typeof agent.lastRunDurationMs === "number" ? { last_run_duration_ms: Math.max(0, agent.lastRunDurationMs) } : {},
6327
6687
  ...agent.lastError ? { last_error: agent.lastError } : {},
6328
6688
  ...agent.lastResult ? {
6329
6689
  last_result: {
@@ -8153,7 +8513,12 @@ function isNoEntError(error) {
8153
8513
 
8154
8514
  // src/agent.ts
8155
8515
  async function runAgentLoop(request) {
8156
- return await runAgentLoopInternal(request, { depth: 0 });
8516
+ const telemetry = createAgentTelemetrySession(request.telemetry);
8517
+ try {
8518
+ return await runAgentLoopInternal(request, { depth: 0, telemetry });
8519
+ } finally {
8520
+ await telemetry?.flush();
8521
+ }
8157
8522
  }
8158
8523
  async function runAgentLoopInternal(request, context) {
8159
8524
  const {
@@ -8163,15 +8528,21 @@ async function runAgentLoopInternal(request, context) {
8163
8528
  subagentTool,
8164
8529
  subagent_tool,
8165
8530
  subagents,
8531
+ telemetry,
8166
8532
  ...toolLoopRequest
8167
8533
  } = request;
8534
+ const telemetrySession = context.telemetry ?? createAgentTelemetrySession(telemetry);
8535
+ const runId = randomRunId();
8536
+ const startedAtMs = Date.now();
8168
8537
  const filesystemSelection = filesystemTool ?? filesystem_tool;
8169
8538
  const subagentSelection = subagentTool ?? subagent_tool ?? subagents;
8170
8539
  const filesystemTools = resolveFilesystemTools(request.model, filesystemSelection);
8171
8540
  const resolvedSubagentConfig = resolveSubagentToolConfig(subagentSelection, context.depth);
8172
8541
  const subagentController = createSubagentController({
8542
+ runId,
8173
8543
  model: request.model,
8174
8544
  depth: context.depth,
8545
+ telemetry: telemetrySession,
8175
8546
  customTools: customTools ?? {},
8176
8547
  filesystemSelection,
8177
8548
  subagentSelection,
@@ -8192,12 +8563,54 @@ async function runAgentLoopInternal(request, context) {
8192
8563
  resolvedSubagentConfig,
8193
8564
  context.depth
8194
8565
  );
8566
+ const emitTelemetry = createAgentTelemetryEmitter({
8567
+ session: telemetrySession,
8568
+ runId,
8569
+ parentRunId: context.parentRunId,
8570
+ depth: context.depth,
8571
+ model: request.model
8572
+ });
8573
+ emitTelemetry({
8574
+ type: "agent.run.started",
8575
+ inputMode: typeof request.input === "string" ? "string" : "messages",
8576
+ customToolCount: Object.keys(customTools ?? {}).length,
8577
+ mergedToolCount: Object.keys(mergedTools).length,
8578
+ filesystemToolsEnabled: Object.keys(filesystemTools).length > 0,
8579
+ subagentToolsEnabled: resolvedSubagentConfig.enabled
8580
+ });
8581
+ const sourceOnEvent = toolLoopRequest.onEvent;
8582
+ const includeLlmStreamEvents = telemetrySession?.includeLlmStreamEvents === true;
8583
+ const wrappedOnEvent = sourceOnEvent || includeLlmStreamEvents ? (event) => {
8584
+ sourceOnEvent?.(event);
8585
+ if (includeLlmStreamEvents) {
8586
+ emitTelemetry({ type: "agent.run.stream", event });
8587
+ }
8588
+ } : void 0;
8195
8589
  try {
8196
- return await runToolLoop({
8590
+ const result = await runToolLoop({
8197
8591
  ...toolLoopRequest,
8198
8592
  ...instructions ? { instructions } : {},
8593
+ ...wrappedOnEvent ? { onEvent: wrappedOnEvent } : {},
8199
8594
  tools: mergedTools
8200
8595
  });
8596
+ emitTelemetry({
8597
+ type: "agent.run.completed",
8598
+ success: true,
8599
+ durationMs: Math.max(0, Date.now() - startedAtMs),
8600
+ stepCount: result.steps.length,
8601
+ toolCallCount: countToolCalls(result),
8602
+ totalCostUsd: result.totalCostUsd,
8603
+ usage: summarizeResultUsage(result)
8604
+ });
8605
+ return result;
8606
+ } catch (error) {
8607
+ emitTelemetry({
8608
+ type: "agent.run.completed",
8609
+ success: false,
8610
+ durationMs: Math.max(0, Date.now() - startedAtMs),
8611
+ error: toErrorMessage2(error)
8612
+ });
8613
+ throw error;
8201
8614
  } finally {
8202
8615
  await subagentController?.closeAll();
8203
8616
  }
@@ -8260,7 +8673,11 @@ function createSubagentController(params) {
8260
8673
  openAiReasoningEffort: params.toolLoopRequest.openAiReasoningEffort,
8261
8674
  signal: subagentRequest.signal
8262
8675
  },
8263
- { depth: params.depth + 1 }
8676
+ {
8677
+ depth: params.depth + 1,
8678
+ parentRunId: params.runId,
8679
+ telemetry: params.telemetry
8680
+ }
8264
8681
  );
8265
8682
  }
8266
8683
  });
@@ -8311,6 +8728,129 @@ function trimToUndefined2(value) {
8311
8728
  const trimmed = value?.trim();
8312
8729
  return trimmed && trimmed.length > 0 ? trimmed : void 0;
8313
8730
  }
8731
+ function randomRunId() {
8732
+ return (0, import_node_crypto3.randomBytes)(8).toString("hex");
8733
+ }
8734
+ function toIsoNow() {
8735
+ return (/* @__PURE__ */ new Date()).toISOString();
8736
+ }
8737
+ function toErrorMessage2(error) {
8738
+ if (error instanceof Error && error.message) {
8739
+ return error.message;
8740
+ }
8741
+ if (typeof error === "string") {
8742
+ return error;
8743
+ }
8744
+ return "Unknown error";
8745
+ }
8746
+ function countToolCalls(result) {
8747
+ let count = 0;
8748
+ for (const step of result.steps) {
8749
+ count += step.toolCalls.length;
8750
+ }
8751
+ return count;
8752
+ }
8753
+ function sumUsageValue(current, next) {
8754
+ if (typeof next !== "number" || !Number.isFinite(next)) {
8755
+ return current;
8756
+ }
8757
+ const normalizedNext = Math.max(0, next);
8758
+ if (typeof current !== "number" || !Number.isFinite(current)) {
8759
+ return normalizedNext;
8760
+ }
8761
+ return Math.max(0, current) + normalizedNext;
8762
+ }
8763
+ function summarizeResultUsage(result) {
8764
+ let summary;
8765
+ for (const step of result.steps) {
8766
+ const usage = step.usage;
8767
+ if (!usage) {
8768
+ continue;
8769
+ }
8770
+ summary = {
8771
+ promptTokens: sumUsageValue(summary?.promptTokens, usage.promptTokens),
8772
+ cachedTokens: sumUsageValue(summary?.cachedTokens, usage.cachedTokens),
8773
+ responseTokens: sumUsageValue(summary?.responseTokens, usage.responseTokens),
8774
+ responseImageTokens: sumUsageValue(summary?.responseImageTokens, usage.responseImageTokens),
8775
+ thinkingTokens: sumUsageValue(summary?.thinkingTokens, usage.thinkingTokens),
8776
+ totalTokens: sumUsageValue(summary?.totalTokens, usage.totalTokens),
8777
+ toolUsePromptTokens: sumUsageValue(summary?.toolUsePromptTokens, usage.toolUsePromptTokens)
8778
+ };
8779
+ }
8780
+ return summary;
8781
+ }
8782
+ function isPromiseLike(value) {
8783
+ return (typeof value === "object" || typeof value === "function") && value !== null && typeof value.then === "function";
8784
+ }
8785
+ function isAgentTelemetrySink(value) {
8786
+ return typeof value === "object" && value !== null && typeof value.emit === "function";
8787
+ }
8788
+ function resolveTelemetrySelection(telemetry) {
8789
+ if (!telemetry) {
8790
+ return void 0;
8791
+ }
8792
+ if (isAgentTelemetrySink(telemetry)) {
8793
+ return { sink: telemetry };
8794
+ }
8795
+ if (isAgentTelemetrySink(telemetry.sink)) {
8796
+ return telemetry;
8797
+ }
8798
+ throw new Error("Invalid runAgentLoop telemetry config: expected a sink with emit(event).");
8799
+ }
8800
+ function createAgentTelemetrySession(telemetry) {
8801
+ const config = resolveTelemetrySelection(telemetry);
8802
+ if (!config) {
8803
+ return void 0;
8804
+ }
8805
+ const pending = /* @__PURE__ */ new Set();
8806
+ const trackPromise = (promise) => {
8807
+ pending.add(promise);
8808
+ promise.finally(() => {
8809
+ pending.delete(promise);
8810
+ });
8811
+ };
8812
+ const emit = (event) => {
8813
+ try {
8814
+ const output = config.sink.emit(event);
8815
+ if (isPromiseLike(output)) {
8816
+ const task = Promise.resolve(output).then(() => void 0).catch(() => void 0);
8817
+ trackPromise(task);
8818
+ }
8819
+ } catch {
8820
+ }
8821
+ };
8822
+ const flush = async () => {
8823
+ while (pending.size > 0) {
8824
+ await Promise.allSettled([...pending]);
8825
+ }
8826
+ if (typeof config.sink.flush === "function") {
8827
+ try {
8828
+ await config.sink.flush();
8829
+ } catch {
8830
+ }
8831
+ }
8832
+ };
8833
+ return {
8834
+ includeLlmStreamEvents: config.includeLlmStreamEvents === true,
8835
+ emit,
8836
+ flush
8837
+ };
8838
+ }
8839
+ function createAgentTelemetryEmitter(params) {
8840
+ return (event) => {
8841
+ if (!params.session) {
8842
+ return;
8843
+ }
8844
+ params.session.emit({
8845
+ ...event,
8846
+ timestamp: toIsoNow(),
8847
+ runId: params.runId,
8848
+ ...params.parentRunId ? { parentRunId: params.parentRunId } : {},
8849
+ depth: params.depth,
8850
+ model: params.model
8851
+ });
8852
+ };
8853
+ }
8314
8854
  // Annotate the CommonJS export names for ESM import in node:
8315
8855
  0 && (module.exports = {
8316
8856
  CHATGPT_MODEL_IDS,
@@ -8334,6 +8874,7 @@ function trimToUndefined2(value) {
8334
8874
  appendMarkdownSourcesSection,
8335
8875
  applyPatch,
8336
8876
  configureGemini,
8877
+ configureModelConcurrency,
8337
8878
  convertGooglePartsToLlmParts,
8338
8879
  createApplyPatchTool,
8339
8880
  createCodexApplyPatchTool,
@@ -8378,6 +8919,7 @@ function trimToUndefined2(value) {
8378
8919
  loadLocalEnv,
8379
8920
  parseJsonFromLlmText,
8380
8921
  refreshChatGptOauthToken,
8922
+ resetModelConcurrencyConfig,
8381
8923
  resolveFilesystemToolProfile,
8382
8924
  resolveFireworksModelId,
8383
8925
  runAgentLoop,