@ljoukov/llm 3.0.4 → 3.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1586,23 +1586,16 @@ function parseEventBlock(raw) {
1586
1586
  var MIN_MODEL_CONCURRENCY_CAP = 1;
1587
1587
  var MAX_MODEL_CONCURRENCY_CAP = 64;
1588
1588
  var DEFAULT_MODEL_CONCURRENCY_CAP = 3;
1589
- function parsePositiveInteger(raw) {
1590
- if (raw === void 0) {
1591
- return void 0;
1592
- }
1593
- const normalized = raw.trim();
1594
- if (!normalized) {
1595
- return void 0;
1596
- }
1597
- if (!/^-?\d+$/u.test(normalized)) {
1598
- return void 0;
1599
- }
1600
- const parsed = Number.parseInt(normalized, 10);
1601
- if (!Number.isFinite(parsed)) {
1602
- return void 0;
1603
- }
1604
- return parsed;
1605
- }
1589
+ var DEFAULT_OPENAI_MODEL_CONCURRENCY_CAP = 12;
1590
+ var DEFAULT_GOOGLE_MODEL_CONCURRENCY_CAP = 4;
1591
+ var DEFAULT_GOOGLE_PREVIEW_MODEL_CONCURRENCY_CAP = 2;
1592
+ var DEFAULT_FIREWORKS_MODEL_CONCURRENCY_CAP = 6;
1593
+ var MODEL_CONCURRENCY_PROVIDERS = [
1594
+ "openai",
1595
+ "google",
1596
+ "fireworks"
1597
+ ];
1598
+ var configuredModelConcurrency = normalizeModelConcurrencyConfig({});
1606
1599
  function clampModelConcurrencyCap(value) {
1607
1600
  if (!Number.isFinite(value)) {
1608
1601
  return DEFAULT_MODEL_CONCURRENCY_CAP;
@@ -1616,30 +1609,94 @@ function clampModelConcurrencyCap(value) {
1616
1609
  }
1617
1610
  return rounded;
1618
1611
  }
1619
- function normalizeModelIdForEnv(modelId) {
1620
- return modelId.trim().replace(/[^A-Za-z0-9]+/gu, "_").replace(/^_+|_+$/gu, "").toUpperCase();
1612
+ function normalizeModelIdForConfig(modelId) {
1613
+ return modelId.trim().toLowerCase();
1621
1614
  }
1622
- function resolveModelConcurrencyCap(options) {
1623
- const env = options.env ?? process.env;
1624
- const providerPrefix = options.providerEnvPrefix;
1625
- const defaultCap = clampModelConcurrencyCap(options.defaultCap ?? DEFAULT_MODEL_CONCURRENCY_CAP);
1626
- const normalizedModelId = options.modelId ? normalizeModelIdForEnv(options.modelId) : "";
1627
- const candidateKeys = [
1628
- ...normalizedModelId ? [
1629
- `${providerPrefix}_MAX_PARALLEL_REQUESTS_MODEL_${normalizedModelId}`,
1630
- `LLM_MAX_PARALLEL_REQUESTS_MODEL_${normalizedModelId}`
1631
- ] : [],
1632
- `${providerPrefix}_MAX_PARALLEL_REQUESTS_PER_MODEL`,
1633
- "LLM_MAX_PARALLEL_REQUESTS_PER_MODEL"
1634
- ];
1635
- for (const key of candidateKeys) {
1636
- const parsed = parsePositiveInteger(env[key]);
1637
- if (parsed === void 0) {
1615
+ function normalizeCap(value) {
1616
+ if (value === void 0 || !Number.isFinite(value)) {
1617
+ return void 0;
1618
+ }
1619
+ return clampModelConcurrencyCap(value);
1620
+ }
1621
+ function normalizeModelCapMap(caps) {
1622
+ const normalized = /* @__PURE__ */ new Map();
1623
+ if (!caps) {
1624
+ return normalized;
1625
+ }
1626
+ for (const [modelId, cap] of Object.entries(caps)) {
1627
+ const modelKey = normalizeModelIdForConfig(modelId);
1628
+ if (!modelKey) {
1638
1629
  continue;
1639
1630
  }
1640
- return clampModelConcurrencyCap(parsed);
1631
+ const normalizedCap = normalizeCap(cap);
1632
+ if (normalizedCap === void 0) {
1633
+ continue;
1634
+ }
1635
+ normalized.set(modelKey, normalizedCap);
1636
+ }
1637
+ return normalized;
1638
+ }
1639
+ function normalizeModelConcurrencyConfig(config) {
1640
+ const providerCaps = {};
1641
+ const providerModelCaps = {
1642
+ openai: /* @__PURE__ */ new Map(),
1643
+ google: /* @__PURE__ */ new Map(),
1644
+ fireworks: /* @__PURE__ */ new Map()
1645
+ };
1646
+ for (const provider of MODEL_CONCURRENCY_PROVIDERS) {
1647
+ const providerCap = normalizeCap(config.providerCaps?.[provider]);
1648
+ if (providerCap !== void 0) {
1649
+ providerCaps[provider] = providerCap;
1650
+ }
1651
+ providerModelCaps[provider] = new Map(
1652
+ normalizeModelCapMap(config.providerModelCaps?.[provider])
1653
+ );
1641
1654
  }
1642
- return defaultCap;
1655
+ return {
1656
+ globalCap: normalizeCap(config.globalCap),
1657
+ providerCaps,
1658
+ modelCaps: normalizeModelCapMap(config.modelCaps),
1659
+ providerModelCaps
1660
+ };
1661
+ }
1662
+ function resolveDefaultProviderCap(provider, modelId) {
1663
+ if (provider === "openai") {
1664
+ return DEFAULT_OPENAI_MODEL_CONCURRENCY_CAP;
1665
+ }
1666
+ if (provider === "google") {
1667
+ return modelId?.includes("preview") ? DEFAULT_GOOGLE_PREVIEW_MODEL_CONCURRENCY_CAP : DEFAULT_GOOGLE_MODEL_CONCURRENCY_CAP;
1668
+ }
1669
+ return DEFAULT_FIREWORKS_MODEL_CONCURRENCY_CAP;
1670
+ }
1671
+ function configureModelConcurrency(config = {}) {
1672
+ configuredModelConcurrency = normalizeModelConcurrencyConfig(config);
1673
+ }
1674
+ function resetModelConcurrencyConfig() {
1675
+ configuredModelConcurrency = normalizeModelConcurrencyConfig({});
1676
+ }
1677
+ function resolveModelConcurrencyCap(options) {
1678
+ const modelId = options.modelId ? normalizeModelIdForConfig(options.modelId) : void 0;
1679
+ const config = options.config ? normalizeModelConcurrencyConfig(options.config) : configuredModelConcurrency;
1680
+ const providerModelCap = modelId ? config.providerModelCaps[options.provider].get(modelId) : void 0;
1681
+ if (providerModelCap !== void 0) {
1682
+ return providerModelCap;
1683
+ }
1684
+ const modelCap = modelId ? config.modelCaps.get(modelId) : void 0;
1685
+ if (modelCap !== void 0) {
1686
+ return modelCap;
1687
+ }
1688
+ const providerCap = config.providerCaps[options.provider];
1689
+ if (providerCap !== void 0) {
1690
+ return providerCap;
1691
+ }
1692
+ if (config.globalCap !== void 0) {
1693
+ return config.globalCap;
1694
+ }
1695
+ const defaultCap = normalizeCap(options.defaultCap);
1696
+ if (defaultCap !== void 0) {
1697
+ return defaultCap;
1698
+ }
1699
+ return resolveDefaultProviderCap(options.provider, modelId);
1643
1700
  }
1644
1701
 
1645
1702
  // src/utils/scheduler.ts
@@ -1748,12 +1805,20 @@ function createCallScheduler(options = {}) {
1748
1805
  release?.();
1749
1806
  }
1750
1807
  }
1751
- async function attemptWithRetries(fn, attempt) {
1808
+ async function attemptWithRetries(fn, attempt, state) {
1752
1809
  try {
1810
+ const spacingStartedAtMs = Date.now();
1753
1811
  await applyStartSpacing();
1812
+ const callStartedAtMs = Date.now();
1813
+ state.schedulerDelayMs += Math.max(0, callStartedAtMs - spacingStartedAtMs);
1814
+ if (state.startedAtMs === void 0) {
1815
+ state.startedAtMs = callStartedAtMs;
1816
+ }
1817
+ state.attempts = Math.max(state.attempts, attempt);
1754
1818
  return await fn();
1755
1819
  } catch (error) {
1756
1820
  if (isOverloadError2(error)) {
1821
+ state.overloadCount += 1;
1757
1822
  consecutiveSuccesses = 0;
1758
1823
  currentParallelLimit = Math.max(1, Math.ceil(currentParallelLimit / 2));
1759
1824
  }
@@ -1770,9 +1835,10 @@ function createCallScheduler(options = {}) {
1770
1835
  }
1771
1836
  const normalizedDelay = Math.max(0, delay);
1772
1837
  if (normalizedDelay > 0) {
1838
+ state.retryDelayMs += normalizedDelay;
1773
1839
  await sleep(normalizedDelay);
1774
1840
  }
1775
- return attemptWithRetries(fn, attempt + 1);
1841
+ return attemptWithRetries(fn, attempt + 1, state);
1776
1842
  }
1777
1843
  }
1778
1844
  function drainQueue() {
@@ -1785,11 +1851,22 @@ function createCallScheduler(options = {}) {
1785
1851
  void task();
1786
1852
  }
1787
1853
  }
1788
- function run(fn) {
1854
+ function run(fn, runOptions = {}) {
1789
1855
  return new Promise((resolve, reject) => {
1856
+ const enqueuedAtMs = Date.now();
1790
1857
  const job = async () => {
1858
+ const dequeuedAtMs = Date.now();
1859
+ const state = {
1860
+ enqueuedAtMs,
1861
+ dequeuedAtMs,
1862
+ schedulerDelayMs: 0,
1863
+ retryDelayMs: 0,
1864
+ attempts: 0,
1865
+ overloadCount: 0
1866
+ };
1791
1867
  try {
1792
- const result = await attemptWithRetries(fn, 1);
1868
+ const result = await attemptWithRetries(fn, 1, state);
1869
+ state.completedAtMs = Date.now();
1793
1870
  consecutiveSuccesses += 1;
1794
1871
  if (currentParallelLimit < maxParallelRequests && consecutiveSuccesses >= increaseAfterConsecutiveSuccesses) {
1795
1872
  currentParallelLimit += 1;
@@ -1797,8 +1874,26 @@ function createCallScheduler(options = {}) {
1797
1874
  }
1798
1875
  resolve(result);
1799
1876
  } catch (error) {
1877
+ state.completedAtMs = Date.now();
1800
1878
  reject(toError(error));
1801
1879
  } finally {
1880
+ const startedAtMs = state.startedAtMs ?? state.dequeuedAtMs;
1881
+ const completedAtMs = state.completedAtMs ?? Date.now();
1882
+ const metrics = {
1883
+ enqueuedAtMs: state.enqueuedAtMs,
1884
+ dequeuedAtMs: state.dequeuedAtMs,
1885
+ startedAtMs,
1886
+ completedAtMs,
1887
+ queueWaitMs: Math.max(0, state.dequeuedAtMs - state.enqueuedAtMs),
1888
+ schedulerDelayMs: Math.max(0, state.schedulerDelayMs),
1889
+ retryDelayMs: Math.max(0, state.retryDelayMs),
1890
+ attempts: Math.max(1, state.attempts),
1891
+ overloadCount: Math.max(0, state.overloadCount)
1892
+ };
1893
+ try {
1894
+ runOptions.onSettled?.(metrics);
1895
+ } catch {
1896
+ }
1802
1897
  activeCount -= 1;
1803
1898
  queueMicrotask(drainQueue);
1804
1899
  }
@@ -1895,7 +1990,7 @@ function getSchedulerForModel(modelId) {
1895
1990
  }
1896
1991
  const created = createCallScheduler({
1897
1992
  maxParallelRequests: resolveModelConcurrencyCap({
1898
- providerEnvPrefix: "FIREWORKS",
1993
+ provider: "fireworks",
1899
1994
  modelId: normalizedModelId
1900
1995
  }),
1901
1996
  minIntervalBetweenStartMs: 200,
@@ -1904,8 +1999,8 @@ function getSchedulerForModel(modelId) {
1904
1999
  schedulerByModel.set(schedulerKey, created);
1905
2000
  return created;
1906
2001
  }
1907
- async function runFireworksCall(fn, modelId) {
1908
- return getSchedulerForModel(modelId).run(async () => fn(getFireworksClient()));
2002
+ async function runFireworksCall(fn, modelId, runOptions) {
2003
+ return getSchedulerForModel(modelId).run(async () => fn(getFireworksClient()), runOptions);
1909
2004
  }
1910
2005
 
1911
2006
  // src/fireworks/models.ts
@@ -2271,7 +2366,7 @@ function getSchedulerForModel2(modelId) {
2271
2366
  }
2272
2367
  const created = createCallScheduler({
2273
2368
  maxParallelRequests: resolveModelConcurrencyCap({
2274
- providerEnvPrefix: "GOOGLE",
2369
+ provider: "google",
2275
2370
  modelId: normalizedModelId
2276
2371
  }),
2277
2372
  minIntervalBetweenStartMs: 200,
@@ -2291,8 +2386,8 @@ function getSchedulerForModel2(modelId) {
2291
2386
  schedulerByModel2.set(schedulerKey, created);
2292
2387
  return created;
2293
2388
  }
2294
- async function runGeminiCall(fn, modelId) {
2295
- return getSchedulerForModel2(modelId).run(async () => fn(await getGeminiClient()));
2389
+ async function runGeminiCall(fn, modelId, runOptions) {
2390
+ return getSchedulerForModel2(modelId).run(async () => fn(await getGeminiClient()), runOptions);
2296
2391
  }
2297
2392
 
2298
2393
  // src/openai/client.ts
@@ -2464,7 +2559,7 @@ function getSchedulerForModel3(modelId) {
2464
2559
  }
2465
2560
  const created = createCallScheduler({
2466
2561
  maxParallelRequests: resolveModelConcurrencyCap({
2467
- providerEnvPrefix: "OPENAI",
2562
+ provider: "openai",
2468
2563
  modelId: normalizedModelId
2469
2564
  }),
2470
2565
  minIntervalBetweenStartMs: 200,
@@ -2473,8 +2568,8 @@ function getSchedulerForModel3(modelId) {
2473
2568
  schedulerByModel3.set(schedulerKey, created);
2474
2569
  return created;
2475
2570
  }
2476
- async function runOpenAiCall(fn, modelId) {
2477
- return getSchedulerForModel3(modelId).run(async () => fn(getOpenAiClient()));
2571
+ async function runOpenAiCall(fn, modelId, runOptions) {
2572
+ return getSchedulerForModel3(modelId).run(async () => fn(getOpenAiClient()), runOptions);
2478
2573
  }
2479
2574
 
2480
2575
  // src/openai/models.ts
@@ -2928,9 +3023,9 @@ function isRetryableChatGptTransportError(error) {
2928
3023
  return false;
2929
3024
  }
2930
3025
  const message = error.message.toLowerCase();
2931
- return message === "terminated" || message.includes("socket hang up") || message.includes("fetch failed") || message.includes("network");
3026
+ return message === "terminated" || message.includes("socket hang up") || message.includes("fetch failed") || message.includes("network") || message.includes("responses websocket");
2932
3027
  }
2933
- async function collectChatGptCodexResponseWithRetry(options, maxAttempts = 2) {
3028
+ async function collectChatGptCodexResponseWithRetry(options, maxAttempts = 3) {
2934
3029
  let attempt = 1;
2935
3030
  while (true) {
2936
3031
  try {
@@ -3835,77 +3930,153 @@ function buildToolErrorOutput(message, issues) {
3835
3930
  }
3836
3931
  return output;
3837
3932
  }
3933
+ var SUBAGENT_WAIT_TOOL_NAME = "wait";
3934
+ function toIsoTimestamp(ms) {
3935
+ return new Date(ms).toISOString();
3936
+ }
3937
+ function toToolResultDuration(result) {
3938
+ return typeof result.durationMs === "number" && Number.isFinite(result.durationMs) ? Math.max(0, result.durationMs) : 0;
3939
+ }
3940
+ function schedulerMetricsOrDefault(metrics) {
3941
+ if (!metrics) {
3942
+ return {
3943
+ queueWaitMs: 0,
3944
+ schedulerDelayMs: 0,
3945
+ providerRetryDelayMs: 0,
3946
+ providerAttempts: 1
3947
+ };
3948
+ }
3949
+ return {
3950
+ queueWaitMs: Math.max(0, metrics.queueWaitMs),
3951
+ schedulerDelayMs: Math.max(0, metrics.schedulerDelayMs),
3952
+ providerRetryDelayMs: Math.max(0, metrics.retryDelayMs),
3953
+ providerAttempts: Math.max(1, metrics.attempts),
3954
+ modelCallStartedAtMs: metrics.startedAtMs
3955
+ };
3956
+ }
3957
+ function buildStepTiming(params) {
3958
+ const scheduler = schedulerMetricsOrDefault(params.schedulerMetrics);
3959
+ const modelCallStartedAtMs = scheduler.modelCallStartedAtMs ?? params.stepStartedAtMs;
3960
+ const firstModelEventAtMs = params.firstModelEventAtMs;
3961
+ const effectiveFirstEventAtMs = firstModelEventAtMs !== void 0 ? Math.max(modelCallStartedAtMs, firstModelEventAtMs) : params.modelCompletedAtMs;
3962
+ const connectionSetupMs = Math.max(0, effectiveFirstEventAtMs - modelCallStartedAtMs);
3963
+ const activeGenerationMs = Math.max(0, params.modelCompletedAtMs - effectiveFirstEventAtMs);
3964
+ return {
3965
+ startedAt: toIsoTimestamp(params.stepStartedAtMs),
3966
+ completedAt: toIsoTimestamp(params.stepCompletedAtMs),
3967
+ totalMs: Math.max(0, params.stepCompletedAtMs - params.stepStartedAtMs),
3968
+ queueWaitMs: scheduler.queueWaitMs,
3969
+ connectionSetupMs,
3970
+ activeGenerationMs,
3971
+ toolExecutionMs: Math.max(0, params.toolExecutionMs),
3972
+ waitToolMs: Math.max(0, params.waitToolMs),
3973
+ schedulerDelayMs: scheduler.schedulerDelayMs,
3974
+ providerRetryDelayMs: scheduler.providerRetryDelayMs,
3975
+ providerAttempts: scheduler.providerAttempts
3976
+ };
3977
+ }
3978
+ function extractSpawnStartupMetrics(outputPayload) {
3979
+ if (!outputPayload || typeof outputPayload !== "object") {
3980
+ return void 0;
3981
+ }
3982
+ const outputRecord = outputPayload;
3983
+ const notification = typeof outputRecord.notification === "string" ? outputRecord.notification : "";
3984
+ if (notification !== "spawned") {
3985
+ return void 0;
3986
+ }
3987
+ const agent = outputRecord.agent;
3988
+ if (!agent || typeof agent !== "object") {
3989
+ return void 0;
3990
+ }
3991
+ const agentRecord = agent;
3992
+ const startupLatencyMs = agentRecord.spawn_startup_latency_ms;
3993
+ if (typeof startupLatencyMs !== "number" || !Number.isFinite(startupLatencyMs)) {
3994
+ return void 0;
3995
+ }
3996
+ return {
3997
+ spawnStartupLatencyMs: Math.max(0, startupLatencyMs)
3998
+ };
3999
+ }
3838
4000
  async function executeToolCall(params) {
3839
4001
  const { callKind, toolName, tool: tool2, rawInput, parseError } = params;
3840
- if (!tool2) {
3841
- const message = `Unknown tool: ${toolName}`;
4002
+ const startedAtMs = Date.now();
4003
+ const finalize = (base, outputPayload, metrics) => {
4004
+ const completedAtMs = Date.now();
3842
4005
  return {
3843
- result: { toolName, input: rawInput, output: { error: message }, error: message },
3844
- outputPayload: buildToolErrorOutput(message)
4006
+ result: {
4007
+ ...base,
4008
+ startedAt: toIsoTimestamp(startedAtMs),
4009
+ completedAt: toIsoTimestamp(completedAtMs),
4010
+ durationMs: Math.max(0, completedAtMs - startedAtMs),
4011
+ ...metrics ? { metrics } : {}
4012
+ },
4013
+ outputPayload
3845
4014
  };
4015
+ };
4016
+ if (!tool2) {
4017
+ const message = `Unknown tool: ${toolName}`;
4018
+ const outputPayload = buildToolErrorOutput(message);
4019
+ return finalize(
4020
+ { toolName, input: rawInput, output: outputPayload, error: message },
4021
+ outputPayload
4022
+ );
3846
4023
  }
3847
4024
  if (callKind === "custom") {
3848
4025
  if (!isCustomTool(tool2)) {
3849
4026
  const message = `Tool ${toolName} was called as custom_tool_call but is declared as function.`;
3850
4027
  const outputPayload = buildToolErrorOutput(message);
3851
- return {
3852
- result: { toolName, input: rawInput, output: outputPayload, error: message },
4028
+ return finalize(
4029
+ { toolName, input: rawInput, output: outputPayload, error: message },
3853
4030
  outputPayload
3854
- };
4031
+ );
3855
4032
  }
3856
4033
  const input = typeof rawInput === "string" ? rawInput : String(rawInput ?? "");
3857
4034
  try {
3858
4035
  const output = await tool2.execute(input);
3859
- return {
3860
- result: { toolName, input, output },
3861
- outputPayload: output
3862
- };
4036
+ const metrics = toolName === "spawn_agent" ? extractSpawnStartupMetrics(output) : void 0;
4037
+ return finalize({ toolName, input, output }, output, metrics);
3863
4038
  } catch (error) {
3864
4039
  const message = error instanceof Error ? error.message : String(error);
3865
4040
  const outputPayload = buildToolErrorOutput(`Tool ${toolName} failed: ${message}`);
3866
- return {
3867
- result: { toolName, input, output: outputPayload, error: message },
3868
- outputPayload
3869
- };
4041
+ return finalize({ toolName, input, output: outputPayload, error: message }, outputPayload);
3870
4042
  }
3871
4043
  }
3872
4044
  if (isCustomTool(tool2)) {
3873
4045
  const message = `Tool ${toolName} was called as function_call but is declared as custom.`;
3874
4046
  const outputPayload = buildToolErrorOutput(message);
3875
- return {
3876
- result: { toolName, input: rawInput, output: outputPayload, error: message },
4047
+ return finalize(
4048
+ { toolName, input: rawInput, output: outputPayload, error: message },
3877
4049
  outputPayload
3878
- };
4050
+ );
3879
4051
  }
3880
4052
  if (parseError) {
3881
4053
  const message = `Invalid JSON for tool ${toolName}: ${parseError}`;
3882
- return {
3883
- result: { toolName, input: rawInput, output: { error: message }, error: message },
3884
- outputPayload: buildToolErrorOutput(message)
3885
- };
4054
+ const outputPayload = buildToolErrorOutput(message);
4055
+ return finalize(
4056
+ { toolName, input: rawInput, output: outputPayload, error: message },
4057
+ outputPayload
4058
+ );
3886
4059
  }
3887
4060
  const parsed = tool2.inputSchema.safeParse(rawInput);
3888
4061
  if (!parsed.success) {
3889
4062
  const message = `Invalid tool arguments for ${toolName}: ${formatZodIssues(parsed.error.issues)}`;
3890
4063
  const outputPayload = buildToolErrorOutput(message, parsed.error.issues);
3891
- return {
3892
- result: { toolName, input: rawInput, output: outputPayload, error: message },
4064
+ return finalize(
4065
+ { toolName, input: rawInput, output: outputPayload, error: message },
3893
4066
  outputPayload
3894
- };
4067
+ );
3895
4068
  }
3896
4069
  try {
3897
4070
  const output = await tool2.execute(parsed.data);
3898
- return {
3899
- result: { toolName, input: parsed.data, output },
3900
- outputPayload: output
3901
- };
4071
+ const metrics = toolName === "spawn_agent" ? extractSpawnStartupMetrics(output) : void 0;
4072
+ return finalize({ toolName, input: parsed.data, output }, output, metrics);
3902
4073
  } catch (error) {
3903
4074
  const message = error instanceof Error ? error.message : String(error);
3904
4075
  const outputPayload = buildToolErrorOutput(`Tool ${toolName} failed: ${message}`);
3905
- return {
3906
- result: { toolName, input: parsed.data, output: outputPayload, error: message },
4076
+ return finalize(
4077
+ { toolName, input: parsed.data, output: outputPayload, error: message },
3907
4078
  outputPayload
3908
- };
4079
+ );
3909
4080
  }
3910
4081
  }
3911
4082
  function buildToolLogId(turn, toolIndex) {
@@ -4770,6 +4941,9 @@ async function runToolLoop(request) {
4770
4941
  let input = toOpenAiInput(contents);
4771
4942
  for (let stepIndex = 0; stepIndex < maxSteps; stepIndex += 1) {
4772
4943
  const turn = stepIndex + 1;
4944
+ const stepStartedAtMs = Date.now();
4945
+ let firstModelEventAtMs;
4946
+ let schedulerMetrics;
4773
4947
  const abortController = new AbortController();
4774
4948
  if (request.signal) {
4775
4949
  if (request.signal.aborted) {
@@ -4788,45 +4962,59 @@ async function runToolLoop(request) {
4788
4962
  const emitEvent = (ev) => {
4789
4963
  onEvent?.(ev);
4790
4964
  };
4791
- const finalResponse = await runOpenAiCall(async (client) => {
4792
- const stream = client.responses.stream(
4793
- {
4794
- model: providerInfo.model,
4795
- input,
4796
- ...previousResponseId ? { previous_response_id: previousResponseId } : {},
4797
- ...openAiTools.length > 0 ? { tools: openAiTools } : {},
4798
- ...openAiTools.length > 0 ? { parallel_tool_calls: true } : {},
4799
- reasoning,
4800
- text: textConfig,
4801
- include: ["reasoning.encrypted_content"]
4802
- },
4803
- { signal: abortController.signal }
4804
- );
4805
- for await (const event of stream) {
4806
- switch (event.type) {
4807
- case "response.output_text.delta":
4808
- emitEvent({
4809
- type: "delta",
4810
- channel: "response",
4811
- text: typeof event.delta === "string" ? event.delta : ""
4812
- });
4813
- break;
4814
- case "response.reasoning_summary_text.delta":
4815
- emitEvent({
4816
- type: "delta",
4817
- channel: "thought",
4818
- text: typeof event.delta === "string" ? event.delta : ""
4819
- });
4820
- break;
4821
- case "response.refusal.delta":
4822
- emitEvent({ type: "blocked" });
4823
- break;
4824
- default:
4825
- break;
4965
+ const markFirstModelEvent = () => {
4966
+ if (firstModelEventAtMs === void 0) {
4967
+ firstModelEventAtMs = Date.now();
4968
+ }
4969
+ };
4970
+ const finalResponse = await runOpenAiCall(
4971
+ async (client) => {
4972
+ const stream = client.responses.stream(
4973
+ {
4974
+ model: providerInfo.model,
4975
+ input,
4976
+ ...previousResponseId ? { previous_response_id: previousResponseId } : {},
4977
+ ...openAiTools.length > 0 ? { tools: openAiTools } : {},
4978
+ ...openAiTools.length > 0 ? { parallel_tool_calls: true } : {},
4979
+ reasoning,
4980
+ text: textConfig,
4981
+ include: ["reasoning.encrypted_content"]
4982
+ },
4983
+ { signal: abortController.signal }
4984
+ );
4985
+ for await (const event of stream) {
4986
+ markFirstModelEvent();
4987
+ switch (event.type) {
4988
+ case "response.output_text.delta":
4989
+ emitEvent({
4990
+ type: "delta",
4991
+ channel: "response",
4992
+ text: typeof event.delta === "string" ? event.delta : ""
4993
+ });
4994
+ break;
4995
+ case "response.reasoning_summary_text.delta":
4996
+ emitEvent({
4997
+ type: "delta",
4998
+ channel: "thought",
4999
+ text: typeof event.delta === "string" ? event.delta : ""
5000
+ });
5001
+ break;
5002
+ case "response.refusal.delta":
5003
+ emitEvent({ type: "blocked" });
5004
+ break;
5005
+ default:
5006
+ break;
5007
+ }
5008
+ }
5009
+ return await stream.finalResponse();
5010
+ },
5011
+ providerInfo.model,
5012
+ {
5013
+ onSettled: (metrics) => {
5014
+ schedulerMetrics = metrics;
4826
5015
  }
4827
5016
  }
4828
- return await stream.finalResponse();
4829
- }, providerInfo.model);
5017
+ );
4830
5018
  modelVersion = typeof finalResponse.model === "string" ? finalResponse.model : request.model;
4831
5019
  emitEvent({ type: "model", modelVersion });
4832
5020
  if (finalResponse.error) {
@@ -4836,6 +5024,7 @@ async function runToolLoop(request) {
4836
5024
  usageTokens = extractOpenAiUsageTokens(finalResponse.usage);
4837
5025
  const responseText = extractOpenAiResponseParts(finalResponse).parts.filter((p) => p.type === "text" && p.thought !== true).map((p) => p.text).join("").trim();
4838
5026
  const reasoningSummary = extractOpenAiReasoningSummary(finalResponse).trim();
5027
+ const modelCompletedAtMs = Date.now();
4839
5028
  const stepCostUsd = estimateCallCostUsd({
4840
5029
  modelId: modelVersion,
4841
5030
  tokens: usageTokens,
@@ -4850,6 +5039,16 @@ async function runToolLoop(request) {
4850
5039
  if (responseToolCalls.length === 0) {
4851
5040
  finalText = responseText;
4852
5041
  finalThoughts = reasoningSummary;
5042
+ const stepCompletedAtMs2 = Date.now();
5043
+ const timing2 = buildStepTiming({
5044
+ stepStartedAtMs,
5045
+ stepCompletedAtMs: stepCompletedAtMs2,
5046
+ modelCompletedAtMs,
5047
+ firstModelEventAtMs,
5048
+ schedulerMetrics,
5049
+ toolExecutionMs: 0,
5050
+ waitToolMs: 0
5051
+ });
4853
5052
  steps.push({
4854
5053
  step: steps.length + 1,
4855
5054
  modelVersion,
@@ -4857,7 +5056,8 @@ async function runToolLoop(request) {
4857
5056
  thoughts: reasoningSummary || void 0,
4858
5057
  toolCalls: [],
4859
5058
  usage: usageTokens,
4860
- costUsd: stepCostUsd
5059
+ costUsd: stepCostUsd,
5060
+ timing: timing2
4861
5061
  });
4862
5062
  return { text: finalText, thoughts: finalThoughts, steps, totalCostUsd };
4863
5063
  }
@@ -4902,8 +5102,15 @@ async function runToolLoop(request) {
4902
5102
  })
4903
5103
  );
4904
5104
  const toolOutputs = [];
5105
+ let toolExecutionMs = 0;
5106
+ let waitToolMs = 0;
4905
5107
  for (const { entry, result, outputPayload } of callResults) {
4906
5108
  stepToolCalls.push({ ...result, callId: entry.call.call_id });
5109
+ const callDurationMs = toToolResultDuration(result);
5110
+ toolExecutionMs += callDurationMs;
5111
+ if (entry.toolName.toLowerCase() === SUBAGENT_WAIT_TOOL_NAME) {
5112
+ waitToolMs += callDurationMs;
5113
+ }
4907
5114
  if (entry.call.kind === "custom") {
4908
5115
  toolOutputs.push({
4909
5116
  type: "custom_tool_call_output",
@@ -4918,6 +5125,16 @@ async function runToolLoop(request) {
4918
5125
  });
4919
5126
  }
4920
5127
  }
5128
+ const stepCompletedAtMs = Date.now();
5129
+ const timing = buildStepTiming({
5130
+ stepStartedAtMs,
5131
+ stepCompletedAtMs,
5132
+ modelCompletedAtMs,
5133
+ firstModelEventAtMs,
5134
+ schedulerMetrics,
5135
+ toolExecutionMs,
5136
+ waitToolMs
5137
+ });
4921
5138
  steps.push({
4922
5139
  step: steps.length + 1,
4923
5140
  modelVersion,
@@ -4925,7 +5142,8 @@ async function runToolLoop(request) {
4925
5142
  thoughts: reasoningSummary || void 0,
4926
5143
  toolCalls: stepToolCalls,
4927
5144
  usage: usageTokens,
4928
- costUsd: stepCostUsd
5145
+ costUsd: stepCostUsd,
5146
+ timing
4929
5147
  });
4930
5148
  previousResponseId = finalResponse.id;
4931
5149
  input = toolOutputs;
@@ -4946,6 +5164,13 @@ async function runToolLoop(request) {
4946
5164
  let input = [...toolLoopInput.input];
4947
5165
  for (let stepIndex = 0; stepIndex < maxSteps; stepIndex += 1) {
4948
5166
  const turn = stepIndex + 1;
5167
+ const stepStartedAtMs = Date.now();
5168
+ let firstModelEventAtMs;
5169
+ const markFirstModelEvent = () => {
5170
+ if (firstModelEventAtMs === void 0) {
5171
+ firstModelEventAtMs = Date.now();
5172
+ }
5173
+ };
4949
5174
  const response = await collectChatGptCodexResponseWithRetry({
4950
5175
  sessionId: conversationId,
4951
5176
  request: {
@@ -4968,13 +5193,16 @@ async function runToolLoop(request) {
4968
5193
  signal: request.signal,
4969
5194
  onDelta: (delta) => {
4970
5195
  if (delta.thoughtDelta) {
5196
+ markFirstModelEvent();
4971
5197
  request.onEvent?.({ type: "delta", channel: "thought", text: delta.thoughtDelta });
4972
5198
  }
4973
5199
  if (delta.textDelta) {
5200
+ markFirstModelEvent();
4974
5201
  request.onEvent?.({ type: "delta", channel: "response", text: delta.textDelta });
4975
5202
  }
4976
5203
  }
4977
5204
  });
5205
+ const modelCompletedAtMs = Date.now();
4978
5206
  const modelVersion = response.model ? `chatgpt-${response.model}` : request.model;
4979
5207
  const usageTokens = extractChatGptUsageTokens(response.usage);
4980
5208
  const stepCostUsd = estimateCallCostUsd({
@@ -4989,6 +5217,15 @@ async function runToolLoop(request) {
4989
5217
  if (responseToolCalls.length === 0) {
4990
5218
  finalText = responseText;
4991
5219
  finalThoughts = reasoningSummaryText;
5220
+ const stepCompletedAtMs2 = Date.now();
5221
+ const timing2 = buildStepTiming({
5222
+ stepStartedAtMs,
5223
+ stepCompletedAtMs: stepCompletedAtMs2,
5224
+ modelCompletedAtMs,
5225
+ firstModelEventAtMs,
5226
+ toolExecutionMs: 0,
5227
+ waitToolMs: 0
5228
+ });
4992
5229
  steps.push({
4993
5230
  step: steps.length + 1,
4994
5231
  modelVersion,
@@ -4996,7 +5233,8 @@ async function runToolLoop(request) {
4996
5233
  thoughts: reasoningSummaryText || void 0,
4997
5234
  toolCalls: [],
4998
5235
  usage: usageTokens,
4999
- costUsd: stepCostUsd
5236
+ costUsd: stepCostUsd,
5237
+ timing: timing2
5000
5238
  });
5001
5239
  return { text: finalText, thoughts: finalThoughts, steps, totalCostUsd };
5002
5240
  }
@@ -5036,8 +5274,15 @@ async function runToolLoop(request) {
5036
5274
  );
5037
5275
  })
5038
5276
  );
5277
+ let toolExecutionMs = 0;
5278
+ let waitToolMs = 0;
5039
5279
  for (const { entry, result, outputPayload } of callResults) {
5040
5280
  toolCalls.push({ ...result, callId: entry.ids.callId });
5281
+ const callDurationMs = toToolResultDuration(result);
5282
+ toolExecutionMs += callDurationMs;
5283
+ if (entry.toolName.toLowerCase() === SUBAGENT_WAIT_TOOL_NAME) {
5284
+ waitToolMs += callDurationMs;
5285
+ }
5041
5286
  if (entry.call.kind === "custom") {
5042
5287
  toolOutputs.push({
5043
5288
  type: "custom_tool_call",
@@ -5068,6 +5313,15 @@ async function runToolLoop(request) {
5068
5313
  });
5069
5314
  }
5070
5315
  }
5316
+ const stepCompletedAtMs = Date.now();
5317
+ const timing = buildStepTiming({
5318
+ stepStartedAtMs,
5319
+ stepCompletedAtMs,
5320
+ modelCompletedAtMs,
5321
+ firstModelEventAtMs,
5322
+ toolExecutionMs,
5323
+ waitToolMs
5324
+ });
5071
5325
  steps.push({
5072
5326
  step: steps.length + 1,
5073
5327
  modelVersion,
@@ -5075,7 +5329,8 @@ async function runToolLoop(request) {
5075
5329
  thoughts: reasoningSummaryText || void 0,
5076
5330
  toolCalls,
5077
5331
  usage: usageTokens,
5078
- costUsd: stepCostUsd
5332
+ costUsd: stepCostUsd,
5333
+ timing
5079
5334
  });
5080
5335
  input = input.concat(toolOutputs);
5081
5336
  }
@@ -5091,18 +5346,29 @@ async function runToolLoop(request) {
5091
5346
  const messages = toFireworksMessages(contents);
5092
5347
  for (let stepIndex = 0; stepIndex < maxSteps; stepIndex += 1) {
5093
5348
  const turn = stepIndex + 1;
5094
- const response = await runFireworksCall(async (client) => {
5095
- return await client.chat.completions.create(
5096
- {
5097
- model: providerInfo.model,
5098
- messages,
5099
- tools: fireworksTools,
5100
- tool_choice: "auto",
5101
- parallel_tool_calls: true
5102
- },
5103
- { signal: request.signal }
5104
- );
5105
- }, providerInfo.model);
5349
+ const stepStartedAtMs = Date.now();
5350
+ let schedulerMetrics;
5351
+ const response = await runFireworksCall(
5352
+ async (client) => {
5353
+ return await client.chat.completions.create(
5354
+ {
5355
+ model: providerInfo.model,
5356
+ messages,
5357
+ tools: fireworksTools,
5358
+ tool_choice: "auto",
5359
+ parallel_tool_calls: true
5360
+ },
5361
+ { signal: request.signal }
5362
+ );
5363
+ },
5364
+ providerInfo.model,
5365
+ {
5366
+ onSettled: (metrics) => {
5367
+ schedulerMetrics = metrics;
5368
+ }
5369
+ }
5370
+ );
5371
+ const modelCompletedAtMs = Date.now();
5106
5372
  const modelVersion = typeof response.model === "string" ? response.model : request.model;
5107
5373
  request.onEvent?.({ type: "model", modelVersion });
5108
5374
  const choice = Array.isArray(response.choices) ? response.choices[0] : void 0;
@@ -5133,6 +5399,15 @@ async function runToolLoop(request) {
5133
5399
  if (responseToolCalls.length === 0) {
5134
5400
  finalText = responseText;
5135
5401
  finalThoughts = "";
5402
+ const stepCompletedAtMs2 = Date.now();
5403
+ const timing2 = buildStepTiming({
5404
+ stepStartedAtMs,
5405
+ stepCompletedAtMs: stepCompletedAtMs2,
5406
+ modelCompletedAtMs,
5407
+ schedulerMetrics,
5408
+ toolExecutionMs: 0,
5409
+ waitToolMs: 0
5410
+ });
5136
5411
  steps.push({
5137
5412
  step: steps.length + 1,
5138
5413
  modelVersion,
@@ -5140,7 +5415,8 @@ async function runToolLoop(request) {
5140
5415
  thoughts: void 0,
5141
5416
  toolCalls: [],
5142
5417
  usage: usageTokens,
5143
- costUsd: stepCostUsd
5418
+ costUsd: stepCostUsd,
5419
+ timing: timing2
5144
5420
  });
5145
5421
  return { text: finalText, thoughts: finalThoughts, steps, totalCostUsd };
5146
5422
  }
@@ -5175,8 +5451,15 @@ async function runToolLoop(request) {
5175
5451
  );
5176
5452
  const assistantToolCalls = [];
5177
5453
  const toolMessages = [];
5454
+ let toolExecutionMs = 0;
5455
+ let waitToolMs = 0;
5178
5456
  for (const { entry, result, outputPayload } of callResults) {
5179
5457
  stepToolCalls.push({ ...result, callId: entry.call.id });
5458
+ const callDurationMs = toToolResultDuration(result);
5459
+ toolExecutionMs += callDurationMs;
5460
+ if (entry.toolName.toLowerCase() === SUBAGENT_WAIT_TOOL_NAME) {
5461
+ waitToolMs += callDurationMs;
5462
+ }
5180
5463
  assistantToolCalls.push({
5181
5464
  id: entry.call.id,
5182
5465
  type: "function",
@@ -5191,6 +5474,15 @@ async function runToolLoop(request) {
5191
5474
  content: mergeToolOutput(outputPayload)
5192
5475
  });
5193
5476
  }
5477
+ const stepCompletedAtMs = Date.now();
5478
+ const timing = buildStepTiming({
5479
+ stepStartedAtMs,
5480
+ stepCompletedAtMs,
5481
+ modelCompletedAtMs,
5482
+ schedulerMetrics,
5483
+ toolExecutionMs,
5484
+ waitToolMs
5485
+ });
5194
5486
  steps.push({
5195
5487
  step: steps.length + 1,
5196
5488
  modelVersion,
@@ -5198,7 +5490,8 @@ async function runToolLoop(request) {
5198
5490
  thoughts: void 0,
5199
5491
  toolCalls: stepToolCalls,
5200
5492
  usage: usageTokens,
5201
- costUsd: stepCostUsd
5493
+ costUsd: stepCostUsd,
5494
+ timing
5202
5495
  });
5203
5496
  messages.push({
5204
5497
  role: "assistant",
@@ -5214,6 +5507,14 @@ async function runToolLoop(request) {
5214
5507
  const geminiTools = geminiNativeTools ? geminiNativeTools.concat(geminiFunctionTools) : geminiFunctionTools;
5215
5508
  const geminiContents = contents.map(convertLlmContentToGeminiContent);
5216
5509
  for (let stepIndex = 0; stepIndex < maxSteps; stepIndex += 1) {
5510
+ const stepStartedAtMs = Date.now();
5511
+ let firstModelEventAtMs;
5512
+ let schedulerMetrics;
5513
+ const markFirstModelEvent = () => {
5514
+ if (firstModelEventAtMs === void 0) {
5515
+ firstModelEventAtMs = Date.now();
5516
+ }
5517
+ };
5217
5518
  const config = {
5218
5519
  maxOutputTokens: 32e3,
5219
5520
  tools: geminiTools,
@@ -5225,81 +5526,91 @@ async function runToolLoop(request) {
5225
5526
  thinkingConfig: resolveGeminiThinkingConfig(request.model)
5226
5527
  };
5227
5528
  const onEvent = request.onEvent;
5228
- const response = await runGeminiCall(async (client) => {
5229
- const stream = await client.models.generateContentStream({
5230
- model: request.model,
5231
- contents: geminiContents,
5232
- config
5233
- });
5234
- let responseText = "";
5235
- let thoughtsText = "";
5236
- const modelParts = [];
5237
- const functionCalls = [];
5238
- const seenFunctionCallIds = /* @__PURE__ */ new Set();
5239
- const seenFunctionCallKeys = /* @__PURE__ */ new Set();
5240
- let latestUsageMetadata;
5241
- let resolvedModelVersion;
5242
- for await (const chunk of stream) {
5243
- if (chunk.modelVersion) {
5244
- resolvedModelVersion = chunk.modelVersion;
5245
- onEvent?.({ type: "model", modelVersion: chunk.modelVersion });
5246
- }
5247
- if (chunk.usageMetadata) {
5248
- latestUsageMetadata = chunk.usageMetadata;
5249
- }
5250
- const candidates = chunk.candidates;
5251
- if (!candidates || candidates.length === 0) {
5252
- continue;
5253
- }
5254
- const primary = candidates[0];
5255
- const parts = primary?.content?.parts;
5256
- if (!parts || parts.length === 0) {
5257
- continue;
5258
- }
5259
- for (const part of parts) {
5260
- modelParts.push(part);
5261
- const call = part.functionCall;
5262
- if (call) {
5263
- const id = typeof call.id === "string" ? call.id : "";
5264
- const shouldAdd = (() => {
5265
- if (id.length > 0) {
5266
- if (seenFunctionCallIds.has(id)) {
5529
+ const response = await runGeminiCall(
5530
+ async (client) => {
5531
+ const stream = await client.models.generateContentStream({
5532
+ model: request.model,
5533
+ contents: geminiContents,
5534
+ config
5535
+ });
5536
+ let responseText = "";
5537
+ let thoughtsText = "";
5538
+ const modelParts = [];
5539
+ const functionCalls = [];
5540
+ const seenFunctionCallIds = /* @__PURE__ */ new Set();
5541
+ const seenFunctionCallKeys = /* @__PURE__ */ new Set();
5542
+ let latestUsageMetadata;
5543
+ let resolvedModelVersion;
5544
+ for await (const chunk of stream) {
5545
+ markFirstModelEvent();
5546
+ if (chunk.modelVersion) {
5547
+ resolvedModelVersion = chunk.modelVersion;
5548
+ onEvent?.({ type: "model", modelVersion: chunk.modelVersion });
5549
+ }
5550
+ if (chunk.usageMetadata) {
5551
+ latestUsageMetadata = chunk.usageMetadata;
5552
+ }
5553
+ const candidates = chunk.candidates;
5554
+ if (!candidates || candidates.length === 0) {
5555
+ continue;
5556
+ }
5557
+ const primary = candidates[0];
5558
+ const parts = primary?.content?.parts;
5559
+ if (!parts || parts.length === 0) {
5560
+ continue;
5561
+ }
5562
+ for (const part of parts) {
5563
+ modelParts.push(part);
5564
+ const call = part.functionCall;
5565
+ if (call) {
5566
+ const id = typeof call.id === "string" ? call.id : "";
5567
+ const shouldAdd = (() => {
5568
+ if (id.length > 0) {
5569
+ if (seenFunctionCallIds.has(id)) {
5570
+ return false;
5571
+ }
5572
+ seenFunctionCallIds.add(id);
5573
+ return true;
5574
+ }
5575
+ const key = JSON.stringify({ name: call.name ?? "", args: call.args ?? null });
5576
+ if (seenFunctionCallKeys.has(key)) {
5267
5577
  return false;
5268
5578
  }
5269
- seenFunctionCallIds.add(id);
5579
+ seenFunctionCallKeys.add(key);
5270
5580
  return true;
5581
+ })();
5582
+ if (shouldAdd) {
5583
+ functionCalls.push(call);
5271
5584
  }
5272
- const key = JSON.stringify({ name: call.name ?? "", args: call.args ?? null });
5273
- if (seenFunctionCallKeys.has(key)) {
5274
- return false;
5275
- }
5276
- seenFunctionCallKeys.add(key);
5277
- return true;
5278
- })();
5279
- if (shouldAdd) {
5280
- functionCalls.push(call);
5281
5585
  }
5282
- }
5283
- if (typeof part.text === "string" && part.text.length > 0) {
5284
- if (part.thought) {
5285
- thoughtsText += part.text;
5286
- onEvent?.({ type: "delta", channel: "thought", text: part.text });
5287
- } else {
5288
- responseText += part.text;
5289
- onEvent?.({ type: "delta", channel: "response", text: part.text });
5586
+ if (typeof part.text === "string" && part.text.length > 0) {
5587
+ if (part.thought) {
5588
+ thoughtsText += part.text;
5589
+ onEvent?.({ type: "delta", channel: "thought", text: part.text });
5590
+ } else {
5591
+ responseText += part.text;
5592
+ onEvent?.({ type: "delta", channel: "response", text: part.text });
5593
+ }
5290
5594
  }
5291
5595
  }
5292
5596
  }
5597
+ return {
5598
+ responseText,
5599
+ thoughtsText,
5600
+ functionCalls,
5601
+ modelParts,
5602
+ usageMetadata: latestUsageMetadata,
5603
+ modelVersion: resolvedModelVersion ?? request.model
5604
+ };
5605
+ },
5606
+ request.model,
5607
+ {
5608
+ onSettled: (metrics) => {
5609
+ schedulerMetrics = metrics;
5610
+ }
5293
5611
  }
5294
- return {
5295
- responseText,
5296
- thoughtsText,
5297
- functionCalls,
5298
- modelParts,
5299
- usageMetadata: latestUsageMetadata,
5300
- modelVersion: resolvedModelVersion ?? request.model
5301
- };
5302
- }, request.model);
5612
+ );
5613
+ const modelCompletedAtMs = Date.now();
5303
5614
  const usageTokens = extractGeminiUsageTokens(response.usageMetadata);
5304
5615
  const modelVersion = response.modelVersion ?? request.model;
5305
5616
  const stepCostUsd = estimateCallCostUsd({
@@ -5311,6 +5622,16 @@ async function runToolLoop(request) {
5311
5622
  if (response.functionCalls.length === 0) {
5312
5623
  finalText = response.responseText.trim();
5313
5624
  finalThoughts = response.thoughtsText.trim();
5625
+ const stepCompletedAtMs2 = Date.now();
5626
+ const timing2 = buildStepTiming({
5627
+ stepStartedAtMs,
5628
+ stepCompletedAtMs: stepCompletedAtMs2,
5629
+ modelCompletedAtMs,
5630
+ firstModelEventAtMs,
5631
+ schedulerMetrics,
5632
+ toolExecutionMs: 0,
5633
+ waitToolMs: 0
5634
+ });
5314
5635
  steps.push({
5315
5636
  step: steps.length + 1,
5316
5637
  modelVersion,
@@ -5318,7 +5639,8 @@ async function runToolLoop(request) {
5318
5639
  thoughts: finalThoughts || void 0,
5319
5640
  toolCalls: [],
5320
5641
  usage: usageTokens,
5321
- costUsd: stepCostUsd
5642
+ costUsd: stepCostUsd,
5643
+ timing: timing2
5322
5644
  });
5323
5645
  return { text: finalText, thoughts: finalThoughts, steps, totalCostUsd };
5324
5646
  }
@@ -5368,8 +5690,15 @@ async function runToolLoop(request) {
5368
5690
  );
5369
5691
  })
5370
5692
  );
5693
+ let toolExecutionMs = 0;
5694
+ let waitToolMs = 0;
5371
5695
  for (const { entry, result, outputPayload } of callResults) {
5372
5696
  toolCalls.push({ ...result, callId: entry.call.id });
5697
+ const callDurationMs = toToolResultDuration(result);
5698
+ toolExecutionMs += callDurationMs;
5699
+ if (entry.toolName.toLowerCase() === SUBAGENT_WAIT_TOOL_NAME) {
5700
+ waitToolMs += callDurationMs;
5701
+ }
5373
5702
  const responsePayload = isPlainRecord(outputPayload) ? outputPayload : { output: outputPayload };
5374
5703
  responseParts.push({
5375
5704
  functionResponse: {
@@ -5379,6 +5708,16 @@ async function runToolLoop(request) {
5379
5708
  }
5380
5709
  });
5381
5710
  }
5711
+ const stepCompletedAtMs = Date.now();
5712
+ const timing = buildStepTiming({
5713
+ stepStartedAtMs,
5714
+ stepCompletedAtMs,
5715
+ modelCompletedAtMs,
5716
+ firstModelEventAtMs,
5717
+ schedulerMetrics,
5718
+ toolExecutionMs,
5719
+ waitToolMs
5720
+ });
5382
5721
  steps.push({
5383
5722
  step: steps.length + 1,
5384
5723
  modelVersion,
@@ -5386,7 +5725,8 @@ async function runToolLoop(request) {
5386
5725
  thoughts: response.thoughtsText.trim() || void 0,
5387
5726
  toolCalls,
5388
5727
  usage: usageTokens,
5389
- costUsd: stepCostUsd
5728
+ costUsd: stepCostUsd,
5729
+ timing
5390
5730
  });
5391
5731
  geminiContents.push({ role: "user", parts: responseParts });
5392
5732
  }
@@ -5637,6 +5977,9 @@ function appendMarkdownSourcesSection(value, sources) {
5637
5977
  ${lines}`;
5638
5978
  }
5639
5979
 
5980
+ // src/agent.ts
5981
+ import { randomBytes as randomBytes3 } from "crypto";
5982
+
5640
5983
  // src/agent/subagents.ts
5641
5984
  import { randomBytes as randomBytes2 } from "crypto";
5642
5985
  import { z as z4 } from "zod";
@@ -6098,7 +6441,12 @@ function startRun(agent, options) {
6098
6441
  }
6099
6442
  const input = [...agent.history, { role: "user", content: nextInput }];
6100
6443
  const abortController = new AbortController();
6444
+ const runStartedAtMs = Date.now();
6101
6445
  agent.abortController = abortController;
6446
+ if (agent.firstRunStartedAtMs === void 0) {
6447
+ agent.firstRunStartedAtMs = runStartedAtMs;
6448
+ }
6449
+ agent.lastRunStartedAtMs = runStartedAtMs;
6102
6450
  agent.lastError = void 0;
6103
6451
  setLifecycle(
6104
6452
  agent,
@@ -6142,6 +6490,9 @@ function startRun(agent, options) {
6142
6490
  agent.lastError = message;
6143
6491
  setLifecycle(agent, "failed", "run_failed", `Subagent ${agent.id} failed: ${message}`);
6144
6492
  } finally {
6493
+ const runCompletedAtMs = Date.now();
6494
+ agent.lastRunCompletedAtMs = runCompletedAtMs;
6495
+ agent.lastRunDurationMs = Math.max(0, runCompletedAtMs - runStartedAtMs);
6145
6496
  agent.runningPromise = void 0;
6146
6497
  agent.abortController = void 0;
6147
6498
  }
@@ -6217,6 +6568,13 @@ function buildSnapshot(agent) {
6217
6568
  turns: agent.turns,
6218
6569
  created_at: new Date(agent.createdAtMs).toISOString(),
6219
6570
  updated_at: new Date(agent.updatedAtMs).toISOString(),
6571
+ ...agent.firstRunStartedAtMs ? {
6572
+ first_run_started_at: new Date(agent.firstRunStartedAtMs).toISOString(),
6573
+ spawn_startup_latency_ms: Math.max(0, agent.firstRunStartedAtMs - agent.createdAtMs)
6574
+ } : {},
6575
+ ...agent.lastRunStartedAtMs ? { last_run_started_at: new Date(agent.lastRunStartedAtMs).toISOString() } : {},
6576
+ ...agent.lastRunCompletedAtMs ? { last_run_completed_at: new Date(agent.lastRunCompletedAtMs).toISOString() } : {},
6577
+ ...typeof agent.lastRunDurationMs === "number" ? { last_run_duration_ms: Math.max(0, agent.lastRunDurationMs) } : {},
6220
6578
  ...agent.lastError ? { last_error: agent.lastError } : {},
6221
6579
  ...agent.lastResult ? {
6222
6580
  last_result: {
@@ -8046,7 +8404,12 @@ function isNoEntError(error) {
8046
8404
 
8047
8405
  // src/agent.ts
8048
8406
  async function runAgentLoop(request) {
8049
- return await runAgentLoopInternal(request, { depth: 0 });
8407
+ const telemetry = createAgentTelemetrySession(request.telemetry);
8408
+ try {
8409
+ return await runAgentLoopInternal(request, { depth: 0, telemetry });
8410
+ } finally {
8411
+ await telemetry?.flush();
8412
+ }
8050
8413
  }
8051
8414
  async function runAgentLoopInternal(request, context) {
8052
8415
  const {
@@ -8056,15 +8419,21 @@ async function runAgentLoopInternal(request, context) {
8056
8419
  subagentTool,
8057
8420
  subagent_tool,
8058
8421
  subagents,
8422
+ telemetry,
8059
8423
  ...toolLoopRequest
8060
8424
  } = request;
8425
+ const telemetrySession = context.telemetry ?? createAgentTelemetrySession(telemetry);
8426
+ const runId = randomRunId();
8427
+ const startedAtMs = Date.now();
8061
8428
  const filesystemSelection = filesystemTool ?? filesystem_tool;
8062
8429
  const subagentSelection = subagentTool ?? subagent_tool ?? subagents;
8063
8430
  const filesystemTools = resolveFilesystemTools(request.model, filesystemSelection);
8064
8431
  const resolvedSubagentConfig = resolveSubagentToolConfig(subagentSelection, context.depth);
8065
8432
  const subagentController = createSubagentController({
8433
+ runId,
8066
8434
  model: request.model,
8067
8435
  depth: context.depth,
8436
+ telemetry: telemetrySession,
8068
8437
  customTools: customTools ?? {},
8069
8438
  filesystemSelection,
8070
8439
  subagentSelection,
@@ -8085,12 +8454,54 @@ async function runAgentLoopInternal(request, context) {
8085
8454
  resolvedSubagentConfig,
8086
8455
  context.depth
8087
8456
  );
8457
+ const emitTelemetry = createAgentTelemetryEmitter({
8458
+ session: telemetrySession,
8459
+ runId,
8460
+ parentRunId: context.parentRunId,
8461
+ depth: context.depth,
8462
+ model: request.model
8463
+ });
8464
+ emitTelemetry({
8465
+ type: "agent.run.started",
8466
+ inputMode: typeof request.input === "string" ? "string" : "messages",
8467
+ customToolCount: Object.keys(customTools ?? {}).length,
8468
+ mergedToolCount: Object.keys(mergedTools).length,
8469
+ filesystemToolsEnabled: Object.keys(filesystemTools).length > 0,
8470
+ subagentToolsEnabled: resolvedSubagentConfig.enabled
8471
+ });
8472
+ const sourceOnEvent = toolLoopRequest.onEvent;
8473
+ const includeLlmStreamEvents = telemetrySession?.includeLlmStreamEvents === true;
8474
+ const wrappedOnEvent = sourceOnEvent || includeLlmStreamEvents ? (event) => {
8475
+ sourceOnEvent?.(event);
8476
+ if (includeLlmStreamEvents) {
8477
+ emitTelemetry({ type: "agent.run.stream", event });
8478
+ }
8479
+ } : void 0;
8088
8480
  try {
8089
- return await runToolLoop({
8481
+ const result = await runToolLoop({
8090
8482
  ...toolLoopRequest,
8091
8483
  ...instructions ? { instructions } : {},
8484
+ ...wrappedOnEvent ? { onEvent: wrappedOnEvent } : {},
8092
8485
  tools: mergedTools
8093
8486
  });
8487
+ emitTelemetry({
8488
+ type: "agent.run.completed",
8489
+ success: true,
8490
+ durationMs: Math.max(0, Date.now() - startedAtMs),
8491
+ stepCount: result.steps.length,
8492
+ toolCallCount: countToolCalls(result),
8493
+ totalCostUsd: result.totalCostUsd,
8494
+ usage: summarizeResultUsage(result)
8495
+ });
8496
+ return result;
8497
+ } catch (error) {
8498
+ emitTelemetry({
8499
+ type: "agent.run.completed",
8500
+ success: false,
8501
+ durationMs: Math.max(0, Date.now() - startedAtMs),
8502
+ error: toErrorMessage2(error)
8503
+ });
8504
+ throw error;
8094
8505
  } finally {
8095
8506
  await subagentController?.closeAll();
8096
8507
  }
@@ -8153,7 +8564,11 @@ function createSubagentController(params) {
8153
8564
  openAiReasoningEffort: params.toolLoopRequest.openAiReasoningEffort,
8154
8565
  signal: subagentRequest.signal
8155
8566
  },
8156
- { depth: params.depth + 1 }
8567
+ {
8568
+ depth: params.depth + 1,
8569
+ parentRunId: params.runId,
8570
+ telemetry: params.telemetry
8571
+ }
8157
8572
  );
8158
8573
  }
8159
8574
  });
@@ -8204,6 +8619,129 @@ function trimToUndefined2(value) {
8204
8619
  const trimmed = value?.trim();
8205
8620
  return trimmed && trimmed.length > 0 ? trimmed : void 0;
8206
8621
  }
8622
+ function randomRunId() {
8623
+ return randomBytes3(8).toString("hex");
8624
+ }
8625
+ function toIsoNow() {
8626
+ return (/* @__PURE__ */ new Date()).toISOString();
8627
+ }
8628
+ function toErrorMessage2(error) {
8629
+ if (error instanceof Error && error.message) {
8630
+ return error.message;
8631
+ }
8632
+ if (typeof error === "string") {
8633
+ return error;
8634
+ }
8635
+ return "Unknown error";
8636
+ }
8637
+ function countToolCalls(result) {
8638
+ let count = 0;
8639
+ for (const step of result.steps) {
8640
+ count += step.toolCalls.length;
8641
+ }
8642
+ return count;
8643
+ }
8644
+ function sumUsageValue(current, next) {
8645
+ if (typeof next !== "number" || !Number.isFinite(next)) {
8646
+ return current;
8647
+ }
8648
+ const normalizedNext = Math.max(0, next);
8649
+ if (typeof current !== "number" || !Number.isFinite(current)) {
8650
+ return normalizedNext;
8651
+ }
8652
+ return Math.max(0, current) + normalizedNext;
8653
+ }
8654
+ function summarizeResultUsage(result) {
8655
+ let summary;
8656
+ for (const step of result.steps) {
8657
+ const usage = step.usage;
8658
+ if (!usage) {
8659
+ continue;
8660
+ }
8661
+ summary = {
8662
+ promptTokens: sumUsageValue(summary?.promptTokens, usage.promptTokens),
8663
+ cachedTokens: sumUsageValue(summary?.cachedTokens, usage.cachedTokens),
8664
+ responseTokens: sumUsageValue(summary?.responseTokens, usage.responseTokens),
8665
+ responseImageTokens: sumUsageValue(summary?.responseImageTokens, usage.responseImageTokens),
8666
+ thinkingTokens: sumUsageValue(summary?.thinkingTokens, usage.thinkingTokens),
8667
+ totalTokens: sumUsageValue(summary?.totalTokens, usage.totalTokens),
8668
+ toolUsePromptTokens: sumUsageValue(summary?.toolUsePromptTokens, usage.toolUsePromptTokens)
8669
+ };
8670
+ }
8671
+ return summary;
8672
+ }
8673
+ function isPromiseLike(value) {
8674
+ return (typeof value === "object" || typeof value === "function") && value !== null && typeof value.then === "function";
8675
+ }
8676
+ function isAgentTelemetrySink(value) {
8677
+ return typeof value === "object" && value !== null && typeof value.emit === "function";
8678
+ }
8679
+ function resolveTelemetrySelection(telemetry) {
8680
+ if (!telemetry) {
8681
+ return void 0;
8682
+ }
8683
+ if (isAgentTelemetrySink(telemetry)) {
8684
+ return { sink: telemetry };
8685
+ }
8686
+ if (isAgentTelemetrySink(telemetry.sink)) {
8687
+ return telemetry;
8688
+ }
8689
+ throw new Error("Invalid runAgentLoop telemetry config: expected a sink with emit(event).");
8690
+ }
8691
+ function createAgentTelemetrySession(telemetry) {
8692
+ const config = resolveTelemetrySelection(telemetry);
8693
+ if (!config) {
8694
+ return void 0;
8695
+ }
8696
+ const pending = /* @__PURE__ */ new Set();
8697
+ const trackPromise = (promise) => {
8698
+ pending.add(promise);
8699
+ promise.finally(() => {
8700
+ pending.delete(promise);
8701
+ });
8702
+ };
8703
+ const emit = (event) => {
8704
+ try {
8705
+ const output = config.sink.emit(event);
8706
+ if (isPromiseLike(output)) {
8707
+ const task = Promise.resolve(output).then(() => void 0).catch(() => void 0);
8708
+ trackPromise(task);
8709
+ }
8710
+ } catch {
8711
+ }
8712
+ };
8713
+ const flush = async () => {
8714
+ while (pending.size > 0) {
8715
+ await Promise.allSettled([...pending]);
8716
+ }
8717
+ if (typeof config.sink.flush === "function") {
8718
+ try {
8719
+ await config.sink.flush();
8720
+ } catch {
8721
+ }
8722
+ }
8723
+ };
8724
+ return {
8725
+ includeLlmStreamEvents: config.includeLlmStreamEvents === true,
8726
+ emit,
8727
+ flush
8728
+ };
8729
+ }
8730
+ function createAgentTelemetryEmitter(params) {
8731
+ return (event) => {
8732
+ if (!params.session) {
8733
+ return;
8734
+ }
8735
+ params.session.emit({
8736
+ ...event,
8737
+ timestamp: toIsoNow(),
8738
+ runId: params.runId,
8739
+ ...params.parentRunId ? { parentRunId: params.parentRunId } : {},
8740
+ depth: params.depth,
8741
+ model: params.model
8742
+ });
8743
+ };
8744
+ }
8207
8745
  export {
8208
8746
  CHATGPT_MODEL_IDS,
8209
8747
  CODEX_APPLY_PATCH_FREEFORM_TOOL_DESCRIPTION,
@@ -8226,6 +8764,7 @@ export {
8226
8764
  appendMarkdownSourcesSection,
8227
8765
  applyPatch,
8228
8766
  configureGemini,
8767
+ configureModelConcurrency,
8229
8768
  convertGooglePartsToLlmParts,
8230
8769
  createApplyPatchTool,
8231
8770
  createCodexApplyPatchTool,
@@ -8270,6 +8809,7 @@ export {
8270
8809
  loadLocalEnv,
8271
8810
  parseJsonFromLlmText,
8272
8811
  refreshChatGptOauthToken,
8812
+ resetModelConcurrencyConfig,
8273
8813
  resolveFilesystemToolProfile,
8274
8814
  resolveFireworksModelId,
8275
8815
  runAgentLoop,