@ljoukov/llm 3.0.3 → 3.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -51,6 +51,7 @@ __export(index_exports, {
51
51
  appendMarkdownSourcesSection: () => appendMarkdownSourcesSection,
52
52
  applyPatch: () => applyPatch,
53
53
  configureGemini: () => configureGemini,
54
+ configureModelConcurrency: () => configureModelConcurrency,
54
55
  convertGooglePartsToLlmParts: () => convertGooglePartsToLlmParts,
55
56
  createApplyPatchTool: () => createApplyPatchTool,
56
57
  createCodexApplyPatchTool: () => createCodexApplyPatchTool,
@@ -95,6 +96,7 @@ __export(index_exports, {
95
96
  loadLocalEnv: () => loadLocalEnv,
96
97
  parseJsonFromLlmText: () => parseJsonFromLlmText,
97
98
  refreshChatGptOauthToken: () => refreshChatGptOauthToken,
99
+ resetModelConcurrencyConfig: () => resetModelConcurrencyConfig,
98
100
  resolveFilesystemToolProfile: () => resolveFilesystemToolProfile,
99
101
  resolveFireworksModelId: () => resolveFireworksModelId,
100
102
  runAgentLoop: () => runAgentLoop,
@@ -1689,6 +1691,123 @@ function parseEventBlock(raw) {
1689
1691
  }
1690
1692
  }
1691
1693
 
1694
+ // src/utils/modelConcurrency.ts
1695
+ var MIN_MODEL_CONCURRENCY_CAP = 1;
1696
+ var MAX_MODEL_CONCURRENCY_CAP = 64;
1697
+ var DEFAULT_MODEL_CONCURRENCY_CAP = 3;
1698
+ var DEFAULT_OPENAI_MODEL_CONCURRENCY_CAP = 12;
1699
+ var DEFAULT_GOOGLE_MODEL_CONCURRENCY_CAP = 4;
1700
+ var DEFAULT_GOOGLE_PREVIEW_MODEL_CONCURRENCY_CAP = 2;
1701
+ var DEFAULT_FIREWORKS_MODEL_CONCURRENCY_CAP = 6;
1702
+ var MODEL_CONCURRENCY_PROVIDERS = [
1703
+ "openai",
1704
+ "google",
1705
+ "fireworks"
1706
+ ];
1707
+ var configuredModelConcurrency = normalizeModelConcurrencyConfig({});
1708
+ function clampModelConcurrencyCap(value) {
1709
+ if (!Number.isFinite(value)) {
1710
+ return DEFAULT_MODEL_CONCURRENCY_CAP;
1711
+ }
1712
+ const rounded = Math.floor(value);
1713
+ if (rounded < MIN_MODEL_CONCURRENCY_CAP) {
1714
+ return MIN_MODEL_CONCURRENCY_CAP;
1715
+ }
1716
+ if (rounded > MAX_MODEL_CONCURRENCY_CAP) {
1717
+ return MAX_MODEL_CONCURRENCY_CAP;
1718
+ }
1719
+ return rounded;
1720
+ }
1721
+ function normalizeModelIdForConfig(modelId) {
1722
+ return modelId.trim().toLowerCase();
1723
+ }
1724
+ function normalizeCap(value) {
1725
+ if (value === void 0 || !Number.isFinite(value)) {
1726
+ return void 0;
1727
+ }
1728
+ return clampModelConcurrencyCap(value);
1729
+ }
1730
+ function normalizeModelCapMap(caps) {
1731
+ const normalized = /* @__PURE__ */ new Map();
1732
+ if (!caps) {
1733
+ return normalized;
1734
+ }
1735
+ for (const [modelId, cap] of Object.entries(caps)) {
1736
+ const modelKey = normalizeModelIdForConfig(modelId);
1737
+ if (!modelKey) {
1738
+ continue;
1739
+ }
1740
+ const normalizedCap = normalizeCap(cap);
1741
+ if (normalizedCap === void 0) {
1742
+ continue;
1743
+ }
1744
+ normalized.set(modelKey, normalizedCap);
1745
+ }
1746
+ return normalized;
1747
+ }
1748
+ function normalizeModelConcurrencyConfig(config) {
1749
+ const providerCaps = {};
1750
+ const providerModelCaps = {
1751
+ openai: /* @__PURE__ */ new Map(),
1752
+ google: /* @__PURE__ */ new Map(),
1753
+ fireworks: /* @__PURE__ */ new Map()
1754
+ };
1755
+ for (const provider of MODEL_CONCURRENCY_PROVIDERS) {
1756
+ const providerCap = normalizeCap(config.providerCaps?.[provider]);
1757
+ if (providerCap !== void 0) {
1758
+ providerCaps[provider] = providerCap;
1759
+ }
1760
+ providerModelCaps[provider] = new Map(
1761
+ normalizeModelCapMap(config.providerModelCaps?.[provider])
1762
+ );
1763
+ }
1764
+ return {
1765
+ globalCap: normalizeCap(config.globalCap),
1766
+ providerCaps,
1767
+ modelCaps: normalizeModelCapMap(config.modelCaps),
1768
+ providerModelCaps
1769
+ };
1770
+ }
1771
+ function resolveDefaultProviderCap(provider, modelId) {
1772
+ if (provider === "openai") {
1773
+ return DEFAULT_OPENAI_MODEL_CONCURRENCY_CAP;
1774
+ }
1775
+ if (provider === "google") {
1776
+ return modelId?.includes("preview") ? DEFAULT_GOOGLE_PREVIEW_MODEL_CONCURRENCY_CAP : DEFAULT_GOOGLE_MODEL_CONCURRENCY_CAP;
1777
+ }
1778
+ return DEFAULT_FIREWORKS_MODEL_CONCURRENCY_CAP;
1779
+ }
1780
+ function configureModelConcurrency(config = {}) {
1781
+ configuredModelConcurrency = normalizeModelConcurrencyConfig(config);
1782
+ }
1783
+ function resetModelConcurrencyConfig() {
1784
+ configuredModelConcurrency = normalizeModelConcurrencyConfig({});
1785
+ }
1786
+ function resolveModelConcurrencyCap(options) {
1787
+ const modelId = options.modelId ? normalizeModelIdForConfig(options.modelId) : void 0;
1788
+ const config = options.config ? normalizeModelConcurrencyConfig(options.config) : configuredModelConcurrency;
1789
+ const providerModelCap = modelId ? config.providerModelCaps[options.provider].get(modelId) : void 0;
1790
+ if (providerModelCap !== void 0) {
1791
+ return providerModelCap;
1792
+ }
1793
+ const modelCap = modelId ? config.modelCaps.get(modelId) : void 0;
1794
+ if (modelCap !== void 0) {
1795
+ return modelCap;
1796
+ }
1797
+ const providerCap = config.providerCaps[options.provider];
1798
+ if (providerCap !== void 0) {
1799
+ return providerCap;
1800
+ }
1801
+ if (config.globalCap !== void 0) {
1802
+ return config.globalCap;
1803
+ }
1804
+ const defaultCap = normalizeCap(options.defaultCap);
1805
+ if (defaultCap !== void 0) {
1806
+ return defaultCap;
1807
+ }
1808
+ return resolveDefaultProviderCap(options.provider, modelId);
1809
+ }
1810
+
1692
1811
  // src/utils/scheduler.ts
1693
1812
  function sleep(ms) {
1694
1813
  return new Promise((resolve) => {
@@ -1704,13 +1823,72 @@ function toError(value) {
1704
1823
  }
1705
1824
  return new Error("Unknown error");
1706
1825
  }
1826
+ function getStatusCode(error) {
1827
+ if (!error || typeof error !== "object") {
1828
+ return void 0;
1829
+ }
1830
+ const maybe = error;
1831
+ const candidates = [maybe.status, maybe.statusCode];
1832
+ for (const candidate of candidates) {
1833
+ if (typeof candidate === "number") {
1834
+ return candidate;
1835
+ }
1836
+ if (typeof candidate === "string") {
1837
+ const parsed = Number.parseInt(candidate, 10);
1838
+ if (Number.isFinite(parsed)) {
1839
+ return parsed;
1840
+ }
1841
+ }
1842
+ }
1843
+ if (typeof maybe.code === "number") {
1844
+ return maybe.code;
1845
+ }
1846
+ return void 0;
1847
+ }
1848
+ function getErrorText(error) {
1849
+ if (error instanceof Error) {
1850
+ return error.message.toLowerCase();
1851
+ }
1852
+ if (typeof error === "string") {
1853
+ return error.toLowerCase();
1854
+ }
1855
+ if (error && typeof error === "object") {
1856
+ const maybe = error;
1857
+ const code = typeof maybe.code === "string" ? maybe.code : "";
1858
+ const message = typeof maybe.message === "string" ? maybe.message : "";
1859
+ return `${code} ${message}`.trim().toLowerCase();
1860
+ }
1861
+ return "";
1862
+ }
1863
+ function defaultIsOverloadError(error) {
1864
+ const status = getStatusCode(error);
1865
+ if (status === 429 || status === 503 || status === 529) {
1866
+ return true;
1867
+ }
1868
+ const text = getErrorText(error);
1869
+ if (!text) {
1870
+ return false;
1871
+ }
1872
+ return text.includes("rate limit") || text.includes("too many requests") || text.includes("resource exhausted") || text.includes("resource_exhausted") || text.includes("overload");
1873
+ }
1707
1874
  function createCallScheduler(options = {}) {
1708
1875
  const maxParallelRequests = Math.max(1, Math.floor(options.maxParallelRequests ?? 3));
1876
+ const initialParallelRequests = Math.min(
1877
+ maxParallelRequests,
1878
+ Math.max(1, Math.floor(options.initialParallelRequests ?? Math.min(3, maxParallelRequests)))
1879
+ );
1880
+ const increaseAfterConsecutiveSuccesses = Math.max(
1881
+ 1,
1882
+ Math.floor(options.increaseAfterConsecutiveSuccesses ?? 8)
1883
+ );
1709
1884
  const minIntervalBetweenStartMs = Math.max(0, Math.floor(options.minIntervalBetweenStartMs ?? 0));
1710
1885
  const startJitterMs = Math.max(0, Math.floor(options.startJitterMs ?? 0));
1711
1886
  const retryPolicy = options.retry;
1887
+ const isOverloadError2 = options.isOverloadError ?? defaultIsOverloadError;
1712
1888
  let activeCount = 0;
1713
1889
  let lastStartTime = 0;
1890
+ let currentParallelLimit = initialParallelRequests;
1891
+ let consecutiveSuccesses = 0;
1714
1892
  let startSpacingChain = Promise.resolve();
1715
1893
  const queue = [];
1716
1894
  async function applyStartSpacing() {
@@ -1736,11 +1914,23 @@ function createCallScheduler(options = {}) {
1736
1914
  release?.();
1737
1915
  }
1738
1916
  }
1739
- async function attemptWithRetries(fn, attempt) {
1917
+ async function attemptWithRetries(fn, attempt, state) {
1740
1918
  try {
1919
+ const spacingStartedAtMs = Date.now();
1741
1920
  await applyStartSpacing();
1921
+ const callStartedAtMs = Date.now();
1922
+ state.schedulerDelayMs += Math.max(0, callStartedAtMs - spacingStartedAtMs);
1923
+ if (state.startedAtMs === void 0) {
1924
+ state.startedAtMs = callStartedAtMs;
1925
+ }
1926
+ state.attempts = Math.max(state.attempts, attempt);
1742
1927
  return await fn();
1743
1928
  } catch (error) {
1929
+ if (isOverloadError2(error)) {
1930
+ state.overloadCount += 1;
1931
+ consecutiveSuccesses = 0;
1932
+ currentParallelLimit = Math.max(1, Math.ceil(currentParallelLimit / 2));
1933
+ }
1744
1934
  const err = toError(error);
1745
1935
  if (!retryPolicy || attempt >= retryPolicy.maxAttempts) {
1746
1936
  throw err;
@@ -1754,13 +1944,14 @@ function createCallScheduler(options = {}) {
1754
1944
  }
1755
1945
  const normalizedDelay = Math.max(0, delay);
1756
1946
  if (normalizedDelay > 0) {
1947
+ state.retryDelayMs += normalizedDelay;
1757
1948
  await sleep(normalizedDelay);
1758
1949
  }
1759
- return attemptWithRetries(fn, attempt + 1);
1950
+ return attemptWithRetries(fn, attempt + 1, state);
1760
1951
  }
1761
1952
  }
1762
1953
  function drainQueue() {
1763
- while (activeCount < maxParallelRequests && queue.length > 0) {
1954
+ while (activeCount < currentParallelLimit && queue.length > 0) {
1764
1955
  const task = queue.shift();
1765
1956
  if (!task) {
1766
1957
  continue;
@@ -1769,15 +1960,49 @@ function createCallScheduler(options = {}) {
1769
1960
  void task();
1770
1961
  }
1771
1962
  }
1772
- function run(fn) {
1963
+ function run(fn, runOptions = {}) {
1773
1964
  return new Promise((resolve, reject) => {
1965
+ const enqueuedAtMs = Date.now();
1774
1966
  const job = async () => {
1967
+ const dequeuedAtMs = Date.now();
1968
+ const state = {
1969
+ enqueuedAtMs,
1970
+ dequeuedAtMs,
1971
+ schedulerDelayMs: 0,
1972
+ retryDelayMs: 0,
1973
+ attempts: 0,
1974
+ overloadCount: 0
1975
+ };
1775
1976
  try {
1776
- const result = await attemptWithRetries(fn, 1);
1977
+ const result = await attemptWithRetries(fn, 1, state);
1978
+ state.completedAtMs = Date.now();
1979
+ consecutiveSuccesses += 1;
1980
+ if (currentParallelLimit < maxParallelRequests && consecutiveSuccesses >= increaseAfterConsecutiveSuccesses) {
1981
+ currentParallelLimit += 1;
1982
+ consecutiveSuccesses = 0;
1983
+ }
1777
1984
  resolve(result);
1778
1985
  } catch (error) {
1986
+ state.completedAtMs = Date.now();
1779
1987
  reject(toError(error));
1780
1988
  } finally {
1989
+ const startedAtMs = state.startedAtMs ?? state.dequeuedAtMs;
1990
+ const completedAtMs = state.completedAtMs ?? Date.now();
1991
+ const metrics = {
1992
+ enqueuedAtMs: state.enqueuedAtMs,
1993
+ dequeuedAtMs: state.dequeuedAtMs,
1994
+ startedAtMs,
1995
+ completedAtMs,
1996
+ queueWaitMs: Math.max(0, state.dequeuedAtMs - state.enqueuedAtMs),
1997
+ schedulerDelayMs: Math.max(0, state.schedulerDelayMs),
1998
+ retryDelayMs: Math.max(0, state.retryDelayMs),
1999
+ attempts: Math.max(1, state.attempts),
2000
+ overloadCount: Math.max(0, state.overloadCount)
2001
+ };
2002
+ try {
2003
+ runOptions.onSettled?.(metrics);
2004
+ } catch {
2005
+ }
1781
2006
  activeCount -= 1;
1782
2007
  queueMicrotask(drainQueue);
1783
2008
  }
@@ -1863,13 +2088,28 @@ function getFireworksClient() {
1863
2088
  }
1864
2089
 
1865
2090
  // src/fireworks/calls.ts
1866
- var scheduler = createCallScheduler({
1867
- maxParallelRequests: 3,
1868
- minIntervalBetweenStartMs: 200,
1869
- startJitterMs: 200
1870
- });
1871
- async function runFireworksCall(fn) {
1872
- return scheduler.run(async () => fn(getFireworksClient()));
2091
+ var DEFAULT_SCHEDULER_KEY = "__default__";
2092
+ var schedulerByModel = /* @__PURE__ */ new Map();
2093
+ function getSchedulerForModel(modelId) {
2094
+ const normalizedModelId = modelId?.trim();
2095
+ const schedulerKey = normalizedModelId && normalizedModelId.length > 0 ? normalizedModelId : DEFAULT_SCHEDULER_KEY;
2096
+ const existing = schedulerByModel.get(schedulerKey);
2097
+ if (existing) {
2098
+ return existing;
2099
+ }
2100
+ const created = createCallScheduler({
2101
+ maxParallelRequests: resolveModelConcurrencyCap({
2102
+ provider: "fireworks",
2103
+ modelId: normalizedModelId
2104
+ }),
2105
+ minIntervalBetweenStartMs: 200,
2106
+ startJitterMs: 200
2107
+ });
2108
+ schedulerByModel.set(schedulerKey, created);
2109
+ return created;
2110
+ }
2111
+ async function runFireworksCall(fn, modelId, runOptions) {
2112
+ return getSchedulerForModel(modelId).run(async () => fn(getFireworksClient()), runOptions);
1873
2113
  }
1874
2114
 
1875
2115
  // src/fireworks/models.ts
@@ -2205,6 +2445,18 @@ function shouldRetry(error) {
2205
2445
  }
2206
2446
  return false;
2207
2447
  }
2448
+ function isOverloadError(error) {
2449
+ const status = getStatus(error);
2450
+ if (status === 429 || status === 503 || status === 529) {
2451
+ return true;
2452
+ }
2453
+ const reason = getErrorReason(error);
2454
+ if (reason && RATE_LIMIT_REASONS.has(reason)) {
2455
+ return true;
2456
+ }
2457
+ const message = getErrorMessage(error).toLowerCase();
2458
+ return message.includes("rate limit") || message.includes("too many requests") || message.includes("resource exhausted") || message.includes("resource_exhausted");
2459
+ }
2208
2460
  function retryDelayMs(attempt) {
2209
2461
  const baseRetryDelayMs = 500;
2210
2462
  const maxRetryDelayMs = 4e3;
@@ -2212,23 +2464,39 @@ function retryDelayMs(attempt) {
2212
2464
  const jitter = Math.floor(Math.random() * 200);
2213
2465
  return base + jitter;
2214
2466
  }
2215
- var scheduler2 = createCallScheduler({
2216
- maxParallelRequests: 3,
2217
- minIntervalBetweenStartMs: 200,
2218
- startJitterMs: 200,
2219
- retry: {
2220
- maxAttempts: 3,
2221
- getDelayMs: (attempt, error) => {
2222
- if (!shouldRetry(error)) {
2223
- return null;
2467
+ var DEFAULT_SCHEDULER_KEY2 = "__default__";
2468
+ var schedulerByModel2 = /* @__PURE__ */ new Map();
2469
+ function getSchedulerForModel2(modelId) {
2470
+ const normalizedModelId = modelId?.trim();
2471
+ const schedulerKey = normalizedModelId && normalizedModelId.length > 0 ? normalizedModelId : DEFAULT_SCHEDULER_KEY2;
2472
+ const existing = schedulerByModel2.get(schedulerKey);
2473
+ if (existing) {
2474
+ return existing;
2475
+ }
2476
+ const created = createCallScheduler({
2477
+ maxParallelRequests: resolveModelConcurrencyCap({
2478
+ provider: "google",
2479
+ modelId: normalizedModelId
2480
+ }),
2481
+ minIntervalBetweenStartMs: 200,
2482
+ startJitterMs: 200,
2483
+ isOverloadError,
2484
+ retry: {
2485
+ maxAttempts: 3,
2486
+ getDelayMs: (attempt, error) => {
2487
+ if (!shouldRetry(error)) {
2488
+ return null;
2489
+ }
2490
+ const hintedDelay = getRetryAfterMs(error);
2491
+ return hintedDelay ?? retryDelayMs(attempt);
2224
2492
  }
2225
- const hintedDelay = getRetryAfterMs(error);
2226
- return hintedDelay ?? retryDelayMs(attempt);
2227
2493
  }
2228
- }
2229
- });
2230
- async function runGeminiCall(fn) {
2231
- return scheduler2.run(async () => fn(await getGeminiClient()));
2494
+ });
2495
+ schedulerByModel2.set(schedulerKey, created);
2496
+ return created;
2497
+ }
2498
+ async function runGeminiCall(fn, modelId, runOptions) {
2499
+ return getSchedulerForModel2(modelId).run(async () => fn(await getGeminiClient()), runOptions);
2232
2500
  }
2233
2501
 
2234
2502
  // src/openai/client.ts
@@ -2389,13 +2657,28 @@ function getOpenAiClient() {
2389
2657
 
2390
2658
  // src/openai/calls.ts
2391
2659
  var DEFAULT_OPENAI_REASONING_EFFORT = "medium";
2392
- var scheduler3 = createCallScheduler({
2393
- maxParallelRequests: 3,
2394
- minIntervalBetweenStartMs: 200,
2395
- startJitterMs: 200
2396
- });
2397
- async function runOpenAiCall(fn) {
2398
- return scheduler3.run(async () => fn(getOpenAiClient()));
2660
+ var DEFAULT_SCHEDULER_KEY3 = "__default__";
2661
+ var schedulerByModel3 = /* @__PURE__ */ new Map();
2662
+ function getSchedulerForModel3(modelId) {
2663
+ const normalizedModelId = modelId?.trim();
2664
+ const schedulerKey = normalizedModelId && normalizedModelId.length > 0 ? normalizedModelId : DEFAULT_SCHEDULER_KEY3;
2665
+ const existing = schedulerByModel3.get(schedulerKey);
2666
+ if (existing) {
2667
+ return existing;
2668
+ }
2669
+ const created = createCallScheduler({
2670
+ maxParallelRequests: resolveModelConcurrencyCap({
2671
+ provider: "openai",
2672
+ modelId: normalizedModelId
2673
+ }),
2674
+ minIntervalBetweenStartMs: 200,
2675
+ startJitterMs: 200
2676
+ });
2677
+ schedulerByModel3.set(schedulerKey, created);
2678
+ return created;
2679
+ }
2680
+ async function runOpenAiCall(fn, modelId, runOptions) {
2681
+ return getSchedulerForModel3(modelId).run(async () => fn(getOpenAiClient()), runOptions);
2399
2682
  }
2400
2683
 
2401
2684
  // src/openai/models.ts
@@ -2849,9 +3132,9 @@ function isRetryableChatGptTransportError(error) {
2849
3132
  return false;
2850
3133
  }
2851
3134
  const message = error.message.toLowerCase();
2852
- return message === "terminated" || message.includes("socket hang up") || message.includes("fetch failed") || message.includes("network");
3135
+ return message === "terminated" || message.includes("socket hang up") || message.includes("fetch failed") || message.includes("network") || message.includes("responses websocket");
2853
3136
  }
2854
- async function collectChatGptCodexResponseWithRetry(options, maxAttempts = 2) {
3137
+ async function collectChatGptCodexResponseWithRetry(options, maxAttempts = 3) {
2855
3138
  let attempt = 1;
2856
3139
  while (true) {
2857
3140
  try {
@@ -3756,77 +4039,153 @@ function buildToolErrorOutput(message, issues) {
3756
4039
  }
3757
4040
  return output;
3758
4041
  }
4042
+ var SUBAGENT_WAIT_TOOL_NAME = "wait";
4043
+ function toIsoTimestamp(ms) {
4044
+ return new Date(ms).toISOString();
4045
+ }
4046
+ function toToolResultDuration(result) {
4047
+ return typeof result.durationMs === "number" && Number.isFinite(result.durationMs) ? Math.max(0, result.durationMs) : 0;
4048
+ }
4049
+ function schedulerMetricsOrDefault(metrics) {
4050
+ if (!metrics) {
4051
+ return {
4052
+ queueWaitMs: 0,
4053
+ schedulerDelayMs: 0,
4054
+ providerRetryDelayMs: 0,
4055
+ providerAttempts: 1
4056
+ };
4057
+ }
4058
+ return {
4059
+ queueWaitMs: Math.max(0, metrics.queueWaitMs),
4060
+ schedulerDelayMs: Math.max(0, metrics.schedulerDelayMs),
4061
+ providerRetryDelayMs: Math.max(0, metrics.retryDelayMs),
4062
+ providerAttempts: Math.max(1, metrics.attempts),
4063
+ modelCallStartedAtMs: metrics.startedAtMs
4064
+ };
4065
+ }
4066
+ function buildStepTiming(params) {
4067
+ const scheduler = schedulerMetricsOrDefault(params.schedulerMetrics);
4068
+ const modelCallStartedAtMs = scheduler.modelCallStartedAtMs ?? params.stepStartedAtMs;
4069
+ const firstModelEventAtMs = params.firstModelEventAtMs;
4070
+ const effectiveFirstEventAtMs = firstModelEventAtMs !== void 0 ? Math.max(modelCallStartedAtMs, firstModelEventAtMs) : params.modelCompletedAtMs;
4071
+ const connectionSetupMs = Math.max(0, effectiveFirstEventAtMs - modelCallStartedAtMs);
4072
+ const activeGenerationMs = Math.max(0, params.modelCompletedAtMs - effectiveFirstEventAtMs);
4073
+ return {
4074
+ startedAt: toIsoTimestamp(params.stepStartedAtMs),
4075
+ completedAt: toIsoTimestamp(params.stepCompletedAtMs),
4076
+ totalMs: Math.max(0, params.stepCompletedAtMs - params.stepStartedAtMs),
4077
+ queueWaitMs: scheduler.queueWaitMs,
4078
+ connectionSetupMs,
4079
+ activeGenerationMs,
4080
+ toolExecutionMs: Math.max(0, params.toolExecutionMs),
4081
+ waitToolMs: Math.max(0, params.waitToolMs),
4082
+ schedulerDelayMs: scheduler.schedulerDelayMs,
4083
+ providerRetryDelayMs: scheduler.providerRetryDelayMs,
4084
+ providerAttempts: scheduler.providerAttempts
4085
+ };
4086
+ }
4087
+ function extractSpawnStartupMetrics(outputPayload) {
4088
+ if (!outputPayload || typeof outputPayload !== "object") {
4089
+ return void 0;
4090
+ }
4091
+ const outputRecord = outputPayload;
4092
+ const notification = typeof outputRecord.notification === "string" ? outputRecord.notification : "";
4093
+ if (notification !== "spawned") {
4094
+ return void 0;
4095
+ }
4096
+ const agent = outputRecord.agent;
4097
+ if (!agent || typeof agent !== "object") {
4098
+ return void 0;
4099
+ }
4100
+ const agentRecord = agent;
4101
+ const startupLatencyMs = agentRecord.spawn_startup_latency_ms;
4102
+ if (typeof startupLatencyMs !== "number" || !Number.isFinite(startupLatencyMs)) {
4103
+ return void 0;
4104
+ }
4105
+ return {
4106
+ spawnStartupLatencyMs: Math.max(0, startupLatencyMs)
4107
+ };
4108
+ }
3759
4109
  async function executeToolCall(params) {
3760
4110
  const { callKind, toolName, tool: tool2, rawInput, parseError } = params;
3761
- if (!tool2) {
3762
- const message = `Unknown tool: ${toolName}`;
4111
+ const startedAtMs = Date.now();
4112
+ const finalize = (base, outputPayload, metrics) => {
4113
+ const completedAtMs = Date.now();
3763
4114
  return {
3764
- result: { toolName, input: rawInput, output: { error: message }, error: message },
3765
- outputPayload: buildToolErrorOutput(message)
4115
+ result: {
4116
+ ...base,
4117
+ startedAt: toIsoTimestamp(startedAtMs),
4118
+ completedAt: toIsoTimestamp(completedAtMs),
4119
+ durationMs: Math.max(0, completedAtMs - startedAtMs),
4120
+ ...metrics ? { metrics } : {}
4121
+ },
4122
+ outputPayload
3766
4123
  };
4124
+ };
4125
+ if (!tool2) {
4126
+ const message = `Unknown tool: ${toolName}`;
4127
+ const outputPayload = buildToolErrorOutput(message);
4128
+ return finalize(
4129
+ { toolName, input: rawInput, output: outputPayload, error: message },
4130
+ outputPayload
4131
+ );
3767
4132
  }
3768
4133
  if (callKind === "custom") {
3769
4134
  if (!isCustomTool(tool2)) {
3770
4135
  const message = `Tool ${toolName} was called as custom_tool_call but is declared as function.`;
3771
4136
  const outputPayload = buildToolErrorOutput(message);
3772
- return {
3773
- result: { toolName, input: rawInput, output: outputPayload, error: message },
4137
+ return finalize(
4138
+ { toolName, input: rawInput, output: outputPayload, error: message },
3774
4139
  outputPayload
3775
- };
4140
+ );
3776
4141
  }
3777
4142
  const input = typeof rawInput === "string" ? rawInput : String(rawInput ?? "");
3778
4143
  try {
3779
4144
  const output = await tool2.execute(input);
3780
- return {
3781
- result: { toolName, input, output },
3782
- outputPayload: output
3783
- };
4145
+ const metrics = toolName === "spawn_agent" ? extractSpawnStartupMetrics(output) : void 0;
4146
+ return finalize({ toolName, input, output }, output, metrics);
3784
4147
  } catch (error) {
3785
4148
  const message = error instanceof Error ? error.message : String(error);
3786
4149
  const outputPayload = buildToolErrorOutput(`Tool ${toolName} failed: ${message}`);
3787
- return {
3788
- result: { toolName, input, output: outputPayload, error: message },
3789
- outputPayload
3790
- };
4150
+ return finalize({ toolName, input, output: outputPayload, error: message }, outputPayload);
3791
4151
  }
3792
4152
  }
3793
4153
  if (isCustomTool(tool2)) {
3794
4154
  const message = `Tool ${toolName} was called as function_call but is declared as custom.`;
3795
4155
  const outputPayload = buildToolErrorOutput(message);
3796
- return {
3797
- result: { toolName, input: rawInput, output: outputPayload, error: message },
4156
+ return finalize(
4157
+ { toolName, input: rawInput, output: outputPayload, error: message },
3798
4158
  outputPayload
3799
- };
4159
+ );
3800
4160
  }
3801
4161
  if (parseError) {
3802
4162
  const message = `Invalid JSON for tool ${toolName}: ${parseError}`;
3803
- return {
3804
- result: { toolName, input: rawInput, output: { error: message }, error: message },
3805
- outputPayload: buildToolErrorOutput(message)
3806
- };
4163
+ const outputPayload = buildToolErrorOutput(message);
4164
+ return finalize(
4165
+ { toolName, input: rawInput, output: outputPayload, error: message },
4166
+ outputPayload
4167
+ );
3807
4168
  }
3808
4169
  const parsed = tool2.inputSchema.safeParse(rawInput);
3809
4170
  if (!parsed.success) {
3810
4171
  const message = `Invalid tool arguments for ${toolName}: ${formatZodIssues(parsed.error.issues)}`;
3811
4172
  const outputPayload = buildToolErrorOutput(message, parsed.error.issues);
3812
- return {
3813
- result: { toolName, input: rawInput, output: outputPayload, error: message },
4173
+ return finalize(
4174
+ { toolName, input: rawInput, output: outputPayload, error: message },
3814
4175
  outputPayload
3815
- };
4176
+ );
3816
4177
  }
3817
4178
  try {
3818
4179
  const output = await tool2.execute(parsed.data);
3819
- return {
3820
- result: { toolName, input: parsed.data, output },
3821
- outputPayload: output
3822
- };
4180
+ const metrics = toolName === "spawn_agent" ? extractSpawnStartupMetrics(output) : void 0;
4181
+ return finalize({ toolName, input: parsed.data, output }, output, metrics);
3823
4182
  } catch (error) {
3824
4183
  const message = error instanceof Error ? error.message : String(error);
3825
4184
  const outputPayload = buildToolErrorOutput(`Tool ${toolName} failed: ${message}`);
3826
- return {
3827
- result: { toolName, input: parsed.data, output: outputPayload, error: message },
4185
+ return finalize(
4186
+ { toolName, input: parsed.data, output: outputPayload, error: message },
3828
4187
  outputPayload
3829
- };
4188
+ );
3830
4189
  }
3831
4190
  }
3832
4191
  function buildToolLogId(turn, toolIndex) {
@@ -4157,7 +4516,7 @@ async function runTextCall(params) {
4157
4516
  }
4158
4517
  }
4159
4518
  }
4160
- });
4519
+ }, modelForProvider);
4161
4520
  } else if (provider === "chatgpt") {
4162
4521
  const chatGptInput = toChatGptInput(contents);
4163
4522
  const reasoningEffort = resolveOpenAiReasoningEffort(
@@ -4252,7 +4611,7 @@ async function runTextCall(params) {
4252
4611
  pushDelta("response", textOutput);
4253
4612
  }
4254
4613
  latestUsage = extractFireworksUsageTokens(response.usage);
4255
- });
4614
+ }, modelForProvider);
4256
4615
  } else {
4257
4616
  const geminiContents = contents.map(convertLlmContentToGeminiContent);
4258
4617
  const config = {
@@ -4320,7 +4679,7 @@ async function runTextCall(params) {
4320
4679
  }
4321
4680
  }
4322
4681
  grounding = latestGrounding;
4323
- });
4682
+ }, modelForProvider);
4324
4683
  }
4325
4684
  const mergedParts = mergeConsecutiveTextParts(responseParts);
4326
4685
  const content = mergedParts.length > 0 ? { role: responseRole ?? "assistant", parts: mergedParts } : void 0;
@@ -4691,6 +5050,9 @@ async function runToolLoop(request) {
4691
5050
  let input = toOpenAiInput(contents);
4692
5051
  for (let stepIndex = 0; stepIndex < maxSteps; stepIndex += 1) {
4693
5052
  const turn = stepIndex + 1;
5053
+ const stepStartedAtMs = Date.now();
5054
+ let firstModelEventAtMs;
5055
+ let schedulerMetrics;
4694
5056
  const abortController = new AbortController();
4695
5057
  if (request.signal) {
4696
5058
  if (request.signal.aborted) {
@@ -4709,45 +5071,59 @@ async function runToolLoop(request) {
4709
5071
  const emitEvent = (ev) => {
4710
5072
  onEvent?.(ev);
4711
5073
  };
4712
- const finalResponse = await runOpenAiCall(async (client) => {
4713
- const stream = client.responses.stream(
4714
- {
4715
- model: providerInfo.model,
4716
- input,
4717
- ...previousResponseId ? { previous_response_id: previousResponseId } : {},
4718
- ...openAiTools.length > 0 ? { tools: openAiTools } : {},
4719
- ...openAiTools.length > 0 ? { parallel_tool_calls: true } : {},
4720
- reasoning,
4721
- text: textConfig,
4722
- include: ["reasoning.encrypted_content"]
4723
- },
4724
- { signal: abortController.signal }
4725
- );
4726
- for await (const event of stream) {
4727
- switch (event.type) {
4728
- case "response.output_text.delta":
4729
- emitEvent({
4730
- type: "delta",
4731
- channel: "response",
4732
- text: typeof event.delta === "string" ? event.delta : ""
4733
- });
4734
- break;
4735
- case "response.reasoning_summary_text.delta":
4736
- emitEvent({
4737
- type: "delta",
4738
- channel: "thought",
4739
- text: typeof event.delta === "string" ? event.delta : ""
4740
- });
4741
- break;
4742
- case "response.refusal.delta":
4743
- emitEvent({ type: "blocked" });
4744
- break;
4745
- default:
4746
- break;
5074
+ const markFirstModelEvent = () => {
5075
+ if (firstModelEventAtMs === void 0) {
5076
+ firstModelEventAtMs = Date.now();
5077
+ }
5078
+ };
5079
+ const finalResponse = await runOpenAiCall(
5080
+ async (client) => {
5081
+ const stream = client.responses.stream(
5082
+ {
5083
+ model: providerInfo.model,
5084
+ input,
5085
+ ...previousResponseId ? { previous_response_id: previousResponseId } : {},
5086
+ ...openAiTools.length > 0 ? { tools: openAiTools } : {},
5087
+ ...openAiTools.length > 0 ? { parallel_tool_calls: true } : {},
5088
+ reasoning,
5089
+ text: textConfig,
5090
+ include: ["reasoning.encrypted_content"]
5091
+ },
5092
+ { signal: abortController.signal }
5093
+ );
5094
+ for await (const event of stream) {
5095
+ markFirstModelEvent();
5096
+ switch (event.type) {
5097
+ case "response.output_text.delta":
5098
+ emitEvent({
5099
+ type: "delta",
5100
+ channel: "response",
5101
+ text: typeof event.delta === "string" ? event.delta : ""
5102
+ });
5103
+ break;
5104
+ case "response.reasoning_summary_text.delta":
5105
+ emitEvent({
5106
+ type: "delta",
5107
+ channel: "thought",
5108
+ text: typeof event.delta === "string" ? event.delta : ""
5109
+ });
5110
+ break;
5111
+ case "response.refusal.delta":
5112
+ emitEvent({ type: "blocked" });
5113
+ break;
5114
+ default:
5115
+ break;
5116
+ }
5117
+ }
5118
+ return await stream.finalResponse();
5119
+ },
5120
+ providerInfo.model,
5121
+ {
5122
+ onSettled: (metrics) => {
5123
+ schedulerMetrics = metrics;
4747
5124
  }
4748
5125
  }
4749
- return await stream.finalResponse();
4750
- });
5126
+ );
4751
5127
  modelVersion = typeof finalResponse.model === "string" ? finalResponse.model : request.model;
4752
5128
  emitEvent({ type: "model", modelVersion });
4753
5129
  if (finalResponse.error) {
@@ -4757,6 +5133,7 @@ async function runToolLoop(request) {
4757
5133
  usageTokens = extractOpenAiUsageTokens(finalResponse.usage);
4758
5134
  const responseText = extractOpenAiResponseParts(finalResponse).parts.filter((p) => p.type === "text" && p.thought !== true).map((p) => p.text).join("").trim();
4759
5135
  const reasoningSummary = extractOpenAiReasoningSummary(finalResponse).trim();
5136
+ const modelCompletedAtMs = Date.now();
4760
5137
  const stepCostUsd = estimateCallCostUsd({
4761
5138
  modelId: modelVersion,
4762
5139
  tokens: usageTokens,
@@ -4771,6 +5148,16 @@ async function runToolLoop(request) {
4771
5148
  if (responseToolCalls.length === 0) {
4772
5149
  finalText = responseText;
4773
5150
  finalThoughts = reasoningSummary;
5151
+ const stepCompletedAtMs2 = Date.now();
5152
+ const timing2 = buildStepTiming({
5153
+ stepStartedAtMs,
5154
+ stepCompletedAtMs: stepCompletedAtMs2,
5155
+ modelCompletedAtMs,
5156
+ firstModelEventAtMs,
5157
+ schedulerMetrics,
5158
+ toolExecutionMs: 0,
5159
+ waitToolMs: 0
5160
+ });
4774
5161
  steps.push({
4775
5162
  step: steps.length + 1,
4776
5163
  modelVersion,
@@ -4778,7 +5165,8 @@ async function runToolLoop(request) {
4778
5165
  thoughts: reasoningSummary || void 0,
4779
5166
  toolCalls: [],
4780
5167
  usage: usageTokens,
4781
- costUsd: stepCostUsd
5168
+ costUsd: stepCostUsd,
5169
+ timing: timing2
4782
5170
  });
4783
5171
  return { text: finalText, thoughts: finalThoughts, steps, totalCostUsd };
4784
5172
  }
@@ -4823,8 +5211,15 @@ async function runToolLoop(request) {
4823
5211
  })
4824
5212
  );
4825
5213
  const toolOutputs = [];
5214
+ let toolExecutionMs = 0;
5215
+ let waitToolMs = 0;
4826
5216
  for (const { entry, result, outputPayload } of callResults) {
4827
5217
  stepToolCalls.push({ ...result, callId: entry.call.call_id });
5218
+ const callDurationMs = toToolResultDuration(result);
5219
+ toolExecutionMs += callDurationMs;
5220
+ if (entry.toolName.toLowerCase() === SUBAGENT_WAIT_TOOL_NAME) {
5221
+ waitToolMs += callDurationMs;
5222
+ }
4828
5223
  if (entry.call.kind === "custom") {
4829
5224
  toolOutputs.push({
4830
5225
  type: "custom_tool_call_output",
@@ -4839,6 +5234,16 @@ async function runToolLoop(request) {
4839
5234
  });
4840
5235
  }
4841
5236
  }
5237
+ const stepCompletedAtMs = Date.now();
5238
+ const timing = buildStepTiming({
5239
+ stepStartedAtMs,
5240
+ stepCompletedAtMs,
5241
+ modelCompletedAtMs,
5242
+ firstModelEventAtMs,
5243
+ schedulerMetrics,
5244
+ toolExecutionMs,
5245
+ waitToolMs
5246
+ });
4842
5247
  steps.push({
4843
5248
  step: steps.length + 1,
4844
5249
  modelVersion,
@@ -4846,7 +5251,8 @@ async function runToolLoop(request) {
4846
5251
  thoughts: reasoningSummary || void 0,
4847
5252
  toolCalls: stepToolCalls,
4848
5253
  usage: usageTokens,
4849
- costUsd: stepCostUsd
5254
+ costUsd: stepCostUsd,
5255
+ timing
4850
5256
  });
4851
5257
  previousResponseId = finalResponse.id;
4852
5258
  input = toolOutputs;
@@ -4867,6 +5273,13 @@ async function runToolLoop(request) {
4867
5273
  let input = [...toolLoopInput.input];
4868
5274
  for (let stepIndex = 0; stepIndex < maxSteps; stepIndex += 1) {
4869
5275
  const turn = stepIndex + 1;
5276
+ const stepStartedAtMs = Date.now();
5277
+ let firstModelEventAtMs;
5278
+ const markFirstModelEvent = () => {
5279
+ if (firstModelEventAtMs === void 0) {
5280
+ firstModelEventAtMs = Date.now();
5281
+ }
5282
+ };
4870
5283
  const response = await collectChatGptCodexResponseWithRetry({
4871
5284
  sessionId: conversationId,
4872
5285
  request: {
@@ -4889,13 +5302,16 @@ async function runToolLoop(request) {
4889
5302
  signal: request.signal,
4890
5303
  onDelta: (delta) => {
4891
5304
  if (delta.thoughtDelta) {
5305
+ markFirstModelEvent();
4892
5306
  request.onEvent?.({ type: "delta", channel: "thought", text: delta.thoughtDelta });
4893
5307
  }
4894
5308
  if (delta.textDelta) {
5309
+ markFirstModelEvent();
4895
5310
  request.onEvent?.({ type: "delta", channel: "response", text: delta.textDelta });
4896
5311
  }
4897
5312
  }
4898
5313
  });
5314
+ const modelCompletedAtMs = Date.now();
4899
5315
  const modelVersion = response.model ? `chatgpt-${response.model}` : request.model;
4900
5316
  const usageTokens = extractChatGptUsageTokens(response.usage);
4901
5317
  const stepCostUsd = estimateCallCostUsd({
@@ -4910,6 +5326,15 @@ async function runToolLoop(request) {
4910
5326
  if (responseToolCalls.length === 0) {
4911
5327
  finalText = responseText;
4912
5328
  finalThoughts = reasoningSummaryText;
5329
+ const stepCompletedAtMs2 = Date.now();
5330
+ const timing2 = buildStepTiming({
5331
+ stepStartedAtMs,
5332
+ stepCompletedAtMs: stepCompletedAtMs2,
5333
+ modelCompletedAtMs,
5334
+ firstModelEventAtMs,
5335
+ toolExecutionMs: 0,
5336
+ waitToolMs: 0
5337
+ });
4913
5338
  steps.push({
4914
5339
  step: steps.length + 1,
4915
5340
  modelVersion,
@@ -4917,7 +5342,8 @@ async function runToolLoop(request) {
4917
5342
  thoughts: reasoningSummaryText || void 0,
4918
5343
  toolCalls: [],
4919
5344
  usage: usageTokens,
4920
- costUsd: stepCostUsd
5345
+ costUsd: stepCostUsd,
5346
+ timing: timing2
4921
5347
  });
4922
5348
  return { text: finalText, thoughts: finalThoughts, steps, totalCostUsd };
4923
5349
  }
@@ -4957,8 +5383,15 @@ async function runToolLoop(request) {
4957
5383
  );
4958
5384
  })
4959
5385
  );
5386
+ let toolExecutionMs = 0;
5387
+ let waitToolMs = 0;
4960
5388
  for (const { entry, result, outputPayload } of callResults) {
4961
5389
  toolCalls.push({ ...result, callId: entry.ids.callId });
5390
+ const callDurationMs = toToolResultDuration(result);
5391
+ toolExecutionMs += callDurationMs;
5392
+ if (entry.toolName.toLowerCase() === SUBAGENT_WAIT_TOOL_NAME) {
5393
+ waitToolMs += callDurationMs;
5394
+ }
4962
5395
  if (entry.call.kind === "custom") {
4963
5396
  toolOutputs.push({
4964
5397
  type: "custom_tool_call",
@@ -4989,6 +5422,15 @@ async function runToolLoop(request) {
4989
5422
  });
4990
5423
  }
4991
5424
  }
5425
+ const stepCompletedAtMs = Date.now();
5426
+ const timing = buildStepTiming({
5427
+ stepStartedAtMs,
5428
+ stepCompletedAtMs,
5429
+ modelCompletedAtMs,
5430
+ firstModelEventAtMs,
5431
+ toolExecutionMs,
5432
+ waitToolMs
5433
+ });
4992
5434
  steps.push({
4993
5435
  step: steps.length + 1,
4994
5436
  modelVersion,
@@ -4996,7 +5438,8 @@ async function runToolLoop(request) {
4996
5438
  thoughts: reasoningSummaryText || void 0,
4997
5439
  toolCalls,
4998
5440
  usage: usageTokens,
4999
- costUsd: stepCostUsd
5441
+ costUsd: stepCostUsd,
5442
+ timing
5000
5443
  });
5001
5444
  input = input.concat(toolOutputs);
5002
5445
  }
@@ -5012,18 +5455,29 @@ async function runToolLoop(request) {
5012
5455
  const messages = toFireworksMessages(contents);
5013
5456
  for (let stepIndex = 0; stepIndex < maxSteps; stepIndex += 1) {
5014
5457
  const turn = stepIndex + 1;
5015
- const response = await runFireworksCall(async (client) => {
5016
- return await client.chat.completions.create(
5017
- {
5018
- model: providerInfo.model,
5019
- messages,
5020
- tools: fireworksTools,
5021
- tool_choice: "auto",
5022
- parallel_tool_calls: true
5023
- },
5024
- { signal: request.signal }
5025
- );
5026
- });
5458
+ const stepStartedAtMs = Date.now();
5459
+ let schedulerMetrics;
5460
+ const response = await runFireworksCall(
5461
+ async (client) => {
5462
+ return await client.chat.completions.create(
5463
+ {
5464
+ model: providerInfo.model,
5465
+ messages,
5466
+ tools: fireworksTools,
5467
+ tool_choice: "auto",
5468
+ parallel_tool_calls: true
5469
+ },
5470
+ { signal: request.signal }
5471
+ );
5472
+ },
5473
+ providerInfo.model,
5474
+ {
5475
+ onSettled: (metrics) => {
5476
+ schedulerMetrics = metrics;
5477
+ }
5478
+ }
5479
+ );
5480
+ const modelCompletedAtMs = Date.now();
5027
5481
  const modelVersion = typeof response.model === "string" ? response.model : request.model;
5028
5482
  request.onEvent?.({ type: "model", modelVersion });
5029
5483
  const choice = Array.isArray(response.choices) ? response.choices[0] : void 0;
@@ -5054,6 +5508,15 @@ async function runToolLoop(request) {
5054
5508
  if (responseToolCalls.length === 0) {
5055
5509
  finalText = responseText;
5056
5510
  finalThoughts = "";
5511
+ const stepCompletedAtMs2 = Date.now();
5512
+ const timing2 = buildStepTiming({
5513
+ stepStartedAtMs,
5514
+ stepCompletedAtMs: stepCompletedAtMs2,
5515
+ modelCompletedAtMs,
5516
+ schedulerMetrics,
5517
+ toolExecutionMs: 0,
5518
+ waitToolMs: 0
5519
+ });
5057
5520
  steps.push({
5058
5521
  step: steps.length + 1,
5059
5522
  modelVersion,
@@ -5061,7 +5524,8 @@ async function runToolLoop(request) {
5061
5524
  thoughts: void 0,
5062
5525
  toolCalls: [],
5063
5526
  usage: usageTokens,
5064
- costUsd: stepCostUsd
5527
+ costUsd: stepCostUsd,
5528
+ timing: timing2
5065
5529
  });
5066
5530
  return { text: finalText, thoughts: finalThoughts, steps, totalCostUsd };
5067
5531
  }
@@ -5096,8 +5560,15 @@ async function runToolLoop(request) {
5096
5560
  );
5097
5561
  const assistantToolCalls = [];
5098
5562
  const toolMessages = [];
5563
+ let toolExecutionMs = 0;
5564
+ let waitToolMs = 0;
5099
5565
  for (const { entry, result, outputPayload } of callResults) {
5100
5566
  stepToolCalls.push({ ...result, callId: entry.call.id });
5567
+ const callDurationMs = toToolResultDuration(result);
5568
+ toolExecutionMs += callDurationMs;
5569
+ if (entry.toolName.toLowerCase() === SUBAGENT_WAIT_TOOL_NAME) {
5570
+ waitToolMs += callDurationMs;
5571
+ }
5101
5572
  assistantToolCalls.push({
5102
5573
  id: entry.call.id,
5103
5574
  type: "function",
@@ -5112,6 +5583,15 @@ async function runToolLoop(request) {
5112
5583
  content: mergeToolOutput(outputPayload)
5113
5584
  });
5114
5585
  }
5586
+ const stepCompletedAtMs = Date.now();
5587
+ const timing = buildStepTiming({
5588
+ stepStartedAtMs,
5589
+ stepCompletedAtMs,
5590
+ modelCompletedAtMs,
5591
+ schedulerMetrics,
5592
+ toolExecutionMs,
5593
+ waitToolMs
5594
+ });
5115
5595
  steps.push({
5116
5596
  step: steps.length + 1,
5117
5597
  modelVersion,
@@ -5119,7 +5599,8 @@ async function runToolLoop(request) {
5119
5599
  thoughts: void 0,
5120
5600
  toolCalls: stepToolCalls,
5121
5601
  usage: usageTokens,
5122
- costUsd: stepCostUsd
5602
+ costUsd: stepCostUsd,
5603
+ timing
5123
5604
  });
5124
5605
  messages.push({
5125
5606
  role: "assistant",
@@ -5135,6 +5616,14 @@ async function runToolLoop(request) {
5135
5616
  const geminiTools = geminiNativeTools ? geminiNativeTools.concat(geminiFunctionTools) : geminiFunctionTools;
5136
5617
  const geminiContents = contents.map(convertLlmContentToGeminiContent);
5137
5618
  for (let stepIndex = 0; stepIndex < maxSteps; stepIndex += 1) {
5619
+ const stepStartedAtMs = Date.now();
5620
+ let firstModelEventAtMs;
5621
+ let schedulerMetrics;
5622
+ const markFirstModelEvent = () => {
5623
+ if (firstModelEventAtMs === void 0) {
5624
+ firstModelEventAtMs = Date.now();
5625
+ }
5626
+ };
5138
5627
  const config = {
5139
5628
  maxOutputTokens: 32e3,
5140
5629
  tools: geminiTools,
@@ -5146,81 +5635,91 @@ async function runToolLoop(request) {
5146
5635
  thinkingConfig: resolveGeminiThinkingConfig(request.model)
5147
5636
  };
5148
5637
  const onEvent = request.onEvent;
5149
- const response = await runGeminiCall(async (client) => {
5150
- const stream = await client.models.generateContentStream({
5151
- model: request.model,
5152
- contents: geminiContents,
5153
- config
5154
- });
5155
- let responseText = "";
5156
- let thoughtsText = "";
5157
- const modelParts = [];
5158
- const functionCalls = [];
5159
- const seenFunctionCallIds = /* @__PURE__ */ new Set();
5160
- const seenFunctionCallKeys = /* @__PURE__ */ new Set();
5161
- let latestUsageMetadata;
5162
- let resolvedModelVersion;
5163
- for await (const chunk of stream) {
5164
- if (chunk.modelVersion) {
5165
- resolvedModelVersion = chunk.modelVersion;
5166
- onEvent?.({ type: "model", modelVersion: chunk.modelVersion });
5167
- }
5168
- if (chunk.usageMetadata) {
5169
- latestUsageMetadata = chunk.usageMetadata;
5170
- }
5171
- const candidates = chunk.candidates;
5172
- if (!candidates || candidates.length === 0) {
5173
- continue;
5174
- }
5175
- const primary = candidates[0];
5176
- const parts = primary?.content?.parts;
5177
- if (!parts || parts.length === 0) {
5178
- continue;
5179
- }
5180
- for (const part of parts) {
5181
- modelParts.push(part);
5182
- const call = part.functionCall;
5183
- if (call) {
5184
- const id = typeof call.id === "string" ? call.id : "";
5185
- const shouldAdd = (() => {
5186
- if (id.length > 0) {
5187
- if (seenFunctionCallIds.has(id)) {
5638
+ const response = await runGeminiCall(
5639
+ async (client) => {
5640
+ const stream = await client.models.generateContentStream({
5641
+ model: request.model,
5642
+ contents: geminiContents,
5643
+ config
5644
+ });
5645
+ let responseText = "";
5646
+ let thoughtsText = "";
5647
+ const modelParts = [];
5648
+ const functionCalls = [];
5649
+ const seenFunctionCallIds = /* @__PURE__ */ new Set();
5650
+ const seenFunctionCallKeys = /* @__PURE__ */ new Set();
5651
+ let latestUsageMetadata;
5652
+ let resolvedModelVersion;
5653
+ for await (const chunk of stream) {
5654
+ markFirstModelEvent();
5655
+ if (chunk.modelVersion) {
5656
+ resolvedModelVersion = chunk.modelVersion;
5657
+ onEvent?.({ type: "model", modelVersion: chunk.modelVersion });
5658
+ }
5659
+ if (chunk.usageMetadata) {
5660
+ latestUsageMetadata = chunk.usageMetadata;
5661
+ }
5662
+ const candidates = chunk.candidates;
5663
+ if (!candidates || candidates.length === 0) {
5664
+ continue;
5665
+ }
5666
+ const primary = candidates[0];
5667
+ const parts = primary?.content?.parts;
5668
+ if (!parts || parts.length === 0) {
5669
+ continue;
5670
+ }
5671
+ for (const part of parts) {
5672
+ modelParts.push(part);
5673
+ const call = part.functionCall;
5674
+ if (call) {
5675
+ const id = typeof call.id === "string" ? call.id : "";
5676
+ const shouldAdd = (() => {
5677
+ if (id.length > 0) {
5678
+ if (seenFunctionCallIds.has(id)) {
5679
+ return false;
5680
+ }
5681
+ seenFunctionCallIds.add(id);
5682
+ return true;
5683
+ }
5684
+ const key = JSON.stringify({ name: call.name ?? "", args: call.args ?? null });
5685
+ if (seenFunctionCallKeys.has(key)) {
5188
5686
  return false;
5189
5687
  }
5190
- seenFunctionCallIds.add(id);
5688
+ seenFunctionCallKeys.add(key);
5191
5689
  return true;
5690
+ })();
5691
+ if (shouldAdd) {
5692
+ functionCalls.push(call);
5192
5693
  }
5193
- const key = JSON.stringify({ name: call.name ?? "", args: call.args ?? null });
5194
- if (seenFunctionCallKeys.has(key)) {
5195
- return false;
5196
- }
5197
- seenFunctionCallKeys.add(key);
5198
- return true;
5199
- })();
5200
- if (shouldAdd) {
5201
- functionCalls.push(call);
5202
5694
  }
5203
- }
5204
- if (typeof part.text === "string" && part.text.length > 0) {
5205
- if (part.thought) {
5206
- thoughtsText += part.text;
5207
- onEvent?.({ type: "delta", channel: "thought", text: part.text });
5208
- } else {
5209
- responseText += part.text;
5210
- onEvent?.({ type: "delta", channel: "response", text: part.text });
5695
+ if (typeof part.text === "string" && part.text.length > 0) {
5696
+ if (part.thought) {
5697
+ thoughtsText += part.text;
5698
+ onEvent?.({ type: "delta", channel: "thought", text: part.text });
5699
+ } else {
5700
+ responseText += part.text;
5701
+ onEvent?.({ type: "delta", channel: "response", text: part.text });
5702
+ }
5211
5703
  }
5212
5704
  }
5213
5705
  }
5706
+ return {
5707
+ responseText,
5708
+ thoughtsText,
5709
+ functionCalls,
5710
+ modelParts,
5711
+ usageMetadata: latestUsageMetadata,
5712
+ modelVersion: resolvedModelVersion ?? request.model
5713
+ };
5714
+ },
5715
+ request.model,
5716
+ {
5717
+ onSettled: (metrics) => {
5718
+ schedulerMetrics = metrics;
5719
+ }
5214
5720
  }
5215
- return {
5216
- responseText,
5217
- thoughtsText,
5218
- functionCalls,
5219
- modelParts,
5220
- usageMetadata: latestUsageMetadata,
5221
- modelVersion: resolvedModelVersion ?? request.model
5222
- };
5223
- });
5721
+ );
5722
+ const modelCompletedAtMs = Date.now();
5224
5723
  const usageTokens = extractGeminiUsageTokens(response.usageMetadata);
5225
5724
  const modelVersion = response.modelVersion ?? request.model;
5226
5725
  const stepCostUsd = estimateCallCostUsd({
@@ -5232,6 +5731,16 @@ async function runToolLoop(request) {
5232
5731
  if (response.functionCalls.length === 0) {
5233
5732
  finalText = response.responseText.trim();
5234
5733
  finalThoughts = response.thoughtsText.trim();
5734
+ const stepCompletedAtMs2 = Date.now();
5735
+ const timing2 = buildStepTiming({
5736
+ stepStartedAtMs,
5737
+ stepCompletedAtMs: stepCompletedAtMs2,
5738
+ modelCompletedAtMs,
5739
+ firstModelEventAtMs,
5740
+ schedulerMetrics,
5741
+ toolExecutionMs: 0,
5742
+ waitToolMs: 0
5743
+ });
5235
5744
  steps.push({
5236
5745
  step: steps.length + 1,
5237
5746
  modelVersion,
@@ -5239,7 +5748,8 @@ async function runToolLoop(request) {
5239
5748
  thoughts: finalThoughts || void 0,
5240
5749
  toolCalls: [],
5241
5750
  usage: usageTokens,
5242
- costUsd: stepCostUsd
5751
+ costUsd: stepCostUsd,
5752
+ timing: timing2
5243
5753
  });
5244
5754
  return { text: finalText, thoughts: finalThoughts, steps, totalCostUsd };
5245
5755
  }
@@ -5289,8 +5799,15 @@ async function runToolLoop(request) {
5289
5799
  );
5290
5800
  })
5291
5801
  );
5802
+ let toolExecutionMs = 0;
5803
+ let waitToolMs = 0;
5292
5804
  for (const { entry, result, outputPayload } of callResults) {
5293
5805
  toolCalls.push({ ...result, callId: entry.call.id });
5806
+ const callDurationMs = toToolResultDuration(result);
5807
+ toolExecutionMs += callDurationMs;
5808
+ if (entry.toolName.toLowerCase() === SUBAGENT_WAIT_TOOL_NAME) {
5809
+ waitToolMs += callDurationMs;
5810
+ }
5294
5811
  const responsePayload = isPlainRecord(outputPayload) ? outputPayload : { output: outputPayload };
5295
5812
  responseParts.push({
5296
5813
  functionResponse: {
@@ -5300,6 +5817,16 @@ async function runToolLoop(request) {
5300
5817
  }
5301
5818
  });
5302
5819
  }
5820
+ const stepCompletedAtMs = Date.now();
5821
+ const timing = buildStepTiming({
5822
+ stepStartedAtMs,
5823
+ stepCompletedAtMs,
5824
+ modelCompletedAtMs,
5825
+ firstModelEventAtMs,
5826
+ schedulerMetrics,
5827
+ toolExecutionMs,
5828
+ waitToolMs
5829
+ });
5303
5830
  steps.push({
5304
5831
  step: steps.length + 1,
5305
5832
  modelVersion,
@@ -5307,7 +5834,8 @@ async function runToolLoop(request) {
5307
5834
  thoughts: response.thoughtsText.trim() || void 0,
5308
5835
  toolCalls,
5309
5836
  usage: usageTokens,
5310
- costUsd: stepCostUsd
5837
+ costUsd: stepCostUsd,
5838
+ timing
5311
5839
  });
5312
5840
  geminiContents.push({ role: "user", parts: responseParts });
5313
5841
  }
@@ -5558,13 +6086,648 @@ function appendMarkdownSourcesSection(value, sources) {
5558
6086
  ${lines}`;
5559
6087
  }
5560
6088
 
6089
+ // src/agent.ts
6090
+ var import_node_crypto3 = require("crypto");
6091
+
6092
+ // src/agent/subagents.ts
6093
+ var import_node_crypto2 = require("crypto");
6094
+ var import_zod4 = require("zod");
6095
+ var DEFAULT_SUBAGENT_MAX_AGENTS = 4;
6096
+ var DEFAULT_SUBAGENT_MAX_DEPTH = 2;
6097
+ var DEFAULT_SUBAGENT_WAIT_TIMEOUT_MS = 1500;
6098
+ var DEFAULT_SUBAGENT_MAX_WAIT_TIMEOUT_MS = 9e4;
6099
+ var MAX_SUBAGENT_MAX_AGENTS = 64;
6100
+ var MAX_SUBAGENT_MAX_DEPTH = 12;
6101
+ var MAX_SUBAGENT_MAX_STEPS = 64;
6102
+ var MAX_SUBAGENT_WAIT_TIMEOUT_MS = 6e5;
6103
+ var SUBAGENT_CONTROL_TOOL_NAMES = ["send_input", "resume_agent", "wait", "close_agent"];
6104
+ var subagentInputItemSchema = import_zod4.z.object({
6105
+ text: import_zod4.z.string().optional(),
6106
+ image_url: import_zod4.z.string().optional(),
6107
+ name: import_zod4.z.string().optional(),
6108
+ path: import_zod4.z.string().optional(),
6109
+ type: import_zod4.z.string().optional()
6110
+ }).passthrough();
6111
+ var spawnAgentInputSchema = import_zod4.z.object({
6112
+ prompt: import_zod4.z.string().optional().describe("Initial prompt for the subagent."),
6113
+ message: import_zod4.z.string().optional().describe("Codex-style alias for prompt."),
6114
+ items: import_zod4.z.array(subagentInputItemSchema).optional().describe("Optional Codex-style input items."),
6115
+ agent_type: import_zod4.z.string().optional().describe("Codex-style agent type hint."),
6116
+ instructions: import_zod4.z.string().optional().describe("Optional extra instructions for this subagent instance."),
6117
+ model: import_zod4.z.string().optional().describe("Optional model override. Must be one of this package's supported text model ids."),
6118
+ max_steps: import_zod4.z.number().int().min(1).max(MAX_SUBAGENT_MAX_STEPS).optional().describe("Optional max step budget for each subagent run.")
6119
+ }).refine((value) => Boolean(resolvePromptValue(value.prompt, value.message, value.items)), {
6120
+ message: "Either prompt, message, or items must contain non-empty input."
6121
+ });
6122
+ var sendInputSchema = import_zod4.z.object({
6123
+ agent_id: import_zod4.z.string().optional().describe("Target subagent id."),
6124
+ id: import_zod4.z.string().optional().describe("Codex-style alias for agent_id."),
6125
+ input: import_zod4.z.string().optional().describe("New user input queued for the subagent."),
6126
+ message: import_zod4.z.string().optional().describe("Codex-style alias for input."),
6127
+ items: import_zod4.z.array(subagentInputItemSchema).optional().describe("Optional Codex-style input items."),
6128
+ interrupt: import_zod4.z.boolean().optional().describe("If true and currently running, aborts active run before queuing input.")
6129
+ }).refine((value) => Boolean(resolveAgentIdValue(value.agent_id, value.id)), {
6130
+ message: "agent_id (or id) is required."
6131
+ }).refine((value) => Boolean(resolvePromptValue(value.input, value.message, value.items)), {
6132
+ message: "input (or message/items) is required."
6133
+ });
6134
+ var resumeAgentSchema = import_zod4.z.object({
6135
+ agent_id: import_zod4.z.string().optional().describe("Target subagent id."),
6136
+ id: import_zod4.z.string().optional().describe("Codex-style alias for agent_id.")
6137
+ }).refine((value) => Boolean(resolveAgentIdValue(value.agent_id, value.id)), {
6138
+ message: "agent_id (or id) is required."
6139
+ });
6140
+ var waitSchema = import_zod4.z.object({
6141
+ agent_id: import_zod4.z.string().optional().describe("Target subagent id."),
6142
+ id: import_zod4.z.string().optional().describe("Codex-style alias for agent_id."),
6143
+ ids: import_zod4.z.array(import_zod4.z.string().min(1)).optional().describe("Codex-style list of agent ids."),
6144
+ timeout_ms: import_zod4.z.number().int().min(1).optional().describe("Optional wait timeout in milliseconds.")
6145
+ }).refine(
6146
+ (value) => Boolean(resolveAgentIdValue(value.agent_id, value.id)) || Array.isArray(value.ids) && value.ids.length > 0,
6147
+ {
6148
+ message: "agent_id/id or ids is required."
6149
+ }
6150
+ );
6151
+ var closeSchema = import_zod4.z.object({
6152
+ agent_id: import_zod4.z.string().optional().describe("Target subagent id."),
6153
+ id: import_zod4.z.string().optional().describe("Codex-style alias for agent_id.")
6154
+ }).refine((value) => Boolean(resolveAgentIdValue(value.agent_id, value.id)), {
6155
+ message: "agent_id (or id) is required."
6156
+ });
6157
+ function resolveSubagentToolConfig(selection, currentDepth) {
6158
+ const defaults = {
6159
+ maxAgents: DEFAULT_SUBAGENT_MAX_AGENTS,
6160
+ maxDepth: DEFAULT_SUBAGENT_MAX_DEPTH,
6161
+ defaultWaitTimeoutMs: DEFAULT_SUBAGENT_WAIT_TIMEOUT_MS,
6162
+ maxWaitTimeoutMs: DEFAULT_SUBAGENT_MAX_WAIT_TIMEOUT_MS,
6163
+ promptPattern: "codex",
6164
+ inheritTools: true,
6165
+ inheritFilesystemTool: true
6166
+ };
6167
+ if (selection === void 0 || selection === false) {
6168
+ return {
6169
+ enabled: false,
6170
+ ...defaults
6171
+ };
6172
+ }
6173
+ const config = selection === true ? {} : selection;
6174
+ const maxAgents = normalizeInteger(
6175
+ config.maxAgents,
6176
+ defaults.maxAgents,
6177
+ 1,
6178
+ MAX_SUBAGENT_MAX_AGENTS
6179
+ );
6180
+ const maxDepth = normalizeInteger(config.maxDepth, defaults.maxDepth, 1, MAX_SUBAGENT_MAX_DEPTH);
6181
+ const defaultWaitTimeoutMs = normalizeInteger(
6182
+ config.defaultWaitTimeoutMs,
6183
+ defaults.defaultWaitTimeoutMs,
6184
+ 1,
6185
+ MAX_SUBAGENT_WAIT_TIMEOUT_MS
6186
+ );
6187
+ const maxWaitTimeoutMs = normalizeInteger(
6188
+ config.maxWaitTimeoutMs,
6189
+ defaults.maxWaitTimeoutMs,
6190
+ defaultWaitTimeoutMs,
6191
+ MAX_SUBAGENT_WAIT_TIMEOUT_MS
6192
+ );
6193
+ const promptPattern = config.promptPattern ?? defaults.promptPattern;
6194
+ const instructions = trimToUndefined(config.instructions);
6195
+ const maxSteps = normalizeOptionalInteger(config.maxSteps, 1, MAX_SUBAGENT_MAX_STEPS);
6196
+ const enabled = config.enabled !== false && currentDepth < maxDepth;
6197
+ return {
6198
+ enabled,
6199
+ maxAgents,
6200
+ maxDepth,
6201
+ defaultWaitTimeoutMs,
6202
+ maxWaitTimeoutMs,
6203
+ promptPattern,
6204
+ ...instructions ? { instructions } : {},
6205
+ ...config.model ? { model: config.model } : {},
6206
+ ...maxSteps ? { maxSteps } : {},
6207
+ inheritTools: config.inheritTools !== false,
6208
+ inheritFilesystemTool: config.inheritFilesystemTool !== false
6209
+ };
6210
+ }
6211
+ function buildCodexSubagentOrchestratorInstructions(params) {
6212
+ return [
6213
+ "Subagent orchestration tools are available: spawn_agent, send_input, resume_agent, wait, close_agent.",
6214
+ "Use this control pattern:",
6215
+ "1. spawn_agent with a focused prompt.",
6216
+ "2. wait on that agent_id until it is no longer running.",
6217
+ "3. For follow-up turns, send_input then resume_agent.",
6218
+ "4. close_agent when delegation is complete.",
6219
+ `Limits: max active subagents ${params.maxAgents}, max depth ${params.maxDepth}, current depth ${params.currentDepth}.`
6220
+ ].join("\n");
6221
+ }
6222
+ function buildCodexSubagentWorkerInstructions(params) {
6223
+ return [
6224
+ `You are a delegated subagent at depth ${params.depth}/${params.maxDepth}.`,
6225
+ "Focus on the delegated task, use available tools when needed, and return concise actionable output.",
6226
+ "If blocked, report the blocker explicitly."
6227
+ ].join("\n");
6228
+ }
6229
+ function createSubagentToolController(options) {
6230
+ if (!options.config.enabled) {
6231
+ return {
6232
+ tools: {},
6233
+ closeAll: async () => {
6234
+ }
6235
+ };
6236
+ }
6237
+ const agents = /* @__PURE__ */ new Map();
6238
+ const tools = {
6239
+ spawn_agent: tool({
6240
+ description: "Spawns a subagent asynchronously. Returns immediately with agent status and id.",
6241
+ inputSchema: spawnAgentInputSchema,
6242
+ execute: async (input) => {
6243
+ if (countActiveAgents(agents) >= options.config.maxAgents) {
6244
+ throw new Error(
6245
+ `Subagent limit reached (${options.config.maxAgents}). Close existing agents before spawning new ones.`
6246
+ );
6247
+ }
6248
+ const childDepth = options.parentDepth + 1;
6249
+ if (childDepth > options.config.maxDepth) {
6250
+ throw new Error(
6251
+ `Subagent depth limit reached (${options.config.maxDepth}). Cannot spawn at depth ${childDepth}.`
6252
+ );
6253
+ }
6254
+ let model = options.config.model ?? options.parentModel;
6255
+ if (input.model) {
6256
+ if (!isLlmTextModelId(input.model)) {
6257
+ throw new Error(`Unsupported subagent model id: ${input.model}`);
6258
+ }
6259
+ model = input.model;
6260
+ }
6261
+ const id = `agent_${(0, import_node_crypto2.randomBytes)(6).toString("hex")}`;
6262
+ const now = Date.now();
6263
+ const initialPrompt = resolvePromptValue(input.prompt, input.message, input.items);
6264
+ if (!initialPrompt) {
6265
+ throw new Error("spawn_agent requires prompt/message/items with non-empty text.");
6266
+ }
6267
+ const agent = {
6268
+ id,
6269
+ depth: childDepth,
6270
+ model,
6271
+ status: "idle",
6272
+ createdAtMs: now,
6273
+ updatedAtMs: now,
6274
+ pendingInputs: [initialPrompt],
6275
+ history: [],
6276
+ ...options.buildChildInstructions ? {
6277
+ instructions: trimToUndefined(
6278
+ options.buildChildInstructions(input.instructions, childDepth)
6279
+ )
6280
+ } : input.instructions ? { instructions: trimToUndefined(input.instructions) } : {},
6281
+ ...input.max_steps ? { maxSteps: input.max_steps } : options.config.maxSteps ? { maxSteps: options.config.maxSteps } : {},
6282
+ turns: 0,
6283
+ notification: "spawned",
6284
+ notificationMessage: `Spawned subagent ${id}.`,
6285
+ version: 1,
6286
+ waiters: /* @__PURE__ */ new Set()
6287
+ };
6288
+ agents.set(id, agent);
6289
+ startRun(agent, options);
6290
+ return buildToolResponse(agent, {
6291
+ notification: "spawned",
6292
+ message: `Spawned subagent ${id}.`
6293
+ });
6294
+ }
6295
+ }),
6296
+ send_input: tool({
6297
+ description: "Queues new input for an existing subagent.",
6298
+ inputSchema: sendInputSchema,
6299
+ execute: async (input) => {
6300
+ const agentId = resolveAgentIdValue(input.agent_id, input.id);
6301
+ if (!agentId) {
6302
+ throw new Error("send_input requires agent_id or id.");
6303
+ }
6304
+ const agent = requireAgent(agents, agentId);
6305
+ const nextInput = resolvePromptValue(input.input, input.message, input.items);
6306
+ if (!nextInput) {
6307
+ throw new Error("send_input requires input/message/items with non-empty text.");
6308
+ }
6309
+ if (agent.status === "closed") {
6310
+ throw new Error(`Subagent ${agent.id} is closed.`);
6311
+ }
6312
+ if (input.interrupt && agent.abortController) {
6313
+ agent.abortController.abort("send_input_interrupt");
6314
+ agent.pendingInputs.unshift(nextInput);
6315
+ setNotification(agent, "input_queued", `Interrupted ${agent.id} and queued new input.`);
6316
+ return buildToolResponse(agent);
6317
+ }
6318
+ agent.pendingInputs.push(nextInput);
6319
+ setNotification(agent, "input_queued", `Queued input for ${agent.id}.`);
6320
+ return buildToolResponse(agent);
6321
+ }
6322
+ }),
6323
+ resume_agent: tool({
6324
+ description: "Resumes a subagent run when queued input is available.",
6325
+ inputSchema: resumeAgentSchema,
6326
+ execute: async (input) => {
6327
+ const agentId = resolveAgentIdValue(input.agent_id, input.id);
6328
+ if (!agentId) {
6329
+ throw new Error("resume_agent requires agent_id or id.");
6330
+ }
6331
+ const agent = requireAgent(agents, agentId);
6332
+ if (agent.status === "closed") {
6333
+ setNotification(agent, "already_closed", `Subagent ${agent.id} is already closed.`);
6334
+ return buildToolResponse(agent, {
6335
+ notification: "already_closed",
6336
+ message: `Subagent ${agent.id} is already closed.`
6337
+ });
6338
+ }
6339
+ const outcome = startRun(agent, options);
6340
+ if (outcome === "started") {
6341
+ return buildToolResponse(agent, {
6342
+ notification: "run_started",
6343
+ message: `Started subagent ${agent.id}.`
6344
+ });
6345
+ }
6346
+ if (outcome === "already_running") {
6347
+ setNotification(agent, "already_running", `Subagent ${agent.id} is already running.`);
6348
+ return buildToolResponse(agent);
6349
+ }
6350
+ setNotification(agent, "no_pending_input", `Subagent ${agent.id} has no queued input.`);
6351
+ return buildToolResponse(agent);
6352
+ }
6353
+ }),
6354
+ wait: tool({
6355
+ description: "Waits for a running subagent to change state or until timeout. Returns current status.",
6356
+ inputSchema: waitSchema,
6357
+ execute: async (input) => {
6358
+ const usesIdsArray = Array.isArray(input.ids) && input.ids.length > 0;
6359
+ const ids = resolveAgentIdList(input.agent_id, input.id, input.ids);
6360
+ if (ids.length === 0) {
6361
+ throw new Error("wait requires agent_id/id or ids.");
6362
+ }
6363
+ const timeoutMs = normalizeInteger(
6364
+ input.timeout_ms,
6365
+ options.config.defaultWaitTimeoutMs,
6366
+ 1,
6367
+ options.config.maxWaitTimeoutMs
6368
+ );
6369
+ if (usesIdsArray) {
6370
+ const status = await waitForAnyAgentStatus(agents, ids, timeoutMs);
6371
+ return { status, timed_out: Object.keys(status).length === 0, timeout_ms: timeoutMs };
6372
+ }
6373
+ const agent = requireAgent(agents, ids[0]);
6374
+ if (agent.status === "running") {
6375
+ const completed = await waitUntilNotRunning(agent, timeoutMs);
6376
+ if (!completed) {
6377
+ setNotification(
6378
+ agent,
6379
+ "timeout",
6380
+ `Timed out after ${timeoutMs}ms while waiting for ${agent.id}.`
6381
+ );
6382
+ return buildToolResponse(agent, void 0, { timed_out: true, timeout_ms: timeoutMs });
6383
+ }
6384
+ }
6385
+ return buildToolResponse(agent, void 0, { timed_out: false, timeout_ms: timeoutMs });
6386
+ }
6387
+ }),
6388
+ close_agent: tool({
6389
+ description: "Closes a subagent and aborts its current run if it is still running.",
6390
+ inputSchema: closeSchema,
6391
+ execute: async (input) => {
6392
+ const agentId = resolveAgentIdValue(input.agent_id, input.id);
6393
+ if (!agentId) {
6394
+ throw new Error("close_agent requires agent_id or id.");
6395
+ }
6396
+ const agent = requireAgent(agents, agentId);
6397
+ if (agent.status === "closed") {
6398
+ setNotification(agent, "already_closed", `Subagent ${agent.id} is already closed.`);
6399
+ return buildToolResponse(agent, void 0, { cancelled: false });
6400
+ }
6401
+ const cancelled = closeSubagent(agent, `Closed ${agent.id}.`);
6402
+ return buildToolResponse(
6403
+ agent,
6404
+ { notification: "closed", message: `Closed ${agent.id}.` },
6405
+ { cancelled }
6406
+ );
6407
+ }
6408
+ })
6409
+ };
6410
+ return {
6411
+ tools,
6412
+ closeAll: async () => {
6413
+ const running = [];
6414
+ for (const agent of agents.values()) {
6415
+ if (agent.status !== "closed") {
6416
+ closeSubagent(agent, `Parent agent loop closed ${agent.id}.`);
6417
+ }
6418
+ if (agent.runningPromise) {
6419
+ running.push(agent.runningPromise);
6420
+ }
6421
+ }
6422
+ if (running.length > 0) {
6423
+ await Promise.race([Promise.allSettled(running), sleep2(2e3)]);
6424
+ }
6425
+ }
6426
+ };
6427
+ }
6428
+ function requireAgent(agents, id) {
6429
+ const agent = agents.get(id);
6430
+ if (!agent) {
6431
+ throw new Error(`Unknown subagent id: ${id}`);
6432
+ }
6433
+ return agent;
6434
+ }
6435
+ function resolveAgentIdValue(agentId, idAlias) {
6436
+ const preferred = agentId?.trim();
6437
+ if (preferred) {
6438
+ return preferred;
6439
+ }
6440
+ const alias = idAlias?.trim();
6441
+ return alias ?? "";
6442
+ }
6443
+ function resolveAgentIdList(agentId, idAlias, ids) {
6444
+ if (Array.isArray(ids) && ids.length > 0) {
6445
+ return [...new Set(ids.map((value) => value.trim()).filter(Boolean))];
6446
+ }
6447
+ const single = resolveAgentIdValue(agentId, idAlias);
6448
+ return single ? [single] : [];
6449
+ }
6450
+ function resolvePromptValue(prompt, message, items) {
6451
+ const promptValue = prompt?.trim();
6452
+ if (promptValue) {
6453
+ return promptValue;
6454
+ }
6455
+ const messageValue = message?.trim();
6456
+ if (messageValue) {
6457
+ return messageValue;
6458
+ }
6459
+ const itemText = resolveInputItemsText(items);
6460
+ return itemText ?? "";
6461
+ }
6462
+ function resolveInputItemsText(items) {
6463
+ if (!items || items.length === 0) {
6464
+ return void 0;
6465
+ }
6466
+ const lines = [];
6467
+ for (const item of items) {
6468
+ if (typeof item.text === "string" && item.text.trim().length > 0) {
6469
+ lines.push(item.text.trim());
6470
+ continue;
6471
+ }
6472
+ const itemType = typeof item.type === "string" ? item.type.trim() : "";
6473
+ const name = typeof item.name === "string" ? item.name.trim() : "";
6474
+ const path6 = typeof item.path === "string" ? item.path.trim() : "";
6475
+ const imageUrl = typeof item.image_url === "string" ? item.image_url.trim() : "";
6476
+ const compact = [itemType, name, path6 || imageUrl].filter(Boolean).join(" ");
6477
+ if (compact) {
6478
+ lines.push(compact);
6479
+ }
6480
+ }
6481
+ if (lines.length === 0) {
6482
+ return void 0;
6483
+ }
6484
+ return lines.join("\n");
6485
+ }
6486
+ function countActiveAgents(agents) {
6487
+ let count = 0;
6488
+ for (const agent of agents.values()) {
6489
+ if (agent.status !== "closed") {
6490
+ count += 1;
6491
+ }
6492
+ }
6493
+ return count;
6494
+ }
6495
+ async function waitForAnyAgentStatus(agents, ids, timeoutMs) {
6496
+ const requested = ids.map((id) => requireAgent(agents, id));
6497
+ const deadline = Date.now() + timeoutMs;
6498
+ while (true) {
6499
+ const status = {};
6500
+ for (const agent of requested) {
6501
+ if (agent.status !== "running") {
6502
+ status[agent.id] = buildSnapshot(agent);
6503
+ }
6504
+ }
6505
+ if (Object.keys(status).length > 0) {
6506
+ return status;
6507
+ }
6508
+ const remaining = deadline - Date.now();
6509
+ if (remaining <= 0) {
6510
+ return {};
6511
+ }
6512
+ await Promise.race(
6513
+ requested.map(async (agent) => {
6514
+ const changed = await waitForVersionChange(agent, agent.version, remaining);
6515
+ if (!changed) {
6516
+ return;
6517
+ }
6518
+ })
6519
+ );
6520
+ }
6521
+ }
6522
+ function setNotification(agent, notification, message) {
6523
+ agent.notification = notification;
6524
+ agent.notificationMessage = message;
6525
+ agent.updatedAtMs = Date.now();
6526
+ agent.version += 1;
6527
+ notifyWaiters(agent);
6528
+ }
6529
+ function setLifecycle(agent, status, notification, message) {
6530
+ agent.status = status;
6531
+ setNotification(agent, notification, message);
6532
+ }
6533
+ function notifyWaiters(agent) {
6534
+ if (agent.waiters.size === 0) {
6535
+ return;
6536
+ }
6537
+ const waiters = [...agent.waiters];
6538
+ agent.waiters.clear();
6539
+ for (const notify of waiters) {
6540
+ notify();
6541
+ }
6542
+ }
6543
+ function startRun(agent, options) {
6544
+ if (agent.runningPromise) {
6545
+ return "already_running";
6546
+ }
6547
+ const nextInput = agent.pendingInputs.shift();
6548
+ if (!nextInput) {
6549
+ return "no_pending_input";
6550
+ }
6551
+ const input = [...agent.history, { role: "user", content: nextInput }];
6552
+ const abortController = new AbortController();
6553
+ const runStartedAtMs = Date.now();
6554
+ agent.abortController = abortController;
6555
+ if (agent.firstRunStartedAtMs === void 0) {
6556
+ agent.firstRunStartedAtMs = runStartedAtMs;
6557
+ }
6558
+ agent.lastRunStartedAtMs = runStartedAtMs;
6559
+ agent.lastError = void 0;
6560
+ setLifecycle(
6561
+ agent,
6562
+ "running",
6563
+ "run_started",
6564
+ `Subagent ${agent.id} started run ${agent.turns + 1}.`
6565
+ );
6566
+ const runPromise = (async () => {
6567
+ try {
6568
+ const result = await options.runSubagent({
6569
+ agentId: agent.id,
6570
+ depth: agent.depth,
6571
+ model: agent.model,
6572
+ input,
6573
+ instructions: agent.instructions,
6574
+ maxSteps: agent.maxSteps,
6575
+ signal: abortController.signal
6576
+ });
6577
+ if (agent.status === "closed") {
6578
+ return;
6579
+ }
6580
+ agent.lastResult = result;
6581
+ agent.lastError = void 0;
6582
+ agent.turns += 1;
6583
+ agent.history = [...input, { role: "assistant", content: result.text }];
6584
+ setLifecycle(
6585
+ agent,
6586
+ "idle",
6587
+ "run_completed",
6588
+ `Subagent ${agent.id} completed run ${agent.turns}.`
6589
+ );
6590
+ } catch (error) {
6591
+ if (agent.status === "closed") {
6592
+ return;
6593
+ }
6594
+ if (abortController.signal.aborted) {
6595
+ setLifecycle(agent, "idle", "input_queued", `Subagent ${agent.id} run interrupted.`);
6596
+ return;
6597
+ }
6598
+ const message = toErrorMessage(error);
6599
+ agent.lastError = message;
6600
+ setLifecycle(agent, "failed", "run_failed", `Subagent ${agent.id} failed: ${message}`);
6601
+ } finally {
6602
+ const runCompletedAtMs = Date.now();
6603
+ agent.lastRunCompletedAtMs = runCompletedAtMs;
6604
+ agent.lastRunDurationMs = Math.max(0, runCompletedAtMs - runStartedAtMs);
6605
+ agent.runningPromise = void 0;
6606
+ agent.abortController = void 0;
6607
+ }
6608
+ })();
6609
+ agent.runningPromise = runPromise;
6610
+ return "started";
6611
+ }
6612
+ function closeSubagent(agent, message) {
6613
+ const cancelled = Boolean(agent.runningPromise);
6614
+ agent.pendingInputs = [];
6615
+ if (agent.abortController) {
6616
+ agent.abortController.abort("close_agent");
6617
+ }
6618
+ setLifecycle(agent, "closed", "closed", message);
6619
+ return cancelled;
6620
+ }
6621
+ async function waitUntilNotRunning(agent, timeoutMs) {
6622
+ const deadline = Date.now() + timeoutMs;
6623
+ while (agent.status === "running") {
6624
+ const remaining = deadline - Date.now();
6625
+ if (remaining <= 0) {
6626
+ return false;
6627
+ }
6628
+ const currentVersion = agent.version;
6629
+ const changed = await waitForVersionChange(agent, currentVersion, remaining);
6630
+ if (!changed) {
6631
+ return false;
6632
+ }
6633
+ }
6634
+ return true;
6635
+ }
6636
+ async function waitForVersionChange(agent, version, timeoutMs) {
6637
+ if (agent.version !== version) {
6638
+ return true;
6639
+ }
6640
+ return await new Promise((resolve) => {
6641
+ const waiter = () => {
6642
+ cleanup();
6643
+ resolve(true);
6644
+ };
6645
+ const timeout = setTimeout(() => {
6646
+ cleanup();
6647
+ resolve(false);
6648
+ }, timeoutMs);
6649
+ const cleanup = () => {
6650
+ clearTimeout(timeout);
6651
+ agent.waiters.delete(waiter);
6652
+ };
6653
+ agent.waiters.add(waiter);
6654
+ });
6655
+ }
6656
+ function buildToolResponse(agent, override, extra = {}) {
6657
+ const notification = override?.notification ?? agent.notification;
6658
+ const message = override?.message ?? agent.notificationMessage;
6659
+ const snapshot = buildSnapshot(agent);
6660
+ return {
6661
+ agent_id: snapshot.agent_id,
6662
+ notification,
6663
+ message,
6664
+ status: snapshot.status,
6665
+ agent: snapshot,
6666
+ tool_availability: snapshot.status === "closed" ? [] : [...SUBAGENT_CONTROL_TOOL_NAMES],
6667
+ ...extra
6668
+ };
6669
+ }
6670
+ function buildSnapshot(agent) {
6671
+ return {
6672
+ agent_id: agent.id,
6673
+ status: agent.status,
6674
+ depth: agent.depth,
6675
+ model: agent.model,
6676
+ pending_inputs: agent.pendingInputs.length,
6677
+ turns: agent.turns,
6678
+ created_at: new Date(agent.createdAtMs).toISOString(),
6679
+ updated_at: new Date(agent.updatedAtMs).toISOString(),
6680
+ ...agent.firstRunStartedAtMs ? {
6681
+ first_run_started_at: new Date(agent.firstRunStartedAtMs).toISOString(),
6682
+ spawn_startup_latency_ms: Math.max(0, agent.firstRunStartedAtMs - agent.createdAtMs)
6683
+ } : {},
6684
+ ...agent.lastRunStartedAtMs ? { last_run_started_at: new Date(agent.lastRunStartedAtMs).toISOString() } : {},
6685
+ ...agent.lastRunCompletedAtMs ? { last_run_completed_at: new Date(agent.lastRunCompletedAtMs).toISOString() } : {},
6686
+ ...typeof agent.lastRunDurationMs === "number" ? { last_run_duration_ms: Math.max(0, agent.lastRunDurationMs) } : {},
6687
+ ...agent.lastError ? { last_error: agent.lastError } : {},
6688
+ ...agent.lastResult ? {
6689
+ last_result: {
6690
+ text: agent.lastResult.text,
6691
+ thoughts: agent.lastResult.thoughts,
6692
+ step_count: agent.lastResult.steps.length,
6693
+ total_cost_usd: agent.lastResult.totalCostUsd
6694
+ }
6695
+ } : {}
6696
+ };
6697
+ }
6698
+ function normalizeInteger(value, fallback, min, max) {
6699
+ const parsed = Number.isFinite(value) ? Math.floor(value) : fallback;
6700
+ return Math.max(min, Math.min(max, parsed));
6701
+ }
6702
+ function normalizeOptionalInteger(value, min, max) {
6703
+ if (!Number.isFinite(value)) {
6704
+ return void 0;
6705
+ }
6706
+ return Math.max(min, Math.min(max, Math.floor(value)));
6707
+ }
6708
+ function trimToUndefined(value) {
6709
+ const trimmed = value?.trim();
6710
+ return trimmed && trimmed.length > 0 ? trimmed : void 0;
6711
+ }
6712
+ function toErrorMessage(error) {
6713
+ if (error instanceof Error) {
6714
+ return error.message;
6715
+ }
6716
+ return String(error);
6717
+ }
6718
+ function sleep2(ms) {
6719
+ return new Promise((resolve) => {
6720
+ setTimeout(resolve, ms);
6721
+ });
6722
+ }
6723
+
5561
6724
  // src/tools/filesystemTools.ts
5562
6725
  var import_node_path5 = __toESM(require("path"), 1);
5563
- var import_zod5 = require("zod");
6726
+ var import_zod6 = require("zod");
5564
6727
 
5565
6728
  // src/tools/applyPatch.ts
5566
6729
  var import_node_path4 = __toESM(require("path"), 1);
5567
- var import_zod4 = require("zod");
6730
+ var import_zod5 = require("zod");
5568
6731
 
5569
6732
  // src/tools/filesystem.ts
5570
6733
  var import_node_fs3 = require("fs");
@@ -5860,8 +7023,8 @@ var CODEX_APPLY_PATCH_JSON_TOOL_DESCRIPTION = [
5860
7023
  "- You must prefix new lines with `+` even when creating a new file",
5861
7024
  "- File references can only be relative, NEVER ABSOLUTE."
5862
7025
  ].join("\n");
5863
- var applyPatchToolInputSchema = import_zod4.z.object({
5864
- input: import_zod4.z.string().min(1).describe(CODEX_APPLY_PATCH_INPUT_DESCRIPTION)
7026
+ var applyPatchToolInputSchema = import_zod5.z.object({
7027
+ input: import_zod5.z.string().min(1).describe(CODEX_APPLY_PATCH_INPUT_DESCRIPTION)
5865
7028
  });
5866
7029
  function createApplyPatchTool(options = {}) {
5867
7030
  return tool({
@@ -6269,100 +7432,100 @@ var MAX_GREP_LIMIT = 2e3;
6269
7432
  var DEFAULT_MAX_LINE_LENGTH = 500;
6270
7433
  var DEFAULT_GREP_MAX_SCANNED_FILES = 2e4;
6271
7434
  var DEFAULT_TAB_WIDTH = 4;
6272
- var codexReadFileInputSchema = import_zod5.z.object({
6273
- file_path: import_zod5.z.string().min(1).describe("Absolute path to the file"),
6274
- offset: import_zod5.z.number().int().min(1).optional().describe("The line number to start reading from. Must be 1 or greater."),
6275
- limit: import_zod5.z.number().int().min(1).optional().describe("The maximum number of lines to return."),
6276
- mode: import_zod5.z.enum(["slice", "indentation"]).optional().describe('Optional mode selector: "slice" (default) or "indentation".'),
6277
- indentation: import_zod5.z.object({
6278
- anchor_line: import_zod5.z.number().int().min(1).optional(),
6279
- max_levels: import_zod5.z.number().int().min(0).optional(),
6280
- include_siblings: import_zod5.z.boolean().optional(),
6281
- include_header: import_zod5.z.boolean().optional(),
6282
- max_lines: import_zod5.z.number().int().min(1).optional()
7435
+ var codexReadFileInputSchema = import_zod6.z.object({
7436
+ file_path: import_zod6.z.string().min(1).describe("Absolute path to the file"),
7437
+ offset: import_zod6.z.number().int().min(1).optional().describe("The line number to start reading from. Must be 1 or greater."),
7438
+ limit: import_zod6.z.number().int().min(1).optional().describe("The maximum number of lines to return."),
7439
+ mode: import_zod6.z.enum(["slice", "indentation"]).optional().describe('Optional mode selector: "slice" (default) or "indentation".'),
7440
+ indentation: import_zod6.z.object({
7441
+ anchor_line: import_zod6.z.number().int().min(1).optional(),
7442
+ max_levels: import_zod6.z.number().int().min(0).optional(),
7443
+ include_siblings: import_zod6.z.boolean().optional(),
7444
+ include_header: import_zod6.z.boolean().optional(),
7445
+ max_lines: import_zod6.z.number().int().min(1).optional()
6283
7446
  }).optional()
6284
7447
  });
6285
- var codexListDirInputSchema = import_zod5.z.object({
6286
- dir_path: import_zod5.z.string().min(1).describe("Absolute path to the directory to list."),
6287
- offset: import_zod5.z.number().int().min(1).optional().describe("The entry number to start listing from. Must be 1 or greater."),
6288
- limit: import_zod5.z.number().int().min(1).optional().describe("The maximum number of entries to return."),
6289
- depth: import_zod5.z.number().int().min(1).optional().describe("The maximum directory depth to traverse. Must be 1 or greater.")
7448
+ var codexListDirInputSchema = import_zod6.z.object({
7449
+ dir_path: import_zod6.z.string().min(1).describe("Absolute path to the directory to list."),
7450
+ offset: import_zod6.z.number().int().min(1).optional().describe("The entry number to start listing from. Must be 1 or greater."),
7451
+ limit: import_zod6.z.number().int().min(1).optional().describe("The maximum number of entries to return."),
7452
+ depth: import_zod6.z.number().int().min(1).optional().describe("The maximum directory depth to traverse. Must be 1 or greater.")
6290
7453
  });
6291
- var codexGrepFilesInputSchema = import_zod5.z.object({
6292
- pattern: import_zod5.z.string().min(1).describe("Regular expression pattern to search for."),
6293
- include: import_zod5.z.string().optional().describe('Optional glob limiting searched files (for example "*.rs").'),
6294
- path: import_zod5.z.string().optional().describe("Directory or file path to search. Defaults to cwd."),
6295
- limit: import_zod5.z.number().int().min(1).optional().describe("Maximum number of file paths to return (defaults to 100).")
7454
+ var codexGrepFilesInputSchema = import_zod6.z.object({
7455
+ pattern: import_zod6.z.string().min(1).describe("Regular expression pattern to search for."),
7456
+ include: import_zod6.z.string().optional().describe('Optional glob limiting searched files (for example "*.rs").'),
7457
+ path: import_zod6.z.string().optional().describe("Directory or file path to search. Defaults to cwd."),
7458
+ limit: import_zod6.z.number().int().min(1).optional().describe("Maximum number of file paths to return (defaults to 100).")
6296
7459
  });
6297
- var applyPatchInputSchema = import_zod5.z.object({
6298
- input: import_zod5.z.string().min(1).describe(CODEX_APPLY_PATCH_INPUT_DESCRIPTION)
7460
+ var applyPatchInputSchema = import_zod6.z.object({
7461
+ input: import_zod6.z.string().min(1).describe(CODEX_APPLY_PATCH_INPUT_DESCRIPTION)
6299
7462
  });
6300
- var geminiReadFileInputSchema = import_zod5.z.object({
6301
- file_path: import_zod5.z.string().min(1),
6302
- offset: import_zod5.z.number().int().min(0).nullish(),
6303
- limit: import_zod5.z.number().int().min(1).nullish()
7463
+ var geminiReadFileInputSchema = import_zod6.z.object({
7464
+ file_path: import_zod6.z.string().min(1),
7465
+ offset: import_zod6.z.number().int().min(0).nullish(),
7466
+ limit: import_zod6.z.number().int().min(1).nullish()
6304
7467
  });
6305
- var geminiReadFilesInputSchema = import_zod5.z.object({
6306
- paths: import_zod5.z.array(import_zod5.z.string().min(1)).min(1),
6307
- line_offset: import_zod5.z.number().int().min(0).nullish(),
6308
- line_limit: import_zod5.z.number().int().min(1).nullish(),
6309
- char_offset: import_zod5.z.number().int().min(0).nullish(),
6310
- char_limit: import_zod5.z.number().int().min(1).nullish(),
6311
- include_line_numbers: import_zod5.z.boolean().nullish()
7468
+ var geminiReadFilesInputSchema = import_zod6.z.object({
7469
+ paths: import_zod6.z.array(import_zod6.z.string().min(1)).min(1),
7470
+ line_offset: import_zod6.z.number().int().min(0).nullish(),
7471
+ line_limit: import_zod6.z.number().int().min(1).nullish(),
7472
+ char_offset: import_zod6.z.number().int().min(0).nullish(),
7473
+ char_limit: import_zod6.z.number().int().min(1).nullish(),
7474
+ include_line_numbers: import_zod6.z.boolean().nullish()
6312
7475
  }).superRefine((value, context) => {
6313
7476
  const hasLineWindow = value.line_offset !== void 0 || value.line_limit !== void 0;
6314
7477
  const hasCharWindow = value.char_offset !== void 0 || value.char_limit !== void 0;
6315
7478
  if (hasLineWindow && hasCharWindow) {
6316
7479
  context.addIssue({
6317
- code: import_zod5.z.ZodIssueCode.custom,
7480
+ code: import_zod6.z.ZodIssueCode.custom,
6318
7481
  message: "Use either line_* or char_* window arguments, not both."
6319
7482
  });
6320
7483
  }
6321
7484
  });
6322
- var geminiWriteFileInputSchema = import_zod5.z.object({
6323
- file_path: import_zod5.z.string().min(1),
6324
- content: import_zod5.z.string()
7485
+ var geminiWriteFileInputSchema = import_zod6.z.object({
7486
+ file_path: import_zod6.z.string().min(1),
7487
+ content: import_zod6.z.string()
6325
7488
  });
6326
- var geminiReplaceInputSchema = import_zod5.z.object({
6327
- file_path: import_zod5.z.string().min(1),
6328
- instruction: import_zod5.z.string().min(1),
6329
- old_string: import_zod5.z.string(),
6330
- new_string: import_zod5.z.string(),
6331
- expected_replacements: import_zod5.z.number().int().min(1).nullish()
7489
+ var geminiReplaceInputSchema = import_zod6.z.object({
7490
+ file_path: import_zod6.z.string().min(1),
7491
+ instruction: import_zod6.z.string().min(1),
7492
+ old_string: import_zod6.z.string(),
7493
+ new_string: import_zod6.z.string(),
7494
+ expected_replacements: import_zod6.z.number().int().min(1).nullish()
6332
7495
  });
6333
- var geminiListDirectoryInputSchema = import_zod5.z.object({
6334
- dir_path: import_zod5.z.string().min(1),
6335
- ignore: import_zod5.z.array(import_zod5.z.string()).nullish(),
6336
- file_filtering_options: import_zod5.z.object({
6337
- respect_git_ignore: import_zod5.z.boolean().nullish(),
6338
- respect_gemini_ignore: import_zod5.z.boolean().nullish()
7496
+ var geminiListDirectoryInputSchema = import_zod6.z.object({
7497
+ dir_path: import_zod6.z.string().min(1),
7498
+ ignore: import_zod6.z.array(import_zod6.z.string()).nullish(),
7499
+ file_filtering_options: import_zod6.z.object({
7500
+ respect_git_ignore: import_zod6.z.boolean().nullish(),
7501
+ respect_gemini_ignore: import_zod6.z.boolean().nullish()
6339
7502
  }).nullish()
6340
7503
  });
6341
- var geminiRgSearchInputSchema = import_zod5.z.object({
6342
- pattern: import_zod5.z.string().min(1),
6343
- path: import_zod5.z.string().nullish(),
6344
- glob: import_zod5.z.string().nullish(),
6345
- case_sensitive: import_zod5.z.boolean().nullish(),
6346
- exclude_pattern: import_zod5.z.string().nullish(),
6347
- names_only: import_zod5.z.boolean().nullish(),
6348
- max_matches_per_file: import_zod5.z.number().int().min(1).nullish(),
6349
- max_results: import_zod5.z.number().int().min(1).nullish()
7504
+ var geminiRgSearchInputSchema = import_zod6.z.object({
7505
+ pattern: import_zod6.z.string().min(1),
7506
+ path: import_zod6.z.string().nullish(),
7507
+ glob: import_zod6.z.string().nullish(),
7508
+ case_sensitive: import_zod6.z.boolean().nullish(),
7509
+ exclude_pattern: import_zod6.z.string().nullish(),
7510
+ names_only: import_zod6.z.boolean().nullish(),
7511
+ max_matches_per_file: import_zod6.z.number().int().min(1).nullish(),
7512
+ max_results: import_zod6.z.number().int().min(1).nullish()
6350
7513
  });
6351
- var geminiGrepSearchInputSchema = import_zod5.z.object({
6352
- pattern: import_zod5.z.string().min(1),
6353
- dir_path: import_zod5.z.string().nullish(),
6354
- include: import_zod5.z.string().nullish(),
6355
- exclude_pattern: import_zod5.z.string().nullish(),
6356
- names_only: import_zod5.z.boolean().nullish(),
6357
- max_matches_per_file: import_zod5.z.number().int().min(1).nullish(),
6358
- total_max_matches: import_zod5.z.number().int().min(1).nullish()
7514
+ var geminiGrepSearchInputSchema = import_zod6.z.object({
7515
+ pattern: import_zod6.z.string().min(1),
7516
+ dir_path: import_zod6.z.string().nullish(),
7517
+ include: import_zod6.z.string().nullish(),
7518
+ exclude_pattern: import_zod6.z.string().nullish(),
7519
+ names_only: import_zod6.z.boolean().nullish(),
7520
+ max_matches_per_file: import_zod6.z.number().int().min(1).nullish(),
7521
+ total_max_matches: import_zod6.z.number().int().min(1).nullish()
6359
7522
  });
6360
- var geminiGlobInputSchema = import_zod5.z.object({
6361
- pattern: import_zod5.z.string().min(1),
6362
- dir_path: import_zod5.z.string().nullish(),
6363
- case_sensitive: import_zod5.z.boolean().nullish(),
6364
- respect_git_ignore: import_zod5.z.boolean().nullish(),
6365
- respect_gemini_ignore: import_zod5.z.boolean().nullish()
7523
+ var geminiGlobInputSchema = import_zod6.z.object({
7524
+ pattern: import_zod6.z.string().min(1),
7525
+ dir_path: import_zod6.z.string().nullish(),
7526
+ case_sensitive: import_zod6.z.boolean().nullish(),
7527
+ respect_git_ignore: import_zod6.z.boolean().nullish(),
7528
+ respect_gemini_ignore: import_zod6.z.boolean().nullish()
6366
7529
  });
6367
7530
  function resolveFilesystemToolProfile(model, profile = "auto") {
6368
7531
  if (profile !== "auto") {
@@ -7350,19 +8513,107 @@ function isNoEntError(error) {
7350
8513
 
7351
8514
  // src/agent.ts
7352
8515
  async function runAgentLoop(request) {
7353
- const { tools: customTools, filesystemTool, filesystem_tool, ...toolLoopRequest } = request;
8516
+ const telemetry = createAgentTelemetrySession(request.telemetry);
8517
+ try {
8518
+ return await runAgentLoopInternal(request, { depth: 0, telemetry });
8519
+ } finally {
8520
+ await telemetry?.flush();
8521
+ }
8522
+ }
8523
+ async function runAgentLoopInternal(request, context) {
8524
+ const {
8525
+ tools: customTools,
8526
+ filesystemTool,
8527
+ filesystem_tool,
8528
+ subagentTool,
8529
+ subagent_tool,
8530
+ subagents,
8531
+ telemetry,
8532
+ ...toolLoopRequest
8533
+ } = request;
8534
+ const telemetrySession = context.telemetry ?? createAgentTelemetrySession(telemetry);
8535
+ const runId = randomRunId();
8536
+ const startedAtMs = Date.now();
7354
8537
  const filesystemSelection = filesystemTool ?? filesystem_tool;
8538
+ const subagentSelection = subagentTool ?? subagent_tool ?? subagents;
7355
8539
  const filesystemTools = resolveFilesystemTools(request.model, filesystemSelection);
7356
- const mergedTools = mergeToolSets(filesystemTools, customTools ?? {});
8540
+ const resolvedSubagentConfig = resolveSubagentToolConfig(subagentSelection, context.depth);
8541
+ const subagentController = createSubagentController({
8542
+ runId,
8543
+ model: request.model,
8544
+ depth: context.depth,
8545
+ telemetry: telemetrySession,
8546
+ customTools: customTools ?? {},
8547
+ filesystemSelection,
8548
+ subagentSelection,
8549
+ toolLoopRequest,
8550
+ resolvedSubagentConfig
8551
+ });
8552
+ const mergedTools = mergeToolSets(
8553
+ mergeToolSets(filesystemTools, subagentController?.tools ?? {}),
8554
+ customTools ?? {}
8555
+ );
7357
8556
  if (Object.keys(mergedTools).length === 0) {
7358
8557
  throw new Error(
7359
- "runAgentLoop requires at least one tool. Provide `tools` or enable `filesystemTool`."
8558
+ "runAgentLoop requires at least one tool. Provide `tools`, enable `filesystemTool`, or enable `subagentTool`."
7360
8559
  );
7361
8560
  }
7362
- return runToolLoop({
7363
- ...toolLoopRequest,
7364
- tools: mergedTools
8561
+ const instructions = buildLoopInstructions(
8562
+ toolLoopRequest.instructions,
8563
+ resolvedSubagentConfig,
8564
+ context.depth
8565
+ );
8566
+ const emitTelemetry = createAgentTelemetryEmitter({
8567
+ session: telemetrySession,
8568
+ runId,
8569
+ parentRunId: context.parentRunId,
8570
+ depth: context.depth,
8571
+ model: request.model
8572
+ });
8573
+ emitTelemetry({
8574
+ type: "agent.run.started",
8575
+ inputMode: typeof request.input === "string" ? "string" : "messages",
8576
+ customToolCount: Object.keys(customTools ?? {}).length,
8577
+ mergedToolCount: Object.keys(mergedTools).length,
8578
+ filesystemToolsEnabled: Object.keys(filesystemTools).length > 0,
8579
+ subagentToolsEnabled: resolvedSubagentConfig.enabled
7365
8580
  });
8581
+ const sourceOnEvent = toolLoopRequest.onEvent;
8582
+ const includeLlmStreamEvents = telemetrySession?.includeLlmStreamEvents === true;
8583
+ const wrappedOnEvent = sourceOnEvent || includeLlmStreamEvents ? (event) => {
8584
+ sourceOnEvent?.(event);
8585
+ if (includeLlmStreamEvents) {
8586
+ emitTelemetry({ type: "agent.run.stream", event });
8587
+ }
8588
+ } : void 0;
8589
+ try {
8590
+ const result = await runToolLoop({
8591
+ ...toolLoopRequest,
8592
+ ...instructions ? { instructions } : {},
8593
+ ...wrappedOnEvent ? { onEvent: wrappedOnEvent } : {},
8594
+ tools: mergedTools
8595
+ });
8596
+ emitTelemetry({
8597
+ type: "agent.run.completed",
8598
+ success: true,
8599
+ durationMs: Math.max(0, Date.now() - startedAtMs),
8600
+ stepCount: result.steps.length,
8601
+ toolCallCount: countToolCalls(result),
8602
+ totalCostUsd: result.totalCostUsd,
8603
+ usage: summarizeResultUsage(result)
8604
+ });
8605
+ return result;
8606
+ } catch (error) {
8607
+ emitTelemetry({
8608
+ type: "agent.run.completed",
8609
+ success: false,
8610
+ durationMs: Math.max(0, Date.now() - startedAtMs),
8611
+ error: toErrorMessage2(error)
8612
+ });
8613
+ throw error;
8614
+ } finally {
8615
+ await subagentController?.closeAll();
8616
+ }
7366
8617
  }
7367
8618
  function resolveFilesystemTools(model, selection) {
7368
8619
  if (selection === void 0 || selection === false) {
@@ -7390,13 +8641,216 @@ function mergeToolSets(base, extra) {
7390
8641
  for (const [toolName, toolSpec] of Object.entries(extra)) {
7391
8642
  if (Object.hasOwn(merged, toolName)) {
7392
8643
  throw new Error(
7393
- `Duplicate tool name "${toolName}" in runAgentLoop. Rename the custom tool or disable that filesystem tool.`
8644
+ `Duplicate tool name "${toolName}" in runAgentLoop. Rename one of the conflicting tools or disable an overlapping built-in tool.`
7394
8645
  );
7395
8646
  }
7396
8647
  merged[toolName] = toolSpec;
7397
8648
  }
7398
8649
  return merged;
7399
8650
  }
8651
+ function createSubagentController(params) {
8652
+ if (!params.resolvedSubagentConfig.enabled) {
8653
+ return null;
8654
+ }
8655
+ return createSubagentToolController({
8656
+ config: params.resolvedSubagentConfig,
8657
+ parentDepth: params.depth,
8658
+ parentModel: params.resolvedSubagentConfig.model ?? params.model,
8659
+ buildChildInstructions: (spawnInstructions, childDepth) => buildChildInstructions(spawnInstructions, params.resolvedSubagentConfig, childDepth),
8660
+ runSubagent: async (subagentRequest) => {
8661
+ const childCustomTools = params.resolvedSubagentConfig.inheritTools ? params.customTools : {};
8662
+ const childFilesystemSelection = params.resolvedSubagentConfig.inheritFilesystemTool ? params.filesystemSelection : false;
8663
+ return await runAgentLoopInternal(
8664
+ {
8665
+ model: subagentRequest.model,
8666
+ input: subagentRequest.input,
8667
+ instructions: subagentRequest.instructions,
8668
+ tools: childCustomTools,
8669
+ filesystemTool: childFilesystemSelection,
8670
+ subagentTool: params.subagentSelection,
8671
+ modelTools: params.toolLoopRequest.modelTools,
8672
+ maxSteps: subagentRequest.maxSteps,
8673
+ openAiReasoningEffort: params.toolLoopRequest.openAiReasoningEffort,
8674
+ signal: subagentRequest.signal
8675
+ },
8676
+ {
8677
+ depth: params.depth + 1,
8678
+ parentRunId: params.runId,
8679
+ telemetry: params.telemetry
8680
+ }
8681
+ );
8682
+ }
8683
+ });
8684
+ }
8685
+ function buildLoopInstructions(baseInstructions, config, depth) {
8686
+ if (!config.enabled) {
8687
+ return trimToUndefined2(baseInstructions);
8688
+ }
8689
+ const blocks = [];
8690
+ const base = trimToUndefined2(baseInstructions);
8691
+ if (base) {
8692
+ blocks.push(base);
8693
+ }
8694
+ if (config.promptPattern === "codex") {
8695
+ blocks.push(
8696
+ buildCodexSubagentOrchestratorInstructions({
8697
+ currentDepth: depth,
8698
+ maxDepth: config.maxDepth,
8699
+ maxAgents: config.maxAgents
8700
+ })
8701
+ );
8702
+ }
8703
+ if (config.instructions) {
8704
+ blocks.push(config.instructions);
8705
+ }
8706
+ return blocks.length > 0 ? blocks.join("\n\n") : void 0;
8707
+ }
8708
+ function buildChildInstructions(spawnInstructions, config, childDepth) {
8709
+ const blocks = [];
8710
+ if (config.promptPattern === "codex") {
8711
+ blocks.push(
8712
+ buildCodexSubagentWorkerInstructions({
8713
+ depth: childDepth,
8714
+ maxDepth: config.maxDepth
8715
+ })
8716
+ );
8717
+ }
8718
+ if (config.instructions) {
8719
+ blocks.push(config.instructions);
8720
+ }
8721
+ const perSpawn = trimToUndefined2(spawnInstructions);
8722
+ if (perSpawn) {
8723
+ blocks.push(perSpawn);
8724
+ }
8725
+ return blocks.length > 0 ? blocks.join("\n\n") : void 0;
8726
+ }
8727
+ function trimToUndefined2(value) {
8728
+ const trimmed = value?.trim();
8729
+ return trimmed && trimmed.length > 0 ? trimmed : void 0;
8730
+ }
8731
+ function randomRunId() {
8732
+ return (0, import_node_crypto3.randomBytes)(8).toString("hex");
8733
+ }
8734
+ function toIsoNow() {
8735
+ return (/* @__PURE__ */ new Date()).toISOString();
8736
+ }
8737
+ function toErrorMessage2(error) {
8738
+ if (error instanceof Error && error.message) {
8739
+ return error.message;
8740
+ }
8741
+ if (typeof error === "string") {
8742
+ return error;
8743
+ }
8744
+ return "Unknown error";
8745
+ }
8746
+ function countToolCalls(result) {
8747
+ let count = 0;
8748
+ for (const step of result.steps) {
8749
+ count += step.toolCalls.length;
8750
+ }
8751
+ return count;
8752
+ }
8753
+ function sumUsageValue(current, next) {
8754
+ if (typeof next !== "number" || !Number.isFinite(next)) {
8755
+ return current;
8756
+ }
8757
+ const normalizedNext = Math.max(0, next);
8758
+ if (typeof current !== "number" || !Number.isFinite(current)) {
8759
+ return normalizedNext;
8760
+ }
8761
+ return Math.max(0, current) + normalizedNext;
8762
+ }
8763
+ function summarizeResultUsage(result) {
8764
+ let summary;
8765
+ for (const step of result.steps) {
8766
+ const usage = step.usage;
8767
+ if (!usage) {
8768
+ continue;
8769
+ }
8770
+ summary = {
8771
+ promptTokens: sumUsageValue(summary?.promptTokens, usage.promptTokens),
8772
+ cachedTokens: sumUsageValue(summary?.cachedTokens, usage.cachedTokens),
8773
+ responseTokens: sumUsageValue(summary?.responseTokens, usage.responseTokens),
8774
+ responseImageTokens: sumUsageValue(summary?.responseImageTokens, usage.responseImageTokens),
8775
+ thinkingTokens: sumUsageValue(summary?.thinkingTokens, usage.thinkingTokens),
8776
+ totalTokens: sumUsageValue(summary?.totalTokens, usage.totalTokens),
8777
+ toolUsePromptTokens: sumUsageValue(summary?.toolUsePromptTokens, usage.toolUsePromptTokens)
8778
+ };
8779
+ }
8780
+ return summary;
8781
+ }
8782
+ function isPromiseLike(value) {
8783
+ return (typeof value === "object" || typeof value === "function") && value !== null && typeof value.then === "function";
8784
+ }
8785
+ function isAgentTelemetrySink(value) {
8786
+ return typeof value === "object" && value !== null && typeof value.emit === "function";
8787
+ }
8788
+ function resolveTelemetrySelection(telemetry) {
8789
+ if (!telemetry) {
8790
+ return void 0;
8791
+ }
8792
+ if (isAgentTelemetrySink(telemetry)) {
8793
+ return { sink: telemetry };
8794
+ }
8795
+ if (isAgentTelemetrySink(telemetry.sink)) {
8796
+ return telemetry;
8797
+ }
8798
+ throw new Error("Invalid runAgentLoop telemetry config: expected a sink with emit(event).");
8799
+ }
8800
+ function createAgentTelemetrySession(telemetry) {
8801
+ const config = resolveTelemetrySelection(telemetry);
8802
+ if (!config) {
8803
+ return void 0;
8804
+ }
8805
+ const pending = /* @__PURE__ */ new Set();
8806
+ const trackPromise = (promise) => {
8807
+ pending.add(promise);
8808
+ promise.finally(() => {
8809
+ pending.delete(promise);
8810
+ });
8811
+ };
8812
+ const emit = (event) => {
8813
+ try {
8814
+ const output = config.sink.emit(event);
8815
+ if (isPromiseLike(output)) {
8816
+ const task = Promise.resolve(output).then(() => void 0).catch(() => void 0);
8817
+ trackPromise(task);
8818
+ }
8819
+ } catch {
8820
+ }
8821
+ };
8822
+ const flush = async () => {
8823
+ while (pending.size > 0) {
8824
+ await Promise.allSettled([...pending]);
8825
+ }
8826
+ if (typeof config.sink.flush === "function") {
8827
+ try {
8828
+ await config.sink.flush();
8829
+ } catch {
8830
+ }
8831
+ }
8832
+ };
8833
+ return {
8834
+ includeLlmStreamEvents: config.includeLlmStreamEvents === true,
8835
+ emit,
8836
+ flush
8837
+ };
8838
+ }
8839
+ function createAgentTelemetryEmitter(params) {
8840
+ return (event) => {
8841
+ if (!params.session) {
8842
+ return;
8843
+ }
8844
+ params.session.emit({
8845
+ ...event,
8846
+ timestamp: toIsoNow(),
8847
+ runId: params.runId,
8848
+ ...params.parentRunId ? { parentRunId: params.parentRunId } : {},
8849
+ depth: params.depth,
8850
+ model: params.model
8851
+ });
8852
+ };
8853
+ }
7400
8854
  // Annotate the CommonJS export names for ESM import in node:
7401
8855
  0 && (module.exports = {
7402
8856
  CHATGPT_MODEL_IDS,
@@ -7420,6 +8874,7 @@ function mergeToolSets(base, extra) {
7420
8874
  appendMarkdownSourcesSection,
7421
8875
  applyPatch,
7422
8876
  configureGemini,
8877
+ configureModelConcurrency,
7423
8878
  convertGooglePartsToLlmParts,
7424
8879
  createApplyPatchTool,
7425
8880
  createCodexApplyPatchTool,
@@ -7464,6 +8919,7 @@ function mergeToolSets(base, extra) {
7464
8919
  loadLocalEnv,
7465
8920
  parseJsonFromLlmText,
7466
8921
  refreshChatGptOauthToken,
8922
+ resetModelConcurrencyConfig,
7467
8923
  resolveFilesystemToolProfile,
7468
8924
  resolveFireworksModelId,
7469
8925
  runAgentLoop,