@runtypelabs/cli 2.0.2 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -8469,8 +8469,12 @@ function setStoredToolPayloadField(payloads, toolId, field, value) {
8469
8469
  const next = { ...payloads.get(toolId) ?? {} };
8470
8470
  if (value === void 0) {
8471
8471
  delete next[field];
8472
+ } else if (field === "parameters") {
8473
+ next.parameters = value;
8474
+ } else if (field === "result") {
8475
+ next.result = value;
8472
8476
  } else {
8473
- next[field] = value;
8477
+ next.streamedInput = value;
8474
8478
  }
8475
8479
  if (next.parameters === void 0 && next.result === void 0 && next.streamedInput === void 0) {
8476
8480
  payloads.delete(toolId);
@@ -12272,7 +12276,7 @@ import { theme as theme24 } from "@runtypelabs/ink-components";
12272
12276
  import { jsx as jsx25, jsxs as jsxs21 } from "react/jsx-runtime";
12273
12277
  var MENU_ITEMS = [
12274
12278
  { key: "c", label: "Copy session JSON" },
12275
- { key: "o", label: "Open session JSON in editor" },
12279
+ { key: "e", label: "Open session JSON in editor" },
12276
12280
  { key: "f", label: "Open marathon folder in file manager" },
12277
12281
  { key: "d", label: "Open agent in Runtype dashboard" }
12278
12282
  ];
@@ -12294,7 +12298,7 @@ function SessionActionMenu({
12294
12298
  onCopySession();
12295
12299
  return;
12296
12300
  }
12297
- if (input === "o" && hasStateFile) {
12301
+ if (input === "e" && hasStateFile) {
12298
12302
  onOpenStateFile();
12299
12303
  return;
12300
12304
  }
@@ -12320,7 +12324,7 @@ function SessionActionMenu({
12320
12324
  children: [
12321
12325
  /* @__PURE__ */ jsx25(Text24, { bold: true, color: theme24.accent, children: "Session" }),
12322
12326
  /* @__PURE__ */ jsx25(Box22, { flexDirection: "column", marginTop: 1, children: MENU_ITEMS.map((item) => {
12323
- const dimmed = item.key === "o" && !hasStateFile || item.key === "f" && !hasStateFile || item.key === "d" && !hasDashboard;
12327
+ const dimmed = item.key === "e" && !hasStateFile || item.key === "f" && !hasStateFile || item.key === "d" && !hasDashboard;
12324
12328
  return /* @__PURE__ */ jsxs21(Text24, { children: [
12325
12329
  /* @__PURE__ */ jsx25(Text24, { color: dimmed ? theme24.textSubtle : theme24.accentActive, children: ` ${item.key} ` }),
12326
12330
  /* @__PURE__ */ jsx25(Text24, { color: dimmed ? theme24.textSubtle : theme24.textMuted, children: item.label })
@@ -14797,6 +14801,8 @@ var NETWORK_ERROR_PATTERNS = [
14797
14801
  "econnrefused",
14798
14802
  "econnaborted",
14799
14803
  "etimedout",
14804
+ "timeout",
14805
+ "request timeout",
14800
14806
  "enetunreach",
14801
14807
  "enetdown",
14802
14808
  "ehostunreach",
@@ -14816,12 +14822,78 @@ var NETWORK_ERROR_PATTERNS = [
14816
14822
  "unable to connect",
14817
14823
  "err_network"
14818
14824
  ];
14825
+ var LOCAL_NETWORK_PATTERNS = [
14826
+ "enetunreach",
14827
+ "enetdown",
14828
+ "enotfound",
14829
+ "network error",
14830
+ "network request failed",
14831
+ "networkerror",
14832
+ "err_network"
14833
+ ];
14834
+ var SERVER_UNREACHABLE_PATTERNS = [
14835
+ "econnrefused",
14836
+ "econnreset",
14837
+ "connection refused",
14838
+ "connection reset",
14839
+ "ehostunreach"
14840
+ ];
14841
+ function collectErrorSignals(error, seen = /* @__PURE__ */ new Set()) {
14842
+ if (error == null || seen.has(error)) return [];
14843
+ if (typeof error === "string") return [error];
14844
+ if (typeof error !== "object") return [String(error)];
14845
+ seen.add(error);
14846
+ const parts = [];
14847
+ if ("message" in error && typeof error.message === "string") {
14848
+ parts.push(error.message);
14849
+ }
14850
+ if ("code" in error && typeof error.code === "string") {
14851
+ parts.push(error.code);
14852
+ }
14853
+ if ("cause" in error) {
14854
+ parts.push(...collectErrorSignals(error.cause, seen));
14855
+ }
14856
+ return parts;
14857
+ }
14858
+ function getNetworkErrorContext(error) {
14859
+ const signals = collectErrorSignals(error);
14860
+ const fallbackMessage = error instanceof Error ? error.message : String(error);
14861
+ const uniqueSignals = [...new Set(signals.map((signal) => signal.trim()).filter(Boolean))];
14862
+ const searchText = uniqueSignals.join(" ").toLowerCase();
14863
+ const detailMessage = uniqueSignals.find((signal) => signal.toLowerCase() !== "fetch failed") ?? fallbackMessage;
14864
+ return {
14865
+ searchText,
14866
+ detailMessage
14867
+ };
14868
+ }
14869
+ function describeNetworkError(error) {
14870
+ const { searchText, detailMessage } = getNetworkErrorContext(error);
14871
+ const isLocalNetwork = LOCAL_NETWORK_PATTERNS.some((p) => searchText.includes(p));
14872
+ const isServerUnreachable = SERVER_UNREACHABLE_PATTERNS.some((p) => searchText.includes(p));
14873
+ const isTimeout = searchText.includes("etimedout") || searchText.includes("timeout");
14874
+ const lines = [];
14875
+ if (isLocalNetwork) {
14876
+ lines.push("Could not reach the Runtype API \u2014 your network appears to be offline.");
14877
+ lines.push("Check your internet connection and try again.");
14878
+ } else if (isServerUnreachable) {
14879
+ lines.push("Could not reach the Runtype API \u2014 the server is not responding.");
14880
+ lines.push("The service may be temporarily unavailable. Try again in a few minutes.");
14881
+ } else if (isTimeout) {
14882
+ lines.push("Could not reach the Runtype API \u2014 the request timed out.");
14883
+ lines.push("This could be a network issue or the server may be under heavy load.");
14884
+ } else {
14885
+ lines.push("Could not reach the Runtype API \u2014 a network error occurred.");
14886
+ lines.push("Check your internet connection or try again in a few minutes.");
14887
+ }
14888
+ lines.push(`Details: ${detailMessage}`);
14889
+ return lines;
14890
+ }
14819
14891
  function isTransientNetworkError(error) {
14820
14892
  if (error instanceof RuntypeApiError) return false;
14821
- const message = error instanceof Error ? error.message.toLowerCase() : String(error).toLowerCase();
14822
- if (error instanceof TypeError && message.includes("fetch")) return true;
14893
+ const { searchText } = getNetworkErrorContext(error);
14894
+ if (error instanceof TypeError && searchText.includes("fetch")) return true;
14823
14895
  if (error instanceof DOMException && error.name === "AbortError") return true;
14824
- return NETWORK_ERROR_PATTERNS.some((pattern) => message.includes(pattern));
14896
+ return NETWORK_ERROR_PATTERNS.some((pattern) => searchText.includes(pattern));
14825
14897
  }
14826
14898
  async function retryOnNetworkError(fn, opts = {}) {
14827
14899
  const maxRetries = opts.maxRetries ?? 3;
@@ -14898,9 +14970,8 @@ function describeMarathonApiError(error) {
14898
14970
  if (!(error instanceof Error)) {
14899
14971
  return ["Task failed: Unknown error"];
14900
14972
  }
14901
- if (!(error instanceof RuntypeApiError) || error.statusCode !== 429) {
14902
- const message = error instanceof Error ? error.message : "Unknown error";
14903
- return [`Task failed: ${message}`];
14973
+ if (isTransientNetworkError(error)) {
14974
+ return describeNetworkError(error);
14904
14975
  }
14905
14976
  return [`Task failed: ${error.message}`];
14906
14977
  }
@@ -15311,7 +15382,9 @@ function extractRunTaskResumeState(state) {
15311
15382
  ...sanitized.bestCandidateNeedsVerification ? { bestCandidateNeedsVerification: sanitized.bestCandidateNeedsVerification } : {},
15312
15383
  ...sanitized.bestCandidateVerified ? { bestCandidateVerified: sanitized.bestCandidateVerified } : {},
15313
15384
  ...sanitized.verificationRequired !== void 0 ? { verificationRequired: sanitized.verificationRequired } : {},
15314
- ...sanitized.lastVerificationPassed ? { lastVerificationPassed: sanitized.lastVerificationPassed } : {}
15385
+ ...sanitized.lastVerificationPassed ? { lastVerificationPassed: sanitized.lastVerificationPassed } : {},
15386
+ ...sanitized.isCreationTask !== void 0 ? { isCreationTask: sanitized.isCreationTask } : {},
15387
+ ...sanitized.outputRoot ? { outputRoot: sanitized.outputRoot } : {}
15315
15388
  };
15316
15389
  }
15317
15390
  function findStateFile(name, stateDir) {
@@ -15476,6 +15549,29 @@ var IGNORED_REPO_DIRS = /* @__PURE__ */ new Set([
15476
15549
  "dist",
15477
15550
  "node_modules"
15478
15551
  ]);
15552
+ var SENSITIVE_PATH_PATTERNS = [
15553
+ { name: ".env", test: (n) => n === ".env" || n.endsWith("/.env") },
15554
+ { name: ".env.*", test: (n) => /\.env\.?[^/]*$/.test(n) || /\/\.env\.?[^/]*$/.test(n) },
15555
+ { name: "private keys", test: (n) => /(^|\/)(id_rsa|id_ed25519|id_ecdsa)(\.pub)?$/.test(n) },
15556
+ { name: "known_hosts", test: (n) => n.endsWith("known_hosts") || n.endsWith("/known_hosts") },
15557
+ { name: "authorized_keys", test: (n) => n.endsWith("authorized_keys") || n.endsWith("/authorized_keys") },
15558
+ { name: "cert/key extensions", test: (n) => /\.(pem|key|p12|pfx)$/i.test(n) },
15559
+ { name: "npm/pypi config", test: (n) => /(^|\/)(\.npmrc|\.pypirc|\.netrc)$/.test(n) },
15560
+ { name: "docker config", test: (n) => /\.docker\/config\.json$/i.test(n) },
15561
+ { name: "credentials", test: (n) => /(^|\/)(credentials\.json|secrets\.json)$/i.test(n) },
15562
+ { name: "service account", test: (n) => /service-account.*\.json$/i.test(n) || /firebase-admin.*\.json$/i.test(n) },
15563
+ { name: ".ssh", test: (n) => n === ".ssh" || n.startsWith(".ssh/") || n.includes("/.ssh/") },
15564
+ { name: ".aws", test: (n) => n === ".aws" || n.startsWith(".aws/") || n.includes("/.aws/") },
15565
+ { name: ".gnupg", test: (n) => n === ".gnupg" || n.startsWith(".gnupg/") || n.includes("/.gnupg/") },
15566
+ { name: ".terraform", test: (n) => n === ".terraform" || n.startsWith(".terraform/") || n.includes("/.terraform/") },
15567
+ { name: ".git", test: (n) => n === ".git" || n.startsWith(".git/") || n.includes("/.git/") },
15568
+ { name: ".runtype", test: (n) => n === ".runtype" || n.startsWith(".runtype/") || n.includes("/.runtype/") }
15569
+ ];
15570
+ function isSensitivePath(normalizedPath) {
15571
+ const n = normalizedPath.replace(/\\/g, "/").trim();
15572
+ if (!n) return false;
15573
+ return SENSITIVE_PATH_PATTERNS.some(({ test }) => test(n));
15574
+ }
15479
15575
  var DEFAULT_DISCOVERY_MAX_RESULTS = 50;
15480
15576
  var MAX_FILE_BYTES_TO_SCAN = 1024 * 1024;
15481
15577
  var LOW_SIGNAL_FILE_NAMES = /* @__PURE__ */ new Set([
@@ -15564,12 +15660,15 @@ function scoreSearchPath(relativePath) {
15564
15660
  return score;
15565
15661
  }
15566
15662
  function shouldIgnoreRepoEntry(entryPath) {
15567
- const normalized = normalizeToolPath(entryPath);
15663
+ const normalized = normalizeToolPath(entryPath).replace(/\\/g, "/");
15568
15664
  if (normalized === ".") return false;
15665
+ if (isSensitivePath(normalized)) return true;
15569
15666
  return normalized.split(path8.sep).some((segment) => IGNORED_REPO_DIRS.has(segment));
15570
15667
  }
15571
15668
  function safeReadTextFile(filePath) {
15572
15669
  try {
15670
+ const normalized = normalizeToolPath(filePath).replace(/\\/g, "/");
15671
+ if (isSensitivePath(normalized)) return null;
15573
15672
  const stat = fs8.statSync(filePath);
15574
15673
  if (!stat.isFile() || stat.size > MAX_FILE_BYTES_TO_SCAN) return null;
15575
15674
  const buffer = fs8.readFileSync(filePath);
@@ -15700,9 +15799,10 @@ function resolveToolPath(toolPath, options = {}) {
15700
15799
  return { ok: false, error: `Path does not exist: ${requestedPath}` };
15701
15800
  }
15702
15801
  const workspaceRoot = fs9.realpathSync.native(process.cwd());
15802
+ const extraRoots = (options.allowedRoots || []).map((rootPath) => canonicalizeAllowedRoot(rootPath));
15703
15803
  const allowedRoots = [
15704
- workspaceRoot,
15705
- ...(options.allowedRoots || []).map((rootPath) => canonicalizeAllowedRoot(rootPath))
15804
+ ...extraRoots,
15805
+ workspaceRoot
15706
15806
  ];
15707
15807
  const matchedRoot = allowedRoots.find(
15708
15808
  (rootPath) => isPathWithinRoot(resolved.canonicalPath, rootPath)
@@ -15721,6 +15821,13 @@ function resolveToolPath(toolPath, options = {}) {
15721
15821
  error: `Access denied: ${requestedPath} is inside restricted workspace state (${blockedSegment})`
15722
15822
  };
15723
15823
  }
15824
+ const relativeFromWorkspace = path9.relative(workspaceRoot, resolved.canonicalPath).replace(/\\/g, "/");
15825
+ if (isSensitivePath(relativeFromWorkspace)) {
15826
+ return {
15827
+ ok: false,
15828
+ error: `Access denied: ${requestedPath} is a sensitive path and cannot be read or written`
15829
+ };
15830
+ }
15724
15831
  }
15725
15832
  if (resolved.exists) {
15726
15833
  const stat = fs9.statSync(resolved.canonicalPath);
@@ -15741,8 +15848,17 @@ function resolveToolPath(toolPath, options = {}) {
15741
15848
  }
15742
15849
  return { ok: true, resolvedPath: resolved.canonicalPath };
15743
15850
  }
15851
+ function getTaskStateRoot(taskName, stateDir) {
15852
+ return path9.join(stateDir || getMarathonStateDir(), stateSafeName3(taskName));
15853
+ }
15744
15854
  function createDefaultLocalTools(context) {
15745
- const allowedReadRoots = context?.taskName ? [getOffloadedOutputDir(context.taskName, context.stateDir)] : [];
15855
+ const taskStateRoot = context?.taskName ? getTaskStateRoot(context.taskName, context.stateDir) : void 0;
15856
+ const planDir = context?.taskName ? path9.resolve(`.runtype/marathons/${stateSafeName3(context.taskName)}`) : void 0;
15857
+ const allowedReadRoots = context?.taskName ? [
15858
+ getOffloadedOutputDir(context.taskName, context.stateDir),
15859
+ ...taskStateRoot ? [taskStateRoot] : [],
15860
+ ...planDir ? [planDir] : []
15861
+ ] : [];
15746
15862
  return {
15747
15863
  read_file: {
15748
15864
  description: "Read the contents of a file at the given path",
@@ -15944,6 +16060,8 @@ function createDefaultLocalTools(context) {
15944
16060
  };
15945
16061
  }
15946
16062
  function createCheckpointedWriteFileTool(taskName, stateDir) {
16063
+ const taskStateRoot = getTaskStateRoot(taskName, stateDir);
16064
+ const planDir = path9.resolve(`.runtype/marathons/${stateSafeName3(taskName)}`);
15947
16065
  return {
15948
16066
  description: "Write content to a file, creating directories as needed and checkpointing original repo files",
15949
16067
  parametersSchema: {
@@ -15956,7 +16074,8 @@ function createCheckpointedWriteFileTool(taskName, stateDir) {
15956
16074
  },
15957
16075
  execute: async (args) => {
15958
16076
  const resolvedPath = resolveToolPath(String(args.path || ""), {
15959
- allowMissing: true
16077
+ allowMissing: true,
16078
+ allowedRoots: [taskStateRoot, planDir]
15960
16079
  });
15961
16080
  if (!resolvedPath.ok) return `Error: ${resolvedPath.error}`;
15962
16081
  const content = String(args.content || "");
@@ -16047,6 +16166,7 @@ function createRunCheckTool() {
16047
16166
  if (!isSafeVerificationCommand(command)) {
16048
16167
  return JSON.stringify({
16049
16168
  success: false,
16169
+ blocked: true,
16050
16170
  command,
16051
16171
  error: "Blocked unsafe verification command. Use a single non-destructive lint/test/typecheck/build command."
16052
16172
  });
@@ -16085,6 +16205,57 @@ function createRunCheckTool() {
16085
16205
  }
16086
16206
  };
16087
16207
  }
16208
+ function createSearchSessionHistoryTool(client, taskName) {
16209
+ return {
16210
+ description: "Search across all prior marathon sessions for specific information, decisions, findings, or tool outputs. Use this when you need to recall something from earlier sessions that may have been compacted away. Returns ranked results with content snippets from matching sessions.",
16211
+ parametersSchema: {
16212
+ type: "object",
16213
+ properties: {
16214
+ query: {
16215
+ type: "string",
16216
+ description: 'What to search for (e.g. "authentication flow decisions", "test failures in auth module")'
16217
+ },
16218
+ limit: {
16219
+ type: "number",
16220
+ description: "Maximum number of results to return (default 5, max 20)"
16221
+ },
16222
+ types: {
16223
+ type: "array",
16224
+ items: { type: "string", enum: ["response", "reasoning", "tool_output"] },
16225
+ description: "Filter by content type (default: all types)"
16226
+ }
16227
+ },
16228
+ required: ["query"]
16229
+ },
16230
+ execute: async (args) => {
16231
+ const query = String(args.query || "").trim();
16232
+ if (!query) return "Error: query is required";
16233
+ const limit = Math.max(1, Math.min(20, Number(args.limit) || 5));
16234
+ const types = Array.isArray(args.types) ? args.types : void 0;
16235
+ try {
16236
+ const response = await client.post("/session-context/search", {
16237
+ query,
16238
+ taskName,
16239
+ limit,
16240
+ ...types ? { types } : {}
16241
+ });
16242
+ if (!response.success || !response.results || response.results.length === 0) {
16243
+ return "No matching session context found for your query.";
16244
+ }
16245
+ const formatted = response.results.map((r, i) => {
16246
+ const header = `[Result ${i + 1}] Session ${r.sessionIndex} | ${r.type}${r.toolName ? ` (${r.toolName})` : ""} | Score: ${r.score.toFixed(3)}`;
16247
+ return `${header}
16248
+ ${r.content}`;
16249
+ });
16250
+ return `Found ${response.count} matching results:
16251
+
16252
+ ${formatted.join("\n\n---\n\n")}`;
16253
+ } catch (error) {
16254
+ return `Session search unavailable: ${error instanceof Error ? error.message : String(error)}`;
16255
+ }
16256
+ }
16257
+ };
16258
+ }
16088
16259
  function buildLocalTools(client, sandboxProvider, options, context) {
16089
16260
  const enabledTools = {};
16090
16261
  if (!options.noLocalTools) {
@@ -16100,6 +16271,9 @@ function buildLocalTools(client, sandboxProvider, options, context) {
16100
16271
  context.stateDir
16101
16272
  );
16102
16273
  enabledTools.run_check = createRunCheckTool();
16274
+ if (options.sessionSearch === true) {
16275
+ enabledTools.search_session_history = createSearchSessionHistoryTool(client, context.taskName);
16276
+ }
16103
16277
  }
16104
16278
  }
16105
16279
  if (sandboxProvider) {
@@ -16111,6 +16285,62 @@ function buildLocalTools(client, sandboxProvider, options, context) {
16111
16285
  return Object.keys(enabledTools).length > 0 ? enabledTools : void 0;
16112
16286
  }
16113
16287
 
16288
+ // src/marathon/session-chunker.ts
16289
+ var DEFAULT_MAX_CHUNK_CHARS = 2e3;
16290
+ var MIN_CONTENT_LENGTH = 50;
16291
+ function extractSessionChunks(snapshot, maxChunkChars = DEFAULT_MAX_CHUNK_CHARS) {
16292
+ const chunks = [];
16293
+ if (snapshot.content && snapshot.content.length >= MIN_CONTENT_LENGTH) {
16294
+ chunks.push(...chunkText(snapshot.content, "response", maxChunkChars));
16295
+ }
16296
+ if (snapshot.reasoning && snapshot.reasoning.length >= MIN_CONTENT_LENGTH) {
16297
+ chunks.push(...chunkText(snapshot.reasoning, "reasoning", maxChunkChars));
16298
+ }
16299
+ for (const tool of snapshot.tools) {
16300
+ const result = typeof tool.result === "string" ? tool.result : JSON.stringify(tool.result ?? "");
16301
+ if (result.length >= MIN_CONTENT_LENGTH) {
16302
+ chunks.push(
16303
+ ...chunkText(result, "tool_output", maxChunkChars, tool.name)
16304
+ );
16305
+ }
16306
+ }
16307
+ return chunks;
16308
+ }
16309
+ function chunkText(text, type, maxChars, toolName) {
16310
+ if (text.length <= maxChars) {
16311
+ return [{ content: text, type, ...toolName ? { toolName } : {} }];
16312
+ }
16313
+ const chunks = [];
16314
+ const paragraphs = text.split(/\n\n+/);
16315
+ let current = "";
16316
+ for (const paragraph of paragraphs) {
16317
+ if (paragraph.length > maxChars) {
16318
+ if (current.length >= MIN_CONTENT_LENGTH) {
16319
+ chunks.push({ content: current.trim(), type, ...toolName ? { toolName } : {} });
16320
+ current = "";
16321
+ }
16322
+ const sentences = paragraph.match(/[^.!?]+[.!?]+\s*|[^.!?]+$/g) || [paragraph];
16323
+ for (const sentence of sentences) {
16324
+ if (current.length + sentence.length > maxChars && current.length > 0) {
16325
+ chunks.push({ content: current.trim(), type, ...toolName ? { toolName } : {} });
16326
+ current = "";
16327
+ }
16328
+ current += sentence;
16329
+ }
16330
+ continue;
16331
+ }
16332
+ if (current.length + paragraph.length + 2 > maxChars && current.length >= MIN_CONTENT_LENGTH) {
16333
+ chunks.push({ content: current.trim(), type, ...toolName ? { toolName } : {} });
16334
+ current = "";
16335
+ }
16336
+ current += (current ? "\n\n" : "") + paragraph;
16337
+ }
16338
+ if (current.length >= MIN_CONTENT_LENGTH) {
16339
+ chunks.push({ content: current.trim(), type, ...toolName ? { toolName } : {} });
16340
+ }
16341
+ return chunks;
16342
+ }
16343
+
16114
16344
  // src/marathon/loop-detector.ts
16115
16345
  var DEFAULT_MAX_HISTORY = 30;
16116
16346
  var DEFAULT_MIN_PATTERN_LENGTH = 2;
@@ -16462,12 +16692,46 @@ function resolveModelForPhase(phase, cliOverrides, milestoneModels) {
16462
16692
  }
16463
16693
  return cliOverrides.defaultModel;
16464
16694
  }
16695
+ function resolveErrorHandlingForPhase(phase, cliFallbackModel, milestoneFallbackModels) {
16696
+ const phaseFallbacks = phase ? milestoneFallbackModels?.[phase] : void 0;
16697
+ if (phaseFallbacks?.length) {
16698
+ return {
16699
+ onError: "fallback",
16700
+ fallbacks: [
16701
+ { type: "retry", delay: 5e3 },
16702
+ ...phaseFallbacks.map((fb) => ({
16703
+ type: "model",
16704
+ model: fb.model,
16705
+ ...fb.temperature !== void 0 ? { temperature: fb.temperature } : {},
16706
+ ...fb.maxTokens !== void 0 ? { maxTokens: fb.maxTokens } : {}
16707
+ }))
16708
+ ]
16709
+ };
16710
+ }
16711
+ if (cliFallbackModel) {
16712
+ return {
16713
+ onError: "fallback",
16714
+ fallbacks: [
16715
+ { type: "retry", delay: 5e3 },
16716
+ { type: "model", model: cliFallbackModel }
16717
+ ]
16718
+ };
16719
+ }
16720
+ return void 0;
16721
+ }
16465
16722
 
16466
16723
  // src/marathon/playbook-loader.ts
16467
16724
  import * as fs12 from "fs";
16468
16725
  import * as path12 from "path";
16469
16726
  import * as os4 from "os";
16727
+ import micromatch from "micromatch";
16470
16728
  import { parse as parseYaml } from "yaml";
16729
+ var DISCOVERY_TOOLS = /* @__PURE__ */ new Set([
16730
+ "search_repo",
16731
+ "glob_files",
16732
+ "tree_directory",
16733
+ "list_directory"
16734
+ ]);
16471
16735
  var PLAYBOOKS_DIR = ".runtype/marathons/playbooks";
16472
16736
  function getCandidatePaths(nameOrPath, cwd) {
16473
16737
  const home = os4.homedir();
@@ -16542,7 +16806,54 @@ function buildIsComplete(criteria) {
16542
16806
  return () => false;
16543
16807
  }
16544
16808
  }
16809
+ function buildPolicyIntercept(policy) {
16810
+ if (!policy.blockedTools?.length && !policy.blockDiscoveryTools && !policy.allowedReadGlobs?.length && !policy.allowedWriteGlobs?.length && !policy.requirePlanBeforeWrite) {
16811
+ return void 0;
16812
+ }
16813
+ const blockedSet = new Set(
16814
+ (policy.blockedTools ?? []).map((t) => t.trim()).filter(Boolean)
16815
+ );
16816
+ const readGlobs = policy.allowedReadGlobs ?? [];
16817
+ const writeGlobs = policy.allowedWriteGlobs ?? [];
16818
+ return (toolName, args, ctx) => {
16819
+ if (blockedSet.has(toolName)) {
16820
+ return `Blocked by playbook policy: ${toolName} is not allowed for this task.`;
16821
+ }
16822
+ if (policy.blockDiscoveryTools && DISCOVERY_TOOLS.has(toolName)) {
16823
+ return `Blocked by playbook policy: discovery tools are disabled for this task.`;
16824
+ }
16825
+ const pathArg = typeof args.path === "string" && args.path.trim() ? ctx.normalizePath(String(args.path)) : void 0;
16826
+ if (pathArg) {
16827
+ const isWrite = toolName === "write_file" || toolName === "restore_file_checkpoint";
16828
+ const isRead = toolName === "read_file";
16829
+ if (isRead && readGlobs.length > 0) {
16830
+ const allowed = micromatch.some(pathArg, readGlobs, { dot: true });
16831
+ if (!allowed) {
16832
+ return `Blocked by playbook policy: ${toolName} path "${pathArg}" is outside allowed read globs: ${readGlobs.join(", ")}`;
16833
+ }
16834
+ }
16835
+ if (isWrite && writeGlobs.length > 0) {
16836
+ const planPath = ctx.state.planPath ? ctx.normalizePath(ctx.state.planPath) : void 0;
16837
+ if (planPath && pathArg === planPath) {
16838
+ } else {
16839
+ const allowed = micromatch.some(pathArg, writeGlobs, { dot: true });
16840
+ if (!allowed) {
16841
+ return `Blocked by playbook policy: ${toolName} path "${pathArg}" is outside allowed write globs: ${writeGlobs.join(", ")}`;
16842
+ }
16843
+ }
16844
+ }
16845
+ if (isWrite && policy.requirePlanBeforeWrite && !ctx.state.planWritten && !ctx.trace.planWritten) {
16846
+ const planPath = ctx.state.planPath ? ctx.normalizePath(ctx.state.planPath) : void 0;
16847
+ if (!planPath || pathArg !== planPath) {
16848
+ return `Blocked by playbook policy: write the plan before creating other files.`;
16849
+ }
16850
+ }
16851
+ }
16852
+ return void 0;
16853
+ };
16854
+ }
16545
16855
  function convertToWorkflow(config2) {
16856
+ const policyIntercept = config2.policy ? buildPolicyIntercept(config2.policy) : void 0;
16546
16857
  const phases = config2.milestones.map((milestone) => ({
16547
16858
  name: milestone.name,
16548
16859
  description: milestone.description,
@@ -16558,6 +16869,7 @@ ${instructions}`;
16558
16869
  return milestone.toolGuidance ?? [];
16559
16870
  },
16560
16871
  isComplete: buildIsComplete(milestone.completionCriteria),
16872
+ interceptToolCall: policyIntercept,
16561
16873
  // Default to rejecting TASK_COMPLETE unless the playbook explicitly allows it.
16562
16874
  // The SDK accepts completion by default when canAcceptCompletion is undefined,
16563
16875
  // which would let the model end the marathon prematurely in early phases.
@@ -16568,23 +16880,37 @@ ${instructions}`;
16568
16880
  phases
16569
16881
  };
16570
16882
  }
16883
+ function normalizeFallbackModel(input) {
16884
+ if (typeof input === "string") return { model: input };
16885
+ return {
16886
+ model: input.model,
16887
+ ...input.temperature !== void 0 ? { temperature: input.temperature } : {},
16888
+ ...input.maxTokens !== void 0 ? { maxTokens: input.maxTokens } : {}
16889
+ };
16890
+ }
16571
16891
  function loadPlaybook(nameOrPath, cwd) {
16572
16892
  const baseCwd = cwd || process.cwd();
16573
16893
  const candidates = getCandidatePaths(nameOrPath, baseCwd);
16574
16894
  for (const candidate of candidates) {
16575
- if (!fs12.existsSync(candidate)) continue;
16895
+ if (!fs12.existsSync(candidate) || fs12.statSync(candidate).isDirectory()) continue;
16576
16896
  const config2 = parsePlaybookFile(candidate);
16577
16897
  validatePlaybook(config2, candidate);
16578
16898
  const milestoneModels = {};
16899
+ const milestoneFallbackModels = {};
16579
16900
  for (const m of config2.milestones) {
16580
16901
  if (m.model) milestoneModels[m.name] = m.model;
16902
+ if (m.fallbackModels?.length) {
16903
+ milestoneFallbackModels[m.name] = m.fallbackModels.map(normalizeFallbackModel);
16904
+ }
16581
16905
  }
16582
16906
  return {
16583
16907
  workflow: convertToWorkflow(config2),
16584
16908
  milestones: config2.milestones.map((m) => m.name),
16585
16909
  milestoneModels: Object.keys(milestoneModels).length > 0 ? milestoneModels : void 0,
16910
+ milestoneFallbackModels: Object.keys(milestoneFallbackModels).length > 0 ? milestoneFallbackModels : void 0,
16586
16911
  verification: config2.verification,
16587
- rules: config2.rules
16912
+ rules: config2.rules,
16913
+ policy: config2.policy
16588
16914
  };
16589
16915
  }
16590
16916
  throw new Error(
@@ -16749,13 +17075,22 @@ function normalizeMarathonAgentArgument(agent) {
16749
17075
  function buildMarathonAutoCreatedAgentBootstrap(agentName, options = {}) {
16750
17076
  const normalizedModel = options.model?.trim();
16751
17077
  const normalizedToolIds = [...new Set((options.toolIds || []).map((toolId) => toolId.trim()).filter(Boolean))];
16752
- const config2 = normalizedModel || normalizedToolIds.length > 0 ? {
17078
+ const normalizedFallbackModel = options.fallbackModel?.trim();
17079
+ const errorHandling = normalizedFallbackModel ? {
17080
+ onError: "fallback",
17081
+ fallbacks: [
17082
+ { type: "retry", delay: 5e3 },
17083
+ { type: "model", model: normalizedFallbackModel }
17084
+ ]
17085
+ } : void 0;
17086
+ const config2 = normalizedModel || normalizedToolIds.length > 0 || errorHandling ? {
16753
17087
  ...normalizedModel ? { model: normalizedModel } : {},
16754
17088
  ...normalizedToolIds.length > 0 ? {
16755
17089
  tools: {
16756
17090
  toolIds: normalizedToolIds
16757
17091
  }
16758
- } : {}
17092
+ } : {},
17093
+ ...errorHandling ? { errorHandling } : {}
16759
17094
  } : void 0;
16760
17095
  return {
16761
17096
  description: `Powering a marathon for ${agentName}`,
@@ -16936,6 +17271,9 @@ async function taskAction(agent, options) {
16936
17271
  console.log(chalk16.green(`Created agent: ${agentId}`));
16937
17272
  }
16938
17273
  } catch (createErr) {
17274
+ if (isTransientNetworkError(createErr)) {
17275
+ await failBeforeMain(formatMarathonApiError(createErr));
17276
+ }
16939
17277
  const errMsg = createErr instanceof Error ? createErr.message : String(createErr);
16940
17278
  await failBeforeMain([
16941
17279
  chalk16.red(`Failed to create agent "${normalizedAgent}"`),
@@ -16944,6 +17282,9 @@ async function taskAction(agent, options) {
16944
17282
  }
16945
17283
  }
16946
17284
  } catch (error) {
17285
+ if (isTransientNetworkError(error)) {
17286
+ await failBeforeMain(formatMarathonApiError(error));
17287
+ }
16947
17288
  const errMsg = error instanceof Error ? error.message : String(error);
16948
17289
  await failBeforeMain([
16949
17290
  chalk16.red("Failed to list agents"),
@@ -17109,11 +17450,17 @@ async function taskAction(agent, options) {
17109
17450
  let playbookWorkflow;
17110
17451
  let playbookMilestones;
17111
17452
  let playbookMilestoneModels;
17453
+ let playbookMilestoneFallbackModels;
17454
+ let playbookPolicy;
17112
17455
  if (options.playbook) {
17113
17456
  const result = loadPlaybook(options.playbook);
17114
17457
  playbookWorkflow = result.workflow;
17115
17458
  playbookMilestones = result.milestones;
17116
17459
  playbookMilestoneModels = result.milestoneModels;
17460
+ playbookMilestoneFallbackModels = result.milestoneFallbackModels;
17461
+ playbookPolicy = result.policy;
17462
+ } else {
17463
+ playbookPolicy = void 0;
17117
17464
  }
17118
17465
  if (useStartupShell && !options.model?.trim()) {
17119
17466
  if (playbookMilestoneModels && Object.keys(playbookMilestoneModels).length > 0 && startupShellRef.current) {
@@ -17214,7 +17561,8 @@ ${rulesContext}`;
17214
17561
  if (autoCreatedAgent) {
17215
17562
  const bootstrapPayload = buildMarathonAutoCreatedAgentBootstrap(normalizedAgent, {
17216
17563
  model: options.model || agentConfigModel || defaultConfiguredModel,
17217
- toolIds: resolvedToolIds
17564
+ toolIds: resolvedToolIds,
17565
+ fallbackModel: options.fallbackModel
17218
17566
  });
17219
17567
  try {
17220
17568
  await client.agents.update(agentId, bootstrapPayload);
@@ -17230,6 +17578,16 @@ ${rulesContext}`;
17230
17578
  );
17231
17579
  }
17232
17580
  }
17581
+ } else if (options.fallbackModel || playbookMilestoneFallbackModels) {
17582
+ const initialErrorHandling = resolveErrorHandlingForPhase(
17583
+ currentPhase,
17584
+ options.fallbackModel,
17585
+ playbookMilestoneFallbackModels
17586
+ );
17587
+ if (initialErrorHandling) {
17588
+ await client.agents.update(agentId, { config: { errorHandling: initialErrorHandling } }).catch(() => {
17589
+ });
17590
+ }
17233
17591
  }
17234
17592
  let localTools = buildLocalTools(client, parsedSandbox, options, {
17235
17593
  taskName,
@@ -17532,7 +17890,13 @@ Saving state... done. Session saved to ${filePath}`);
17532
17890
  model: event.model || effectiveModelForContext
17533
17891
  });
17534
17892
  },
17535
- ...resumeState ? { resumeState } : {},
17893
+ ...resumeState || playbookPolicy ? {
17894
+ resumeState: {
17895
+ ...resumeState ?? {},
17896
+ ...playbookPolicy?.outputRoot ? { outputRoot: playbookPolicy.outputRoot } : {},
17897
+ ...playbookPolicy?.requireVerification !== void 0 ? { verificationRequired: playbookPolicy.requireVerification } : {}
17898
+ }
17899
+ } : {},
17536
17900
  toolContextMode: options.toolContext || "hot-tail",
17537
17901
  toolWindow: options.toolWindow === "session" || !options.toolWindow ? "session" : parseInt(options.toolWindow, 10) || 10,
17538
17902
  onSession: async (state) => {
@@ -17575,6 +17939,21 @@ Saving state... done. Session saved to ${filePath}`);
17575
17939
  resumeState = extractRunTaskResumeState(adjustedState);
17576
17940
  lastSessionMessages = state.messages ?? [];
17577
17941
  saveState(filePath, adjustedState, { stripSnapshotEvents: !!eventLogWriter });
17942
+ if (options.sessionSearch === true) {
17943
+ const latestSnapshot = persistedSessionSnapshots[persistedSessionSnapshots.length - 1];
17944
+ if (latestSnapshot) {
17945
+ const chunks = extractSessionChunks(latestSnapshot);
17946
+ if (chunks.length > 0) {
17947
+ const sessionIdx = currentSessionOffset + state.sessionCount - 1;
17948
+ client.post("/session-context/index", {
17949
+ taskName,
17950
+ sessionIndex: sessionIdx,
17951
+ chunks
17952
+ }).catch(() => {
17953
+ });
17954
+ }
17955
+ }
17956
+ }
17578
17957
  if (resumeState?.workflowPhase) {
17579
17958
  const displayMilestone = detectedVariant === "external" && resumeState.workflowPhase === "research" && adjustedState.planWritten ? "report" : resumeState.workflowPhase;
17580
17959
  streamRef.current?.updateMilestone(displayMilestone);
@@ -17594,6 +17973,17 @@ Saving state... done. Session saved to ${filePath}`);
17594
17973
  options.model = newPhaseModel;
17595
17974
  modelChangedOnPhaseTransition = true;
17596
17975
  }
17976
+ if (options.fallbackModel || playbookMilestoneFallbackModels) {
17977
+ const newErrorHandling = resolveErrorHandlingForPhase(
17978
+ resumeState.workflowPhase,
17979
+ options.fallbackModel,
17980
+ playbookMilestoneFallbackModels
17981
+ );
17982
+ client.agents.update(agentId, {
17983
+ config: { errorHandling: newErrorHandling ?? null }
17984
+ }).catch(() => {
17985
+ });
17986
+ }
17597
17987
  }
17598
17988
  if (state.recentActionKeys && state.recentActionKeys.length > 0) {
17599
17989
  for (const key of state.recentActionKeys) {
@@ -17939,7 +18329,7 @@ ${details}`);
17939
18329
  }
17940
18330
  return resolved;
17941
18331
  }
17942
- function detectDeployWorkflow(_message, sandboxProvider, resumeState) {
18332
+ function detectDeployWorkflow(_message, _sandboxProvider, resumeState) {
17943
18333
  if (resumeState?.workflowVariant === "game") return gameWorkflow;
17944
18334
  if (resumeState?.workflowPhase === "design" || resumeState?.workflowPhase === "build" || resumeState?.workflowPhase === "verify") {
17945
18335
  return gameWorkflow;
@@ -17970,7 +18360,7 @@ function resolveSandboxWorkflowSelection(message, sandboxProvider, resumeState)
17970
18360
  };
17971
18361
  }
17972
18362
  function applyTaskOptions(cmd) {
17973
- return cmd.argument("<agent>", "Agent ID or name").option("-g, --goal <text>", "Goal message for the agent").option("--max-sessions <n>", "Maximum sessions", "50").option("--max-cost <n>", "Budget in USD").option("--model <modelId>", "Model ID to use (overrides agent config)").option("--name <name>", "Task name (used for state file, defaults to agent name)").option("--session <name>", "Resume a specific session by name").option("--state-dir <path>", "Directory for state files (default: ~/.runtype/projects/<hash>/marathons/)").option("--resume [message]", "Resume from existing local state, optionally with a new message").option("--fresh", "Start a new run and ignore any existing local state for this task").option("--compact", "Force compact-summary resume mode instead of replaying full history").option("--compact-strategy <strategy>", "Compaction strategy: auto (default), provider_native, or summary_fallback").option("--compact-threshold <value>", "Auto-compact when estimated context crosses this threshold (default: 80% fallback, 90% native; accepts percent like 90% or absolute token count like 120000)").option("--compact-instructions <text>", "Extra instructions for what a compact summary must preserve").option("--no-auto-compact", "Disable automatic context-aware history compaction").option("--track", "Sync progress to a Runtype record (visible in dashboard)").option("--debug", "Show debug output from each session").option("--json", "Output final result as JSON").option("--sandbox <provider>", "Enable sandbox code execution tool (cloudflare-worker, quickjs, or daytona)").option("--no-local-tools", "Disable built-in local tool execution (read_file, write_file, list_directory)").option("-t, --tools <tools...>", "Enable built-in tools (e.g., exa, firecrawl, dalle, openai_web_search, anthropic_web_search)").option("--plain-text", "Disable markdown rendering in output").option("--no-reasoning", "Disable model reasoning/thinking (enabled by default for supported models)").option("--no-checkpoint", "Run all iterations without checkpoint pauses (fully autonomous)").option("--checkpoint-timeout <seconds>", "Auto-continue timeout in seconds (default: 10)", "10").option("--planning-model <modelId>", "Model to use during research/planning phases").option("--execution-model <modelId>", "Model to use during execution phase").option("--playbook <name>", "Load a playbook from .runtype/marathons/playbooks/").option("--offload-threshold <chars>", 'Offload tool outputs larger than this to files (default: 100000; use "off" or "0" to disable guardrails)').option("--tool-context <mode>", "Tool result storage: hot-tail (default), observation-mask, or full-inline").option("--tool-window <window>", 'Compaction window: "session" (default) or a number for last-N tool results (e.g. 10)').option("--runner-char <char>", "Custom runner emoji (default: \u{1F3C3})").option("--finish-char <char>", "Custom finish line emoji (default: \u{1F3C1})").option("--no-runner", "Hide the runner emoji from the header border").option("--no-finish", "Hide the finish line emoji from the header border").action(taskAction);
18363
+ return cmd.argument("<agent>", "Agent ID or name").option("-g, --goal <text>", "Goal message for the agent").option("--max-sessions <n>", "Maximum sessions", "50").option("--max-cost <n>", "Budget in USD").option("--model <modelId>", "Model ID to use (overrides agent config)").option("--name <name>", "Task name (used for state file, defaults to agent name)").option("--session <name>", "Resume a specific session by name").option("--state-dir <path>", "Directory for state files (default: ~/.runtype/projects/<hash>/marathons/)").option("--resume [message]", "Resume from existing local state, optionally with a new message").option("--fresh", "Start a new run and ignore any existing local state for this task").option("--compact", "Force compact-summary resume mode instead of replaying full history").option("--compact-strategy <strategy>", "Compaction strategy: auto (default), provider_native, or summary_fallback").option("--compact-threshold <value>", "Auto-compact when estimated context crosses this threshold (default: 80% fallback, 90% native; accepts percent like 90% or absolute token count like 120000)").option("--compact-instructions <text>", "Extra instructions for what a compact summary must preserve").option("--no-auto-compact", "Disable automatic context-aware history compaction").option("--track", "Sync progress to a Runtype record (visible in dashboard)").option("--debug", "Show debug output from each session").option("--json", "Output final result as JSON").option("--sandbox <provider>", "Enable sandbox code execution tool (cloudflare-worker, quickjs, or daytona)").option("--no-local-tools", "Disable built-in local tool execution (read_file, write_file, list_directory)").option("--session-search", "Enable session context indexing and search_session_history tool").option("-t, --tools <tools...>", "Enable built-in tools (e.g., exa, firecrawl, dalle, openai_web_search, anthropic_web_search)").option("--plain-text", "Disable markdown rendering in output").option("--no-reasoning", "Disable model reasoning/thinking (enabled by default for supported models)").option("--no-checkpoint", "Run all iterations without checkpoint pauses (fully autonomous)").option("--checkpoint-timeout <seconds>", "Auto-continue timeout in seconds (default: 10)", "10").option("--planning-model <modelId>", "Model to use during research/planning phases").option("--execution-model <modelId>", "Model to use during execution phase").option("--fallback-model <modelId>", "Model to fall back to when primary model fails").option("--playbook <name>", "Load a playbook from .runtype/marathons/playbooks/").option("--offload-threshold <chars>", 'Offload tool outputs larger than this to files (default: 100000; use "off" or "0" to disable guardrails)').option("--tool-context <mode>", "Tool result storage: hot-tail (default), observation-mask, or full-inline").option("--tool-window <window>", 'Compaction window: "session" (default) or a number for last-N tool results (e.g. 10)').option("--runner-char <char>", "Custom runner emoji (default: \u{1F3C3})").option("--finish-char <char>", "Custom finish line emoji (default: \u{1F3C1})").option("--no-runner", "Hide the runner emoji from the header border").option("--no-finish", "Hide the finish line emoji from the header border").action(taskAction);
17974
18364
  }
17975
18365
  var taskCommand = applyTaskOptions(
17976
18366
  new Command10("task").description("Run a multi-session agent task")