@bastani/atomic 0.8.31-alpha.2 → 0.8.31-alpha.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. package/CHANGELOG.md +16 -3
  2. package/dist/builtin/cursor/CHANGELOG.md +1 -1
  3. package/dist/builtin/cursor/package.json +2 -2
  4. package/dist/builtin/intercom/package.json +1 -1
  5. package/dist/builtin/mcp/CHANGELOG.md +5 -0
  6. package/dist/builtin/mcp/direct-tools.ts +4 -2
  7. package/dist/builtin/mcp/package.json +1 -1
  8. package/dist/builtin/mcp/proxy-modes.ts +4 -2
  9. package/dist/builtin/mcp/utils.ts +25 -0
  10. package/dist/builtin/subagents/package.json +1 -1
  11. package/dist/builtin/web-access/package.json +1 -1
  12. package/dist/builtin/workflows/CHANGELOG.md +5 -0
  13. package/dist/builtin/workflows/builtin/ralph.ts +1 -0
  14. package/dist/builtin/workflows/package.json +1 -1
  15. package/dist/builtin/workflows/src/runs/foreground/stage-runner.ts +114 -4
  16. package/dist/core/agent-session.d.ts +25 -0
  17. package/dist/core/agent-session.d.ts.map +1 -1
  18. package/dist/core/agent-session.js +135 -11
  19. package/dist/core/agent-session.js.map +1 -1
  20. package/dist/core/auth-guidance.d.ts +12 -0
  21. package/dist/core/auth-guidance.d.ts.map +1 -1
  22. package/dist/core/auth-guidance.js +24 -0
  23. package/dist/core/auth-guidance.js.map +1 -1
  24. package/dist/core/auth-storage.d.ts +42 -0
  25. package/dist/core/auth-storage.d.ts.map +1 -1
  26. package/dist/core/auth-storage.js +71 -10
  27. package/dist/core/auth-storage.js.map +1 -1
  28. package/dist/core/context-window.d.ts +15 -0
  29. package/dist/core/context-window.d.ts.map +1 -1
  30. package/dist/core/context-window.js +11 -0
  31. package/dist/core/context-window.js.map +1 -1
  32. package/dist/core/copilot-gemini-payload-sanitizer.d.ts +72 -0
  33. package/dist/core/copilot-gemini-payload-sanitizer.d.ts.map +1 -0
  34. package/dist/core/copilot-gemini-payload-sanitizer.js +296 -0
  35. package/dist/core/copilot-gemini-payload-sanitizer.js.map +1 -0
  36. package/dist/core/copilot-gemini-reasoning.d.ts +118 -0
  37. package/dist/core/copilot-gemini-reasoning.d.ts.map +1 -0
  38. package/dist/core/copilot-gemini-reasoning.js +260 -0
  39. package/dist/core/copilot-gemini-reasoning.js.map +1 -0
  40. package/dist/core/copilot-gemini-tool-arguments.d.ts +42 -0
  41. package/dist/core/copilot-gemini-tool-arguments.d.ts.map +1 -0
  42. package/dist/core/copilot-gemini-tool-arguments.js +179 -0
  43. package/dist/core/copilot-gemini-tool-arguments.js.map +1 -0
  44. package/dist/core/copilot-model-catalog.d.ts +26 -11
  45. package/dist/core/copilot-model-catalog.d.ts.map +1 -1
  46. package/dist/core/copilot-model-catalog.js +34 -9
  47. package/dist/core/copilot-model-catalog.js.map +1 -1
  48. package/dist/core/flattened-tool-arguments.d.ts +41 -0
  49. package/dist/core/flattened-tool-arguments.d.ts.map +1 -0
  50. package/dist/core/flattened-tool-arguments.js +136 -0
  51. package/dist/core/flattened-tool-arguments.js.map +1 -0
  52. package/dist/core/http-dispatcher.d.ts.map +1 -1
  53. package/dist/core/http-dispatcher.js +5 -0
  54. package/dist/core/http-dispatcher.js.map +1 -1
  55. package/dist/core/model-registry.d.ts.map +1 -1
  56. package/dist/core/model-registry.js +6 -4
  57. package/dist/core/model-registry.js.map +1 -1
  58. package/dist/core/sdk.d.ts.map +1 -1
  59. package/dist/core/sdk.js +38 -8
  60. package/dist/core/sdk.js.map +1 -1
  61. package/dist/index.d.ts +2 -1
  62. package/dist/index.d.ts.map +1 -1
  63. package/dist/index.js +2 -1
  64. package/dist/index.js.map +1 -1
  65. package/docs/providers.md +4 -3
  66. package/docs/workflows.md +2 -0
  67. package/package.json +2 -2
@@ -20,10 +20,10 @@ import { stripFrontmatter } from "../utils/frontmatter.js";
20
20
  import { resolvePath } from "../utils/paths.js";
21
21
  import { sleep } from "../utils/sleep.js";
22
22
  import { ATOMIC_GUIDE_COMMAND_NAME, ATOMIC_GUIDE_HELP_CHOICES, atomicGuideModeForChoice, getAtomicGuideMessage, isAtomicGuideHelpChoice, normalizeAtomicGuideMode, } from "./atomic-guide-command.js";
23
- import { formatNoApiKeyFoundMessage, formatNoModelSelectedMessage, formatUnresolvedModelMessage, } from "./auth-guidance.js";
23
+ import { formatAuthStorageLoadFailedMessage, formatNoApiKeyFoundMessage, formatNoModelSelectedMessage, formatUnresolvedModelMessage, } from "./auth-guidance.js";
24
24
  import { executeBashWithOperations } from "./bash-executor.js";
25
25
  import { calculateContextTokens, collectEntriesForBranchSummary, contextCompact as runContextCompact, estimateContextTokens, generateBranchSummary, prepareContextCompaction, shouldCompact, validateContextDeletionRequest, } from "./compaction/index.js";
26
- import { getModelDefaultContextWindow, getSupportedContextWindows, selectContextWindow } from "./context-window.js";
26
+ import { getEffectiveInputBudget, getModelDefaultContextWindow, getSupportedContextWindows, selectContextWindow } from "./context-window.js";
27
27
  import { formatCopilotProviderError, parseCopilotPromptLimitError } from "./copilot-errors.js";
28
28
  import { DEFAULT_THINKING_LEVEL } from "./defaults.js";
29
29
  import { exportSessionToHtml } from "./export-html/index.js";
@@ -39,6 +39,8 @@ import { evaluateBashCommandPolicy, formatBashCommandPolicyRejection, } from "./
39
39
  import { createAllToolDefinitions, defaultToolNames } from "./tools/index.js";
40
40
  import { redirectOversizedToolResult } from "./tools/oversized-tool-result.js";
41
41
  import { createToolDefinitionFromAgentTool } from "./tools/tool-definition-wrapper.js";
42
+ import { isCopilotGeminiModel } from "./copilot-gemini-payload-sanitizer.js";
43
+ import { normalizeToolArgumentsForModel } from "./copilot-gemini-tool-arguments.js";
42
44
  function deepFreeze(value) {
43
45
  if (value && typeof value === "object") {
44
46
  Object.freeze(value);
@@ -388,6 +390,9 @@ export class AgentSession {
388
390
  else if (event.message.role === "user" ||
389
391
  event.message.role === "assistant" ||
390
392
  event.message.role === "toolResult") {
393
+ if (event.message.role === "assistant") {
394
+ this._normalizePersistedGeminiToolArgs(event.message);
395
+ }
391
396
  // Regular LLM message - persist as SessionMessageEntry
392
397
  this.sessionManager.appendMessage(event.message);
393
398
  }
@@ -396,12 +401,17 @@ export class AgentSession {
396
401
  if (event.message.role === "assistant") {
397
402
  this._lastAssistantMessage = event.message;
398
403
  const assistantMsg = event.message;
399
- if (assistantMsg.stopReason !== "error") {
404
+ // Treat degenerate empty completions (no content, zero output tokens) as
405
+ // failures alongside stopReason === "error". Otherwise an empty turn that
406
+ // stops with reason "stop" would reset the retry counter on every attempt,
407
+ // causing unbounded retries instead of honoring maxRetries.
408
+ const assistantFailed = assistantMsg.stopReason === "error" || this._isEmptyCompletion(assistantMsg);
409
+ if (!assistantFailed) {
400
410
  this._overflowRecoveryAttempted = false;
401
411
  }
402
412
  // Reset retry counter immediately on successful assistant response
403
413
  // This prevents accumulation across multiple LLM calls within a turn
404
- if (assistantMsg.stopReason !== "error" && this._retryAttempt > 0) {
414
+ if (!assistantFailed && this._retryAttempt > 0) {
405
415
  this._emit({
406
416
  type: "auto_retry_end",
407
417
  success: true,
@@ -415,8 +425,16 @@ export class AgentSession {
415
425
  if (event.type === "agent_end" && this._lastAssistantMessage) {
416
426
  const msg = this._lastAssistantMessage;
417
427
  this._lastAssistantMessage = undefined;
418
- // Check for retryable errors first (overloaded, rate limit, server errors)
419
- if (this._isRetryableError(msg)) {
428
+ // Check for retryable errors first (overloaded, rate limit, server errors,
429
+ // transient provider finish_reason errors, or degenerate empty completions)
430
+ const retryableError = this._isRetryableError(msg);
431
+ const emptyCompletion = !retryableError && this._isEmptyCompletion(msg);
432
+ if (retryableError || emptyCompletion) {
433
+ if (emptyCompletion && !msg.errorMessage) {
434
+ // Surface a clear reason in the retry banner; empty completions carry no
435
+ // provider error message of their own.
436
+ msg.errorMessage = "Provider returned an empty completion";
437
+ }
420
438
  const didRetry = await this._handleRetryableError(msg);
421
439
  if (didRetry)
422
440
  return; // Retry was initiated, don't proceed to compaction
@@ -879,6 +897,16 @@ export class AgentSession {
879
897
  throw new Error(formatUnresolvedModelMessage(this.model));
880
898
  }
881
899
  if (!this._modelRegistry.hasConfiguredAuth(this.model)) {
900
+ // A failed credential-store load (for example auth.json briefly locked
901
+ // by a concurrent process, or invalid JSON) leaves an empty in-memory
902
+ // credential set. That would otherwise be misreported here as
903
+ // "No API key found" even though the credentials exist on disk. Surface
904
+ // the real load failure instead so configured providers are not falsely
905
+ // reported as unauthenticated (issue #1431).
906
+ const authLoadError = this._modelRegistry.authStorage.getLoadError();
907
+ if (authLoadError) {
908
+ throw new Error(formatAuthStorageLoadFailedMessage(this.model.provider, authLoadError), { cause: authLoadError });
909
+ }
882
910
  const isOAuth = this._modelRegistry.isUsingOAuth(this.model);
883
911
  if (isOAuth) {
884
912
  throw new Error(`Authentication failed for "${this.model.provider}". ` +
@@ -2009,7 +2037,11 @@ export class AgentSession {
2009
2037
  else {
2010
2038
  contextTokens = calculateContextTokens(assistantMessage.usage);
2011
2039
  }
2012
- if (shouldCompact(contextTokens, contextWindow, settings)) {
2040
+ // Compact against the effective input budget (the hard prompt cap for providers like Copilot
2041
+ // that advertise a larger total window) so we compact before overrunning the server-side limit
2042
+ // rather than relying on reactive overflow recovery near the cap.
2043
+ const compactionBudget = this.model ? getEffectiveInputBudget(this.model) : contextWindow;
2044
+ if (shouldCompact(contextTokens, compactionBudget, settings)) {
2013
2045
  await this._runAutoCompaction("threshold", false);
2014
2046
  }
2015
2047
  }
@@ -2017,7 +2049,11 @@ export class AgentSession {
2017
2049
  if (!this.model || this.model.provider !== "github-copilot" || !assistantMessage.errorMessage)
2018
2050
  return false;
2019
2051
  const promptLimitError = parseCopilotPromptLimitError(assistantMessage.errorMessage);
2020
- return promptLimitError !== undefined && this.model.contextWindow > promptLimitError.limitTokens;
2052
+ // Compare against the effective input budget (the model's real prompt cap), not the displayed
2053
+ // total window. A rejection at the prompt cap is a normal overflow we should compact-and-retry;
2054
+ // only a rejection *below* the cap (e.g. a missing long-context entitlement dropping the account
2055
+ // to a lower server tier) keeps the friendly error visible instead of silently compacting down.
2056
+ return promptLimitError !== undefined && getEffectiveInputBudget(this.model) > promptLimitError.limitTokens;
2021
2057
  }
2022
2058
  /**
2023
2059
  * Internal: remove the trailing overflow error from retry context if it is still present.
@@ -2386,7 +2422,23 @@ export class AgentSession {
2386
2422
  for (const tool of wrappedExtensionTools) {
2387
2423
  toolRegistry.set(tool.name, tool);
2388
2424
  }
2389
- this._toolRegistry = toolRegistry;
2425
+ // GitHub Copilot Gemini serializes array/object tool-call arguments as
2426
+ // flattened `name[index]` keys (confirmed on the raw CAPI wire). Reconstruct
2427
+ // them into proper arrays/objects before per-tool preparation and schema
2428
+ // validation, so tool calls (notably structured_output) don't fail and loop.
2429
+ // Gated to Copilot Gemini at call time via this.model; a no-op otherwise.
2430
+ // `prepareArguments` is a plain function field (no `this` binding), and the
2431
+ // `{ ...tool }` spread assumes AgentTools are plain objects — matching the
2432
+ // existing tool-definition-wrapper pattern; a class-instance tool would lose
2433
+ // prototype members here.
2434
+ this._toolRegistry = new Map(Array.from(toolRegistry, ([name, tool]) => {
2435
+ const basePrepareArguments = tool.prepareArguments;
2436
+ const prepareArguments = (args) => {
2437
+ const normalized = normalizeToolArgumentsForModel(args, this.model, tool.parameters);
2438
+ return basePrepareArguments ? basePrepareArguments(normalized) : normalized;
2439
+ };
2440
+ return [name, { ...tool, prepareArguments }];
2441
+ }));
2390
2442
  const nextActiveToolNames = (options?.activeToolNames ? [...options.activeToolNames] : [...previousActiveToolNames]).filter((name) => isExposedTool(name));
2391
2443
  if (allowedToolNames) {
2392
2444
  for (const toolName of this._toolRegistry.keys()) {
@@ -2484,8 +2536,80 @@ export class AgentSession {
2484
2536
  if (isContextOverflow(message, contextWindow))
2485
2537
  return false;
2486
2538
  const err = message.errorMessage;
2487
- // Match: overloaded_error, provider returned error, rate limit, 429, 500, 502, 503, 504, service unavailable, network/connection errors (including connection lost), WebSocket transport closes/errors, fetch failed, premature stream endings, HTTP/2 closed before response, terminated, retry delay exceeded
2488
- return /overloaded|provider.?returned.?error|rate.?limit|too many requests|429|500|502|503|504|service.?unavailable|server.?error|internal.?error|network.?error|connection.?error|connection.?refused|connection.?lost|websocket.?closed|websocket.?error|other side closed|fetch failed|upstream.?connect|reset before headers|socket hang up|ended without|stream ended before message_stop|http2 request did not get a response|timed? out|timeout|terminated|retry delay/i.test(err);
2539
+ // A genuine `content_filter` stop is a deliberate safety block: retrying it
2540
+ // re-issues the same blocked request up to maxRetries times for no benefit.
2541
+ // GitHub Copilot Gemini is the exception — CAPI maps spurious Gemini blocks
2542
+ // (RECITATION/safety on MALFORMED_FUNCTION_CALL etc.) to `content_filter`, so
2543
+ // only treat `content_filter` as retryable for those models.
2544
+ if (isCopilotGeminiModel({ provider: message.provider, api: message.api, id: message.model }) &&
2545
+ /finish.?reason:?\s*content.?filter/i.test(err)) {
2546
+ return true;
2547
+ }
2548
+ // Match: overloaded_error, provider returned error, rate limit, 429, 500, 502, 503, 504, service unavailable, network/connection errors (including connection lost), WebSocket transport closes/errors, fetch failed, premature stream endings, HTTP/2 closed before response, terminated, retry delay exceeded, and a bare/transient provider finish_reason "error" (e.g. github-copilot Gemini's CAPI mapping of MALFORMED_FUNCTION_CALL/OTHER/UNEXPECTED_TOOL_CALL). These are provider-agnostic transient failures.
2549
+ return /overloaded|provider.?returned.?error|rate.?limit|too many requests|429|500|502|503|504|service.?unavailable|server.?error|internal.?error|network.?error|connection.?error|connection.?refused|connection.?lost|websocket.?closed|websocket.?error|other side closed|fetch failed|upstream.?connect|reset before headers|socket hang up|ended without|stream ended before message_stop|http2 request did not get a response|timed? out|timeout|terminated|retry delay|finish.?reason:?\s*error/i.test(err);
2550
+ }
2551
+ /**
2552
+ * For GitHub Copilot Gemini, reconstruct flattened tool-call arguments
2553
+ * (for example `edits[0].newText`) into the nested arrays/objects Gemini
2554
+ * produced before the assistant message is persisted, so saved transcripts
2555
+ * never carry the flattened CAPI wire shape and replays loaded from disk match
2556
+ * the structure Gemini signed. In-place, gated to Copilot Gemini, and a no-op
2557
+ * for well-formed arguments or any other provider/model. The outbound replay
2558
+ * normalizer still heals already-persisted (legacy) sessions on the wire.
2559
+ */
2560
+ _normalizePersistedGeminiToolArgs(message) {
2561
+ const model = this.model;
2562
+ if (!model || !isCopilotGeminiModel(model))
2563
+ return;
2564
+ for (const block of message.content) {
2565
+ if (block.type !== "toolCall")
2566
+ continue;
2567
+ const tool = this._toolRegistry.get(block.name);
2568
+ const normalized = normalizeToolArgumentsForModel(block.arguments, model, tool?.parameters);
2569
+ if (normalized !== block.arguments && normalized !== null && typeof normalized === "object") {
2570
+ block.arguments = normalized;
2571
+ }
2572
+ }
2573
+ }
2574
+ /**
2575
+ * Detect a degenerate empty completion: the provider ended the stream with no
2576
+ * usable content and zero output tokens. Seen with github-copilot Gemini models
2577
+ * that emit finish_reason "stop" (or a tool-use stop) with an empty content array
2578
+ * and 0 output tokens, leaving the turn dead instead of producing the next step.
2579
+ *
2580
+ * These are treated as retryable so the harness re-issues the request rather than
2581
+ * silently stopping mid-task. Guarded tightly (no text, no tool call, no thinking,
2582
+ * and output === 0) so legitimate non-empty turns are never matched.
2583
+ *
2584
+ * Intentionally provider-agnostic (not gated to Copilot Gemini): a degenerate
2585
+ * empty turn is a transient failure for any provider. It is bounded by
2586
+ * `maxRetries` and falls through to normal handling on exhaustion.
2587
+ */
2588
+ _isEmptyCompletion(message) {
2589
+ // Only "completed" stop reasons can be deceptively empty. Real errors are handled
2590
+ // by _isRetryableError; aborted/length turns are intentional outcomes.
2591
+ if (message.stopReason !== "stop" && message.stopReason !== "toolUse")
2592
+ return false;
2593
+ const content = message.content;
2594
+ if (Array.isArray(content)) {
2595
+ const hasContent = content.some((part) => {
2596
+ if (part.type === "text")
2597
+ return part.text.trim().length > 0;
2598
+ if (part.type === "toolCall")
2599
+ return true;
2600
+ if (part.type === "thinking")
2601
+ return part.redacted === true || part.thinking.trim().length > 0;
2602
+ return true; // unknown part types count as content
2603
+ });
2604
+ if (hasContent)
2605
+ return false;
2606
+ }
2607
+ // A turn that produced output tokens but no surfaced content is not "empty"
2608
+ // (e.g. reasoning-only responses); leave those alone. Note: a provider that
2609
+ // fails to report `usage` (output defaults to 0) would make every
2610
+ // content-less turn match here; the dual requirement (empty content AND zero
2611
+ // output) keeps that false-positive risk low in practice.
2612
+ return (message.usage?.output ?? 0) === 0;
2489
2613
  }
2490
2614
  /**
2491
2615
  * Handle retryable errors with exponential backoff.