@gajae-code/coding-agent 0.5.4 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (155) hide show
  1. package/CHANGELOG.md +23 -0
  2. package/dist/types/cli/web-search-cli.d.ts +12 -0
  3. package/dist/types/commands/rlm.d.ts +10 -0
  4. package/dist/types/commands/web-search.d.ts +54 -0
  5. package/dist/types/config/keybindings.d.ts +10 -0
  6. package/dist/types/config/model-profiles.d.ts +2 -1
  7. package/dist/types/config/model-registry.d.ts +3 -0
  8. package/dist/types/config/models-config-schema.d.ts +3 -0
  9. package/dist/types/config/settings-schema.d.ts +61 -3
  10. package/dist/types/edit/notebook.d.ts +3 -0
  11. package/dist/types/eval/py/executor.d.ts +3 -0
  12. package/dist/types/eval/py/kernel.d.ts +3 -1
  13. package/dist/types/eval/py/runtime.d.ts +9 -1
  14. package/dist/types/exec/bash-executor.d.ts +4 -0
  15. package/dist/types/extensibility/custom-tools/types.d.ts +2 -0
  16. package/dist/types/extensibility/custom-tools/wrapper.d.ts +1 -0
  17. package/dist/types/extensibility/extensions/types.d.ts +2 -0
  18. package/dist/types/extensibility/extensions/wrapper.d.ts +1 -0
  19. package/dist/types/gjc-runtime/launch-tmux.d.ts +6 -0
  20. package/dist/types/gjc-runtime/session-state-sidecar.d.ts +14 -0
  21. package/dist/types/gjc-runtime/tmux-common.d.ts +6 -0
  22. package/dist/types/gjc-runtime/tmux-gc.d.ts +3 -3
  23. package/dist/types/gjc-runtime/tmux-sessions.d.ts +4 -0
  24. package/dist/types/gjc-runtime/ultragoal-runtime.d.ts +18 -0
  25. package/dist/types/goals/state.d.ts +1 -1
  26. package/dist/types/goals/tools/goal-tool.d.ts +2 -0
  27. package/dist/types/main.d.ts +11 -0
  28. package/dist/types/modes/components/custom-editor.d.ts +4 -2
  29. package/dist/types/modes/components/custom-model-preset-wizard.d.ts +12 -0
  30. package/dist/types/modes/components/model-selector.d.ts +5 -2
  31. package/dist/types/modes/components/status-line.d.ts +4 -1
  32. package/dist/types/modes/controllers/input-controller.d.ts +3 -0
  33. package/dist/types/modes/controllers/selector-controller.d.ts +1 -0
  34. package/dist/types/modes/print-mode.d.ts +6 -0
  35. package/dist/types/modes/rpc/rpc-client.d.ts +21 -0
  36. package/dist/types/modes/rpc/rpc-socket-security.d.ts +7 -0
  37. package/dist/types/modes/rpc/rpc-types.d.ts +13 -0
  38. package/dist/types/modes/shared/agent-wire/command-dispatch.d.ts +2 -0
  39. package/dist/types/modes/shared/agent-wire/unattended-session.d.ts +1 -0
  40. package/dist/types/rlm/artifacts.d.ts +9 -0
  41. package/dist/types/rlm/complete-research-tool.d.ts +35 -0
  42. package/dist/types/rlm/data-context.d.ts +6 -0
  43. package/dist/types/rlm/index.d.ts +35 -0
  44. package/dist/types/rlm/notebook.d.ts +12 -0
  45. package/dist/types/rlm/preset.d.ts +23 -0
  46. package/dist/types/rlm/python-tool.d.ts +16 -0
  47. package/dist/types/rlm/report.d.ts +14 -0
  48. package/dist/types/rlm/types.d.ts +37 -0
  49. package/dist/types/sdk.d.ts +7 -0
  50. package/dist/types/session/agent-session.d.ts +21 -0
  51. package/dist/types/tools/bash-allowed-prefixes.d.ts +6 -1
  52. package/dist/types/tools/browser/attach.d.ts +19 -3
  53. package/dist/types/tools/browser/registry.d.ts +15 -0
  54. package/dist/types/tools/browser/render.d.ts +3 -0
  55. package/dist/types/tools/browser.d.ts +18 -1
  56. package/dist/types/tools/computer/render.d.ts +17 -0
  57. package/dist/types/tools/computer.d.ts +465 -0
  58. package/dist/types/tools/index.d.ts +24 -1
  59. package/dist/types/tools/job.d.ts +13 -0
  60. package/dist/types/tools/tool-timeouts.d.ts +5 -0
  61. package/dist/types/web/search/index.d.ts +32 -2
  62. package/dist/types/web/search/providers/base.d.ts +22 -0
  63. package/dist/types/web/search/providers/xai.d.ts +64 -0
  64. package/dist/types/web/search/types.d.ts +11 -3
  65. package/package.json +7 -7
  66. package/src/cli/web-search-cli.ts +123 -8
  67. package/src/cli.ts +2 -0
  68. package/src/commands/rlm.ts +19 -0
  69. package/src/commands/web-search.ts +66 -0
  70. package/src/config/keybindings.ts +11 -0
  71. package/src/config/model-profiles.ts +11 -3
  72. package/src/config/model-registry.ts +55 -1
  73. package/src/config/models-config-schema.ts +1 -0
  74. package/src/config/settings-schema.ts +67 -1
  75. package/src/edit/notebook.ts +6 -2
  76. package/src/eval/py/executor.ts +8 -1
  77. package/src/eval/py/kernel.ts +9 -4
  78. package/src/eval/py/runtime.ts +153 -32
  79. package/src/exec/bash-executor.ts +10 -4
  80. package/src/extensibility/custom-tools/types.ts +2 -0
  81. package/src/extensibility/custom-tools/wrapper.ts +2 -0
  82. package/src/extensibility/extensions/types.ts +2 -0
  83. package/src/extensibility/extensions/wrapper.ts +1 -0
  84. package/src/gjc-runtime/launch-tmux.ts +129 -1
  85. package/src/gjc-runtime/session-state-sidecar.ts +61 -1
  86. package/src/gjc-runtime/tmux-common.ts +26 -2
  87. package/src/gjc-runtime/tmux-gc.ts +40 -27
  88. package/src/gjc-runtime/tmux-sessions.ts +13 -1
  89. package/src/gjc-runtime/ultragoal-runtime.ts +340 -18
  90. package/src/goals/runtime.ts +4 -3
  91. package/src/goals/state.ts +1 -1
  92. package/src/goals/tools/goal-tool.ts +16 -3
  93. package/src/internal-urls/docs-index.generated.ts +13 -9
  94. package/src/main.ts +28 -3
  95. package/src/modes/components/custom-editor.ts +13 -4
  96. package/src/modes/components/custom-model-preset-wizard.ts +293 -0
  97. package/src/modes/components/hook-selector.ts +1 -1
  98. package/src/modes/components/model-selector.ts +72 -29
  99. package/src/modes/components/skill-message.ts +62 -8
  100. package/src/modes/components/status-line.ts +13 -1
  101. package/src/modes/controllers/input-controller.ts +60 -11
  102. package/src/modes/controllers/selector-controller.ts +39 -0
  103. package/src/modes/interactive-mode.ts +1 -1
  104. package/src/modes/print-mode.ts +14 -4
  105. package/src/modes/rpc/rpc-client.ts +250 -80
  106. package/src/modes/rpc/rpc-mode.ts +6 -12
  107. package/src/modes/rpc/rpc-socket-security.ts +103 -0
  108. package/src/modes/rpc/rpc-types.ts +10 -0
  109. package/src/modes/shared/agent-wire/command-dispatch.ts +7 -0
  110. package/src/modes/shared/agent-wire/command-validation.ts +1 -0
  111. package/src/modes/shared/agent-wire/scopes.ts +1 -0
  112. package/src/modes/shared/agent-wire/unattended-session.ts +9 -0
  113. package/src/modes/utils/hotkeys-markdown.ts +4 -2
  114. package/src/modes/utils/ui-helpers.ts +2 -2
  115. package/src/prompts/goals/goal-continuation.md +1 -0
  116. package/src/prompts/goals/goal-mode-active.md +1 -0
  117. package/src/prompts/system/rlm-report-command.md +1 -0
  118. package/src/prompts/system/rlm-research.md +23 -0
  119. package/src/prompts/tools/bash.md +23 -2
  120. package/src/prompts/tools/browser.md +7 -3
  121. package/src/prompts/tools/computer.md +74 -0
  122. package/src/prompts/tools/goal.md +3 -0
  123. package/src/prompts/tools/job.md +9 -1
  124. package/src/prompts/tools/web-search.md +7 -0
  125. package/src/rlm/artifacts.ts +60 -0
  126. package/src/rlm/complete-research-tool.ts +163 -0
  127. package/src/rlm/data-context.ts +26 -0
  128. package/src/rlm/index.ts +339 -0
  129. package/src/rlm/notebook.ts +108 -0
  130. package/src/rlm/preset.ts +76 -0
  131. package/src/rlm/python-tool.ts +68 -0
  132. package/src/rlm/report.ts +70 -0
  133. package/src/rlm/types.ts +40 -0
  134. package/src/sdk.ts +12 -0
  135. package/src/session/agent-session.ts +48 -3
  136. package/src/slash-commands/builtin-registry.ts +17 -0
  137. package/src/tools/bash-allowed-prefixes.ts +84 -1
  138. package/src/tools/bash.ts +80 -13
  139. package/src/tools/browser/attach.ts +103 -3
  140. package/src/tools/browser/registry.ts +176 -2
  141. package/src/tools/browser/render.ts +9 -1
  142. package/src/tools/browser.ts +33 -0
  143. package/src/tools/computer/render.ts +78 -0
  144. package/src/tools/computer.ts +640 -0
  145. package/src/tools/index.ts +41 -1
  146. package/src/tools/job.ts +88 -5
  147. package/src/tools/json-tree.ts +42 -29
  148. package/src/tools/renderers.ts +2 -0
  149. package/src/tools/tool-timeouts.ts +1 -0
  150. package/src/web/search/index.ts +27 -2
  151. package/src/web/search/provider.ts +16 -1
  152. package/src/web/search/providers/base.ts +22 -0
  153. package/src/web/search/providers/xai.ts +511 -0
  154. package/src/web/search/render.ts +7 -0
  155. package/src/web/search/types.ts +11 -1
@@ -801,13 +801,114 @@ function evidenceKindMatches(kind: string, words: string[]): boolean {
801
801
 
802
802
  type SurfaceFamily = "web" | "cli" | "native" | "api-package" | "algorithm-math" | "unknown";
803
803
 
804
+ type UltragoalChangeStatus = "added" | "modified" | "deleted" | "renamed" | "copied" | "unknown";
805
+ type UltragoalChangeCategory =
806
+ | "code"
807
+ | "generated-binding"
808
+ | "tool"
809
+ | "settings-registry"
810
+ | "prompt-doc-behavior"
811
+ | "docs-static"
812
+ | "other";
813
+ interface UltragoalChangeSetPath extends JsonObject {
814
+ path: string;
815
+ status: UltragoalChangeStatus;
816
+ oldPath?: string;
817
+ category?: UltragoalChangeCategory;
818
+ }
819
+ interface UltragoalChangeSet extends JsonObject {
820
+ source: "checkpoint-git" | "review-pr" | "review-branch" | "review-worktree" | "review-spec";
821
+ baseRef?: string;
822
+ headRef?: string;
823
+ mergeBase?: string;
824
+ paths: UltragoalChangeSetPath[];
825
+ rawDiffStat?: string;
826
+ trusted: true;
827
+ }
828
+
829
+ const MANDATORY_COMPUTER_CASE_IDS = [
830
+ "kill-switch-bypass",
831
+ "suspended-enforcement",
832
+ "permission-revoked",
833
+ "display-stale",
834
+ "out-of-bounds-drift",
835
+ "runaway-loop-halt",
836
+ "blast-radius",
837
+ ] as const;
838
+
839
+ function normalizeRepoPath(value: string): string {
840
+ return value.replaceAll("\\\\", "/").replace(/^\.\//, "");
841
+ }
842
+
843
+ function categorizeComputerChangePath(value: string): UltragoalChangeCategory {
844
+ const normalized = normalizeRepoPath(value);
845
+ if (normalized.startsWith("crates/pi-natives/src/computer/")) return "code";
846
+ if (/^packages\/natives\/native\/index\.(?:d\.ts|js)$/.test(normalized)) return "generated-binding";
847
+ if (
848
+ normalized === "packages/coding-agent/src/tools/computer.ts" ||
849
+ normalized.startsWith("packages/coding-agent/src/tools/computer/")
850
+ )
851
+ return "tool";
852
+ if (
853
+ normalized === "packages/coding-agent/src/tools/index.ts" ||
854
+ normalized === "packages/coding-agent/src/tools/renderers.ts" ||
855
+ normalized === "packages/coding-agent/src/config/settings-schema.ts"
856
+ )
857
+ return "settings-registry";
858
+ if (
859
+ normalized === "packages/coding-agent/src/prompts/tools/computer.md" ||
860
+ normalized === "packages/coding-agent/src/defaults/gjc/skills/ultragoal/SKILL.md" ||
861
+ normalized === "packages/coding-agent/src/prompts/agents/executor.md"
862
+ )
863
+ return "prompt-doc-behavior";
864
+ if (normalized === "docs/tools/computer.md" || normalized === "docs/computer-use/README.md") return "docs-static";
865
+ return "other";
866
+ }
867
+
868
+ function isComputerChangePath(row: UltragoalChangeSetPath): boolean {
869
+ return (
870
+ categorizeComputerChangePath(row.path) !== "other" ||
871
+ (row.oldPath ? categorizeComputerChangePath(row.oldPath) !== "other" : false)
872
+ );
873
+ }
874
+
875
+ function isDocsOnlyStaticComputerChangeSet(changeSet: UltragoalChangeSet | undefined): boolean {
876
+ if (!changeSet || changeSet.paths.length === 0) return false;
877
+ return changeSet.paths.every(row => {
878
+ const category = row.category ?? categorizeComputerChangePath(row.path);
879
+ const oldCategory = row.oldPath ? categorizeComputerChangePath(row.oldPath) : category;
880
+ return category === "docs-static" && oldCategory === "docs-static";
881
+ });
882
+ }
883
+
884
+ function trustedChangeSetRequiresComputerSuite(changeSet: UltragoalChangeSet | undefined): boolean {
885
+ if (!changeSet?.trusted) return false;
886
+ if (isDocsOnlyStaticComputerChangeSet(changeSet)) return false;
887
+ return changeSet.paths.some(isComputerChangePath);
888
+ }
889
+
890
+ function requiresComputerRedTeamSuite(executorQa: JsonObject, changeSet: UltragoalChangeSet | undefined): boolean {
891
+ if (trustedChangeSetRequiresComputerSuite(changeSet)) return true;
892
+ const declaredPaths = Array.isArray(executorQa.changedPaths) ? executorQa.changedPaths : [];
893
+ return declaredPaths.some(value => typeof value === "string" && categorizeComputerChangePath(value) !== "other");
894
+ }
895
+
896
+ function normalizeAdversarialCaseId(value: string): string {
897
+ return normalizeSurfaceToken(value).replace(/\s+/g, "-");
898
+ }
899
+
804
900
  export function normalizeSurfaceToken(value: string): string {
805
901
  return value.toLowerCase().replaceAll("_", "-").trim();
806
902
  }
807
903
 
808
904
  export function surfaceFamily(value: string): SurfaceFamily {
809
905
  const normalized = normalizeSurfaceToken(value);
810
- if (["native", "desktop", "tui"].some(word => normalized.includes(word))) return "native";
906
+ if (
907
+ ["computer", "computer-use", "desktop-input", "native-input", "native", "desktop", "tui"].some(word =>
908
+ normalized.includes(word),
909
+ )
910
+ )
911
+ return "native";
811
912
  if (["gui", "web", "browser", "ui", "visual"].some(word => normalized.includes(word))) return "web";
812
913
  if (["cli", "terminal", "command"].some(word => normalized.includes(word))) return "cli";
813
914
  if (["api", "package", "library", "sdk"].some(word => normalized.includes(word))) return "api-package";
@@ -1840,12 +1941,92 @@ function validateAdversarialCases(
1840
1941
  return idMap;
1841
1942
  }
1842
1943
 
1944
+ async function validateMandatoryComputerAdversarialCases(
1945
+ cwd: string,
1946
+ contractCoverage: JsonObject[],
1947
+ adversarialCases: Map<string, JsonObject>,
1948
+ artifactRefs: Map<string, JsonObject>,
1949
+ ): Promise<void> {
1950
+ const linkedCaseIds = new Set<string>();
1951
+ for (const [index, row] of contractCoverage.entries()) {
1952
+ const ids = optionalStringLinks(row, "adversarialCaseRefs", `executorQa.contractCoverage[${index}]`);
1953
+ for (const id of ids ?? []) linkedCaseIds.add(normalizeAdversarialCaseId(id));
1954
+ }
1955
+ for (const caseId of MANDATORY_COMPUTER_CASE_IDS) {
1956
+ const row = adversarialCases.get(caseId);
1957
+ if (!row)
1958
+ throw new Error(
1959
+ `COMPUTER_REDTEAM_CASE_MISSING: qualityGate executorQa.adversarialCases must include ${caseId}`,
1960
+ );
1961
+ if (optionalStatusField(row, `executorQa.adversarialCases.${caseId}`) === NOT_APPLICABLE_STATUS) {
1962
+ throw new Error(
1963
+ `COMPUTER_REDTEAM_CASE_NOT_APPLICABLE: mandatory computer adversarial case ${caseId} must not be not_applicable`,
1964
+ );
1965
+ }
1966
+ if (!linkedCaseIds.has(caseId)) {
1967
+ throw new Error(
1968
+ `COMPUTER_REDTEAM_CASE_UNLINKED: mandatory computer adversarial case ${caseId} must be linked from contractCoverage.adversarialCaseRefs`,
1969
+ );
1970
+ }
1971
+ const artifactIds = requireStringLinks(row.artifactRefs, `executorQa.adversarialCases.${caseId}.artifactRefs`);
1972
+ let hasValidLiveNativeProof = false;
1973
+ let sawInlineOnly = false;
1974
+ let sawReceiptOnly = false;
1975
+ let sawMetadataOnly = false;
1976
+ for (const artifactId of artifactIds) {
1977
+ const artifact = artifactRefs.get(artifactId);
1978
+ if (!artifact)
1979
+ throw new Error(
1980
+ `qualityGate executorQa.adversarialCases.${caseId}.artifactRefs references unknown id ${artifactId}`,
1981
+ );
1982
+ const fieldName = `executorQa.artifactRefs.${artifactId}`;
1983
+ if (artifact.inlineEvidence !== undefined && !nonEmptyString(artifact.path)) sawInlineOnly = true;
1984
+ if (
1985
+ (artifact.verifiedReceipt !== undefined || artifact.receipt !== undefined) &&
1986
+ !nonEmptyString(artifact.path)
1987
+ )
1988
+ sawReceiptOnly = true;
1989
+ if (
1990
+ !nonEmptyString(artifact.path) &&
1991
+ artifact.inlineEvidence === undefined &&
1992
+ artifact.verifiedReceipt === undefined &&
1993
+ artifact.receipt === undefined
1994
+ )
1995
+ sawMetadataOnly = true;
1996
+ try {
1997
+ await validateArtifactProof(cwd, artifact, fieldName, { surfaceFamily: "native", live: true });
1998
+ if (await validateStructuralArtifact(cwd, artifact, fieldName, { surfaceFamily: "native", live: true }))
1999
+ hasValidLiveNativeProof = true;
2000
+ } catch {
2001
+ // Preserve the explicit computer red-team error taxonomy below.
2002
+ }
2003
+ }
2004
+ if (!hasValidLiveNativeProof) {
2005
+ if (sawInlineOnly)
2006
+ throw new Error(
2007
+ `COMPUTER_REDTEAM_INLINE_ONLY: mandatory computer adversarial case ${caseId} requires live structural native proof`,
2008
+ );
2009
+ if (sawReceiptOnly)
2010
+ throw new Error(
2011
+ `COMPUTER_REDTEAM_RECEIPT_ONLY: mandatory computer adversarial case ${caseId} requires live structural native proof`,
2012
+ );
2013
+ if (sawMetadataOnly)
2014
+ throw new Error(
2015
+ `COMPUTER_REDTEAM_ARTIFACT_METADATA_ONLY: mandatory computer adversarial case ${caseId} requires durable live structural native proof`,
2016
+ );
2017
+ throw new Error(
2018
+ `COMPUTER_REDTEAM_ARTIFACT_MISSING: mandatory computer adversarial case ${caseId} requires at least one valid live structural native proof artifact`,
2019
+ );
2020
+ }
2021
+ }
2022
+ }
2023
+
1843
2024
  function validateContractCoverage(
1844
2025
  executorQa: JsonObject,
1845
2026
  surfaceEvidence: Map<string, JsonObject>,
1846
2027
  adversarialCases: Map<string, JsonObject>,
1847
2028
  artifactRefs: Map<string, JsonObject>,
1848
- ): void {
2029
+ ): JsonObject[] {
1849
2030
  const rows = requireObjectArray(executorQa.contractCoverage, "executorQa.contractCoverage");
1850
2031
  buildRowIdMap(rows, "executorQa.contractCoverage");
1851
2032
  let hasSuccessfulContractCoverage = false;
@@ -1896,32 +2077,47 @@ function validateContractCoverage(
1896
2077
  "qualityGate executorQa.contractCoverage must include at least one row with status covered, passed, or verified",
1897
2078
  );
1898
2079
  }
2080
+ return rows;
1899
2081
  }
1900
2082
 
1901
2083
  async function validateExecutorQaRedTeamEvidenceInternal(
1902
2084
  cwd: string,
1903
2085
  executorQa: JsonObject,
1904
- _options: { mode?: "checkpoint" | "review" } = {},
2086
+ options: { mode?: "checkpoint" | "review"; changeSet?: UltragoalChangeSet } = {},
1905
2087
  ): Promise<void> {
1906
2088
  const artifactRefs = await validateArtifactRefs(cwd, executorQa);
1907
2089
  const surfaceEvidence = await validateSurfaceEvidence(cwd, executorQa, artifactRefs);
1908
2090
  const adversarialCases = validateAdversarialCases(executorQa, artifactRefs);
1909
- validateContractCoverage(executorQa, surfaceEvidence, adversarialCases, artifactRefs);
2091
+ const contractCoverage = validateContractCoverage(executorQa, surfaceEvidence, adversarialCases, artifactRefs);
2092
+ if (requiresComputerRedTeamSuite(executorQa, options.changeSet)) {
2093
+ await validateMandatoryComputerAdversarialCases(cwd, contractCoverage, adversarialCases, artifactRefs);
2094
+ }
1910
2095
  }
1911
2096
 
1912
- async function validateExecutorQaRedTeamEvidence(cwd: string, executorQa: JsonObject): Promise<void> {
1913
- await validateExecutorQaRedTeamEvidenceInternal(cwd, executorQa, { mode: "checkpoint" });
2097
+ async function validateExecutorQaRedTeamEvidence(
2098
+ cwd: string,
2099
+ executorQa: JsonObject,
2100
+ options: { changeSet?: UltragoalChangeSet } = {},
2101
+ ): Promise<void> {
2102
+ await validateExecutorQaRedTeamEvidenceInternal(cwd, executorQa, {
2103
+ mode: "checkpoint",
2104
+ changeSet: options.changeSet,
2105
+ });
1914
2106
  }
1915
2107
 
1916
2108
  export async function validateExecutorQaRedTeamEvidenceForReview(
1917
2109
  cwd: string,
1918
2110
  executorQa: Record<string, unknown>,
1919
- options: { mode?: "review" } = {},
2111
+ options: { mode?: "review"; changeSet?: UltragoalChangeSet } = {},
1920
2112
  ): Promise<void> {
1921
2113
  await validateExecutorQaRedTeamEvidenceInternal(cwd, executorQa as JsonObject, options);
1922
2114
  }
1923
2115
 
1924
- async function validateCompletionQualityGate(cwd: string, gate: JsonObject): Promise<void> {
2116
+ async function validateCompletionQualityGate(
2117
+ cwd: string,
2118
+ gate: JsonObject,
2119
+ options: { changeSet?: UltragoalChangeSet } = {},
2120
+ ): Promise<void> {
1925
2121
  const codeReview = qualityGateObject(gate.codeReview);
1926
2122
  if (codeReview) {
1927
2123
  throw new Error(
@@ -1966,7 +2162,7 @@ async function validateCompletionQualityGate(cwd: string, gate: JsonObject): Pro
1966
2162
  }
1967
2163
  requireNonEmptyString(executorQa.evidence, "executorQa.evidence");
1968
2164
  requireEmptyBlockers(executorQa.blockers, "executorQa.blockers");
1969
- await validateExecutorQaRedTeamEvidence(cwd, executorQa);
2165
+ await validateExecutorQaRedTeamEvidence(cwd, executorQa, { changeSet: options.changeSet });
1970
2166
  if (iteration.status !== PASSED_STATUS || iteration.fullRerun !== true) {
1971
2167
  throw new Error("qualityGate iteration must be passed with fullRerun true");
1972
2168
  }
@@ -1977,7 +2173,11 @@ async function validateCompletionQualityGate(cwd: string, gate: JsonObject): Pro
1977
2173
  requireEmptyBlockers(iteration.blockers, "iteration.blockers");
1978
2174
  }
1979
2175
 
1980
- async function readRequiredCompletionQualityGate(cwd: string, value: string | undefined): Promise<unknown> {
2176
+ async function readRequiredCompletionQualityGate(
2177
+ cwd: string,
2178
+ value: string | undefined,
2179
+ options: { changeSet?: UltragoalChangeSet } = {},
2180
+ ): Promise<unknown> {
1981
2181
  if (!value?.trim()) {
1982
2182
  throw new Error(
1983
2183
  "complete checkpoints require --quality-gate-json with architectReview, executorQa, and iteration evidence",
@@ -1986,7 +2186,7 @@ async function readRequiredCompletionQualityGate(cwd: string, value: string | un
1986
2186
  const gate = await readStructuredValue(cwd, value);
1987
2187
  const gateObject = qualityGateObject(gate);
1988
2188
  if (!gateObject) throw new Error("qualityGate must be a JSON object");
1989
- await validateCompletionQualityGate(cwd, gateObject);
2189
+ await validateCompletionQualityGate(cwd, gateObject, { changeSet: options.changeSet });
1990
2190
  return gate;
1991
2191
  }
1992
2192
 
@@ -2089,9 +2289,10 @@ export async function checkpointUltragoalGoal(input: {
2089
2289
  // instead of silently dropping it.
2090
2290
  return plan;
2091
2291
  }
2292
+ const changeSet = input.status === "complete" ? await computeCheckpointChangeSet(input.cwd) : undefined;
2092
2293
  const qualityGateJson =
2093
2294
  input.status === "complete"
2094
- ? await readRequiredCompletionQualityGate(input.cwd, input.qualityGateJson)
2295
+ ? await readRequiredCompletionQualityGate(input.cwd, input.qualityGateJson, { changeSet })
2095
2296
  : input.qualityGateJson
2096
2297
  ? await readStructuredValue(input.cwd, input.qualityGateJson)
2097
2298
  : undefined;
@@ -2690,20 +2891,140 @@ async function resolveGitBase(cwd: string, branch?: string): Promise<string> {
2690
2891
  }
2691
2892
  const mergeBase = await spawnText(["git", "merge-base", "HEAD", "origin/main"], { cwd, timeoutMs: 3000 });
2692
2893
  if (mergeBase.ok && mergeBase.stdout.trim()) return mergeBase.stdout.trim();
2693
- return "HEAD";
2894
+ return "HEAD~1";
2895
+ }
2896
+
2897
+ function parseGitNameStatus(output: string): UltragoalChangeSetPath[] {
2898
+ const rows: UltragoalChangeSetPath[] = [];
2899
+ for (const line of output.split("\n")) {
2900
+ const trimmed = line.trim();
2901
+ if (!trimmed) continue;
2902
+ const parts = trimmed.split(/\s+/);
2903
+ const statusCode = parts[0] ?? "";
2904
+ let status: UltragoalChangeStatus = "unknown";
2905
+ if (statusCode.startsWith("A")) status = "added";
2906
+ else if (statusCode.startsWith("M")) status = "modified";
2907
+ else if (statusCode.startsWith("D")) status = "deleted";
2908
+ else if (statusCode.startsWith("R")) status = "renamed";
2909
+ else if (statusCode.startsWith("C")) status = "copied";
2910
+ const pathValue = status === "renamed" || status === "copied" ? parts[2] : parts[1];
2911
+ if (!pathValue) continue;
2912
+ const oldPath = status === "renamed" || status === "copied" ? parts[1] : undefined;
2913
+ rows.push({
2914
+ path: normalizeRepoPath(pathValue),
2915
+ oldPath: oldPath ? normalizeRepoPath(oldPath) : undefined,
2916
+ status,
2917
+ category: categorizeComputerChangePath(pathValue),
2918
+ });
2919
+ }
2920
+ return rows;
2921
+ }
2922
+
2923
+ function mergeChangeSetPaths(groups: UltragoalChangeSetPath[][]): UltragoalChangeSetPath[] {
2924
+ const byKey = new Map<string, UltragoalChangeSetPath>();
2925
+ for (const row of groups.flat()) byKey.set(`${row.oldPath ?? ""}\u0000${row.path}`, row);
2926
+ return [...byKey.values()];
2927
+ }
2928
+
2929
+ async function computeCheckpointChangeSet(cwd: string): Promise<UltragoalChangeSet | undefined> {
2930
+ const inGit = await spawnText(["git", "rev-parse", "--is-inside-work-tree"], { cwd, timeoutMs: 3000 });
2931
+ if (!inGit.ok || inGit.stdout.trim() !== "true") return undefined;
2932
+ if (!(await Bun.file(path.join(cwd, ".git")).exists())) return undefined;
2933
+ const baseRef = await resolveGitBase(cwd);
2934
+ const base = baseRef;
2935
+ const mergeBase = await spawnText(["git", "merge-base", "HEAD", baseRef], { cwd, timeoutMs: 3000 });
2936
+ const [committed, unstaged, staged, stat] = await Promise.all([
2937
+ spawnText(["git", "diff", "--name-status", `${base}...HEAD`], { cwd, timeoutMs: 5000 }),
2938
+ spawnText(["git", "diff", "--name-status"], { cwd, timeoutMs: 5000 }),
2939
+ spawnText(["git", "diff", "--cached", "--name-status"], { cwd, timeoutMs: 5000 }),
2940
+ spawnText(["git", "diff", "--stat", `${base}...HEAD`], { cwd, timeoutMs: 5000 }),
2941
+ ]);
2942
+ if (!committed.ok && !unstaged.ok && !staged.ok) return undefined;
2943
+ return {
2944
+ source: "checkpoint-git",
2945
+ baseRef,
2946
+ mergeBase: mergeBase.ok && mergeBase.stdout.trim() ? mergeBase.stdout.trim() : undefined,
2947
+ headRef: "HEAD",
2948
+ paths: mergeChangeSetPaths([
2949
+ parseGitNameStatus(committed.stdout),
2950
+ parseGitNameStatus(unstaged.stdout),
2951
+ parseGitNameStatus(staged.stdout),
2952
+ ]),
2953
+ rawDiffStat: stat.stdout,
2954
+ trusted: true,
2955
+ };
2956
+ }
2957
+
2958
+ function parseUnifiedDiffPaths(diff: string): UltragoalChangeSetPath[] {
2959
+ const paths: UltragoalChangeSetPath[] = [];
2960
+ for (const line of diff.split("\n")) {
2961
+ if (!line.startsWith("diff --git ")) continue;
2962
+ const match = /^diff --git a\/(.+?) b\/(.+)$/.exec(line);
2963
+ if (!match) continue;
2964
+ const oldPath = normalizeRepoPath(match[1]!);
2965
+ const newPath = normalizeRepoPath(match[2]!);
2966
+ paths.push({
2967
+ path: newPath,
2968
+ oldPath: oldPath === newPath ? undefined : oldPath,
2969
+ status: oldPath === newPath ? "modified" : "renamed",
2970
+ category: categorizeComputerChangePath(newPath),
2971
+ });
2972
+ }
2973
+ return paths;
2974
+ }
2975
+
2976
+ function changeSetFromReviewSource(source: JsonObject): UltragoalChangeSet | undefined {
2977
+ const kind = nonEmptyString(source.kind);
2978
+ if (kind === "spec") return { source: "review-spec", paths: [], trusted: true };
2979
+ if (kind === "pr" && typeof source.diff === "string")
2980
+ return {
2981
+ source: "review-pr",
2982
+ paths: parseUnifiedDiffPaths(source.diff),
2983
+ rawDiffStat: source.diff,
2984
+ trusted: true,
2985
+ };
2986
+ const local = qualityGateObject(source.local);
2987
+ if (kind === "pr" && local) return changeSetFromReviewSource(local);
2988
+ if (kind === "worktree")
2989
+ return {
2990
+ source: "review-worktree",
2991
+ paths: parseGitNameStatus(String(source.nameStatus ?? source.status ?? "")),
2992
+ rawDiffStat: String(source.diffStat ?? ""),
2993
+ trusted: true,
2994
+ };
2995
+ if (kind === "branch" || kind === "pr-fallback")
2996
+ return {
2997
+ source: "review-branch",
2998
+ baseRef: nonEmptyString(source.base) ?? undefined,
2999
+ headRef: "HEAD",
3000
+ paths: parseGitNameStatus(String(source.nameStatus ?? "")),
3001
+ rawDiffStat: String(source.diffStat ?? ""),
3002
+ trusted: true,
3003
+ };
3004
+ return undefined;
2694
3005
  }
2695
3006
 
2696
3007
  async function localDiffSource(cwd: string, sourceKind: string, branch?: string): Promise<JsonObject> {
2697
3008
  if (sourceKind === "worktree") {
2698
- const [status, diff] = await Promise.all([
3009
+ const [status, diff, unstaged, staged] = await Promise.all([
2699
3010
  spawnText(["git", "status", "--short"], { cwd, timeoutMs: 5000 }),
2700
3011
  spawnText(["git", "diff", "--stat"], { cwd, timeoutMs: 5000 }),
3012
+ spawnText(["git", "diff", "--name-status"], { cwd, timeoutMs: 5000 }),
3013
+ spawnText(["git", "diff", "--cached", "--name-status"], { cwd, timeoutMs: 5000 }),
2701
3014
  ]);
2702
- return { kind: "worktree", status: status.stdout, diffStat: diff.stdout };
3015
+ return {
3016
+ kind: "worktree",
3017
+ status: status.stdout,
3018
+ diffStat: diff.stdout,
3019
+ nameStatus: `${unstaged.stdout}\n${staged.stdout}`,
3020
+ };
2703
3021
  }
2704
3022
  const base = await resolveGitBase(cwd, branch);
2705
- const diff = await spawnText(["git", "diff", "--stat", `${base}...HEAD`], { cwd, timeoutMs: 5000 });
2706
- return { kind: sourceKind, base, branch, diffStat: diff.stdout };
3023
+ const [diff, nameStatus] = await Promise.all([
3024
+ spawnText(["git", "diff", "--stat", `${base}...HEAD`], { cwd, timeoutMs: 5000 }),
3025
+ spawnText(["git", "diff", "--name-status", `${base}...HEAD`], { cwd, timeoutMs: 5000 }),
3026
+ ]);
3027
+ return { kind: sourceKind, base, branch, diffStat: diff.stdout, nameStatus: nameStatus.stdout };
2707
3028
  }
2708
3029
 
2709
3030
  async function resolveReviewSource(
@@ -2817,13 +3138,14 @@ export async function runUltragoalReview(cwd: string, args: readonly string[]):
2817
3138
  const mode = parseReviewMode(flagValue(args, "--mode"));
2818
3139
  const specPath = flagValue(args, "--spec");
2819
3140
  const { contractStrength, source } = await resolveReviewSource(cwd, args, specPath);
3141
+ const changeSet = changeSetFromReviewSource(source);
2820
3142
  const executorQa = await readOptionalExecutorQa(
2821
3143
  cwd,
2822
3144
  flagValue(args, "--executor-qa-json") ?? flagValue(args, "--executor-qa"),
2823
3145
  );
2824
3146
  const findings: UltragoalReviewFinding[] = [];
2825
3147
  try {
2826
- await validateExecutorQaRedTeamEvidenceForReview(cwd, executorQa, { mode: "review" });
3148
+ await validateExecutorQaRedTeamEvidenceForReview(cwd, executorQa, { mode: "review", changeSet });
2827
3149
  } catch (error) {
2828
3150
  findings.push(findingFromError(error));
2829
3151
  }
@@ -350,12 +350,13 @@ export class GoalRuntime {
350
350
  await this.#flushUsageLocked();
351
351
  const state = this.#getStateClone();
352
352
  if (!state?.goal) return undefined;
353
+ if (state.goal.status !== "active") {
354
+ throw new Error(`cannot pause a goal that is not active (current status: ${state.goal.status})`);
355
+ }
353
356
  state.enabled = false;
354
357
  state.mode = "active";
355
358
  state.reason = undefined;
356
- if (state.goal.status === "active") {
357
- state.goal.status = "paused";
358
- }
359
+ state.goal.status = "paused";
359
360
  state.goal.updatedAt = this.#now();
360
361
  this.#clearActiveAccounting();
361
362
  await this.#commitState(state, { persist: "goal_paused" });
@@ -19,7 +19,7 @@ export interface GoalModeState {
19
19
  goal: Goal;
20
20
  }
21
21
  export interface GoalToolDetails {
22
- op: "create" | "get" | "complete" | "resume" | "drop";
22
+ op: "create" | "get" | "complete" | "resume" | "drop" | "pause";
23
23
  goal?: Goal | null;
24
24
  }
25
25
 
@@ -17,9 +17,9 @@ import type { Goal, GoalStatus, GoalToolDetails } from "../state";
17
17
 
18
18
  const goalSchema = z.object({
19
19
  op: z
20
- .enum(["create", "get", "complete", "resume", "drop"])
20
+ .enum(["create", "get", "complete", "resume", "drop", "pause"])
21
21
  .describe(
22
- "op: get | create | complete | drop | resume — drop clears the active goal without exiting goal mode (tool stays callable for the next create)",
22
+ "op: get | create | complete | drop | resume | pause — drop clears the active goal without exiting goal mode (tool stays callable for the next create); pause parks an active goal whose remaining work is blocked on human input so the autonomous continuation loop stops until resume",
23
23
  ),
24
24
  objective: z.string().describe("goal objective").optional(),
25
25
  });
@@ -66,8 +66,15 @@ function buildGoalToolResult(op: GoalToolDetails["op"], response: GoalToolRespon
66
66
  };
67
67
  }
68
68
 
69
+ function assertGoalOperationAllowed(session: ToolSession, op: GoalToolInput["op"]): void {
70
+ const allowedOps = session.goalToolAllowedOps;
71
+ if (!allowedOps || allowedOps.includes(op)) return;
72
+ throw new ToolError(`Goal mode in this session only allows goal operations: ${allowedOps.join(", ")}.`);
73
+ }
74
+
69
75
  async function executeGoalOperation(session: ToolSession, params: GoalToolInput): Promise<GoalToolResponse> {
70
76
  rejectUnsupportedGoalArgs(params as Record<string, unknown>);
77
+ assertGoalOperationAllowed(session, params.op);
71
78
  if (params.op === "get") {
72
79
  const state = session.getGoalModeState?.();
73
80
  return buildGoalToolResponse(state?.goal ?? null);
@@ -86,6 +93,10 @@ async function executeGoalOperation(session: ToolSession, params: GoalToolInput)
86
93
  const resumed = await runtime.resumeGoal();
87
94
  return buildGoalToolResponse(resumed.goal);
88
95
  }
96
+ if (params.op === "pause") {
97
+ const paused = await runtime.pauseGoal();
98
+ return buildGoalToolResponse(paused?.goal ?? null);
99
+ }
89
100
  if (params.op === "drop") {
90
101
  const dropped = await runtime.dropGoal();
91
102
  return buildGoalToolResponse(dropped ?? null);
@@ -121,7 +132,7 @@ export class GoalTool implements AgentTool<typeof goalSchema, GoalToolDetails> {
121
132
  _context?: AgentToolContext,
122
133
  ): Promise<AgentToolResult<GoalToolDetails>> {
123
134
  const response = await executeGoalOperation(this.#session, params);
124
- return buildGoalToolResult(params.op, response);
135
+ return buildGoalToolResult(params.op as GoalToolDetails["op"], response);
125
136
  }
126
137
  }
127
138
 
@@ -135,6 +146,8 @@ function describeOp(op: string | undefined): string {
135
146
  return "check";
136
147
  case "resume":
137
148
  return "resume";
149
+ case "pause":
150
+ return "pause";
138
151
  case "drop":
139
152
  return "drop";
140
153
  default: