@gajae-code/coding-agent 0.5.4 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +23 -0
- package/dist/types/cli/web-search-cli.d.ts +12 -0
- package/dist/types/commands/rlm.d.ts +10 -0
- package/dist/types/commands/web-search.d.ts +54 -0
- package/dist/types/config/keybindings.d.ts +10 -0
- package/dist/types/config/model-profiles.d.ts +2 -1
- package/dist/types/config/model-registry.d.ts +3 -0
- package/dist/types/config/models-config-schema.d.ts +3 -0
- package/dist/types/config/settings-schema.d.ts +61 -3
- package/dist/types/edit/notebook.d.ts +3 -0
- package/dist/types/eval/py/executor.d.ts +3 -0
- package/dist/types/eval/py/kernel.d.ts +3 -1
- package/dist/types/eval/py/runtime.d.ts +9 -1
- package/dist/types/exec/bash-executor.d.ts +4 -0
- package/dist/types/extensibility/custom-tools/types.d.ts +2 -0
- package/dist/types/extensibility/custom-tools/wrapper.d.ts +1 -0
- package/dist/types/extensibility/extensions/types.d.ts +2 -0
- package/dist/types/extensibility/extensions/wrapper.d.ts +1 -0
- package/dist/types/gjc-runtime/launch-tmux.d.ts +6 -0
- package/dist/types/gjc-runtime/session-state-sidecar.d.ts +14 -0
- package/dist/types/gjc-runtime/tmux-common.d.ts +6 -0
- package/dist/types/gjc-runtime/tmux-gc.d.ts +3 -3
- package/dist/types/gjc-runtime/tmux-sessions.d.ts +4 -0
- package/dist/types/gjc-runtime/ultragoal-runtime.d.ts +18 -0
- package/dist/types/goals/state.d.ts +1 -1
- package/dist/types/goals/tools/goal-tool.d.ts +2 -0
- package/dist/types/main.d.ts +11 -0
- package/dist/types/modes/components/custom-editor.d.ts +4 -2
- package/dist/types/modes/components/custom-model-preset-wizard.d.ts +12 -0
- package/dist/types/modes/components/model-selector.d.ts +5 -2
- package/dist/types/modes/components/status-line.d.ts +4 -1
- package/dist/types/modes/controllers/input-controller.d.ts +3 -0
- package/dist/types/modes/controllers/selector-controller.d.ts +1 -0
- package/dist/types/modes/print-mode.d.ts +6 -0
- package/dist/types/modes/rpc/rpc-client.d.ts +21 -0
- package/dist/types/modes/rpc/rpc-socket-security.d.ts +7 -0
- package/dist/types/modes/rpc/rpc-types.d.ts +13 -0
- package/dist/types/modes/shared/agent-wire/command-dispatch.d.ts +2 -0
- package/dist/types/modes/shared/agent-wire/unattended-session.d.ts +1 -0
- package/dist/types/rlm/artifacts.d.ts +9 -0
- package/dist/types/rlm/complete-research-tool.d.ts +35 -0
- package/dist/types/rlm/data-context.d.ts +6 -0
- package/dist/types/rlm/index.d.ts +35 -0
- package/dist/types/rlm/notebook.d.ts +12 -0
- package/dist/types/rlm/preset.d.ts +23 -0
- package/dist/types/rlm/python-tool.d.ts +16 -0
- package/dist/types/rlm/report.d.ts +14 -0
- package/dist/types/rlm/types.d.ts +37 -0
- package/dist/types/sdk.d.ts +7 -0
- package/dist/types/session/agent-session.d.ts +21 -0
- package/dist/types/tools/bash-allowed-prefixes.d.ts +6 -1
- package/dist/types/tools/browser/attach.d.ts +19 -3
- package/dist/types/tools/browser/registry.d.ts +15 -0
- package/dist/types/tools/browser/render.d.ts +3 -0
- package/dist/types/tools/browser.d.ts +18 -1
- package/dist/types/tools/computer/render.d.ts +17 -0
- package/dist/types/tools/computer.d.ts +465 -0
- package/dist/types/tools/index.d.ts +24 -1
- package/dist/types/tools/job.d.ts +13 -0
- package/dist/types/tools/tool-timeouts.d.ts +5 -0
- package/dist/types/web/search/index.d.ts +32 -2
- package/dist/types/web/search/providers/base.d.ts +22 -0
- package/dist/types/web/search/providers/xai.d.ts +64 -0
- package/dist/types/web/search/types.d.ts +11 -3
- package/package.json +7 -7
- package/src/cli/web-search-cli.ts +123 -8
- package/src/cli.ts +2 -0
- package/src/commands/rlm.ts +19 -0
- package/src/commands/web-search.ts +66 -0
- package/src/config/keybindings.ts +11 -0
- package/src/config/model-profiles.ts +11 -3
- package/src/config/model-registry.ts +55 -1
- package/src/config/models-config-schema.ts +1 -0
- package/src/config/settings-schema.ts +67 -1
- package/src/edit/notebook.ts +6 -2
- package/src/eval/py/executor.ts +8 -1
- package/src/eval/py/kernel.ts +9 -4
- package/src/eval/py/runtime.ts +153 -32
- package/src/exec/bash-executor.ts +10 -4
- package/src/extensibility/custom-tools/types.ts +2 -0
- package/src/extensibility/custom-tools/wrapper.ts +2 -0
- package/src/extensibility/extensions/types.ts +2 -0
- package/src/extensibility/extensions/wrapper.ts +1 -0
- package/src/gjc-runtime/launch-tmux.ts +129 -1
- package/src/gjc-runtime/session-state-sidecar.ts +61 -1
- package/src/gjc-runtime/tmux-common.ts +26 -2
- package/src/gjc-runtime/tmux-gc.ts +40 -27
- package/src/gjc-runtime/tmux-sessions.ts +13 -1
- package/src/gjc-runtime/ultragoal-runtime.ts +340 -18
- package/src/goals/runtime.ts +4 -3
- package/src/goals/state.ts +1 -1
- package/src/goals/tools/goal-tool.ts +16 -3
- package/src/internal-urls/docs-index.generated.ts +13 -9
- package/src/main.ts +28 -3
- package/src/modes/components/custom-editor.ts +13 -4
- package/src/modes/components/custom-model-preset-wizard.ts +293 -0
- package/src/modes/components/hook-selector.ts +1 -1
- package/src/modes/components/model-selector.ts +72 -29
- package/src/modes/components/skill-message.ts +62 -8
- package/src/modes/components/status-line.ts +13 -1
- package/src/modes/controllers/input-controller.ts +60 -11
- package/src/modes/controllers/selector-controller.ts +39 -0
- package/src/modes/interactive-mode.ts +1 -1
- package/src/modes/print-mode.ts +14 -4
- package/src/modes/rpc/rpc-client.ts +250 -80
- package/src/modes/rpc/rpc-mode.ts +6 -12
- package/src/modes/rpc/rpc-socket-security.ts +103 -0
- package/src/modes/rpc/rpc-types.ts +10 -0
- package/src/modes/shared/agent-wire/command-dispatch.ts +7 -0
- package/src/modes/shared/agent-wire/command-validation.ts +1 -0
- package/src/modes/shared/agent-wire/scopes.ts +1 -0
- package/src/modes/shared/agent-wire/unattended-session.ts +9 -0
- package/src/modes/utils/hotkeys-markdown.ts +4 -2
- package/src/modes/utils/ui-helpers.ts +2 -2
- package/src/prompts/goals/goal-continuation.md +1 -0
- package/src/prompts/goals/goal-mode-active.md +1 -0
- package/src/prompts/system/rlm-report-command.md +1 -0
- package/src/prompts/system/rlm-research.md +23 -0
- package/src/prompts/tools/bash.md +23 -2
- package/src/prompts/tools/browser.md +7 -3
- package/src/prompts/tools/computer.md +74 -0
- package/src/prompts/tools/goal.md +3 -0
- package/src/prompts/tools/job.md +9 -1
- package/src/prompts/tools/web-search.md +7 -0
- package/src/rlm/artifacts.ts +60 -0
- package/src/rlm/complete-research-tool.ts +163 -0
- package/src/rlm/data-context.ts +26 -0
- package/src/rlm/index.ts +339 -0
- package/src/rlm/notebook.ts +108 -0
- package/src/rlm/preset.ts +76 -0
- package/src/rlm/python-tool.ts +68 -0
- package/src/rlm/report.ts +70 -0
- package/src/rlm/types.ts +40 -0
- package/src/sdk.ts +12 -0
- package/src/session/agent-session.ts +48 -3
- package/src/slash-commands/builtin-registry.ts +17 -0
- package/src/tools/bash-allowed-prefixes.ts +84 -1
- package/src/tools/bash.ts +80 -13
- package/src/tools/browser/attach.ts +103 -3
- package/src/tools/browser/registry.ts +176 -2
- package/src/tools/browser/render.ts +9 -1
- package/src/tools/browser.ts +33 -0
- package/src/tools/computer/render.ts +78 -0
- package/src/tools/computer.ts +640 -0
- package/src/tools/index.ts +41 -1
- package/src/tools/job.ts +88 -5
- package/src/tools/json-tree.ts +42 -29
- package/src/tools/renderers.ts +2 -0
- package/src/tools/tool-timeouts.ts +1 -0
- package/src/web/search/index.ts +27 -2
- package/src/web/search/provider.ts +16 -1
- package/src/web/search/providers/base.ts +22 -0
- package/src/web/search/providers/xai.ts +511 -0
- package/src/web/search/render.ts +7 -0
- package/src/web/search/types.ts +11 -1
|
@@ -801,13 +801,114 @@ function evidenceKindMatches(kind: string, words: string[]): boolean {
|
|
|
801
801
|
|
|
802
802
|
type SurfaceFamily = "web" | "cli" | "native" | "api-package" | "algorithm-math" | "unknown";
|
|
803
803
|
|
|
804
|
+
type UltragoalChangeStatus = "added" | "modified" | "deleted" | "renamed" | "copied" | "unknown";
|
|
805
|
+
type UltragoalChangeCategory =
|
|
806
|
+
| "code"
|
|
807
|
+
| "generated-binding"
|
|
808
|
+
| "tool"
|
|
809
|
+
| "settings-registry"
|
|
810
|
+
| "prompt-doc-behavior"
|
|
811
|
+
| "docs-static"
|
|
812
|
+
| "other";
|
|
813
|
+
interface UltragoalChangeSetPath extends JsonObject {
|
|
814
|
+
path: string;
|
|
815
|
+
status: UltragoalChangeStatus;
|
|
816
|
+
oldPath?: string;
|
|
817
|
+
category?: UltragoalChangeCategory;
|
|
818
|
+
}
|
|
819
|
+
interface UltragoalChangeSet extends JsonObject {
|
|
820
|
+
source: "checkpoint-git" | "review-pr" | "review-branch" | "review-worktree" | "review-spec";
|
|
821
|
+
baseRef?: string;
|
|
822
|
+
headRef?: string;
|
|
823
|
+
mergeBase?: string;
|
|
824
|
+
paths: UltragoalChangeSetPath[];
|
|
825
|
+
rawDiffStat?: string;
|
|
826
|
+
trusted: true;
|
|
827
|
+
}
|
|
828
|
+
|
|
829
|
+
const MANDATORY_COMPUTER_CASE_IDS = [
|
|
830
|
+
"kill-switch-bypass",
|
|
831
|
+
"suspended-enforcement",
|
|
832
|
+
"permission-revoked",
|
|
833
|
+
"display-stale",
|
|
834
|
+
"out-of-bounds-drift",
|
|
835
|
+
"runaway-loop-halt",
|
|
836
|
+
"blast-radius",
|
|
837
|
+
] as const;
|
|
838
|
+
|
|
839
|
+
function normalizeRepoPath(value: string): string {
|
|
840
|
+
return value.replaceAll("\\\\", "/").replace(/^\.\//, "");
|
|
841
|
+
}
|
|
842
|
+
|
|
843
|
+
function categorizeComputerChangePath(value: string): UltragoalChangeCategory {
|
|
844
|
+
const normalized = normalizeRepoPath(value);
|
|
845
|
+
if (normalized.startsWith("crates/pi-natives/src/computer/")) return "code";
|
|
846
|
+
if (/^packages\/natives\/native\/index\.(?:d\.ts|js)$/.test(normalized)) return "generated-binding";
|
|
847
|
+
if (
|
|
848
|
+
normalized === "packages/coding-agent/src/tools/computer.ts" ||
|
|
849
|
+
normalized.startsWith("packages/coding-agent/src/tools/computer/")
|
|
850
|
+
)
|
|
851
|
+
return "tool";
|
|
852
|
+
if (
|
|
853
|
+
normalized === "packages/coding-agent/src/tools/index.ts" ||
|
|
854
|
+
normalized === "packages/coding-agent/src/tools/renderers.ts" ||
|
|
855
|
+
normalized === "packages/coding-agent/src/config/settings-schema.ts"
|
|
856
|
+
)
|
|
857
|
+
return "settings-registry";
|
|
858
|
+
if (
|
|
859
|
+
normalized === "packages/coding-agent/src/prompts/tools/computer.md" ||
|
|
860
|
+
normalized === "packages/coding-agent/src/defaults/gjc/skills/ultragoal/SKILL.md" ||
|
|
861
|
+
normalized === "packages/coding-agent/src/prompts/agents/executor.md"
|
|
862
|
+
)
|
|
863
|
+
return "prompt-doc-behavior";
|
|
864
|
+
if (normalized === "docs/tools/computer.md" || normalized === "docs/computer-use/README.md") return "docs-static";
|
|
865
|
+
return "other";
|
|
866
|
+
}
|
|
867
|
+
|
|
868
|
+
function isComputerChangePath(row: UltragoalChangeSetPath): boolean {
|
|
869
|
+
return (
|
|
870
|
+
categorizeComputerChangePath(row.path) !== "other" ||
|
|
871
|
+
(row.oldPath ? categorizeComputerChangePath(row.oldPath) !== "other" : false)
|
|
872
|
+
);
|
|
873
|
+
}
|
|
874
|
+
|
|
875
|
+
function isDocsOnlyStaticComputerChangeSet(changeSet: UltragoalChangeSet | undefined): boolean {
|
|
876
|
+
if (!changeSet || changeSet.paths.length === 0) return false;
|
|
877
|
+
return changeSet.paths.every(row => {
|
|
878
|
+
const category = row.category ?? categorizeComputerChangePath(row.path);
|
|
879
|
+
const oldCategory = row.oldPath ? categorizeComputerChangePath(row.oldPath) : category;
|
|
880
|
+
return category === "docs-static" && oldCategory === "docs-static";
|
|
881
|
+
});
|
|
882
|
+
}
|
|
883
|
+
|
|
884
|
+
function trustedChangeSetRequiresComputerSuite(changeSet: UltragoalChangeSet | undefined): boolean {
|
|
885
|
+
if (!changeSet?.trusted) return false;
|
|
886
|
+
if (isDocsOnlyStaticComputerChangeSet(changeSet)) return false;
|
|
887
|
+
return changeSet.paths.some(isComputerChangePath);
|
|
888
|
+
}
|
|
889
|
+
|
|
890
|
+
function requiresComputerRedTeamSuite(executorQa: JsonObject, changeSet: UltragoalChangeSet | undefined): boolean {
|
|
891
|
+
if (trustedChangeSetRequiresComputerSuite(changeSet)) return true;
|
|
892
|
+
const declaredPaths = Array.isArray(executorQa.changedPaths) ? executorQa.changedPaths : [];
|
|
893
|
+
return declaredPaths.some(value => typeof value === "string" && categorizeComputerChangePath(value) !== "other");
|
|
894
|
+
}
|
|
895
|
+
|
|
896
|
+
function normalizeAdversarialCaseId(value: string): string {
|
|
897
|
+
return normalizeSurfaceToken(value).replace(/\s+/g, "-");
|
|
898
|
+
}
|
|
899
|
+
|
|
804
900
|
export function normalizeSurfaceToken(value: string): string {
|
|
805
901
|
return value.toLowerCase().replaceAll("_", "-").trim();
|
|
806
902
|
}
|
|
807
903
|
|
|
808
904
|
export function surfaceFamily(value: string): SurfaceFamily {
|
|
809
905
|
const normalized = normalizeSurfaceToken(value);
|
|
810
|
-
if (
|
|
906
|
+
if (
|
|
907
|
+
["computer", "computer-use", "desktop-input", "native-input", "native", "desktop", "tui"].some(word =>
|
|
908
|
+
normalized.includes(word),
|
|
909
|
+
)
|
|
910
|
+
)
|
|
911
|
+
return "native";
|
|
811
912
|
if (["gui", "web", "browser", "ui", "visual"].some(word => normalized.includes(word))) return "web";
|
|
812
913
|
if (["cli", "terminal", "command"].some(word => normalized.includes(word))) return "cli";
|
|
813
914
|
if (["api", "package", "library", "sdk"].some(word => normalized.includes(word))) return "api-package";
|
|
@@ -1840,12 +1941,92 @@ function validateAdversarialCases(
|
|
|
1840
1941
|
return idMap;
|
|
1841
1942
|
}
|
|
1842
1943
|
|
|
1944
|
+
async function validateMandatoryComputerAdversarialCases(
|
|
1945
|
+
cwd: string,
|
|
1946
|
+
contractCoverage: JsonObject[],
|
|
1947
|
+
adversarialCases: Map<string, JsonObject>,
|
|
1948
|
+
artifactRefs: Map<string, JsonObject>,
|
|
1949
|
+
): Promise<void> {
|
|
1950
|
+
const linkedCaseIds = new Set<string>();
|
|
1951
|
+
for (const [index, row] of contractCoverage.entries()) {
|
|
1952
|
+
const ids = optionalStringLinks(row, "adversarialCaseRefs", `executorQa.contractCoverage[${index}]`);
|
|
1953
|
+
for (const id of ids ?? []) linkedCaseIds.add(normalizeAdversarialCaseId(id));
|
|
1954
|
+
}
|
|
1955
|
+
for (const caseId of MANDATORY_COMPUTER_CASE_IDS) {
|
|
1956
|
+
const row = adversarialCases.get(caseId);
|
|
1957
|
+
if (!row)
|
|
1958
|
+
throw new Error(
|
|
1959
|
+
`COMPUTER_REDTEAM_CASE_MISSING: qualityGate executorQa.adversarialCases must include ${caseId}`,
|
|
1960
|
+
);
|
|
1961
|
+
if (optionalStatusField(row, `executorQa.adversarialCases.${caseId}`) === NOT_APPLICABLE_STATUS) {
|
|
1962
|
+
throw new Error(
|
|
1963
|
+
`COMPUTER_REDTEAM_CASE_NOT_APPLICABLE: mandatory computer adversarial case ${caseId} must not be not_applicable`,
|
|
1964
|
+
);
|
|
1965
|
+
}
|
|
1966
|
+
if (!linkedCaseIds.has(caseId)) {
|
|
1967
|
+
throw new Error(
|
|
1968
|
+
`COMPUTER_REDTEAM_CASE_UNLINKED: mandatory computer adversarial case ${caseId} must be linked from contractCoverage.adversarialCaseRefs`,
|
|
1969
|
+
);
|
|
1970
|
+
}
|
|
1971
|
+
const artifactIds = requireStringLinks(row.artifactRefs, `executorQa.adversarialCases.${caseId}.artifactRefs`);
|
|
1972
|
+
let hasValidLiveNativeProof = false;
|
|
1973
|
+
let sawInlineOnly = false;
|
|
1974
|
+
let sawReceiptOnly = false;
|
|
1975
|
+
let sawMetadataOnly = false;
|
|
1976
|
+
for (const artifactId of artifactIds) {
|
|
1977
|
+
const artifact = artifactRefs.get(artifactId);
|
|
1978
|
+
if (!artifact)
|
|
1979
|
+
throw new Error(
|
|
1980
|
+
`qualityGate executorQa.adversarialCases.${caseId}.artifactRefs references unknown id ${artifactId}`,
|
|
1981
|
+
);
|
|
1982
|
+
const fieldName = `executorQa.artifactRefs.${artifactId}`;
|
|
1983
|
+
if (artifact.inlineEvidence !== undefined && !nonEmptyString(artifact.path)) sawInlineOnly = true;
|
|
1984
|
+
if (
|
|
1985
|
+
(artifact.verifiedReceipt !== undefined || artifact.receipt !== undefined) &&
|
|
1986
|
+
!nonEmptyString(artifact.path)
|
|
1987
|
+
)
|
|
1988
|
+
sawReceiptOnly = true;
|
|
1989
|
+
if (
|
|
1990
|
+
!nonEmptyString(artifact.path) &&
|
|
1991
|
+
artifact.inlineEvidence === undefined &&
|
|
1992
|
+
artifact.verifiedReceipt === undefined &&
|
|
1993
|
+
artifact.receipt === undefined
|
|
1994
|
+
)
|
|
1995
|
+
sawMetadataOnly = true;
|
|
1996
|
+
try {
|
|
1997
|
+
await validateArtifactProof(cwd, artifact, fieldName, { surfaceFamily: "native", live: true });
|
|
1998
|
+
if (await validateStructuralArtifact(cwd, artifact, fieldName, { surfaceFamily: "native", live: true }))
|
|
1999
|
+
hasValidLiveNativeProof = true;
|
|
2000
|
+
} catch {
|
|
2001
|
+
// Preserve the explicit computer red-team error taxonomy below.
|
|
2002
|
+
}
|
|
2003
|
+
}
|
|
2004
|
+
if (!hasValidLiveNativeProof) {
|
|
2005
|
+
if (sawInlineOnly)
|
|
2006
|
+
throw new Error(
|
|
2007
|
+
`COMPUTER_REDTEAM_INLINE_ONLY: mandatory computer adversarial case ${caseId} requires live structural native proof`,
|
|
2008
|
+
);
|
|
2009
|
+
if (sawReceiptOnly)
|
|
2010
|
+
throw new Error(
|
|
2011
|
+
`COMPUTER_REDTEAM_RECEIPT_ONLY: mandatory computer adversarial case ${caseId} requires live structural native proof`,
|
|
2012
|
+
);
|
|
2013
|
+
if (sawMetadataOnly)
|
|
2014
|
+
throw new Error(
|
|
2015
|
+
`COMPUTER_REDTEAM_ARTIFACT_METADATA_ONLY: mandatory computer adversarial case ${caseId} requires durable live structural native proof`,
|
|
2016
|
+
);
|
|
2017
|
+
throw new Error(
|
|
2018
|
+
`COMPUTER_REDTEAM_ARTIFACT_MISSING: mandatory computer adversarial case ${caseId} requires at least one valid live structural native proof artifact`,
|
|
2019
|
+
);
|
|
2020
|
+
}
|
|
2021
|
+
}
|
|
2022
|
+
}
|
|
2023
|
+
|
|
1843
2024
|
function validateContractCoverage(
|
|
1844
2025
|
executorQa: JsonObject,
|
|
1845
2026
|
surfaceEvidence: Map<string, JsonObject>,
|
|
1846
2027
|
adversarialCases: Map<string, JsonObject>,
|
|
1847
2028
|
artifactRefs: Map<string, JsonObject>,
|
|
1848
|
-
):
|
|
2029
|
+
): JsonObject[] {
|
|
1849
2030
|
const rows = requireObjectArray(executorQa.contractCoverage, "executorQa.contractCoverage");
|
|
1850
2031
|
buildRowIdMap(rows, "executorQa.contractCoverage");
|
|
1851
2032
|
let hasSuccessfulContractCoverage = false;
|
|
@@ -1896,32 +2077,47 @@ function validateContractCoverage(
|
|
|
1896
2077
|
"qualityGate executorQa.contractCoverage must include at least one row with status covered, passed, or verified",
|
|
1897
2078
|
);
|
|
1898
2079
|
}
|
|
2080
|
+
return rows;
|
|
1899
2081
|
}
|
|
1900
2082
|
|
|
1901
2083
|
async function validateExecutorQaRedTeamEvidenceInternal(
|
|
1902
2084
|
cwd: string,
|
|
1903
2085
|
executorQa: JsonObject,
|
|
1904
|
-
|
|
2086
|
+
options: { mode?: "checkpoint" | "review"; changeSet?: UltragoalChangeSet } = {},
|
|
1905
2087
|
): Promise<void> {
|
|
1906
2088
|
const artifactRefs = await validateArtifactRefs(cwd, executorQa);
|
|
1907
2089
|
const surfaceEvidence = await validateSurfaceEvidence(cwd, executorQa, artifactRefs);
|
|
1908
2090
|
const adversarialCases = validateAdversarialCases(executorQa, artifactRefs);
|
|
1909
|
-
validateContractCoverage(executorQa, surfaceEvidence, adversarialCases, artifactRefs);
|
|
2091
|
+
const contractCoverage = validateContractCoverage(executorQa, surfaceEvidence, adversarialCases, artifactRefs);
|
|
2092
|
+
if (requiresComputerRedTeamSuite(executorQa, options.changeSet)) {
|
|
2093
|
+
await validateMandatoryComputerAdversarialCases(cwd, contractCoverage, adversarialCases, artifactRefs);
|
|
2094
|
+
}
|
|
1910
2095
|
}
|
|
1911
2096
|
|
|
1912
|
-
async function validateExecutorQaRedTeamEvidence(
|
|
1913
|
-
|
|
2097
|
+
async function validateExecutorQaRedTeamEvidence(
|
|
2098
|
+
cwd: string,
|
|
2099
|
+
executorQa: JsonObject,
|
|
2100
|
+
options: { changeSet?: UltragoalChangeSet } = {},
|
|
2101
|
+
): Promise<void> {
|
|
2102
|
+
await validateExecutorQaRedTeamEvidenceInternal(cwd, executorQa, {
|
|
2103
|
+
mode: "checkpoint",
|
|
2104
|
+
changeSet: options.changeSet,
|
|
2105
|
+
});
|
|
1914
2106
|
}
|
|
1915
2107
|
|
|
1916
2108
|
export async function validateExecutorQaRedTeamEvidenceForReview(
|
|
1917
2109
|
cwd: string,
|
|
1918
2110
|
executorQa: Record<string, unknown>,
|
|
1919
|
-
options: { mode?: "review" } = {},
|
|
2111
|
+
options: { mode?: "review"; changeSet?: UltragoalChangeSet } = {},
|
|
1920
2112
|
): Promise<void> {
|
|
1921
2113
|
await validateExecutorQaRedTeamEvidenceInternal(cwd, executorQa as JsonObject, options);
|
|
1922
2114
|
}
|
|
1923
2115
|
|
|
1924
|
-
async function validateCompletionQualityGate(
|
|
2116
|
+
async function validateCompletionQualityGate(
|
|
2117
|
+
cwd: string,
|
|
2118
|
+
gate: JsonObject,
|
|
2119
|
+
options: { changeSet?: UltragoalChangeSet } = {},
|
|
2120
|
+
): Promise<void> {
|
|
1925
2121
|
const codeReview = qualityGateObject(gate.codeReview);
|
|
1926
2122
|
if (codeReview) {
|
|
1927
2123
|
throw new Error(
|
|
@@ -1966,7 +2162,7 @@ async function validateCompletionQualityGate(cwd: string, gate: JsonObject): Pro
|
|
|
1966
2162
|
}
|
|
1967
2163
|
requireNonEmptyString(executorQa.evidence, "executorQa.evidence");
|
|
1968
2164
|
requireEmptyBlockers(executorQa.blockers, "executorQa.blockers");
|
|
1969
|
-
await validateExecutorQaRedTeamEvidence(cwd, executorQa);
|
|
2165
|
+
await validateExecutorQaRedTeamEvidence(cwd, executorQa, { changeSet: options.changeSet });
|
|
1970
2166
|
if (iteration.status !== PASSED_STATUS || iteration.fullRerun !== true) {
|
|
1971
2167
|
throw new Error("qualityGate iteration must be passed with fullRerun true");
|
|
1972
2168
|
}
|
|
@@ -1977,7 +2173,11 @@ async function validateCompletionQualityGate(cwd: string, gate: JsonObject): Pro
|
|
|
1977
2173
|
requireEmptyBlockers(iteration.blockers, "iteration.blockers");
|
|
1978
2174
|
}
|
|
1979
2175
|
|
|
1980
|
-
async function readRequiredCompletionQualityGate(
|
|
2176
|
+
async function readRequiredCompletionQualityGate(
|
|
2177
|
+
cwd: string,
|
|
2178
|
+
value: string | undefined,
|
|
2179
|
+
options: { changeSet?: UltragoalChangeSet } = {},
|
|
2180
|
+
): Promise<unknown> {
|
|
1981
2181
|
if (!value?.trim()) {
|
|
1982
2182
|
throw new Error(
|
|
1983
2183
|
"complete checkpoints require --quality-gate-json with architectReview, executorQa, and iteration evidence",
|
|
@@ -1986,7 +2186,7 @@ async function readRequiredCompletionQualityGate(cwd: string, value: string | un
|
|
|
1986
2186
|
const gate = await readStructuredValue(cwd, value);
|
|
1987
2187
|
const gateObject = qualityGateObject(gate);
|
|
1988
2188
|
if (!gateObject) throw new Error("qualityGate must be a JSON object");
|
|
1989
|
-
await validateCompletionQualityGate(cwd, gateObject);
|
|
2189
|
+
await validateCompletionQualityGate(cwd, gateObject, { changeSet: options.changeSet });
|
|
1990
2190
|
return gate;
|
|
1991
2191
|
}
|
|
1992
2192
|
|
|
@@ -2089,9 +2289,10 @@ export async function checkpointUltragoalGoal(input: {
|
|
|
2089
2289
|
// instead of silently dropping it.
|
|
2090
2290
|
return plan;
|
|
2091
2291
|
}
|
|
2292
|
+
const changeSet = input.status === "complete" ? await computeCheckpointChangeSet(input.cwd) : undefined;
|
|
2092
2293
|
const qualityGateJson =
|
|
2093
2294
|
input.status === "complete"
|
|
2094
|
-
? await readRequiredCompletionQualityGate(input.cwd, input.qualityGateJson)
|
|
2295
|
+
? await readRequiredCompletionQualityGate(input.cwd, input.qualityGateJson, { changeSet })
|
|
2095
2296
|
: input.qualityGateJson
|
|
2096
2297
|
? await readStructuredValue(input.cwd, input.qualityGateJson)
|
|
2097
2298
|
: undefined;
|
|
@@ -2690,20 +2891,140 @@ async function resolveGitBase(cwd: string, branch?: string): Promise<string> {
|
|
|
2690
2891
|
}
|
|
2691
2892
|
const mergeBase = await spawnText(["git", "merge-base", "HEAD", "origin/main"], { cwd, timeoutMs: 3000 });
|
|
2692
2893
|
if (mergeBase.ok && mergeBase.stdout.trim()) return mergeBase.stdout.trim();
|
|
2693
|
-
return "HEAD";
|
|
2894
|
+
return "HEAD~1";
|
|
2895
|
+
}
|
|
2896
|
+
|
|
2897
|
+
function parseGitNameStatus(output: string): UltragoalChangeSetPath[] {
|
|
2898
|
+
const rows: UltragoalChangeSetPath[] = [];
|
|
2899
|
+
for (const line of output.split("\n")) {
|
|
2900
|
+
const trimmed = line.trim();
|
|
2901
|
+
if (!trimmed) continue;
|
|
2902
|
+
const parts = trimmed.split(/\s+/);
|
|
2903
|
+
const statusCode = parts[0] ?? "";
|
|
2904
|
+
let status: UltragoalChangeStatus = "unknown";
|
|
2905
|
+
if (statusCode.startsWith("A")) status = "added";
|
|
2906
|
+
else if (statusCode.startsWith("M")) status = "modified";
|
|
2907
|
+
else if (statusCode.startsWith("D")) status = "deleted";
|
|
2908
|
+
else if (statusCode.startsWith("R")) status = "renamed";
|
|
2909
|
+
else if (statusCode.startsWith("C")) status = "copied";
|
|
2910
|
+
const pathValue = status === "renamed" || status === "copied" ? parts[2] : parts[1];
|
|
2911
|
+
if (!pathValue) continue;
|
|
2912
|
+
const oldPath = status === "renamed" || status === "copied" ? parts[1] : undefined;
|
|
2913
|
+
rows.push({
|
|
2914
|
+
path: normalizeRepoPath(pathValue),
|
|
2915
|
+
oldPath: oldPath ? normalizeRepoPath(oldPath) : undefined,
|
|
2916
|
+
status,
|
|
2917
|
+
category: categorizeComputerChangePath(pathValue),
|
|
2918
|
+
});
|
|
2919
|
+
}
|
|
2920
|
+
return rows;
|
|
2921
|
+
}
|
|
2922
|
+
|
|
2923
|
+
function mergeChangeSetPaths(groups: UltragoalChangeSetPath[][]): UltragoalChangeSetPath[] {
|
|
2924
|
+
const byKey = new Map<string, UltragoalChangeSetPath>();
|
|
2925
|
+
for (const row of groups.flat()) byKey.set(`${row.oldPath ?? ""}\u0000${row.path}`, row);
|
|
2926
|
+
return [...byKey.values()];
|
|
2927
|
+
}
|
|
2928
|
+
|
|
2929
|
+
async function computeCheckpointChangeSet(cwd: string): Promise<UltragoalChangeSet | undefined> {
|
|
2930
|
+
const inGit = await spawnText(["git", "rev-parse", "--is-inside-work-tree"], { cwd, timeoutMs: 3000 });
|
|
2931
|
+
if (!inGit.ok || inGit.stdout.trim() !== "true") return undefined;
|
|
2932
|
+
if (!(await Bun.file(path.join(cwd, ".git")).exists())) return undefined;
|
|
2933
|
+
const baseRef = await resolveGitBase(cwd);
|
|
2934
|
+
const base = baseRef;
|
|
2935
|
+
const mergeBase = await spawnText(["git", "merge-base", "HEAD", baseRef], { cwd, timeoutMs: 3000 });
|
|
2936
|
+
const [committed, unstaged, staged, stat] = await Promise.all([
|
|
2937
|
+
spawnText(["git", "diff", "--name-status", `${base}...HEAD`], { cwd, timeoutMs: 5000 }),
|
|
2938
|
+
spawnText(["git", "diff", "--name-status"], { cwd, timeoutMs: 5000 }),
|
|
2939
|
+
spawnText(["git", "diff", "--cached", "--name-status"], { cwd, timeoutMs: 5000 }),
|
|
2940
|
+
spawnText(["git", "diff", "--stat", `${base}...HEAD`], { cwd, timeoutMs: 5000 }),
|
|
2941
|
+
]);
|
|
2942
|
+
if (!committed.ok && !unstaged.ok && !staged.ok) return undefined;
|
|
2943
|
+
return {
|
|
2944
|
+
source: "checkpoint-git",
|
|
2945
|
+
baseRef,
|
|
2946
|
+
mergeBase: mergeBase.ok && mergeBase.stdout.trim() ? mergeBase.stdout.trim() : undefined,
|
|
2947
|
+
headRef: "HEAD",
|
|
2948
|
+
paths: mergeChangeSetPaths([
|
|
2949
|
+
parseGitNameStatus(committed.stdout),
|
|
2950
|
+
parseGitNameStatus(unstaged.stdout),
|
|
2951
|
+
parseGitNameStatus(staged.stdout),
|
|
2952
|
+
]),
|
|
2953
|
+
rawDiffStat: stat.stdout,
|
|
2954
|
+
trusted: true,
|
|
2955
|
+
};
|
|
2956
|
+
}
|
|
2957
|
+
|
|
2958
|
+
function parseUnifiedDiffPaths(diff: string): UltragoalChangeSetPath[] {
|
|
2959
|
+
const paths: UltragoalChangeSetPath[] = [];
|
|
2960
|
+
for (const line of diff.split("\n")) {
|
|
2961
|
+
if (!line.startsWith("diff --git ")) continue;
|
|
2962
|
+
const match = /^diff --git a\/(.+?) b\/(.+)$/.exec(line);
|
|
2963
|
+
if (!match) continue;
|
|
2964
|
+
const oldPath = normalizeRepoPath(match[1]!);
|
|
2965
|
+
const newPath = normalizeRepoPath(match[2]!);
|
|
2966
|
+
paths.push({
|
|
2967
|
+
path: newPath,
|
|
2968
|
+
oldPath: oldPath === newPath ? undefined : oldPath,
|
|
2969
|
+
status: oldPath === newPath ? "modified" : "renamed",
|
|
2970
|
+
category: categorizeComputerChangePath(newPath),
|
|
2971
|
+
});
|
|
2972
|
+
}
|
|
2973
|
+
return paths;
|
|
2974
|
+
}
|
|
2975
|
+
|
|
2976
|
+
function changeSetFromReviewSource(source: JsonObject): UltragoalChangeSet | undefined {
|
|
2977
|
+
const kind = nonEmptyString(source.kind);
|
|
2978
|
+
if (kind === "spec") return { source: "review-spec", paths: [], trusted: true };
|
|
2979
|
+
if (kind === "pr" && typeof source.diff === "string")
|
|
2980
|
+
return {
|
|
2981
|
+
source: "review-pr",
|
|
2982
|
+
paths: parseUnifiedDiffPaths(source.diff),
|
|
2983
|
+
rawDiffStat: source.diff,
|
|
2984
|
+
trusted: true,
|
|
2985
|
+
};
|
|
2986
|
+
const local = qualityGateObject(source.local);
|
|
2987
|
+
if (kind === "pr" && local) return changeSetFromReviewSource(local);
|
|
2988
|
+
if (kind === "worktree")
|
|
2989
|
+
return {
|
|
2990
|
+
source: "review-worktree",
|
|
2991
|
+
paths: parseGitNameStatus(String(source.nameStatus ?? source.status ?? "")),
|
|
2992
|
+
rawDiffStat: String(source.diffStat ?? ""),
|
|
2993
|
+
trusted: true,
|
|
2994
|
+
};
|
|
2995
|
+
if (kind === "branch" || kind === "pr-fallback")
|
|
2996
|
+
return {
|
|
2997
|
+
source: "review-branch",
|
|
2998
|
+
baseRef: nonEmptyString(source.base) ?? undefined,
|
|
2999
|
+
headRef: "HEAD",
|
|
3000
|
+
paths: parseGitNameStatus(String(source.nameStatus ?? "")),
|
|
3001
|
+
rawDiffStat: String(source.diffStat ?? ""),
|
|
3002
|
+
trusted: true,
|
|
3003
|
+
};
|
|
3004
|
+
return undefined;
|
|
2694
3005
|
}
|
|
2695
3006
|
|
|
2696
3007
|
async function localDiffSource(cwd: string, sourceKind: string, branch?: string): Promise<JsonObject> {
|
|
2697
3008
|
if (sourceKind === "worktree") {
|
|
2698
|
-
const [status, diff] = await Promise.all([
|
|
3009
|
+
const [status, diff, unstaged, staged] = await Promise.all([
|
|
2699
3010
|
spawnText(["git", "status", "--short"], { cwd, timeoutMs: 5000 }),
|
|
2700
3011
|
spawnText(["git", "diff", "--stat"], { cwd, timeoutMs: 5000 }),
|
|
3012
|
+
spawnText(["git", "diff", "--name-status"], { cwd, timeoutMs: 5000 }),
|
|
3013
|
+
spawnText(["git", "diff", "--cached", "--name-status"], { cwd, timeoutMs: 5000 }),
|
|
2701
3014
|
]);
|
|
2702
|
-
return {
|
|
3015
|
+
return {
|
|
3016
|
+
kind: "worktree",
|
|
3017
|
+
status: status.stdout,
|
|
3018
|
+
diffStat: diff.stdout,
|
|
3019
|
+
nameStatus: `${unstaged.stdout}\n${staged.stdout}`,
|
|
3020
|
+
};
|
|
2703
3021
|
}
|
|
2704
3022
|
const base = await resolveGitBase(cwd, branch);
|
|
2705
|
-
const diff = await
|
|
2706
|
-
|
|
3023
|
+
const [diff, nameStatus] = await Promise.all([
|
|
3024
|
+
spawnText(["git", "diff", "--stat", `${base}...HEAD`], { cwd, timeoutMs: 5000 }),
|
|
3025
|
+
spawnText(["git", "diff", "--name-status", `${base}...HEAD`], { cwd, timeoutMs: 5000 }),
|
|
3026
|
+
]);
|
|
3027
|
+
return { kind: sourceKind, base, branch, diffStat: diff.stdout, nameStatus: nameStatus.stdout };
|
|
2707
3028
|
}
|
|
2708
3029
|
|
|
2709
3030
|
async function resolveReviewSource(
|
|
@@ -2817,13 +3138,14 @@ export async function runUltragoalReview(cwd: string, args: readonly string[]):
|
|
|
2817
3138
|
const mode = parseReviewMode(flagValue(args, "--mode"));
|
|
2818
3139
|
const specPath = flagValue(args, "--spec");
|
|
2819
3140
|
const { contractStrength, source } = await resolveReviewSource(cwd, args, specPath);
|
|
3141
|
+
const changeSet = changeSetFromReviewSource(source);
|
|
2820
3142
|
const executorQa = await readOptionalExecutorQa(
|
|
2821
3143
|
cwd,
|
|
2822
3144
|
flagValue(args, "--executor-qa-json") ?? flagValue(args, "--executor-qa"),
|
|
2823
3145
|
);
|
|
2824
3146
|
const findings: UltragoalReviewFinding[] = [];
|
|
2825
3147
|
try {
|
|
2826
|
-
await validateExecutorQaRedTeamEvidenceForReview(cwd, executorQa, { mode: "review" });
|
|
3148
|
+
await validateExecutorQaRedTeamEvidenceForReview(cwd, executorQa, { mode: "review", changeSet });
|
|
2827
3149
|
} catch (error) {
|
|
2828
3150
|
findings.push(findingFromError(error));
|
|
2829
3151
|
}
|
package/src/goals/runtime.ts
CHANGED
|
@@ -350,12 +350,13 @@ export class GoalRuntime {
|
|
|
350
350
|
await this.#flushUsageLocked();
|
|
351
351
|
const state = this.#getStateClone();
|
|
352
352
|
if (!state?.goal) return undefined;
|
|
353
|
+
if (state.goal.status !== "active") {
|
|
354
|
+
throw new Error(`cannot pause a goal that is not active (current status: ${state.goal.status})`);
|
|
355
|
+
}
|
|
353
356
|
state.enabled = false;
|
|
354
357
|
state.mode = "active";
|
|
355
358
|
state.reason = undefined;
|
|
356
|
-
|
|
357
|
-
state.goal.status = "paused";
|
|
358
|
-
}
|
|
359
|
+
state.goal.status = "paused";
|
|
359
360
|
state.goal.updatedAt = this.#now();
|
|
360
361
|
this.#clearActiveAccounting();
|
|
361
362
|
await this.#commitState(state, { persist: "goal_paused" });
|
package/src/goals/state.ts
CHANGED
|
@@ -17,9 +17,9 @@ import type { Goal, GoalStatus, GoalToolDetails } from "../state";
|
|
|
17
17
|
|
|
18
18
|
const goalSchema = z.object({
|
|
19
19
|
op: z
|
|
20
|
-
.enum(["create", "get", "complete", "resume", "drop"])
|
|
20
|
+
.enum(["create", "get", "complete", "resume", "drop", "pause"])
|
|
21
21
|
.describe(
|
|
22
|
-
"op: get | create | complete | drop | resume — drop clears the active goal without exiting goal mode (tool stays callable for the next create)",
|
|
22
|
+
"op: get | create | complete | drop | resume | pause — drop clears the active goal without exiting goal mode (tool stays callable for the next create); pause parks an active goal whose remaining work is blocked on human input so the autonomous continuation loop stops until resume",
|
|
23
23
|
),
|
|
24
24
|
objective: z.string().describe("goal objective").optional(),
|
|
25
25
|
});
|
|
@@ -66,8 +66,15 @@ function buildGoalToolResult(op: GoalToolDetails["op"], response: GoalToolRespon
|
|
|
66
66
|
};
|
|
67
67
|
}
|
|
68
68
|
|
|
69
|
+
function assertGoalOperationAllowed(session: ToolSession, op: GoalToolInput["op"]): void {
|
|
70
|
+
const allowedOps = session.goalToolAllowedOps;
|
|
71
|
+
if (!allowedOps || allowedOps.includes(op)) return;
|
|
72
|
+
throw new ToolError(`Goal mode in this session only allows goal operations: ${allowedOps.join(", ")}.`);
|
|
73
|
+
}
|
|
74
|
+
|
|
69
75
|
async function executeGoalOperation(session: ToolSession, params: GoalToolInput): Promise<GoalToolResponse> {
|
|
70
76
|
rejectUnsupportedGoalArgs(params as Record<string, unknown>);
|
|
77
|
+
assertGoalOperationAllowed(session, params.op);
|
|
71
78
|
if (params.op === "get") {
|
|
72
79
|
const state = session.getGoalModeState?.();
|
|
73
80
|
return buildGoalToolResponse(state?.goal ?? null);
|
|
@@ -86,6 +93,10 @@ async function executeGoalOperation(session: ToolSession, params: GoalToolInput)
|
|
|
86
93
|
const resumed = await runtime.resumeGoal();
|
|
87
94
|
return buildGoalToolResponse(resumed.goal);
|
|
88
95
|
}
|
|
96
|
+
if (params.op === "pause") {
|
|
97
|
+
const paused = await runtime.pauseGoal();
|
|
98
|
+
return buildGoalToolResponse(paused?.goal ?? null);
|
|
99
|
+
}
|
|
89
100
|
if (params.op === "drop") {
|
|
90
101
|
const dropped = await runtime.dropGoal();
|
|
91
102
|
return buildGoalToolResponse(dropped ?? null);
|
|
@@ -121,7 +132,7 @@ export class GoalTool implements AgentTool<typeof goalSchema, GoalToolDetails> {
|
|
|
121
132
|
_context?: AgentToolContext,
|
|
122
133
|
): Promise<AgentToolResult<GoalToolDetails>> {
|
|
123
134
|
const response = await executeGoalOperation(this.#session, params);
|
|
124
|
-
return buildGoalToolResult(params.op, response);
|
|
135
|
+
return buildGoalToolResult(params.op as GoalToolDetails["op"], response);
|
|
125
136
|
}
|
|
126
137
|
}
|
|
127
138
|
|
|
@@ -135,6 +146,8 @@ function describeOp(op: string | undefined): string {
|
|
|
135
146
|
return "check";
|
|
136
147
|
case "resume":
|
|
137
148
|
return "resume";
|
|
149
|
+
case "pause":
|
|
150
|
+
return "pause";
|
|
138
151
|
case "drop":
|
|
139
152
|
return "drop";
|
|
140
153
|
default:
|