gsd-pi 2.63.0-dev.351157b → 2.63.0-dev.d04bbc5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +4 -0
- package/dist/headless-query.js +11 -1
- package/dist/resources/extensions/gsd/auto/detect-stuck.js +27 -0
- package/dist/resources/extensions/gsd/auto/phases.js +34 -0
- package/dist/resources/extensions/gsd/auto/session.js +4 -0
- package/dist/resources/extensions/gsd/auto-model-selection.js +32 -0
- package/dist/resources/extensions/gsd/auto-post-unit.js +79 -0
- package/dist/resources/extensions/gsd/auto-timers.js +2 -1
- package/dist/resources/extensions/gsd/bootstrap/db-tools.js +87 -28
- package/dist/resources/extensions/gsd/bootstrap/register-hooks.js +23 -0
- package/dist/resources/extensions/gsd/bootstrap/system-context.js +30 -2
- package/dist/resources/extensions/gsd/preferences-types.js +1 -0
- package/dist/resources/extensions/gsd/prompt-loader.js +7 -0
- package/dist/resources/extensions/gsd/prompts/system.md +3 -7
- package/dist/resources/extensions/gsd/safety/content-validator.js +73 -0
- package/dist/resources/extensions/gsd/safety/destructive-guard.js +34 -0
- package/dist/resources/extensions/gsd/safety/evidence-collector.js +109 -0
- package/dist/resources/extensions/gsd/safety/evidence-cross-ref.js +83 -0
- package/dist/resources/extensions/gsd/safety/file-change-validator.js +71 -0
- package/dist/resources/extensions/gsd/safety/git-checkpoint.js +91 -0
- package/dist/resources/extensions/gsd/safety/safety-harness.js +64 -0
- package/dist/resources/extensions/ollama/index.js +22 -10
- package/dist/resources/extensions/ollama/ollama-chat-provider.js +1 -1
- package/dist/update-cmd.js +4 -2
- package/dist/web/standalone/.next/BUILD_ID +1 -1
- package/dist/web/standalone/.next/app-path-routes-manifest.json +18 -18
- package/dist/web/standalone/.next/build-manifest.json +2 -2
- package/dist/web/standalone/.next/prerender-manifest.json +3 -3
- package/dist/web/standalone/.next/server/app/_global-error.html +2 -2
- package/dist/web/standalone/.next/server/app/_global-error.rsc +1 -1
- package/dist/web/standalone/.next/server/app/_global-error.segments/_full.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/_global-error.segments/_global-error/__PAGE__.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/_global-error.segments/_global-error.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/_global-error.segments/_head.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/_global-error.segments/_index.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/_global-error.segments/_tree.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/_not-found.html +1 -1
- package/dist/web/standalone/.next/server/app/_not-found.rsc +1 -1
- package/dist/web/standalone/.next/server/app/_not-found.segments/_full.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/_not-found.segments/_head.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/_not-found.segments/_index.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/_not-found.segments/_not-found/__PAGE__.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/_not-found.segments/_not-found.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/_not-found.segments/_tree.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/index.html +1 -1
- package/dist/web/standalone/.next/server/app/index.rsc +1 -1
- package/dist/web/standalone/.next/server/app/index.segments/__PAGE__.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/index.segments/_full.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/index.segments/_head.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/index.segments/_index.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/index.segments/_tree.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app-paths-manifest.json +18 -18
- package/dist/web/standalone/.next/server/pages/404.html +1 -1
- package/dist/web/standalone/.next/server/pages/500.html +2 -2
- package/dist/web/standalone/.next/server/server-reference-manifest.json +1 -1
- package/dist/welcome-screen.js +1 -1
- package/package.json +1 -1
- package/packages/pi-coding-agent/dist/core/extensions/provider-registration.test.d.ts +2 -0
- package/packages/pi-coding-agent/dist/core/extensions/provider-registration.test.d.ts.map +1 -0
- package/packages/pi-coding-agent/dist/core/extensions/provider-registration.test.js +46 -0
- package/packages/pi-coding-agent/dist/core/extensions/provider-registration.test.js.map +1 -0
- package/packages/pi-coding-agent/dist/core/model-registry.d.ts.map +1 -1
- package/packages/pi-coding-agent/dist/core/model-registry.js +11 -0
- package/packages/pi-coding-agent/dist/core/model-registry.js.map +1 -1
- package/packages/pi-coding-agent/dist/core/sdk.d.ts.map +1 -1
- package/packages/pi-coding-agent/dist/core/sdk.js +2 -3
- package/packages/pi-coding-agent/dist/core/sdk.js.map +1 -1
- package/packages/pi-coding-agent/src/core/extensions/provider-registration.test.ts +81 -0
- package/packages/pi-coding-agent/src/core/model-registry.ts +12 -0
- package/packages/pi-coding-agent/src/core/sdk.ts +2 -3
- package/src/resources/extensions/gsd/auto/detect-stuck.ts +27 -0
- package/src/resources/extensions/gsd/auto/phases.ts +39 -0
- package/src/resources/extensions/gsd/auto/session.ts +5 -0
- package/src/resources/extensions/gsd/auto-model-selection.ts +36 -0
- package/src/resources/extensions/gsd/auto-post-unit.ts +88 -0
- package/src/resources/extensions/gsd/auto-timers.ts +2 -1
- package/src/resources/extensions/gsd/bootstrap/db-tools.ts +86 -28
- package/src/resources/extensions/gsd/bootstrap/register-hooks.ts +27 -0
- package/src/resources/extensions/gsd/bootstrap/system-context.ts +31 -2
- package/src/resources/extensions/gsd/preferences-types.ts +13 -0
- package/src/resources/extensions/gsd/prompt-loader.ts +8 -0
- package/src/resources/extensions/gsd/prompts/system.md +3 -7
- package/src/resources/extensions/gsd/safety/content-validator.ts +98 -0
- package/src/resources/extensions/gsd/safety/destructive-guard.ts +49 -0
- package/src/resources/extensions/gsd/safety/evidence-collector.ts +151 -0
- package/src/resources/extensions/gsd/safety/evidence-cross-ref.ts +120 -0
- package/src/resources/extensions/gsd/safety/file-change-validator.ts +108 -0
- package/src/resources/extensions/gsd/safety/git-checkpoint.ts +106 -0
- package/src/resources/extensions/gsd/safety/safety-harness.ts +105 -0
- package/src/resources/extensions/gsd/tests/complete-slice-string-coercion.test.ts +211 -0
- package/src/resources/extensions/gsd/tests/flat-rate-routing-guard.test.ts +50 -0
- package/src/resources/extensions/gsd/tests/git-checkpoint.test.ts +94 -0
- package/src/resources/extensions/gsd/tests/stuck-detection-coverage.test.ts +42 -0
- package/src/resources/extensions/gsd/workflow-logger.ts +2 -1
- package/src/resources/extensions/ollama/index.ts +20 -11
- package/src/resources/extensions/ollama/ollama-auth-mode.test.ts +20 -0
- package/src/resources/extensions/ollama/ollama-chat-provider.ts +1 -1
- package/src/resources/extensions/ollama/tests/ollama-chat-provider-stream.test.ts +82 -0
- /package/dist/web/standalone/.next/static/{QmuF-eAbuU_2MQ03t38qr → vIq9fmvRUaFOpguoX5j4W}/_buildManifest.js +0 -0
- /package/dist/web/standalone/.next/static/{QmuF-eAbuU_2MQ03t38qr → vIq9fmvRUaFOpguoX5j4W}/_ssgManifest.js +0 -0
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Lightweight content validator for auto-mode safety harness.
|
|
3
|
+
* Validates that high-value unit outputs contain minimum expected content.
|
|
4
|
+
*
|
|
5
|
+
* Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { existsSync, readFileSync } from "node:fs";
|
|
9
|
+
import { logWarning } from "../workflow-logger.js";
|
|
10
|
+
|
|
11
|
+
// ─── Types ──────────────────────────────────────────────────────────────────
|
|
12
|
+
|
|
13
|
+
export interface ContentViolation {
|
|
14
|
+
severity: "warning";
|
|
15
|
+
reason: string;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
// ─── Public API ─────────────────────────────────────────────────────────────
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* Validate content quality for a completed unit.
|
|
22
|
+
* Returns an array of violations. Empty array = content looks acceptable.
|
|
23
|
+
*
|
|
24
|
+
* @param unitType - The type of unit that completed (e.g. "plan-slice")
|
|
25
|
+
* @param artifactPath - Absolute path to the primary artifact file
|
|
26
|
+
*/
|
|
27
|
+
export function validateContent(
|
|
28
|
+
unitType: string,
|
|
29
|
+
artifactPath: string | null,
|
|
30
|
+
): ContentViolation[] {
|
|
31
|
+
if (!artifactPath || !existsSync(artifactPath)) return [];
|
|
32
|
+
|
|
33
|
+
const validator = VALIDATORS[unitType];
|
|
34
|
+
if (!validator) return [];
|
|
35
|
+
|
|
36
|
+
try {
|
|
37
|
+
const content = readFileSync(artifactPath, "utf-8");
|
|
38
|
+
return validator(content);
|
|
39
|
+
} catch (e) {
|
|
40
|
+
logWarning("safety", `content validation read failed: ${(e as Error).message}`);
|
|
41
|
+
return [];
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
// ─── Validators ─────────────────────────────────────────────────────────────
|
|
46
|
+
|
|
47
|
+
type ContentValidatorFn = (content: string) => ContentViolation[];
|
|
48
|
+
|
|
49
|
+
const VALIDATORS: Record<string, ContentValidatorFn> = {
|
|
50
|
+
"plan-slice": validatePlanSlice,
|
|
51
|
+
"plan-milestone": validatePlanMilestone,
|
|
52
|
+
};
|
|
53
|
+
|
|
54
|
+
function validatePlanSlice(content: string): ContentViolation[] {
|
|
55
|
+
const violations: ContentViolation[] = [];
|
|
56
|
+
|
|
57
|
+
// Must have at least 2 task entries (checkbox pattern)
|
|
58
|
+
const taskCount = (content.match(/- \[[ x]\] \*\*T\d+/g) || []).length;
|
|
59
|
+
if (taskCount < 2) {
|
|
60
|
+
violations.push({
|
|
61
|
+
severity: "warning",
|
|
62
|
+
reason: `Slice plan has only ${taskCount} task(s) — expected at least 2`,
|
|
63
|
+
});
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
// Should have a Files Likely Touched section
|
|
67
|
+
if (!content.includes("## Files Likely Touched") && !content.includes("## Files")) {
|
|
68
|
+
violations.push({
|
|
69
|
+
severity: "warning",
|
|
70
|
+
reason: "Slice plan missing 'Files Likely Touched' section",
|
|
71
|
+
});
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
// Should have a verification section
|
|
75
|
+
if (!content.includes("Verify") && !content.includes("verify")) {
|
|
76
|
+
violations.push({
|
|
77
|
+
severity: "warning",
|
|
78
|
+
reason: "Slice plan has no verification instructions",
|
|
79
|
+
});
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
return violations;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
function validatePlanMilestone(content: string): ContentViolation[] {
|
|
86
|
+
const violations: ContentViolation[] = [];
|
|
87
|
+
|
|
88
|
+
// Must have at least 1 slice entry
|
|
89
|
+
const sliceCount = (content.match(/##\s+S\d+/g) || []).length;
|
|
90
|
+
if (sliceCount < 1) {
|
|
91
|
+
violations.push({
|
|
92
|
+
severity: "warning",
|
|
93
|
+
reason: `Milestone roadmap has ${sliceCount} slice(s) — expected at least 1`,
|
|
94
|
+
});
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
return violations;
|
|
98
|
+
}
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Destructive command classifier for auto-mode safety harness.
|
|
3
|
+
* Classifies bash commands and warns on potentially destructive operations.
|
|
4
|
+
* Does NOT block — only classifies for logging/notification.
|
|
5
|
+
*
|
|
6
|
+
* Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
// ─── Pattern Definitions ────────────────────────────────────────────────────
|
|
10
|
+
|
|
11
|
+
interface DestructivePattern {
|
|
12
|
+
pattern: RegExp;
|
|
13
|
+
label: string;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
const DESTRUCTIVE_PATTERNS: readonly DestructivePattern[] = [
|
|
17
|
+
{ pattern: /\brm\s+(-[^\s]*[rfRF][^\s]*\s+|.*\s+-[^\s]*[rfRF])/, label: "recursive delete" },
|
|
18
|
+
{ pattern: /\bgit\s+push\s+.*--force/, label: "force push" },
|
|
19
|
+
{ pattern: /\bgit\s+push\s+-f\b/, label: "force push" },
|
|
20
|
+
{ pattern: /\bgit\s+reset\s+--hard/, label: "hard reset" },
|
|
21
|
+
{ pattern: /\bgit\s+clean\s+-[^\s]*[fdxFDX]/, label: "git clean" },
|
|
22
|
+
{ pattern: /\bgit\s+checkout\s+--\s+\./, label: "discard all changes" },
|
|
23
|
+
{ pattern: /\bdrop\s+(database|table|index)\b/i, label: "SQL drop" },
|
|
24
|
+
{ pattern: /\btruncate\s+table\b/i, label: "SQL truncate" },
|
|
25
|
+
{ pattern: /\bchmod\s+777\b/, label: "world-writable permissions" },
|
|
26
|
+
{ pattern: /\bcurl\s.*\|\s*(bash|sh|zsh)\b/, label: "pipe to shell" },
|
|
27
|
+
];
|
|
28
|
+
|
|
29
|
+
// ─── Public API ─────────────────────────────────────────────────────────────
|
|
30
|
+
|
|
31
|
+
export interface CommandClassification {
|
|
32
|
+
destructive: boolean;
|
|
33
|
+
labels: string[];
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
/**
|
|
37
|
+
* Classify a bash command for destructive operations.
|
|
38
|
+
* Returns the list of matched destructive pattern labels.
|
|
39
|
+
*/
|
|
40
|
+
export function classifyCommand(command: string): CommandClassification {
|
|
41
|
+
const labels: string[] = [];
|
|
42
|
+
for (const { pattern, label } of DESTRUCTIVE_PATTERNS) {
|
|
43
|
+
if (pattern.test(command)) {
|
|
44
|
+
// Deduplicate labels (e.g., two force-push patterns)
|
|
45
|
+
if (!labels.includes(label)) labels.push(label);
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
return { destructive: labels.length > 0, labels };
|
|
49
|
+
}
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Real-time tool call evidence collector for auto-mode safety harness.
|
|
3
|
+
* Tracks every bash command, file write, and file edit during a unit execution.
|
|
4
|
+
* Evidence is compared against LLM completion claims in evidence-cross-ref.ts.
|
|
5
|
+
*
|
|
6
|
+
* Follows the same module-level Map pattern as auto-tool-tracking.ts.
|
|
7
|
+
* Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
// ─── Types ──────────────────────────────────────────────────────────────────
|
|
11
|
+
|
|
12
|
+
export interface BashEvidence {
|
|
13
|
+
kind: "bash";
|
|
14
|
+
toolCallId: string;
|
|
15
|
+
command: string;
|
|
16
|
+
exitCode: number;
|
|
17
|
+
outputSnippet: string;
|
|
18
|
+
timestamp: number;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export interface FileWriteEvidence {
|
|
22
|
+
kind: "write";
|
|
23
|
+
toolCallId: string;
|
|
24
|
+
path: string;
|
|
25
|
+
timestamp: number;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
export interface FileEditEvidence {
|
|
29
|
+
kind: "edit";
|
|
30
|
+
toolCallId: string;
|
|
31
|
+
path: string;
|
|
32
|
+
timestamp: number;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
export type EvidenceEntry = BashEvidence | FileWriteEvidence | FileEditEvidence;
|
|
36
|
+
|
|
37
|
+
// ─── Module State ───────────────────────────────────────────────────────────
|
|
38
|
+
|
|
39
|
+
let unitEvidence: EvidenceEntry[] = [];
|
|
40
|
+
|
|
41
|
+
// ─── Public API ─────────────────────────────────────────────────────────────
|
|
42
|
+
|
|
43
|
+
/** Reset all evidence for a new unit. Call at unit start. */
|
|
44
|
+
export function resetEvidence(): void {
|
|
45
|
+
unitEvidence = [];
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/** Get a read-only view of all evidence collected for the current unit. */
|
|
49
|
+
export function getEvidence(): readonly EvidenceEntry[] {
|
|
50
|
+
return unitEvidence;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/** Get only bash evidence entries. */
|
|
54
|
+
export function getBashEvidence(): readonly BashEvidence[] {
|
|
55
|
+
return unitEvidence.filter((e): e is BashEvidence => e.kind === "bash");
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/** Get all file paths touched (write + edit). */
|
|
59
|
+
export function getFilePaths(): string[] {
|
|
60
|
+
return unitEvidence
|
|
61
|
+
.filter((e): e is FileWriteEvidence | FileEditEvidence => e.kind === "write" || e.kind === "edit")
|
|
62
|
+
.map(e => e.path);
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
// ─── Recording (called from register-hooks.ts) ─────────────────────────────
|
|
66
|
+
|
|
67
|
+
/**
|
|
68
|
+
* Record a tool call at dispatch time (before execution).
|
|
69
|
+
* Exit codes and output are filled in by recordToolResult after execution.
|
|
70
|
+
*/
|
|
71
|
+
export function recordToolCall(toolName: string, input: Record<string, unknown>): void {
|
|
72
|
+
if (toolName === "bash" || toolName === "Bash") {
|
|
73
|
+
unitEvidence.push({
|
|
74
|
+
kind: "bash",
|
|
75
|
+
toolCallId: "",
|
|
76
|
+
command: String(input.command ?? ""),
|
|
77
|
+
exitCode: -1,
|
|
78
|
+
outputSnippet: "",
|
|
79
|
+
timestamp: Date.now(),
|
|
80
|
+
});
|
|
81
|
+
} else if (toolName === "write" || toolName === "Write") {
|
|
82
|
+
unitEvidence.push({
|
|
83
|
+
kind: "write",
|
|
84
|
+
toolCallId: "",
|
|
85
|
+
path: String(input.file_path ?? input.path ?? ""),
|
|
86
|
+
timestamp: Date.now(),
|
|
87
|
+
});
|
|
88
|
+
} else if (toolName === "edit" || toolName === "Edit") {
|
|
89
|
+
unitEvidence.push({
|
|
90
|
+
kind: "edit",
|
|
91
|
+
toolCallId: "",
|
|
92
|
+
path: String(input.file_path ?? input.path ?? ""),
|
|
93
|
+
timestamp: Date.now(),
|
|
94
|
+
});
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
/**
|
|
99
|
+
* Record a tool execution result. Matches the most recent unresolved entry
|
|
100
|
+
* of the same kind and fills in the toolCallId, exit code, and output.
|
|
101
|
+
*/
|
|
102
|
+
export function recordToolResult(
|
|
103
|
+
toolCallId: string,
|
|
104
|
+
toolName: string,
|
|
105
|
+
result: unknown,
|
|
106
|
+
isError: boolean,
|
|
107
|
+
): void {
|
|
108
|
+
const normalizedName = toolName.toLowerCase();
|
|
109
|
+
|
|
110
|
+
if (normalizedName === "bash") {
|
|
111
|
+
const entry = findLastUnresolved("bash") as BashEvidence | undefined;
|
|
112
|
+
if (entry) {
|
|
113
|
+
entry.toolCallId = toolCallId;
|
|
114
|
+
const text = extractResultText(result);
|
|
115
|
+
entry.outputSnippet = text.slice(0, 500);
|
|
116
|
+
const exitMatch = text.match(/Command exited with code (\d+)/);
|
|
117
|
+
entry.exitCode = exitMatch ? Number(exitMatch[1]) : (isError ? 1 : 0);
|
|
118
|
+
}
|
|
119
|
+
} else if (normalizedName === "write" || normalizedName === "edit") {
|
|
120
|
+
const entry = findLastUnresolved(normalizedName as "write" | "edit");
|
|
121
|
+
if (entry) {
|
|
122
|
+
entry.toolCallId = toolCallId;
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
// ─── Internals ──────────────────────────────────────────────────────────────
|
|
128
|
+
|
|
129
|
+
function findLastUnresolved(kind: string): EvidenceEntry | undefined {
|
|
130
|
+
for (let i = unitEvidence.length - 1; i >= 0; i--) {
|
|
131
|
+
if (unitEvidence[i].kind === kind && unitEvidence[i].toolCallId === "") {
|
|
132
|
+
return unitEvidence[i];
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
return undefined;
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
function extractResultText(result: unknown): string {
|
|
139
|
+
if (typeof result === "string") return result;
|
|
140
|
+
if (result && typeof result === "object") {
|
|
141
|
+
const r = result as Record<string, unknown>;
|
|
142
|
+
if (Array.isArray(r.content)) {
|
|
143
|
+
const textBlock = r.content.find(
|
|
144
|
+
(c: unknown) => typeof c === "object" && c !== null && (c as Record<string, unknown>).type === "text",
|
|
145
|
+
) as Record<string, unknown> | undefined;
|
|
146
|
+
if (textBlock && typeof textBlock.text === "string") return textBlock.text;
|
|
147
|
+
}
|
|
148
|
+
if (typeof r.text === "string") return r.text;
|
|
149
|
+
}
|
|
150
|
+
return String(result ?? "");
|
|
151
|
+
}
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Evidence cross-reference for auto-mode safety harness.
|
|
3
|
+
* Compares the LLM's claimed verification evidence (command + exitCode)
|
|
4
|
+
* against actual bash tool calls recorded by the evidence collector.
|
|
5
|
+
*
|
|
6
|
+
* Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import type { BashEvidence, EvidenceEntry } from "./evidence-collector.js";
|
|
10
|
+
|
|
11
|
+
// ─── Types ──────────────────────────────────────────────────────────────────
|
|
12
|
+
|
|
13
|
+
export interface ClaimedEvidence {
|
|
14
|
+
command: string;
|
|
15
|
+
exitCode: number;
|
|
16
|
+
verdict: string;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
export interface EvidenceMismatch {
|
|
20
|
+
severity: "warning" | "error";
|
|
21
|
+
claimed: ClaimedEvidence;
|
|
22
|
+
actual: BashEvidence | null;
|
|
23
|
+
reason: string;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
// ─── Public API ─────────────────────────────────────────────────────────────
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Cross-reference claimed verification evidence against actual bash tool calls.
|
|
30
|
+
*
|
|
31
|
+
* Returns an array of mismatches. Empty array = all claims verified.
|
|
32
|
+
* Skips entries that were coerced from strings (already flagged by db-tools.ts).
|
|
33
|
+
*/
|
|
34
|
+
export function crossReferenceEvidence(
|
|
35
|
+
claimedEvidence: readonly ClaimedEvidence[],
|
|
36
|
+
actualEvidence: readonly EvidenceEntry[],
|
|
37
|
+
): EvidenceMismatch[] {
|
|
38
|
+
const bashCalls = actualEvidence.filter(
|
|
39
|
+
(e): e is BashEvidence => e.kind === "bash",
|
|
40
|
+
);
|
|
41
|
+
const mismatches: EvidenceMismatch[] = [];
|
|
42
|
+
|
|
43
|
+
for (const claimed of claimedEvidence) {
|
|
44
|
+
// Skip coerced entries — they're already flagged with exitCode: -1
|
|
45
|
+
// and verdict: "unknown (coerced from string)" by db-tools.ts
|
|
46
|
+
if (claimed.verdict?.includes("coerced from string")) continue;
|
|
47
|
+
if (claimed.exitCode === -1) continue;
|
|
48
|
+
|
|
49
|
+
// Skip entries with empty or generic commands
|
|
50
|
+
if (!claimed.command || claimed.command.length < 3) continue;
|
|
51
|
+
|
|
52
|
+
// Find matching bash call by command substring match
|
|
53
|
+
const match = findBestMatch(claimed.command, bashCalls);
|
|
54
|
+
|
|
55
|
+
if (!match) {
|
|
56
|
+
mismatches.push({
|
|
57
|
+
severity: "warning",
|
|
58
|
+
claimed,
|
|
59
|
+
actual: null,
|
|
60
|
+
reason: `No bash tool call found matching "${claimed.command.slice(0, 80)}"`,
|
|
61
|
+
});
|
|
62
|
+
continue;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
// Exit code mismatch: LLM claims success but actual command failed
|
|
66
|
+
if (claimed.exitCode === 0 && match.exitCode !== 0) {
|
|
67
|
+
mismatches.push({
|
|
68
|
+
severity: "error",
|
|
69
|
+
claimed,
|
|
70
|
+
actual: match,
|
|
71
|
+
reason: `Claimed exitCode=0 but actual exitCode=${match.exitCode}`,
|
|
72
|
+
});
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
return mismatches;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
// ─── Internals ──────────────────────────────────────────────────────────────
|
|
80
|
+
|
|
81
|
+
/**
|
|
82
|
+
* Find the best matching bash evidence entry for a claimed command.
|
|
83
|
+
* Uses substring matching — the claimed command may be a shortened version
|
|
84
|
+
* of the actual command, or vice versa.
|
|
85
|
+
*/
|
|
86
|
+
function findBestMatch(
|
|
87
|
+
claimedCommand: string,
|
|
88
|
+
bashCalls: readonly BashEvidence[],
|
|
89
|
+
): BashEvidence | null {
|
|
90
|
+
const normalized = claimedCommand.trim();
|
|
91
|
+
|
|
92
|
+
// Exact match first
|
|
93
|
+
const exact = bashCalls.find(b => b.command.trim() === normalized);
|
|
94
|
+
if (exact) return exact;
|
|
95
|
+
|
|
96
|
+
// Substring match: claimed is contained in actual or actual in claimed
|
|
97
|
+
const substring = bashCalls.find(
|
|
98
|
+
b => b.command.includes(normalized) || normalized.includes(b.command),
|
|
99
|
+
);
|
|
100
|
+
if (substring) return substring;
|
|
101
|
+
|
|
102
|
+
// Token match: split on whitespace and check significant overlap
|
|
103
|
+
const claimedTokens = normalized.split(/\s+/).filter(t => t.length > 2);
|
|
104
|
+
if (claimedTokens.length === 0) return null;
|
|
105
|
+
|
|
106
|
+
let bestMatch: BashEvidence | null = null;
|
|
107
|
+
let bestScore = 0;
|
|
108
|
+
|
|
109
|
+
for (const call of bashCalls) {
|
|
110
|
+
const callTokens = new Set(call.command.split(/\s+/));
|
|
111
|
+
const matchCount = claimedTokens.filter(t => callTokens.has(t)).length;
|
|
112
|
+
const score = matchCount / claimedTokens.length;
|
|
113
|
+
if (score > bestScore && score >= 0.5) {
|
|
114
|
+
bestScore = score;
|
|
115
|
+
bestMatch = call;
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
return bestMatch;
|
|
120
|
+
}
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Post-unit file change validator for auto-mode safety harness.
|
|
3
|
+
* Compares actual git diff against the task plan's expected output files.
|
|
4
|
+
*
|
|
5
|
+
* Uses tasks.expected_output (DB column, populated from per-task ## Expected Output)
|
|
6
|
+
* and tasks.files (from slice PLAN.md - Files: subline) as the expected set.
|
|
7
|
+
* Compares against git diff HEAD~1 --name-only after auto-commit.
|
|
8
|
+
*
|
|
9
|
+
* Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
import { execFileSync } from "node:child_process";
|
|
13
|
+
import { logWarning } from "../workflow-logger.js";
|
|
14
|
+
|
|
15
|
+
// ─── Types ──────────────────────────────────────────────────────────────────
|
|
16
|
+
|
|
17
|
+
export interface FileViolation {
|
|
18
|
+
severity: "info" | "warning";
|
|
19
|
+
file: string;
|
|
20
|
+
reason: string;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
export interface FileChangeAudit {
|
|
24
|
+
expectedFiles: string[];
|
|
25
|
+
actualFiles: string[];
|
|
26
|
+
unexpectedFiles: string[];
|
|
27
|
+
missingFiles: string[];
|
|
28
|
+
violations: FileViolation[];
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
// ─── Public API ─────────────────────────────────────────────────────────────
|
|
32
|
+
|
|
33
|
+
/**
|
|
34
|
+
* Validate file changes after auto-commit for an execute-task unit.
|
|
35
|
+
* Returns null if task data is unavailable or DB is not loaded.
|
|
36
|
+
*
|
|
37
|
+
* @param basePath - Working directory (worktree or project root)
|
|
38
|
+
* @param expectedOutput - JSON array from tasks.expected_output DB column
|
|
39
|
+
* @param plannedFiles - JSON array from tasks.files DB column
|
|
40
|
+
*/
|
|
41
|
+
export function validateFileChanges(
|
|
42
|
+
basePath: string,
|
|
43
|
+
expectedOutput: string[],
|
|
44
|
+
plannedFiles: string[],
|
|
45
|
+
): FileChangeAudit | null {
|
|
46
|
+
const allExpected = new Set([...expectedOutput, ...plannedFiles]);
|
|
47
|
+
|
|
48
|
+
// If no expected files were planned, skip validation
|
|
49
|
+
if (allExpected.size === 0) return null;
|
|
50
|
+
|
|
51
|
+
// Get actual changed files from last commit
|
|
52
|
+
const actualFiles = getChangedFilesFromLastCommit(basePath);
|
|
53
|
+
if (!actualFiles) return null;
|
|
54
|
+
|
|
55
|
+
// Filter out .gsd/ internal files — only validate project source files
|
|
56
|
+
const projectFiles = actualFiles.filter(f => !f.startsWith(".gsd/") && !f.startsWith(".gsd\\"));
|
|
57
|
+
|
|
58
|
+
// Normalize expected paths (strip leading ./ or /)
|
|
59
|
+
const normalizedExpected = new Set(
|
|
60
|
+
[...allExpected].map(f => f.replace(/^\.\//, "").replace(/^\//, "")),
|
|
61
|
+
);
|
|
62
|
+
|
|
63
|
+
// Compute symmetric difference
|
|
64
|
+
const unexpectedFiles = projectFiles.filter(f => !normalizedExpected.has(f));
|
|
65
|
+
const missingFiles = [...normalizedExpected].filter(f => !projectFiles.includes(f));
|
|
66
|
+
|
|
67
|
+
const violations: FileViolation[] = [];
|
|
68
|
+
|
|
69
|
+
for (const f of unexpectedFiles) {
|
|
70
|
+
violations.push({
|
|
71
|
+
severity: "warning",
|
|
72
|
+
file: f,
|
|
73
|
+
reason: "Modified but not in task plan's expected output",
|
|
74
|
+
});
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
for (const f of missingFiles) {
|
|
78
|
+
violations.push({
|
|
79
|
+
severity: "info",
|
|
80
|
+
file: f,
|
|
81
|
+
reason: "Listed in task plan but not modified",
|
|
82
|
+
});
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
return {
|
|
86
|
+
expectedFiles: [...normalizedExpected],
|
|
87
|
+
actualFiles: projectFiles,
|
|
88
|
+
unexpectedFiles,
|
|
89
|
+
missingFiles,
|
|
90
|
+
violations,
|
|
91
|
+
};
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
// ─── Internals ──────────────────────────────────────────────────────────────
|
|
95
|
+
|
|
96
|
+
function getChangedFilesFromLastCommit(basePath: string): string[] | null {
|
|
97
|
+
try {
|
|
98
|
+
const result = execFileSync(
|
|
99
|
+
"git",
|
|
100
|
+
["diff", "--name-only", "HEAD~1", "HEAD"],
|
|
101
|
+
{ cwd: basePath, stdio: ["ignore", "pipe", "pipe"], encoding: "utf-8" },
|
|
102
|
+
).trim();
|
|
103
|
+
return result ? result.split("\n").filter(Boolean) : [];
|
|
104
|
+
} catch (e) {
|
|
105
|
+
logWarning("safety", `git diff failed in file-change-validator: ${(e as Error).message}`);
|
|
106
|
+
return null;
|
|
107
|
+
}
|
|
108
|
+
}
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pre-unit git checkpoint and rollback for auto-mode safety harness.
|
|
3
|
+
* Uses the existing refs/gsd/ namespace (already pruned by doctor).
|
|
4
|
+
*
|
|
5
|
+
* Creates a lightweight ref at HEAD before unit execution. On failure,
|
|
6
|
+
* the ref can be used to rollback the branch to the pre-unit state.
|
|
7
|
+
*
|
|
8
|
+
* Copyright (c) 2026 Jeremy McSpadden <jeremy@fluxlabs.net>
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import { execFileSync } from "node:child_process";
|
|
12
|
+
import { logWarning } from "../workflow-logger.js";
|
|
13
|
+
|
|
14
|
+
// ─── Constants ──────────────────────────────────────────────────────────────
|
|
15
|
+
|
|
16
|
+
const CHECKPOINT_PREFIX = "refs/gsd/checkpoints/";
|
|
17
|
+
|
|
18
|
+
// ─── Public API ─────────────────────────────────────────────────────────────
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* Create a checkpoint ref at the current HEAD for the given unit.
|
|
22
|
+
* Returns the SHA of HEAD, or null if the operation fails.
|
|
23
|
+
*/
|
|
24
|
+
export function createCheckpoint(basePath: string, unitId: string): string | null {
|
|
25
|
+
try {
|
|
26
|
+
const sha = execFileSync("git", ["rev-parse", "HEAD"], {
|
|
27
|
+
cwd: basePath,
|
|
28
|
+
stdio: ["ignore", "pipe", "pipe"],
|
|
29
|
+
encoding: "utf-8",
|
|
30
|
+
}).trim();
|
|
31
|
+
|
|
32
|
+
if (!sha || sha.length < 7) return null;
|
|
33
|
+
|
|
34
|
+
// Sanitize unitId for use in ref path (replace / with -)
|
|
35
|
+
const safeUnitId = unitId.replace(/\//g, "-");
|
|
36
|
+
|
|
37
|
+
execFileSync("git", ["update-ref", `${CHECKPOINT_PREFIX}${safeUnitId}`, sha], {
|
|
38
|
+
cwd: basePath,
|
|
39
|
+
stdio: ["ignore", "pipe", "pipe"],
|
|
40
|
+
});
|
|
41
|
+
|
|
42
|
+
return sha;
|
|
43
|
+
} catch (e) {
|
|
44
|
+
logWarning("safety", `checkpoint creation failed: ${(e as Error).message}`);
|
|
45
|
+
return null;
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
/**
|
|
50
|
+
* Rollback the current branch to a checkpoint SHA.
|
|
51
|
+
* Returns true on success, false on failure.
|
|
52
|
+
*
|
|
53
|
+
* WARNING: This is a destructive operation — it discards all changes
|
|
54
|
+
* since the checkpoint. Only call when the user has opted in via
|
|
55
|
+
* safety_harness.auto_rollback or an explicit manual trigger.
|
|
56
|
+
*/
|
|
57
|
+
export function rollbackToCheckpoint(
|
|
58
|
+
basePath: string,
|
|
59
|
+
unitId: string,
|
|
60
|
+
sha: string,
|
|
61
|
+
): boolean {
|
|
62
|
+
try {
|
|
63
|
+
// Get current branch name
|
|
64
|
+
const branch = execFileSync("git", ["rev-parse", "--abbrev-ref", "HEAD"], {
|
|
65
|
+
cwd: basePath,
|
|
66
|
+
stdio: ["ignore", "pipe", "pipe"],
|
|
67
|
+
encoding: "utf-8",
|
|
68
|
+
}).trim();
|
|
69
|
+
|
|
70
|
+
if (!branch || branch === "HEAD") {
|
|
71
|
+
logWarning("safety", "rollback: detached HEAD state, cannot rollback");
|
|
72
|
+
return false;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
// Reset branch pointer and working tree to checkpoint SHA in one step.
|
|
76
|
+
// Using `git reset --hard <sha>` works on the currently checked-out branch
|
|
77
|
+
// (unlike `git branch -f` which is rejected for checked-out branches).
|
|
78
|
+
execFileSync("git", ["reset", "--hard", sha], {
|
|
79
|
+
cwd: basePath,
|
|
80
|
+
stdio: ["ignore", "pipe", "pipe"],
|
|
81
|
+
});
|
|
82
|
+
|
|
83
|
+
// Cleanup checkpoint ref
|
|
84
|
+
cleanupCheckpoint(basePath, unitId);
|
|
85
|
+
|
|
86
|
+
return true;
|
|
87
|
+
} catch (e) {
|
|
88
|
+
logWarning("safety", `rollback failed: ${(e as Error).message}`);
|
|
89
|
+
return false;
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
/**
|
|
94
|
+
* Remove a checkpoint ref after successful unit completion.
|
|
95
|
+
*/
|
|
96
|
+
export function cleanupCheckpoint(basePath: string, unitId: string): void {
|
|
97
|
+
try {
|
|
98
|
+
const safeUnitId = unitId.replace(/\//g, "-");
|
|
99
|
+
execFileSync("git", ["update-ref", "-d", `${CHECKPOINT_PREFIX}${safeUnitId}`], {
|
|
100
|
+
cwd: basePath,
|
|
101
|
+
stdio: ["ignore", "pipe", "pipe"],
|
|
102
|
+
});
|
|
103
|
+
} catch {
|
|
104
|
+
// Non-fatal — ref may already have been cleaned up
|
|
105
|
+
}
|
|
106
|
+
}
|