@interf/compiler 0.33.0 → 0.50.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +122 -226
- package/dist/cli/commands/agents.js +1 -32
- package/dist/cli/commands/benchmark.d.ts +2 -3
- package/dist/cli/commands/benchmark.js +1 -31
- package/dist/cli/commands/build-plan.js +26 -50
- package/dist/cli/commands/build.d.ts +2 -3
- package/dist/cli/commands/build.js +1 -31
- package/dist/cli/commands/graphs.js +177 -32
- package/dist/cli/commands/mcp.d.ts +1 -0
- package/dist/cli/commands/mcp.js +223 -126
- package/dist/cli/commands/project.js +10 -36
- package/dist/cli/commands/reset.d.ts +2 -3
- package/dist/cli/commands/reset.js +1 -22
- package/dist/cli/commands/runs.js +86 -33
- package/dist/cli/commands/status.js +3 -24
- package/dist/cli/commands/traces.js +1 -29
- package/dist/cli/commands/wizard.js +17 -29
- package/dist/cli/lib/http-client.d.ts +39 -0
- package/dist/cli/lib/http-client.js +73 -0
- package/dist/packages/build-plans/authoring/brief.d.ts +25 -4
- package/dist/packages/build-plans/authoring/build-plan-authoring.d.ts +42 -1
- package/dist/packages/build-plans/authoring/build-plan-authoring.js +470 -63
- package/dist/packages/build-plans/authoring/build-plan-edit-session.d.ts +9 -0
- package/dist/packages/build-plans/authoring/build-plan-edit-session.js +27 -10
- package/dist/packages/build-plans/authoring/build-plan-improvement.js +62 -8
- package/dist/packages/build-plans/authoring/lib/build-plan-edit-utils.d.ts +1 -0
- package/dist/packages/build-plans/package/build-plan-definitions.d.ts +0 -1
- package/dist/packages/build-plans/package/build-plan-definitions.js +5 -3
- package/dist/packages/build-plans/package/build-plan-stage-runner.d.ts +1 -0
- package/dist/packages/build-plans/package/build-plan-stage-runner.js +2 -1
- package/dist/packages/build-plans/package/builtin-build-plan.d.ts +2 -2
- package/dist/packages/build-plans/package/builtin-build-plan.js +3 -3
- package/dist/packages/build-plans/package/context-interface.d.ts +3 -0
- package/dist/packages/build-plans/package/context-interface.js +5 -5
- package/dist/packages/build-plans/package/interf-build-plan-package.js +22 -22
- package/dist/packages/build-plans/package/local-build-plans.d.ts +10 -5
- package/dist/packages/build-plans/package/local-build-plans.js +57 -32
- package/dist/packages/contracts/index.d.ts +4 -3
- package/dist/packages/contracts/index.js +2 -1
- package/dist/packages/contracts/lib/context-graph-layer.d.ts +161 -0
- package/dist/packages/contracts/lib/context-graph-layer.js +216 -0
- package/dist/packages/contracts/lib/project-paths.d.ts +7 -0
- package/dist/packages/contracts/lib/project-paths.js +9 -0
- package/dist/packages/contracts/lib/project-schema.d.ts +264 -1
- package/dist/packages/contracts/lib/project-schema.js +38 -13
- package/dist/packages/contracts/lib/schema.d.ts +556 -23
- package/dist/packages/contracts/lib/schema.js +279 -18
- package/dist/packages/contracts/utils/filesystem.d.ts +1 -0
- package/dist/packages/contracts/utils/filesystem.js +29 -1
- package/dist/packages/projects/lib/schema.d.ts +6 -8
- package/dist/packages/projects/lib/schema.js +3 -1
- package/dist/packages/projects/source-config.d.ts +0 -5
- package/dist/packages/projects/source-config.js +9 -22
- package/dist/packages/runtime/actions/fields.d.ts +4 -0
- package/dist/packages/runtime/actions/form-builders.js +79 -31
- package/dist/packages/runtime/actions/form-validators.js +9 -3
- package/dist/packages/runtime/actions/helpers.js +3 -3
- package/dist/packages/runtime/actions/registry.d.ts +1 -1
- package/dist/packages/runtime/actions/registry.js +1 -1
- package/dist/packages/runtime/actions/requests.d.ts +1 -1
- package/dist/packages/runtime/actions/requests.js +12 -6
- package/dist/packages/runtime/actions/schemas.d.ts +7 -0
- package/dist/packages/runtime/actions/schemas.js +1 -0
- package/dist/packages/runtime/agent-handoff.js +8 -7
- package/dist/packages/runtime/agents/lib/execution-profile.d.ts +14 -0
- package/dist/packages/runtime/agents/lib/execution-profile.js +23 -0
- package/dist/packages/runtime/agents/lib/execution.js +14 -8
- package/dist/packages/runtime/agents/lib/executors.d.ts +1 -0
- package/dist/packages/runtime/agents/lib/executors.js +11 -2
- package/dist/packages/runtime/agents/lib/logs.d.ts +10 -0
- package/dist/packages/runtime/agents/lib/logs.js +32 -8
- package/dist/packages/runtime/agents/lib/preflight.js +4 -1
- package/dist/packages/runtime/agents/lib/render.d.ts +18 -0
- package/dist/packages/runtime/agents/lib/render.js +44 -18
- package/dist/packages/runtime/agents/lib/shell-templates.js +105 -63
- package/dist/packages/runtime/agents/lib/shells.d.ts +29 -0
- package/dist/packages/runtime/agents/lib/shells.js +158 -32
- package/dist/packages/runtime/agents/lib/source-context-scan.d.ts +10 -0
- package/dist/packages/runtime/agents/lib/source-context-scan.js +388 -0
- package/dist/packages/runtime/agents/lib/status.js +1 -14
- package/dist/packages/runtime/agents/lib/string-utils.d.ts +16 -0
- package/dist/packages/runtime/agents/lib/string-utils.js +36 -0
- package/dist/packages/runtime/agents/lib/types.d.ts +1 -0
- package/dist/packages/runtime/agents/providers/codex.js +2 -0
- package/dist/packages/runtime/agents/role-executors.js +2 -1
- package/dist/packages/runtime/auth/session-store.js +11 -3
- package/dist/packages/runtime/benchmark-question-draft.d.ts +3 -0
- package/dist/packages/runtime/benchmark-question-draft.js +57 -28
- package/dist/packages/runtime/build/artifact-status.d.ts +1 -1
- package/dist/packages/runtime/build/artifact-status.js +1 -1
- package/dist/packages/runtime/build/build-evidence.d.ts +2 -1
- package/dist/packages/runtime/build/build-evidence.js +11 -5
- package/dist/packages/runtime/build/build-pipeline.js +89 -5
- package/dist/packages/runtime/build/build-stage-plan.js +3 -1
- package/dist/packages/runtime/build/build-stage-runner.js +169 -32
- package/dist/packages/runtime/build/build-target.d.ts +3 -0
- package/dist/packages/runtime/build/build-target.js +25 -1
- package/dist/packages/runtime/build/check-evaluator.d.ts +1 -1
- package/dist/packages/runtime/build/check-evaluator.js +655 -4
- package/dist/packages/runtime/build/context-graph-paths.d.ts +13 -0
- package/dist/packages/runtime/build/context-graph-paths.js +27 -0
- package/dist/packages/runtime/build/index.d.ts +2 -2
- package/dist/packages/runtime/build/index.js +2 -2
- package/dist/packages/runtime/build/inspect-map.d.ts +10 -0
- package/dist/packages/runtime/build/inspect-map.js +270 -0
- package/dist/packages/runtime/build/lib/schema.d.ts +246 -53
- package/dist/packages/runtime/build/lib/schema.js +173 -15
- package/dist/packages/runtime/build/native-entrypoint.d.ts +2 -0
- package/dist/packages/runtime/build/native-entrypoint.js +286 -0
- package/dist/packages/runtime/build/runtime-contracts.js +9 -3
- package/dist/packages/runtime/build/runtime-log-paths.d.ts +3 -0
- package/dist/packages/runtime/build/runtime-log-paths.js +16 -0
- package/dist/packages/runtime/build/runtime-prompt.js +6 -4
- package/dist/packages/runtime/build/runtime-runs.js +63 -10
- package/dist/packages/runtime/build/runtime-types.d.ts +4 -1
- package/dist/packages/runtime/build/runtime.d.ts +3 -1
- package/dist/packages/runtime/build/runtime.js +3 -1
- package/dist/packages/runtime/build/source-files.js +11 -2
- package/dist/packages/runtime/build/source-inventory.d.ts +1 -0
- package/dist/packages/runtime/build/source-inventory.js +246 -7
- package/dist/packages/runtime/build/source-manifest.d.ts +11 -0
- package/dist/packages/runtime/build/source-manifest.js +30 -2
- package/dist/packages/runtime/build/stage-evidence.js +80 -11
- package/dist/packages/runtime/build/stage-manifest.d.ts +45 -0
- package/dist/packages/runtime/build/stage-manifest.js +1125 -0
- package/dist/packages/runtime/build/stage-reuse.js +12 -0
- package/dist/packages/runtime/build/stage-session.d.ts +81 -0
- package/dist/packages/runtime/build/stage-session.js +308 -0
- package/dist/packages/runtime/build/state-io.js +10 -11
- package/dist/packages/runtime/build/state-view.js +1 -1
- package/dist/packages/runtime/build/state.d.ts +1 -1
- package/dist/packages/runtime/build/state.js +1 -1
- package/dist/packages/runtime/build/summary-coverage-index.d.ts +21 -0
- package/dist/packages/runtime/build/summary-coverage-index.js +189 -0
- package/dist/packages/runtime/build/traces.js +3 -3
- package/dist/packages/runtime/build/validate-context-graph.d.ts +1 -1
- package/dist/packages/runtime/build/validate-context-graph.js +5 -5
- package/dist/packages/runtime/build/validate.d.ts +1 -1
- package/dist/packages/runtime/build/validate.js +1 -1
- package/dist/packages/runtime/client.d.ts +3 -3
- package/dist/packages/runtime/client.js +8 -13
- package/dist/packages/runtime/context-checks.js +13 -0
- package/dist/packages/runtime/context-graph-scaffold.js +2 -1
- package/dist/packages/runtime/context-graph-semantic-graph.d.ts +9 -0
- package/dist/packages/runtime/context-graph-semantic-graph.js +416 -0
- package/dist/packages/runtime/execution/lib/schema.d.ts +34 -31
- package/dist/packages/runtime/index.d.ts +2 -2
- package/dist/packages/runtime/index.js +1 -1
- package/dist/packages/runtime/native-run-handlers.d.ts +38 -0
- package/dist/packages/runtime/native-run-handlers.js +52 -33
- package/dist/packages/runtime/plan-artifact-contract.js +1 -1
- package/dist/packages/runtime/project-source-state.d.ts +4 -4
- package/dist/packages/runtime/project-source-state.js +5 -2
- package/dist/packages/runtime/project-store.d.ts +5 -0
- package/dist/packages/runtime/project-store.js +30 -3
- package/dist/packages/runtime/requested-artifacts.js +1 -1
- package/dist/packages/runtime/run-observability.js +9 -4
- package/dist/packages/runtime/runtime-action-proposals.js +3 -3
- package/dist/packages/runtime/runtime-build-plans.js +47 -3
- package/dist/packages/runtime/runtime-build-runs.js +9 -16
- package/dist/packages/runtime/runtime-caches.d.ts +26 -0
- package/dist/packages/runtime/runtime-caches.js +47 -0
- package/dist/packages/runtime/runtime-jobs.js +6 -6
- package/dist/packages/runtime/runtime-project-mutations.js +1 -0
- package/dist/packages/runtime/runtime-project-reads.d.ts +4 -1
- package/dist/packages/runtime/runtime-project-reads.js +229 -36
- package/dist/packages/runtime/runtime-proposal-helpers.js +6 -6
- package/dist/packages/runtime/runtime-resource-builders.d.ts +4 -2
- package/dist/packages/runtime/runtime-resource-builders.js +16 -14
- package/dist/packages/runtime/runtime-status.d.ts +14 -0
- package/dist/packages/runtime/runtime-status.js +15 -0
- package/dist/packages/runtime/runtime-verify-runs.js +6 -5
- package/dist/packages/runtime/runtime.d.ts +439 -22
- package/dist/packages/runtime/runtime.js +16 -2
- package/dist/packages/runtime/schemas/actions.d.ts +24 -0
- package/dist/packages/runtime/schemas/agents.d.ts +28 -0
- package/dist/packages/runtime/schemas/agents.js +33 -0
- package/dist/packages/runtime/schemas/build-plans.d.ts +181 -8
- package/dist/packages/runtime/schemas/build-plans.js +36 -2
- package/dist/packages/runtime/schemas/context-graphs.d.ts +1522 -0
- package/dist/packages/runtime/schemas/context-graphs.js +110 -0
- package/dist/packages/runtime/schemas/files.d.ts +7 -347
- package/dist/packages/runtime/schemas/files.js +1 -24
- package/dist/packages/runtime/schemas/index.d.ts +1 -0
- package/dist/packages/runtime/schemas/index.js +1 -0
- package/dist/packages/runtime/schemas/jobs.js +4 -0
- package/dist/packages/runtime/schemas/projects.d.ts +48 -21
- package/dist/packages/runtime/schemas/projects.js +34 -10
- package/dist/packages/runtime/schemas/runs.d.ts +1009 -240
- package/dist/packages/runtime/schemas/runs.js +17 -0
- package/dist/packages/runtime/service/openapi.js +1 -0
- package/dist/packages/runtime/service/operations.d.ts +1666 -145
- package/dist/packages/runtime/service/operations.js +147 -17
- package/dist/packages/runtime/service/routes.d.ts +11 -3
- package/dist/packages/runtime/service/routes.js +11 -3
- package/dist/packages/runtime/service/server-app-boot.js +2 -2
- package/dist/packages/runtime/service/server-helpers.d.ts +11 -0
- package/dist/packages/runtime/service/server-helpers.js +19 -0
- package/dist/packages/runtime/service/server-routes-action-proposals.js +4 -2
- package/dist/packages/runtime/service/server-routes-agents.js +19 -85
- package/dist/packages/runtime/service/server-routes-build-plans.js +14 -11
- package/dist/packages/runtime/service/server-routes-project-context.js +102 -7
- package/dist/packages/runtime/service/server-routes-project-jobs.js +19 -12
- package/dist/packages/runtime/service/server-routes-project-runs.js +5 -2
- package/dist/packages/runtime/service/server-routes-projects.js +6 -2
- package/dist/packages/runtime/service/server-routes-runs.js +11 -4
- package/dist/packages/runtime/verify/lib/schema.js +12 -0
- package/dist/packages/runtime/verify/test-file-guard.d.ts +2 -0
- package/dist/packages/runtime/verify/test-file-guard.js +29 -0
- package/dist/packages/runtime/verify/verify-execution.d.ts +7 -0
- package/dist/packages/runtime/verify/verify-execution.js +109 -35
- package/dist/packages/runtime/verify/verify-paths.d.ts +1 -0
- package/dist/packages/runtime/verify/verify-paths.js +4 -0
- package/dist/packages/runtime/verify/verify-specs.js +49 -39
- package/dist/packages/runtime/wire-schemas.d.ts +1 -1
- package/dist/packages/runtime/wire-schemas.js +1 -1
- package/package.json +2 -8
- package/public-repo/CONTRIBUTING.md +10 -3
- package/public-repo/README.md +122 -226
- package/public-repo/build-plans/interf-default/README.md +15 -12
- package/public-repo/build-plans/interf-default/build/stages/entrypoint/SKILL.md +74 -0
- package/public-repo/build-plans/interf-default/build/stages/knowledge/SKILL.md +95 -0
- package/public-repo/build-plans/interf-default/build/stages/summarize/SKILL.md +38 -5
- package/public-repo/build-plans/interf-default/build-plan.json +27 -23
- package/public-repo/build-plans/interf-default/build-plan.schema.json +24 -20
- package/public-repo/build-plans/interf-default/use/query/SKILL.md +8 -7
- package/public-repo/openapi/local-service.openapi.json +11637 -4213
- package/public-repo/skills/interf/SKILL.md +174 -134
- package/dist/packages/runtime/build/runtime-paths.d.ts +0 -8
- package/dist/packages/runtime/build/runtime-paths.js +0 -26
- package/dist/packages/runtime/build/state-paths.d.ts +0 -7
- package/dist/packages/runtime/build/state-paths.js +0 -22
- package/public-repo/build-plans/interf-default/build/stages/shape/SKILL.md +0 -34
- package/public-repo/build-plans/interf-default/build/stages/structure/SKILL.md +0 -28
|
@@ -80,7 +80,7 @@ export async function tryHandleProjectContext(req, res, runtime, ctx, method) {
|
|
|
80
80
|
if (method === "GET") {
|
|
81
81
|
const context = runtime.getLatestContextGraph(storedProject.projectDataDir, storedProject.id);
|
|
82
82
|
if (!context?.build_evidence) {
|
|
83
|
-
sendError(res, 404, "
|
|
83
|
+
sendError(res, 404, "Graph diagnostics not found.");
|
|
84
84
|
}
|
|
85
85
|
else {
|
|
86
86
|
sendJson(res, 200, context.build_evidence);
|
|
@@ -88,6 +88,46 @@ export async function tryHandleProjectContext(req, res, runtime, ctx, method) {
|
|
|
88
88
|
return true;
|
|
89
89
|
}
|
|
90
90
|
}
|
|
91
|
+
if (subPath === PROJECT_SUBRESOURCES.contextGraphSessions) {
|
|
92
|
+
if (method === "GET") {
|
|
93
|
+
const sessions = runtime.listLatestContextGraphStageSessions(storedProject.projectDataDir, storedProject.id);
|
|
94
|
+
sendJson(res, 200, { sessions });
|
|
95
|
+
return true;
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
if (subPath.startsWith(`${PROJECT_SUBRESOURCES.contextGraphSessions}/`)) {
|
|
99
|
+
if (method === "GET") {
|
|
100
|
+
const rawStageRunId = subPath.slice(PROJECT_SUBRESOURCES.contextGraphSessions.length + 1);
|
|
101
|
+
let stageRunId;
|
|
102
|
+
try {
|
|
103
|
+
stageRunId = decodeURIComponent(rawStageRunId);
|
|
104
|
+
}
|
|
105
|
+
catch {
|
|
106
|
+
sendError(res, 400, "Stage execution session id is not valid URI-encoded UTF-8.");
|
|
107
|
+
return true;
|
|
108
|
+
}
|
|
109
|
+
const session = runtime
|
|
110
|
+
.listLatestContextGraphStageSessions(storedProject.projectDataDir, storedProject.id)
|
|
111
|
+
.find((entry) => entry.stage_run_id === stageRunId) ?? null;
|
|
112
|
+
if (!session)
|
|
113
|
+
sendError(res, 404, `Stage execution session not found: ${stageRunId}`);
|
|
114
|
+
else
|
|
115
|
+
sendJson(res, 200, { session });
|
|
116
|
+
return true;
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
if (subPath === PROJECT_SUBRESOURCES.contextGraphEntrypoint) {
|
|
120
|
+
if (method === "GET") {
|
|
121
|
+
const handoff = runtime.getContextGraphHandoff(storedProject.projectDataDir, storedProject.id);
|
|
122
|
+
if (!handoff) {
|
|
123
|
+
sendError(res, 404, "Context Graph handoff not found.");
|
|
124
|
+
}
|
|
125
|
+
else {
|
|
126
|
+
sendJson(res, 200, handoff);
|
|
127
|
+
}
|
|
128
|
+
return true;
|
|
129
|
+
}
|
|
130
|
+
}
|
|
91
131
|
// GET /v1/projects/<id>/context-graph/traces — first-class traces surface.
|
|
92
132
|
// Returns a derived Traces wire shape rolled up from the current
|
|
93
133
|
// Context Graph's artifacts + Checks + source file index.
|
|
@@ -117,20 +157,75 @@ export async function tryHandleProjectContext(req, res, runtime, ctx, method) {
|
|
|
117
157
|
return true;
|
|
118
158
|
}
|
|
119
159
|
}
|
|
160
|
+
if (subPath === PROJECT_SUBRESOURCES.contextGraphStages) {
|
|
161
|
+
if (method === "GET") {
|
|
162
|
+
const context = runtime.getLatestContextGraph(storedProject.projectDataDir, storedProject.id);
|
|
163
|
+
if (!context) {
|
|
164
|
+
sendError(res, 404, "Context Graph not found.");
|
|
165
|
+
}
|
|
166
|
+
else {
|
|
167
|
+
sendJson(res, 200, {
|
|
168
|
+
stages: context.stage_summaries,
|
|
169
|
+
readiness: context.readiness_rollup,
|
|
170
|
+
primary_metrics: context.primary_metrics,
|
|
171
|
+
});
|
|
172
|
+
}
|
|
173
|
+
return true;
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
if (subPath === PROJECT_SUBRESOURCES.contextGraphResources) {
|
|
177
|
+
if (method === "GET") {
|
|
178
|
+
const context = runtime.getLatestContextGraph(storedProject.projectDataDir, storedProject.id);
|
|
179
|
+
if (!context) {
|
|
180
|
+
sendError(res, 404, "Context Graph not found.");
|
|
181
|
+
}
|
|
182
|
+
else {
|
|
183
|
+
sendJson(res, 200, {
|
|
184
|
+
resources: context.resources,
|
|
185
|
+
entrypoints: context.entrypoints,
|
|
186
|
+
graph_outputs: context.graph_manifest?.graph_outputs ?? null,
|
|
187
|
+
});
|
|
188
|
+
}
|
|
189
|
+
return true;
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
if (subPath.startsWith(`${PROJECT_SUBRESOURCES.contextGraphResources}/`)) {
|
|
193
|
+
if (method === "GET") {
|
|
194
|
+
const rawResourceId = subPath.slice(PROJECT_SUBRESOURCES.contextGraphResources.length + 1);
|
|
195
|
+
let resourceId;
|
|
196
|
+
try {
|
|
197
|
+
resourceId = decodeURIComponent(rawResourceId);
|
|
198
|
+
}
|
|
199
|
+
catch {
|
|
200
|
+
sendError(res, 400, "Resource id is not valid URI-encoded UTF-8.");
|
|
201
|
+
return true;
|
|
202
|
+
}
|
|
203
|
+
const context = runtime.getLatestContextGraph(storedProject.projectDataDir, storedProject.id);
|
|
204
|
+
const resource = context?.resources.find((entry) => entry.id === resourceId) ?? null;
|
|
205
|
+
if (!resource) {
|
|
206
|
+
sendError(res, 404, `Context Graph resource not found: ${resourceId}`);
|
|
207
|
+
}
|
|
208
|
+
else {
|
|
209
|
+
const stage = resource.stage_id
|
|
210
|
+
? context?.stage_summaries.find((entry) => entry.stage_id === resource.stage_id) ?? null
|
|
211
|
+
: null;
|
|
212
|
+
sendJson(res, 200, {
|
|
213
|
+
resource,
|
|
214
|
+
stage,
|
|
215
|
+
linked_resources: context?.resources.filter((entry) => resource.links.includes(entry.path ?? entry.id)) ?? [],
|
|
216
|
+
});
|
|
217
|
+
}
|
|
218
|
+
return true;
|
|
219
|
+
}
|
|
220
|
+
}
|
|
120
221
|
// GET /v1/projects/<id>/source-state — manifest-backed drift verdict for
|
|
121
222
|
// the Project's Source binding.
|
|
122
223
|
if (subPath === PROJECT_SUBRESOURCES.sourceState) {
|
|
123
224
|
if (method === "GET") {
|
|
124
|
-
// The per-run SourceState record carries the historical file index;
|
|
125
|
-
// for v1 we treat the latest Build id as the graph pointer and
|
|
126
|
-
// leave the hash comparison to a follow-up enhancement. For now the
|
|
127
|
-
// verdict is `unknown` until a Build has produced a Context Graph.
|
|
128
225
|
const latestContext = runtime.getLatestContextGraph(storedProject.projectDataDir, storedProject.id);
|
|
129
226
|
const state = computeProjectSourceState({
|
|
130
227
|
projectId: storedProject.id,
|
|
131
|
-
sourceFolderPath: storedProject.source.locator,
|
|
132
228
|
contextGraphPath: latestContext?.path ?? storedProject.contextGraphPath,
|
|
133
|
-
lastGraphIndexHash: null,
|
|
134
229
|
});
|
|
135
230
|
sendJson(res, 200, state);
|
|
136
231
|
return true;
|
|
@@ -2,6 +2,19 @@ import { PROJECT_SUBRESOURCES } from "./routes.js";
|
|
|
2
2
|
import { readJsonBody, sendError, sendJson, } from "./server-helpers.js";
|
|
3
3
|
import { writeBenchmarkSpecsForProject } from "../../projects/source-config.js";
|
|
4
4
|
import { loadTestSpec } from "../verify/verify-specs.js";
|
|
5
|
+
import { ProjectBenchmarkQuestionDraftCreateRequestSchema, ProjectBuildPlanAuthoringCreateRequestSchema, ProjectResetRequestSchema, } from "../schemas/index.js";
|
|
6
|
+
function projectScopedJobRequest(storedProject, body) {
|
|
7
|
+
const scopedRequest = ProjectBuildPlanAuthoringCreateRequestSchema.parse(body ?? {});
|
|
8
|
+
const explicitIntent = typeof scopedRequest.intent === "string" && scopedRequest.intent.trim().length > 0
|
|
9
|
+
? scopedRequest.intent
|
|
10
|
+
: null;
|
|
11
|
+
return {
|
|
12
|
+
...scopedRequest,
|
|
13
|
+
project: storedProject.id,
|
|
14
|
+
source_folder_path: storedProject.source.locator,
|
|
15
|
+
intent: explicitIntent ?? storedProject.intent,
|
|
16
|
+
};
|
|
17
|
+
}
|
|
5
18
|
/**
|
|
6
19
|
* Async/structured mutations on a project: Build Plan authoring
|
|
7
20
|
* and improvement jobs, benchmark-question drafts, Build Plan change deltas,
|
|
@@ -14,11 +27,7 @@ export async function tryHandleProjectJobs(req, res, runtime, ctx, method) {
|
|
|
14
27
|
if (method === "POST") {
|
|
15
28
|
try {
|
|
16
29
|
const body = (await readJsonBody(req));
|
|
17
|
-
const job = await runtime.createBuildPlanAuthoringRun(storedProject.projectDataDir,
|
|
18
|
-
...(body ?? {}),
|
|
19
|
-
project: storedProject.id,
|
|
20
|
-
source_folder_path: storedProject.source.locator,
|
|
21
|
-
});
|
|
30
|
+
const job = await runtime.createBuildPlanAuthoringRun(storedProject.projectDataDir, projectScopedJobRequest(storedProject, body));
|
|
22
31
|
sendJson(res, 202, job);
|
|
23
32
|
}
|
|
24
33
|
catch (error) {
|
|
@@ -31,11 +40,7 @@ export async function tryHandleProjectJobs(req, res, runtime, ctx, method) {
|
|
|
31
40
|
if (method === "POST") {
|
|
32
41
|
try {
|
|
33
42
|
const body = (await readJsonBody(req));
|
|
34
|
-
const job = await runtime.createBuildPlanAuthoringRun(storedProject.projectDataDir,
|
|
35
|
-
...(body ?? {}),
|
|
36
|
-
project: storedProject.id,
|
|
37
|
-
source_folder_path: storedProject.source.locator,
|
|
38
|
-
}, "build-plan-improvement");
|
|
43
|
+
const job = await runtime.createBuildPlanAuthoringRun(storedProject.projectDataDir, projectScopedJobRequest(storedProject, body), "build-plan-improvement");
|
|
39
44
|
sendJson(res, 202, job);
|
|
40
45
|
}
|
|
41
46
|
catch (error) {
|
|
@@ -48,8 +53,9 @@ export async function tryHandleProjectJobs(req, res, runtime, ctx, method) {
|
|
|
48
53
|
if (method === "POST") {
|
|
49
54
|
try {
|
|
50
55
|
const body = (await readJsonBody(req));
|
|
56
|
+
const scopedRequest = ProjectBenchmarkQuestionDraftCreateRequestSchema.parse(body ?? {});
|
|
51
57
|
const job = await runtime.createBenchmarkQuestionDraftRun(storedProject.projectDataDir, {
|
|
52
|
-
...
|
|
58
|
+
...scopedRequest,
|
|
53
59
|
project: storedProject.id,
|
|
54
60
|
source_folder_path: storedProject.source.locator,
|
|
55
61
|
});
|
|
@@ -116,7 +122,8 @@ export async function tryHandleProjectJobs(req, res, runtime, ctx, method) {
|
|
|
116
122
|
if (method === "POST") {
|
|
117
123
|
try {
|
|
118
124
|
const body = (await readJsonBody(req));
|
|
119
|
-
const
|
|
125
|
+
const scopedRequest = ProjectResetRequestSchema.parse(body ?? {});
|
|
126
|
+
const request = { ...scopedRequest, project: storedProject.id };
|
|
120
127
|
const result = runtime.applyReset(storedProject.projectDataDir, request);
|
|
121
128
|
sendJson(res, 200, result);
|
|
122
129
|
}
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { PROJECT_SUBRESOURCES } from "./routes.js";
|
|
2
2
|
import { readJsonBody, sendError, sendErrorResponse, sendJson, } from "./server-helpers.js";
|
|
3
3
|
import { assertCanRunBenchmark } from "../entitlement-guard.js";
|
|
4
|
+
import { ProjectBuildRunCreateRequestSchema, ProjectVerifyRunCreateRequestSchema, } from "../schemas/index.js";
|
|
4
5
|
/**
|
|
5
6
|
* Build/verify run mutations and the per-Project runs listing.
|
|
6
7
|
* Run-id lookups for an unknown project are handled by the
|
|
@@ -16,7 +17,8 @@ export async function tryHandleProjectRuns(req, res, runtime, ctx, method) {
|
|
|
16
17
|
}
|
|
17
18
|
try {
|
|
18
19
|
const body = (await readJsonBody(req));
|
|
19
|
-
const
|
|
20
|
+
const scopedRequest = ProjectBuildRunCreateRequestSchema.parse(body ?? {});
|
|
21
|
+
const request = { ...scopedRequest, project: storedProject.id };
|
|
20
22
|
const idempotencyKeyRaw = req.headers["x-interf-idempotency-key"];
|
|
21
23
|
const idempotencyKey = Array.isArray(idempotencyKeyRaw)
|
|
22
24
|
? idempotencyKeyRaw[0]
|
|
@@ -64,7 +66,8 @@ export async function tryHandleProjectRuns(req, res, runtime, ctx, method) {
|
|
|
64
66
|
.filter((run) => !run.parent_run_id).length;
|
|
65
67
|
assertCanRunBenchmark(runtime.currentAccount, existingCount);
|
|
66
68
|
const body = (await readJsonBody(req));
|
|
67
|
-
const
|
|
69
|
+
const scopedRequest = ProjectVerifyRunCreateRequestSchema.parse(body ?? {});
|
|
70
|
+
const request = { ...scopedRequest, project: storedProject.id };
|
|
68
71
|
const resource = await runtime.createVerifyRun(storedProject.projectDataDir, request);
|
|
69
72
|
sendJson(res, 201, resource);
|
|
70
73
|
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { ProjectCreateRequestSchema, ProjectUpdateRequestSchema, } from "../schemas/index.js";
|
|
2
2
|
import { LOCAL_SERVICE_ROUTES } from "./routes.js";
|
|
3
|
-
import { readJsonBody, sendError, sendErrorResponse, sendJson, } from "./server-helpers.js";
|
|
3
|
+
import { decodeOr400, readJsonBody, sendError, sendErrorResponse, sendJson, } from "./server-helpers.js";
|
|
4
4
|
import { createStoredProject, deleteStoredProject, getStoredProject, listStoredProjects, projectWireShape, updateStoredProject, } from "../project-store.js";
|
|
5
5
|
import { tryHandleProjectRuns, } from "./server-routes-project-runs.js";
|
|
6
6
|
import { tryHandleProjectJobs } from "./server-routes-project-jobs.js";
|
|
@@ -19,6 +19,7 @@ export async function tryHandleProjectCollection(req, res, runtime, path, method
|
|
|
19
19
|
const stored = createStoredProject(runtime, {
|
|
20
20
|
id: body.id,
|
|
21
21
|
source: { kind: "local-folder", locator: body.source.locator },
|
|
22
|
+
intent: body.intent,
|
|
22
23
|
build_plan_id: body.build_plan_id,
|
|
23
24
|
});
|
|
24
25
|
sendJson(res, 201, projectWireShape(stored));
|
|
@@ -41,7 +42,9 @@ export async function tryHandlePerProject(req, res, runtime, path, method) {
|
|
|
41
42
|
const slashIndex = tail.indexOf("/");
|
|
42
43
|
const projectId = slashIndex === -1 ? tail : tail.slice(0, slashIndex);
|
|
43
44
|
const subPath = slashIndex === -1 ? "" : tail.slice(slashIndex + 1);
|
|
44
|
-
const decodedProjectId =
|
|
45
|
+
const decodedProjectId = decodeOr400(res, projectId, "Project id");
|
|
46
|
+
if (decodedProjectId === null)
|
|
47
|
+
return true;
|
|
45
48
|
const storedProject = getStoredProject(decodedProjectId);
|
|
46
49
|
if (!storedProject) {
|
|
47
50
|
sendError(res, 404, `Project not found: ${decodedProjectId}`);
|
|
@@ -63,6 +66,7 @@ export async function tryHandlePerProject(req, res, runtime, path, method) {
|
|
|
63
66
|
try {
|
|
64
67
|
const body = ProjectUpdateRequestSchema.parse(await readJsonBody(req));
|
|
65
68
|
const updated = updateStoredProject(decodedProjectId, {
|
|
69
|
+
intent: body.intent,
|
|
66
70
|
build_plan_id: body.build_plan_id,
|
|
67
71
|
});
|
|
68
72
|
sendJson(res, 200, projectWireShape(updated));
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { LOCAL_SERVICE_ROUTES, RUN_SUBRESOURCES } from "./routes.js";
|
|
2
|
-
import { parseRequestUrl, sendError, sendJson } from "./server-helpers.js";
|
|
2
|
+
import { decodeOr400, parseRequestUrl, sendError, sendJson } from "./server-helpers.js";
|
|
3
3
|
import { isTraversalRelativePath, safeApiFilePath, sendApiFile, } from "./server-api-files.js";
|
|
4
4
|
import { findInstanceBuildRun, findInstanceJob, findInstanceRun, findInstanceVerifyRun, listInstanceRuns, } from "./server-instance-helpers.js";
|
|
5
5
|
import { runObservabilityToStatus } from "../run-observability.js";
|
|
@@ -10,7 +10,9 @@ export function tryHandleRuns(req, res, runtime, path, method) {
|
|
|
10
10
|
}
|
|
11
11
|
const observableRunMatch = path.match(/^\/v1\/runs\/([^/]+)(?:\/([^/]+))?$/);
|
|
12
12
|
if (observableRunMatch?.[1]) {
|
|
13
|
-
const runId =
|
|
13
|
+
const runId = decodeOr400(res, observableRunMatch[1], "Run id");
|
|
14
|
+
if (runId === null)
|
|
15
|
+
return true;
|
|
14
16
|
const child = observableRunMatch[2];
|
|
15
17
|
if (method === "GET" && !child) {
|
|
16
18
|
const run = findInstanceRun(runtime, runId);
|
|
@@ -80,7 +82,10 @@ export function tryHandleRuns(req, res, runtime, path, method) {
|
|
|
80
82
|
}
|
|
81
83
|
const verifyRunMatch = path.match(/^\/v1\/verify-runs\/([^/]+)$/);
|
|
82
84
|
if (method === "GET" && verifyRunMatch?.[1]) {
|
|
83
|
-
const
|
|
85
|
+
const verifyRunId = decodeOr400(res, verifyRunMatch[1], "Verify run id");
|
|
86
|
+
if (verifyRunId === null)
|
|
87
|
+
return true;
|
|
88
|
+
const found = findInstanceVerifyRun(runtime, verifyRunId);
|
|
84
89
|
if (!found)
|
|
85
90
|
sendError(res, 404, "Verify run not found.");
|
|
86
91
|
else
|
|
@@ -89,7 +94,9 @@ export function tryHandleRuns(req, res, runtime, path, method) {
|
|
|
89
94
|
}
|
|
90
95
|
const jobMatch = path.match(/^\/v1\/jobs\/([^/]+)(?:\/([^/]+))?$/);
|
|
91
96
|
if (jobMatch?.[1]) {
|
|
92
|
-
const runId =
|
|
97
|
+
const runId = decodeOr400(res, jobMatch[1], "Job run id");
|
|
98
|
+
if (runId === null)
|
|
99
|
+
return true;
|
|
93
100
|
const child = jobMatch[2];
|
|
94
101
|
const found = findInstanceJob(runtime, runId);
|
|
95
102
|
if (method === "GET" && !child) {
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { z } from "zod";
|
|
2
2
|
import { RuntimeExecutorInfoSchema, TestCaseExpectSchema, TestTargetTypeSchema, ProjectIdSchema, } from "../../../contracts/lib/schema.js";
|
|
3
|
+
import { isSafeRelativeTestFile } from "../test-file-guard.js";
|
|
3
4
|
const TestCaseCoreSchema = z.object({
|
|
4
5
|
id: z.string().regex(/^[a-z0-9][a-z0-9-]{0,79}$/),
|
|
5
6
|
question: z.string().min(1),
|
|
@@ -14,6 +15,17 @@ const TestCaseCoreSchema = z.object({
|
|
|
14
15
|
message: "Test cases need at least one of file, answer, or expect.",
|
|
15
16
|
});
|
|
16
17
|
}
|
|
18
|
+
// H2: a `file` target is a project-relative output path that is later joined
|
|
19
|
+
// onto the (sandboxed) target directory. Reject absolute or `..`-escaping
|
|
20
|
+
// values at parse time so they can never reach `path.join`. Centralizing the
|
|
21
|
+
// guard here means H1 (verify-execution join) and H3 (spec writer) inherit it.
|
|
22
|
+
if (value.file !== undefined && !isSafeRelativeTestFile(value.file)) {
|
|
23
|
+
ctx.addIssue({
|
|
24
|
+
code: z.ZodIssueCode.custom,
|
|
25
|
+
path: ["file"],
|
|
26
|
+
message: "Test case file must be a relative path without '..' or a leading '/'.",
|
|
27
|
+
});
|
|
28
|
+
}
|
|
17
29
|
});
|
|
18
30
|
export const TestCaseSchema = TestCaseCoreSchema;
|
|
19
31
|
const TestSpecCoreSchema = z.object({
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
// Pure, Node-free guard for a `testCase.file` value. A test file is a
|
|
2
|
+
// project-relative output path inside the (sandboxed) target directory, so it
|
|
3
|
+
// must never be absolute, never escape via `..`, and stay within a conservative
|
|
4
|
+
// character set. This is the single guard both the schema (H2) and the spec
|
|
5
|
+
// writer (H3) consume so a malicious `file` cannot reach `path.join` and a later
|
|
6
|
+
// `existsSync`/`readFileSync`.
|
|
7
|
+
//
|
|
8
|
+
// This module imports no Node builtins so it is safe to pull into the schemas
|
|
9
|
+
// barrel (`@interf/compiler/schemas`) and bundle into the renderer. The
|
|
10
|
+
// filesystem-aware path helpers live in `verify-paths.ts`, which re-exports
|
|
11
|
+
// these guards for server-side callers.
|
|
12
|
+
const TEST_FILE_PATTERN = /^[A-Za-z0-9._/-]+$/;
|
|
13
|
+
export function isSafeRelativeTestFile(file) {
|
|
14
|
+
if (file.length === 0)
|
|
15
|
+
return false;
|
|
16
|
+
if (file.startsWith("/"))
|
|
17
|
+
return false;
|
|
18
|
+
if (!TEST_FILE_PATTERN.test(file))
|
|
19
|
+
return false;
|
|
20
|
+
// Reject `..` as a whole path segment (e.g. `..`, `../x`, `a/../b`, `a/..`).
|
|
21
|
+
if (file.split("/").some((segment) => segment === ".."))
|
|
22
|
+
return false;
|
|
23
|
+
return true;
|
|
24
|
+
}
|
|
25
|
+
export function assertSafeRelativeTestFile(file) {
|
|
26
|
+
if (!isSafeRelativeTestFile(file)) {
|
|
27
|
+
throw new Error(`Invalid test case file: ${file}`);
|
|
28
|
+
}
|
|
29
|
+
}
|
|
@@ -1,6 +1,12 @@
|
|
|
1
1
|
import { type AgentExecutor } from "../agents/lib/executors.js";
|
|
2
2
|
import type { TestTargetRun, TestTargetCandidate, LoadedTestSpec } from "./verify-types.js";
|
|
3
3
|
import { type TestSandboxRetentionMode } from "./verify-sandbox.js";
|
|
4
|
+
export declare function resolveTargetFilePath(targetPath: string, file: string): string;
|
|
5
|
+
interface TestJudgeVerdict {
|
|
6
|
+
pass: boolean;
|
|
7
|
+
summary: string;
|
|
8
|
+
}
|
|
9
|
+
export declare function readTestJudgeVerdictFromStatus(statusPath: string): TestJudgeVerdict | null;
|
|
4
10
|
export declare function runTargetTests(sourcePath: string, spec: LoadedTestSpec, targets: TestTargetCandidate[]): TestTargetRun;
|
|
5
11
|
export declare function runTargetTestsWithJudge(sourcePath: string, spec: LoadedTestSpec, targets: TestTargetCandidate[], executor: AgentExecutor, options?: {
|
|
6
12
|
preserveSandboxes?: TestSandboxRetentionMode;
|
|
@@ -12,3 +18,4 @@ export declare function runTargetTestsAuto(sourcePath: string, spec: LoadedTestS
|
|
|
12
18
|
artifactRootPath?: string;
|
|
13
19
|
}): Promise<TestTargetRun>;
|
|
14
20
|
export declare function saveTargetTestRun(artifactRootPath: string, result: TestTargetRun): string;
|
|
21
|
+
export {};
|
|
@@ -1,9 +1,21 @@
|
|
|
1
1
|
import { existsSync, mkdirSync, mkdtempSync, readFileSync, rmSync, writeFileSync, } from "node:fs";
|
|
2
2
|
import { join } from "node:path";
|
|
3
3
|
import { tmpdir } from "node:os";
|
|
4
|
+
import { assertPathWithinRoot } from "../../contracts/utils/path-guards.js";
|
|
4
5
|
import { buildRuntimeExecutorInfo } from "../agents/lib/executors.js";
|
|
5
6
|
import { targetTestRunGitignorePath, targetTestRunsPath, targetTestSandboxGitignorePath, targetTestSandboxesPath, normalizeTestId, } from "./verify-paths.js";
|
|
6
7
|
import { createTestSandbox, } from "./verify-sandbox.js";
|
|
8
|
+
import { freezePreservedShell } from "../agents/lib/shell-fs.js";
|
|
9
|
+
// H1: `testCase.file` is a project-relative output path joined onto the target
|
|
10
|
+
// directory. `path.join` resolves `..` at string level, so a malicious file can
|
|
11
|
+
// escape the (sandboxed) target and be read via existsSync/readFileSync. Resolve
|
|
12
|
+
// the join and assert it stays within target.path before any filesystem access.
|
|
13
|
+
// Centralized so every read of a test-case file inherits the bound (ENGINE-POLICY
|
|
14
|
+
// rule 7). The schema/spec-writer guards (H2/H3) are defense-in-depth ahead of
|
|
15
|
+
// this; this is the last line before the filesystem.
|
|
16
|
+
export function resolveTargetFilePath(targetPath, file) {
|
|
17
|
+
return assertPathWithinRoot(targetPath, join(targetPath, file), "test case file");
|
|
18
|
+
}
|
|
7
19
|
function parseWords(content) {
|
|
8
20
|
return content.trim().split(/\s+/).filter(Boolean).length;
|
|
9
21
|
}
|
|
@@ -66,7 +78,7 @@ function runTestCase(target, testCase) {
|
|
|
66
78
|
if (!testCase.file) {
|
|
67
79
|
throw new Error(`Test case "${testCase.id}" requires an executor because it has no file target.`);
|
|
68
80
|
}
|
|
69
|
-
const outputPath =
|
|
81
|
+
const outputPath = resolveTargetFilePath(target.path, testCase.file);
|
|
70
82
|
const checks = [];
|
|
71
83
|
if (!existsSync(outputPath)) {
|
|
72
84
|
checks.push({
|
|
@@ -133,7 +145,13 @@ function readTestJudgeVerdict(verdictPath) {
|
|
|
133
145
|
summary: typeof raw.summary === "string" ? raw.summary : "",
|
|
134
146
|
};
|
|
135
147
|
}
|
|
136
|
-
|
|
148
|
+
// M13: verdict.json is the single source of truth for a judge pass/fail. This is
|
|
149
|
+
// a strict fallback for when the agent omitted the JSON file: only an explicit
|
|
150
|
+
// `pass=true` / `pass=false` token on a terminal DONE:/BLOCKED:/ERROR: line is
|
|
151
|
+
// honored. The prompt mandates `DONE: pass=true|false - <summary>`, so a benign
|
|
152
|
+
// status line that merely mentions a match must NOT be read as a pass. A terminal
|
|
153
|
+
// line with no explicit pass token yields no verdict (treated as missing/invalid).
|
|
154
|
+
export function readTestJudgeVerdictFromStatus(statusPath) {
|
|
137
155
|
if (!existsSync(statusPath))
|
|
138
156
|
return null;
|
|
139
157
|
const lines = readFileSync(statusPath, "utf8")
|
|
@@ -148,53 +166,98 @@ function readTestJudgeVerdictFromStatus(statusPath) {
|
|
|
148
166
|
continue;
|
|
149
167
|
const normalized = line.toLowerCase();
|
|
150
168
|
const summary = line.replace(/^(DONE|BLOCKED|ERROR):\s*/i, "").trim();
|
|
151
|
-
if (
|
|
152
|
-
return { pass: true, summary };
|
|
153
|
-
}
|
|
154
|
-
if (normalized.includes("pass=false")) {
|
|
155
|
-
return { pass: false, summary };
|
|
156
|
-
}
|
|
157
|
-
if (/values match expected|matches expected|candidate matches|answer matches|expected values match/i.test(line)) {
|
|
169
|
+
if (/\bpass=true\b/.test(normalized)) {
|
|
158
170
|
return { pass: true, summary };
|
|
159
171
|
}
|
|
160
|
-
if (
|
|
172
|
+
if (/\bpass=false\b/.test(normalized)) {
|
|
161
173
|
return { pass: false, summary };
|
|
162
174
|
}
|
|
175
|
+
// Terminal line reached without an explicit pass token: no trustworthy
|
|
176
|
+
// verdict. Stop scanning so an earlier, benign line cannot be misread.
|
|
177
|
+
return null;
|
|
163
178
|
}
|
|
164
179
|
return null;
|
|
165
180
|
}
|
|
166
|
-
async function runTargetTestsJudge(testCase, executor, candidateLabel, candidateContent
|
|
167
|
-
|
|
181
|
+
async function runTargetTestsJudge(testCase, executor, candidateLabel, candidateContent,
|
|
182
|
+
// When supplied, the judge shell is created at this durable path and preserved
|
|
183
|
+
// (frozen) on finish so the judge execution is inspectable: prompt, reasoning
|
|
184
|
+
// transcript, the candidate it judged, and the JSON verdict all survive. Lives
|
|
185
|
+
// under the benchmark sandbox so it is preserved with the sandbox on failure.
|
|
186
|
+
// When omitted, the shell is an ephemeral /tmp dir removed on finish.
|
|
187
|
+
preservedShellRoot) {
|
|
188
|
+
const preserve = Boolean(preservedShellRoot);
|
|
189
|
+
const tempDir = preservedShellRoot
|
|
190
|
+
? (mkdirSync(preservedShellRoot, { recursive: true }), preservedShellRoot)
|
|
191
|
+
: mkdtempSync(join(tmpdir(), "interf-test-judge-"));
|
|
192
|
+
// The preserved shell needs a runtime/ dir up front: freezePreservedShell
|
|
193
|
+
// writes its manifest to runtime/preserved-shell.json, and reasoning is teed
|
|
194
|
+
// there too. (The ephemeral path doesn't need it.)
|
|
195
|
+
if (preserve) {
|
|
196
|
+
mkdirSync(join(tempDir, "runtime"), { recursive: true });
|
|
197
|
+
}
|
|
168
198
|
let executionError = null;
|
|
169
199
|
let verdict = null;
|
|
170
200
|
try {
|
|
171
201
|
const verdictPath = join(tempDir, "verdict.json");
|
|
172
202
|
const statusPath = join(tempDir, "judge.status.log");
|
|
173
203
|
const prompt = buildTestJudgePrompt(testCase, candidateLabel, candidateContent, verdictPath);
|
|
204
|
+
// Preserve the rendered prompt and the candidate it judged so the preserved
|
|
205
|
+
// shell holds everything needed to reproduce the verdict, not just the output.
|
|
206
|
+
if (preserve) {
|
|
207
|
+
writeFileSync(join(tempDir, "prompt.txt"), `${prompt}\n`);
|
|
208
|
+
writeFileSync(join(tempDir, "candidate.txt"), `${candidateContent}\n`);
|
|
209
|
+
}
|
|
210
|
+
// Same canonical convention as the stage path: reasoning is teed into the
|
|
211
|
+
// shell's runtime/ dir so it is preserved when the shell is frozen.
|
|
212
|
+
const reasoningLogPath = preserve ? join(tempDir, "runtime", "agent-reasoning.jsonl") : null;
|
|
174
213
|
try {
|
|
175
214
|
await executor.execute(tempDir, prompt, {
|
|
176
215
|
statusLogPath: statusPath,
|
|
216
|
+
reasoningLogPath,
|
|
177
217
|
});
|
|
178
218
|
}
|
|
179
219
|
catch (error) {
|
|
180
220
|
executionError = error instanceof Error ? error.message : String(error);
|
|
181
221
|
}
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
222
|
+
// M12: first error wins. If execute() already failed, do not read verdict
|
|
223
|
+
// files — a throw in verdict-reading would otherwise overwrite the real
|
|
224
|
+
// execution failure and mask why the judge never ran. Only read the verdict
|
|
225
|
+
// when execution itself reported no error.
|
|
226
|
+
if (!executionError) {
|
|
227
|
+
try {
|
|
228
|
+
verdict = readTestJudgeVerdict(verdictPath);
|
|
229
|
+
if (!verdict) {
|
|
230
|
+
verdict = readTestJudgeVerdictFromStatus(statusPath);
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
catch (error) {
|
|
234
|
+
executionError = error instanceof Error ? error.message : String(error);
|
|
186
235
|
}
|
|
187
|
-
}
|
|
188
|
-
catch (error) {
|
|
189
|
-
executionError = error instanceof Error ? error.message : String(error);
|
|
190
236
|
}
|
|
191
237
|
}
|
|
192
238
|
finally {
|
|
193
|
-
|
|
239
|
+
// Preserve a durable judge shell (freeze materializes symlinks + writes the
|
|
240
|
+
// preserved-shell manifest, path unchanged); only remove an ephemeral one.
|
|
241
|
+
if (preserve) {
|
|
242
|
+
freezePreservedShell(tempDir, "test-judge");
|
|
243
|
+
}
|
|
244
|
+
else {
|
|
245
|
+
rmSync(tempDir, { recursive: true, force: true });
|
|
246
|
+
}
|
|
194
247
|
}
|
|
195
248
|
return { verdict, error: executionError };
|
|
196
249
|
}
|
|
197
|
-
|
|
250
|
+
/**
|
|
251
|
+
* Durable judge-shell root under a benchmark sandbox target. Lives beside the
|
|
252
|
+
* sandbox's other preserved runtime artifacts so a preserved sandbox carries the
|
|
253
|
+
* judge execution too. Keyed by case + candidate so multiple judged cases in one
|
|
254
|
+
* target don't collide.
|
|
255
|
+
*/
|
|
256
|
+
function judgeShellRoot(targetPath, testCase, candidateLabel) {
|
|
257
|
+
const slug = `${normalizeTestId(testCase.id) || "case"}-${normalizeTestId(candidateLabel) || "candidate"}`;
|
|
258
|
+
return join(targetPath, ".interf", "runtime", "test-judge", slug);
|
|
259
|
+
}
|
|
260
|
+
function buildTestQueryPrompt(target, testCase, answerPath, tracePath, retryReason) {
|
|
198
261
|
const header = target.type === "context-graph"
|
|
199
262
|
? [
|
|
200
263
|
"You are running an Interf benchmark inside an isolated sandboxed context-graph copy.",
|
|
@@ -217,10 +280,13 @@ function buildTestQueryPrompt(target, testCase, answerPath, tracePath) {
|
|
|
217
280
|
...header,
|
|
218
281
|
"Emit only STATUS:, DONE:, BLOCKED:, or ERROR: lines.",
|
|
219
282
|
"Do not ask follow-up questions.",
|
|
283
|
+
"Create both required output files before printing DONE.",
|
|
284
|
+
"The output paths are absolute temp file paths outside the Context Graph sandbox; write them exactly as given.",
|
|
220
285
|
`Write the answer to ${JSON.stringify(answerPath)}.`,
|
|
221
286
|
`Write the trace to ${JSON.stringify(tracePath)} with keys: case_id, target, artifacts_consulted, source_paths_read, used_source_files, answer_summary.`,
|
|
222
287
|
`Set \`case_id\` to ${JSON.stringify(testCase.id)}.`,
|
|
223
288
|
`Set \`target\` to ${JSON.stringify(target.type)}.`,
|
|
289
|
+
...(retryReason ? [`Retry reason: ${retryReason}`] : []),
|
|
224
290
|
`Question: ${testCase.question}`,
|
|
225
291
|
].join("\n");
|
|
226
292
|
}
|
|
@@ -228,20 +294,26 @@ async function runLiveTestCase(target, testCase, executor) {
|
|
|
228
294
|
const tempDir = mkdtempSync(join(tmpdir(), "interf-test-live-"));
|
|
229
295
|
const answerPath = join(tempDir, "answer.md");
|
|
230
296
|
const tracePath = join(tempDir, "trace.json");
|
|
231
|
-
const statusPath = join(tempDir, "status.log");
|
|
232
|
-
const eventPath = join(tempDir, "events.ndjson");
|
|
233
|
-
const prompt = buildTestQueryPrompt(target, testCase, answerPath, tracePath);
|
|
234
297
|
let executionError = null;
|
|
235
298
|
let code = -1;
|
|
236
299
|
try {
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
300
|
+
for (let attempt = 1; attempt <= 2; attempt += 1) {
|
|
301
|
+
const statusPath = join(tempDir, `status-${attempt}.log`);
|
|
302
|
+
const eventPath = join(tempDir, `events-${attempt}.ndjson`);
|
|
303
|
+
const prompt = buildTestQueryPrompt(target, testCase, answerPath, tracePath, attempt === 1
|
|
304
|
+
? undefined
|
|
305
|
+
: `Attempt ${attempt - 1} exited without writing ${!existsSync(answerPath) ? "the answer file" : "the trace file"}. Write both files before DONE.`);
|
|
306
|
+
try {
|
|
307
|
+
code = await executor.execute(target.path, prompt, {
|
|
308
|
+
eventLogPath: eventPath,
|
|
309
|
+
statusLogPath: statusPath,
|
|
310
|
+
});
|
|
311
|
+
}
|
|
312
|
+
catch (error) {
|
|
313
|
+
executionError = error instanceof Error ? error.message : String(error);
|
|
314
|
+
}
|
|
315
|
+
if (existsSync(answerPath) && existsSync(tracePath))
|
|
316
|
+
break;
|
|
245
317
|
}
|
|
246
318
|
const checks = [];
|
|
247
319
|
if (!existsSync(answerPath)) {
|
|
@@ -294,7 +366,8 @@ async function runLiveTestCase(target, testCase, executor) {
|
|
|
294
366
|
});
|
|
295
367
|
}
|
|
296
368
|
if (testCase.answer) {
|
|
297
|
-
const
|
|
369
|
+
const candidateLabel = `generated answer for ${testCase.id}`;
|
|
370
|
+
const judged = await runTargetTestsJudge(testCase, executor, candidateLabel, answer, judgeShellRoot(target.path, testCase, candidateLabel));
|
|
298
371
|
checks.push({
|
|
299
372
|
label: "judge verdict",
|
|
300
373
|
ok: judged.verdict?.pass === true,
|
|
@@ -324,7 +397,7 @@ async function runTestCaseWithJudge(target, testCase, executor) {
|
|
|
324
397
|
if (!testCase.file) {
|
|
325
398
|
return runLiveTestCase(target, testCase, executor);
|
|
326
399
|
}
|
|
327
|
-
const outputPath =
|
|
400
|
+
const outputPath = resolveTargetFilePath(target.path, testCase.file);
|
|
328
401
|
const checks = [];
|
|
329
402
|
if (!existsSync(outputPath)) {
|
|
330
403
|
checks.push({
|
|
@@ -353,7 +426,8 @@ async function runTestCaseWithJudge(target, testCase, executor) {
|
|
|
353
426
|
});
|
|
354
427
|
checks.push(...evaluated.checks);
|
|
355
428
|
if (testCase.answer) {
|
|
356
|
-
const
|
|
429
|
+
const candidateLabel = `Context Graph file ${outputPath}`;
|
|
430
|
+
const judged = await runTargetTestsJudge(testCase, executor, candidateLabel, content, judgeShellRoot(target.path, testCase, candidateLabel));
|
|
357
431
|
checks.push({
|
|
358
432
|
label: "judge verdict",
|
|
359
433
|
ok: judged.verdict?.pass === true,
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import type { TestTargetType } from "./verify-types.js";
|
|
2
2
|
export declare const TEST_SPEC_EXTENSIONS: Set<string>;
|
|
3
|
+
export { isSafeRelativeTestFile, assertSafeRelativeTestFile, } from "./test-file-guard.js";
|
|
3
4
|
export declare function testSpecRootPath(sourcePath: string): string;
|
|
4
5
|
export declare function testSpecTypePath(sourcePath: string, type: TestTargetType): string;
|
|
5
6
|
export declare function targetTestRunsPath(contextGraphPath: string, type: TestTargetType): string;
|
|
@@ -4,6 +4,10 @@ import { targetTestRunsRootForContextGraph, targetTestSandboxesRootForContextGra
|
|
|
4
4
|
import { asProjectDataDir, projectTestsSpecsRoot, } from "../../contracts/lib/project-paths.js";
|
|
5
5
|
const TEST_ID_PATTERN = /^[a-z0-9][a-z0-9-]{0,79}$/;
|
|
6
6
|
export const TEST_SPEC_EXTENSIONS = new Set([".json"]);
|
|
7
|
+
// The `testCase.file` safelist guard lives in a Node-free module so the schemas
|
|
8
|
+
// barrel can import it without pulling Node builtins into the renderer bundle.
|
|
9
|
+
// Re-exported here for server-side path callers.
|
|
10
|
+
export { isSafeRelativeTestFile, assertSafeRelativeTestFile, } from "./test-file-guard.js";
|
|
7
11
|
export function testSpecRootPath(sourcePath) {
|
|
8
12
|
return projectTestsSpecsRoot(asProjectDataDir(sourcePath));
|
|
9
13
|
}
|