@interf/compiler 0.33.0 → 0.50.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (234) hide show
  1. package/README.md +122 -226
  2. package/dist/cli/commands/agents.js +1 -32
  3. package/dist/cli/commands/benchmark.d.ts +2 -3
  4. package/dist/cli/commands/benchmark.js +1 -31
  5. package/dist/cli/commands/build-plan.js +26 -50
  6. package/dist/cli/commands/build.d.ts +2 -3
  7. package/dist/cli/commands/build.js +1 -31
  8. package/dist/cli/commands/graphs.js +177 -32
  9. package/dist/cli/commands/mcp.d.ts +1 -0
  10. package/dist/cli/commands/mcp.js +223 -126
  11. package/dist/cli/commands/project.js +10 -36
  12. package/dist/cli/commands/reset.d.ts +2 -3
  13. package/dist/cli/commands/reset.js +1 -22
  14. package/dist/cli/commands/runs.js +86 -33
  15. package/dist/cli/commands/status.js +3 -24
  16. package/dist/cli/commands/traces.js +1 -29
  17. package/dist/cli/commands/wizard.js +17 -29
  18. package/dist/cli/lib/http-client.d.ts +39 -0
  19. package/dist/cli/lib/http-client.js +73 -0
  20. package/dist/packages/build-plans/authoring/brief.d.ts +25 -4
  21. package/dist/packages/build-plans/authoring/build-plan-authoring.d.ts +42 -1
  22. package/dist/packages/build-plans/authoring/build-plan-authoring.js +470 -63
  23. package/dist/packages/build-plans/authoring/build-plan-edit-session.d.ts +9 -0
  24. package/dist/packages/build-plans/authoring/build-plan-edit-session.js +27 -10
  25. package/dist/packages/build-plans/authoring/build-plan-improvement.js +62 -8
  26. package/dist/packages/build-plans/authoring/lib/build-plan-edit-utils.d.ts +1 -0
  27. package/dist/packages/build-plans/package/build-plan-definitions.d.ts +0 -1
  28. package/dist/packages/build-plans/package/build-plan-definitions.js +5 -3
  29. package/dist/packages/build-plans/package/build-plan-stage-runner.d.ts +1 -0
  30. package/dist/packages/build-plans/package/build-plan-stage-runner.js +2 -1
  31. package/dist/packages/build-plans/package/builtin-build-plan.d.ts +2 -2
  32. package/dist/packages/build-plans/package/builtin-build-plan.js +3 -3
  33. package/dist/packages/build-plans/package/context-interface.d.ts +3 -0
  34. package/dist/packages/build-plans/package/context-interface.js +5 -5
  35. package/dist/packages/build-plans/package/interf-build-plan-package.js +22 -22
  36. package/dist/packages/build-plans/package/local-build-plans.d.ts +10 -5
  37. package/dist/packages/build-plans/package/local-build-plans.js +57 -32
  38. package/dist/packages/contracts/index.d.ts +4 -3
  39. package/dist/packages/contracts/index.js +2 -1
  40. package/dist/packages/contracts/lib/context-graph-layer.d.ts +161 -0
  41. package/dist/packages/contracts/lib/context-graph-layer.js +216 -0
  42. package/dist/packages/contracts/lib/project-paths.d.ts +7 -0
  43. package/dist/packages/contracts/lib/project-paths.js +9 -0
  44. package/dist/packages/contracts/lib/project-schema.d.ts +264 -1
  45. package/dist/packages/contracts/lib/project-schema.js +38 -13
  46. package/dist/packages/contracts/lib/schema.d.ts +556 -23
  47. package/dist/packages/contracts/lib/schema.js +279 -18
  48. package/dist/packages/contracts/utils/filesystem.d.ts +1 -0
  49. package/dist/packages/contracts/utils/filesystem.js +29 -1
  50. package/dist/packages/projects/lib/schema.d.ts +6 -8
  51. package/dist/packages/projects/lib/schema.js +3 -1
  52. package/dist/packages/projects/source-config.d.ts +0 -5
  53. package/dist/packages/projects/source-config.js +9 -22
  54. package/dist/packages/runtime/actions/fields.d.ts +4 -0
  55. package/dist/packages/runtime/actions/form-builders.js +79 -31
  56. package/dist/packages/runtime/actions/form-validators.js +9 -3
  57. package/dist/packages/runtime/actions/helpers.js +3 -3
  58. package/dist/packages/runtime/actions/registry.d.ts +1 -1
  59. package/dist/packages/runtime/actions/registry.js +1 -1
  60. package/dist/packages/runtime/actions/requests.d.ts +1 -1
  61. package/dist/packages/runtime/actions/requests.js +12 -6
  62. package/dist/packages/runtime/actions/schemas.d.ts +7 -0
  63. package/dist/packages/runtime/actions/schemas.js +1 -0
  64. package/dist/packages/runtime/agent-handoff.js +8 -7
  65. package/dist/packages/runtime/agents/lib/execution-profile.d.ts +14 -0
  66. package/dist/packages/runtime/agents/lib/execution-profile.js +23 -0
  67. package/dist/packages/runtime/agents/lib/execution.js +14 -8
  68. package/dist/packages/runtime/agents/lib/executors.d.ts +1 -0
  69. package/dist/packages/runtime/agents/lib/executors.js +11 -2
  70. package/dist/packages/runtime/agents/lib/logs.d.ts +10 -0
  71. package/dist/packages/runtime/agents/lib/logs.js +32 -8
  72. package/dist/packages/runtime/agents/lib/preflight.js +4 -1
  73. package/dist/packages/runtime/agents/lib/render.d.ts +18 -0
  74. package/dist/packages/runtime/agents/lib/render.js +44 -18
  75. package/dist/packages/runtime/agents/lib/shell-templates.js +105 -63
  76. package/dist/packages/runtime/agents/lib/shells.d.ts +29 -0
  77. package/dist/packages/runtime/agents/lib/shells.js +158 -32
  78. package/dist/packages/runtime/agents/lib/source-context-scan.d.ts +10 -0
  79. package/dist/packages/runtime/agents/lib/source-context-scan.js +388 -0
  80. package/dist/packages/runtime/agents/lib/status.js +1 -14
  81. package/dist/packages/runtime/agents/lib/string-utils.d.ts +16 -0
  82. package/dist/packages/runtime/agents/lib/string-utils.js +36 -0
  83. package/dist/packages/runtime/agents/lib/types.d.ts +1 -0
  84. package/dist/packages/runtime/agents/providers/codex.js +2 -0
  85. package/dist/packages/runtime/agents/role-executors.js +2 -1
  86. package/dist/packages/runtime/auth/session-store.js +11 -3
  87. package/dist/packages/runtime/benchmark-question-draft.d.ts +3 -0
  88. package/dist/packages/runtime/benchmark-question-draft.js +57 -28
  89. package/dist/packages/runtime/build/artifact-status.d.ts +1 -1
  90. package/dist/packages/runtime/build/artifact-status.js +1 -1
  91. package/dist/packages/runtime/build/build-evidence.d.ts +2 -1
  92. package/dist/packages/runtime/build/build-evidence.js +11 -5
  93. package/dist/packages/runtime/build/build-pipeline.js +89 -5
  94. package/dist/packages/runtime/build/build-stage-plan.js +3 -1
  95. package/dist/packages/runtime/build/build-stage-runner.js +169 -32
  96. package/dist/packages/runtime/build/build-target.d.ts +3 -0
  97. package/dist/packages/runtime/build/build-target.js +25 -1
  98. package/dist/packages/runtime/build/check-evaluator.d.ts +1 -1
  99. package/dist/packages/runtime/build/check-evaluator.js +655 -4
  100. package/dist/packages/runtime/build/context-graph-paths.d.ts +13 -0
  101. package/dist/packages/runtime/build/context-graph-paths.js +27 -0
  102. package/dist/packages/runtime/build/index.d.ts +2 -2
  103. package/dist/packages/runtime/build/index.js +2 -2
  104. package/dist/packages/runtime/build/inspect-map.d.ts +10 -0
  105. package/dist/packages/runtime/build/inspect-map.js +270 -0
  106. package/dist/packages/runtime/build/lib/schema.d.ts +246 -53
  107. package/dist/packages/runtime/build/lib/schema.js +173 -15
  108. package/dist/packages/runtime/build/native-entrypoint.d.ts +2 -0
  109. package/dist/packages/runtime/build/native-entrypoint.js +286 -0
  110. package/dist/packages/runtime/build/runtime-contracts.js +9 -3
  111. package/dist/packages/runtime/build/runtime-log-paths.d.ts +3 -0
  112. package/dist/packages/runtime/build/runtime-log-paths.js +16 -0
  113. package/dist/packages/runtime/build/runtime-prompt.js +6 -4
  114. package/dist/packages/runtime/build/runtime-runs.js +63 -10
  115. package/dist/packages/runtime/build/runtime-types.d.ts +4 -1
  116. package/dist/packages/runtime/build/runtime.d.ts +3 -1
  117. package/dist/packages/runtime/build/runtime.js +3 -1
  118. package/dist/packages/runtime/build/source-files.js +11 -2
  119. package/dist/packages/runtime/build/source-inventory.d.ts +1 -0
  120. package/dist/packages/runtime/build/source-inventory.js +246 -7
  121. package/dist/packages/runtime/build/source-manifest.d.ts +11 -0
  122. package/dist/packages/runtime/build/source-manifest.js +30 -2
  123. package/dist/packages/runtime/build/stage-evidence.js +80 -11
  124. package/dist/packages/runtime/build/stage-manifest.d.ts +45 -0
  125. package/dist/packages/runtime/build/stage-manifest.js +1125 -0
  126. package/dist/packages/runtime/build/stage-reuse.js +12 -0
  127. package/dist/packages/runtime/build/stage-session.d.ts +81 -0
  128. package/dist/packages/runtime/build/stage-session.js +308 -0
  129. package/dist/packages/runtime/build/state-io.js +10 -11
  130. package/dist/packages/runtime/build/state-view.js +1 -1
  131. package/dist/packages/runtime/build/state.d.ts +1 -1
  132. package/dist/packages/runtime/build/state.js +1 -1
  133. package/dist/packages/runtime/build/summary-coverage-index.d.ts +21 -0
  134. package/dist/packages/runtime/build/summary-coverage-index.js +189 -0
  135. package/dist/packages/runtime/build/traces.js +3 -3
  136. package/dist/packages/runtime/build/validate-context-graph.d.ts +1 -1
  137. package/dist/packages/runtime/build/validate-context-graph.js +5 -5
  138. package/dist/packages/runtime/build/validate.d.ts +1 -1
  139. package/dist/packages/runtime/build/validate.js +1 -1
  140. package/dist/packages/runtime/client.d.ts +3 -3
  141. package/dist/packages/runtime/client.js +8 -13
  142. package/dist/packages/runtime/context-checks.js +13 -0
  143. package/dist/packages/runtime/context-graph-scaffold.js +2 -1
  144. package/dist/packages/runtime/context-graph-semantic-graph.d.ts +9 -0
  145. package/dist/packages/runtime/context-graph-semantic-graph.js +416 -0
  146. package/dist/packages/runtime/execution/lib/schema.d.ts +34 -31
  147. package/dist/packages/runtime/index.d.ts +2 -2
  148. package/dist/packages/runtime/index.js +1 -1
  149. package/dist/packages/runtime/native-run-handlers.d.ts +38 -0
  150. package/dist/packages/runtime/native-run-handlers.js +52 -33
  151. package/dist/packages/runtime/plan-artifact-contract.js +1 -1
  152. package/dist/packages/runtime/project-source-state.d.ts +4 -4
  153. package/dist/packages/runtime/project-source-state.js +5 -2
  154. package/dist/packages/runtime/project-store.d.ts +5 -0
  155. package/dist/packages/runtime/project-store.js +30 -3
  156. package/dist/packages/runtime/requested-artifacts.js +1 -1
  157. package/dist/packages/runtime/run-observability.js +9 -4
  158. package/dist/packages/runtime/runtime-action-proposals.js +3 -3
  159. package/dist/packages/runtime/runtime-build-plans.js +47 -3
  160. package/dist/packages/runtime/runtime-build-runs.js +9 -16
  161. package/dist/packages/runtime/runtime-caches.d.ts +26 -0
  162. package/dist/packages/runtime/runtime-caches.js +47 -0
  163. package/dist/packages/runtime/runtime-jobs.js +6 -6
  164. package/dist/packages/runtime/runtime-project-mutations.js +1 -0
  165. package/dist/packages/runtime/runtime-project-reads.d.ts +4 -1
  166. package/dist/packages/runtime/runtime-project-reads.js +229 -36
  167. package/dist/packages/runtime/runtime-proposal-helpers.js +6 -6
  168. package/dist/packages/runtime/runtime-resource-builders.d.ts +4 -2
  169. package/dist/packages/runtime/runtime-resource-builders.js +16 -14
  170. package/dist/packages/runtime/runtime-status.d.ts +14 -0
  171. package/dist/packages/runtime/runtime-status.js +15 -0
  172. package/dist/packages/runtime/runtime-verify-runs.js +6 -5
  173. package/dist/packages/runtime/runtime.d.ts +439 -22
  174. package/dist/packages/runtime/runtime.js +16 -2
  175. package/dist/packages/runtime/schemas/actions.d.ts +24 -0
  176. package/dist/packages/runtime/schemas/agents.d.ts +28 -0
  177. package/dist/packages/runtime/schemas/agents.js +33 -0
  178. package/dist/packages/runtime/schemas/build-plans.d.ts +181 -8
  179. package/dist/packages/runtime/schemas/build-plans.js +36 -2
  180. package/dist/packages/runtime/schemas/context-graphs.d.ts +1522 -0
  181. package/dist/packages/runtime/schemas/context-graphs.js +110 -0
  182. package/dist/packages/runtime/schemas/files.d.ts +7 -347
  183. package/dist/packages/runtime/schemas/files.js +1 -24
  184. package/dist/packages/runtime/schemas/index.d.ts +1 -0
  185. package/dist/packages/runtime/schemas/index.js +1 -0
  186. package/dist/packages/runtime/schemas/jobs.js +4 -0
  187. package/dist/packages/runtime/schemas/projects.d.ts +48 -21
  188. package/dist/packages/runtime/schemas/projects.js +34 -10
  189. package/dist/packages/runtime/schemas/runs.d.ts +1009 -240
  190. package/dist/packages/runtime/schemas/runs.js +17 -0
  191. package/dist/packages/runtime/service/openapi.js +1 -0
  192. package/dist/packages/runtime/service/operations.d.ts +1666 -145
  193. package/dist/packages/runtime/service/operations.js +147 -17
  194. package/dist/packages/runtime/service/routes.d.ts +11 -3
  195. package/dist/packages/runtime/service/routes.js +11 -3
  196. package/dist/packages/runtime/service/server-app-boot.js +2 -2
  197. package/dist/packages/runtime/service/server-helpers.d.ts +11 -0
  198. package/dist/packages/runtime/service/server-helpers.js +19 -0
  199. package/dist/packages/runtime/service/server-routes-action-proposals.js +4 -2
  200. package/dist/packages/runtime/service/server-routes-agents.js +19 -85
  201. package/dist/packages/runtime/service/server-routes-build-plans.js +14 -11
  202. package/dist/packages/runtime/service/server-routes-project-context.js +102 -7
  203. package/dist/packages/runtime/service/server-routes-project-jobs.js +19 -12
  204. package/dist/packages/runtime/service/server-routes-project-runs.js +5 -2
  205. package/dist/packages/runtime/service/server-routes-projects.js +6 -2
  206. package/dist/packages/runtime/service/server-routes-runs.js +11 -4
  207. package/dist/packages/runtime/verify/lib/schema.js +12 -0
  208. package/dist/packages/runtime/verify/test-file-guard.d.ts +2 -0
  209. package/dist/packages/runtime/verify/test-file-guard.js +29 -0
  210. package/dist/packages/runtime/verify/verify-execution.d.ts +7 -0
  211. package/dist/packages/runtime/verify/verify-execution.js +109 -35
  212. package/dist/packages/runtime/verify/verify-paths.d.ts +1 -0
  213. package/dist/packages/runtime/verify/verify-paths.js +4 -0
  214. package/dist/packages/runtime/verify/verify-specs.js +49 -39
  215. package/dist/packages/runtime/wire-schemas.d.ts +1 -1
  216. package/dist/packages/runtime/wire-schemas.js +1 -1
  217. package/package.json +2 -8
  218. package/public-repo/CONTRIBUTING.md +10 -3
  219. package/public-repo/README.md +122 -226
  220. package/public-repo/build-plans/interf-default/README.md +15 -12
  221. package/public-repo/build-plans/interf-default/build/stages/entrypoint/SKILL.md +74 -0
  222. package/public-repo/build-plans/interf-default/build/stages/knowledge/SKILL.md +95 -0
  223. package/public-repo/build-plans/interf-default/build/stages/summarize/SKILL.md +38 -5
  224. package/public-repo/build-plans/interf-default/build-plan.json +27 -23
  225. package/public-repo/build-plans/interf-default/build-plan.schema.json +24 -20
  226. package/public-repo/build-plans/interf-default/use/query/SKILL.md +8 -7
  227. package/public-repo/openapi/local-service.openapi.json +11637 -4213
  228. package/public-repo/skills/interf/SKILL.md +174 -134
  229. package/dist/packages/runtime/build/runtime-paths.d.ts +0 -8
  230. package/dist/packages/runtime/build/runtime-paths.js +0 -26
  231. package/dist/packages/runtime/build/state-paths.d.ts +0 -7
  232. package/dist/packages/runtime/build/state-paths.js +0 -22
  233. package/public-repo/build-plans/interf-default/build/stages/shape/SKILL.md +0 -34
  234. package/public-repo/build-plans/interf-default/build/stages/structure/SKILL.md +0 -28
@@ -80,7 +80,7 @@ export async function tryHandleProjectContext(req, res, runtime, ctx, method) {
80
80
  if (method === "GET") {
81
81
  const context = runtime.getLatestContextGraph(storedProject.projectDataDir, storedProject.id);
82
82
  if (!context?.build_evidence) {
83
- sendError(res, 404, "Build evidence not found.");
83
+ sendError(res, 404, "Graph diagnostics not found.");
84
84
  }
85
85
  else {
86
86
  sendJson(res, 200, context.build_evidence);
@@ -88,6 +88,46 @@ export async function tryHandleProjectContext(req, res, runtime, ctx, method) {
88
88
  return true;
89
89
  }
90
90
  }
91
+ if (subPath === PROJECT_SUBRESOURCES.contextGraphSessions) {
92
+ if (method === "GET") {
93
+ const sessions = runtime.listLatestContextGraphStageSessions(storedProject.projectDataDir, storedProject.id);
94
+ sendJson(res, 200, { sessions });
95
+ return true;
96
+ }
97
+ }
98
+ if (subPath.startsWith(`${PROJECT_SUBRESOURCES.contextGraphSessions}/`)) {
99
+ if (method === "GET") {
100
+ const rawStageRunId = subPath.slice(PROJECT_SUBRESOURCES.contextGraphSessions.length + 1);
101
+ let stageRunId;
102
+ try {
103
+ stageRunId = decodeURIComponent(rawStageRunId);
104
+ }
105
+ catch {
106
+ sendError(res, 400, "Stage execution session id is not valid URI-encoded UTF-8.");
107
+ return true;
108
+ }
109
+ const session = runtime
110
+ .listLatestContextGraphStageSessions(storedProject.projectDataDir, storedProject.id)
111
+ .find((entry) => entry.stage_run_id === stageRunId) ?? null;
112
+ if (!session)
113
+ sendError(res, 404, `Stage execution session not found: ${stageRunId}`);
114
+ else
115
+ sendJson(res, 200, { session });
116
+ return true;
117
+ }
118
+ }
119
+ if (subPath === PROJECT_SUBRESOURCES.contextGraphEntrypoint) {
120
+ if (method === "GET") {
121
+ const handoff = runtime.getContextGraphHandoff(storedProject.projectDataDir, storedProject.id);
122
+ if (!handoff) {
123
+ sendError(res, 404, "Context Graph handoff not found.");
124
+ }
125
+ else {
126
+ sendJson(res, 200, handoff);
127
+ }
128
+ return true;
129
+ }
130
+ }
91
131
  // GET /v1/projects/<id>/context-graph/traces — first-class traces surface.
92
132
  // Returns a derived Traces wire shape rolled up from the current
93
133
  // Context Graph's artifacts + Checks + source file index.
@@ -117,20 +157,75 @@ export async function tryHandleProjectContext(req, res, runtime, ctx, method) {
117
157
  return true;
118
158
  }
119
159
  }
160
+ if (subPath === PROJECT_SUBRESOURCES.contextGraphStages) {
161
+ if (method === "GET") {
162
+ const context = runtime.getLatestContextGraph(storedProject.projectDataDir, storedProject.id);
163
+ if (!context) {
164
+ sendError(res, 404, "Context Graph not found.");
165
+ }
166
+ else {
167
+ sendJson(res, 200, {
168
+ stages: context.stage_summaries,
169
+ readiness: context.readiness_rollup,
170
+ primary_metrics: context.primary_metrics,
171
+ });
172
+ }
173
+ return true;
174
+ }
175
+ }
176
+ if (subPath === PROJECT_SUBRESOURCES.contextGraphResources) {
177
+ if (method === "GET") {
178
+ const context = runtime.getLatestContextGraph(storedProject.projectDataDir, storedProject.id);
179
+ if (!context) {
180
+ sendError(res, 404, "Context Graph not found.");
181
+ }
182
+ else {
183
+ sendJson(res, 200, {
184
+ resources: context.resources,
185
+ entrypoints: context.entrypoints,
186
+ graph_outputs: context.graph_manifest?.graph_outputs ?? null,
187
+ });
188
+ }
189
+ return true;
190
+ }
191
+ }
192
+ if (subPath.startsWith(`${PROJECT_SUBRESOURCES.contextGraphResources}/`)) {
193
+ if (method === "GET") {
194
+ const rawResourceId = subPath.slice(PROJECT_SUBRESOURCES.contextGraphResources.length + 1);
195
+ let resourceId;
196
+ try {
197
+ resourceId = decodeURIComponent(rawResourceId);
198
+ }
199
+ catch {
200
+ sendError(res, 400, "Resource id is not valid URI-encoded UTF-8.");
201
+ return true;
202
+ }
203
+ const context = runtime.getLatestContextGraph(storedProject.projectDataDir, storedProject.id);
204
+ const resource = context?.resources.find((entry) => entry.id === resourceId) ?? null;
205
+ if (!resource) {
206
+ sendError(res, 404, `Context Graph resource not found: ${resourceId}`);
207
+ }
208
+ else {
209
+ const stage = resource.stage_id
210
+ ? context?.stage_summaries.find((entry) => entry.stage_id === resource.stage_id) ?? null
211
+ : null;
212
+ sendJson(res, 200, {
213
+ resource,
214
+ stage,
215
+ linked_resources: context?.resources.filter((entry) => resource.links.includes(entry.path ?? entry.id)) ?? [],
216
+ });
217
+ }
218
+ return true;
219
+ }
220
+ }
120
221
  // GET /v1/projects/<id>/source-state — manifest-backed drift verdict for
121
222
  // the Project's Source binding.
122
223
  if (subPath === PROJECT_SUBRESOURCES.sourceState) {
123
224
  if (method === "GET") {
124
- // The per-run SourceState record carries the historical file index;
125
- // for v1 we treat the latest Build id as the graph pointer and
126
- // leave the hash comparison to a follow-up enhancement. For now the
127
- // verdict is `unknown` until a Build has produced a Context Graph.
128
225
  const latestContext = runtime.getLatestContextGraph(storedProject.projectDataDir, storedProject.id);
129
226
  const state = computeProjectSourceState({
130
227
  projectId: storedProject.id,
131
- sourceFolderPath: storedProject.source.locator,
132
228
  contextGraphPath: latestContext?.path ?? storedProject.contextGraphPath,
133
- lastGraphIndexHash: null,
134
229
  });
135
230
  sendJson(res, 200, state);
136
231
  return true;
@@ -2,6 +2,19 @@ import { PROJECT_SUBRESOURCES } from "./routes.js";
2
2
  import { readJsonBody, sendError, sendJson, } from "./server-helpers.js";
3
3
  import { writeBenchmarkSpecsForProject } from "../../projects/source-config.js";
4
4
  import { loadTestSpec } from "../verify/verify-specs.js";
5
+ import { ProjectBenchmarkQuestionDraftCreateRequestSchema, ProjectBuildPlanAuthoringCreateRequestSchema, ProjectResetRequestSchema, } from "../schemas/index.js";
6
+ function projectScopedJobRequest(storedProject, body) {
7
+ const scopedRequest = ProjectBuildPlanAuthoringCreateRequestSchema.parse(body ?? {});
8
+ const explicitIntent = typeof scopedRequest.intent === "string" && scopedRequest.intent.trim().length > 0
9
+ ? scopedRequest.intent
10
+ : null;
11
+ return {
12
+ ...scopedRequest,
13
+ project: storedProject.id,
14
+ source_folder_path: storedProject.source.locator,
15
+ intent: explicitIntent ?? storedProject.intent,
16
+ };
17
+ }
5
18
  /**
6
19
  * Async/structured mutations on a project: Build Plan authoring
7
20
  * and improvement jobs, benchmark-question drafts, Build Plan change deltas,
@@ -14,11 +27,7 @@ export async function tryHandleProjectJobs(req, res, runtime, ctx, method) {
14
27
  if (method === "POST") {
15
28
  try {
16
29
  const body = (await readJsonBody(req));
17
- const job = await runtime.createBuildPlanAuthoringRun(storedProject.projectDataDir, {
18
- ...(body ?? {}),
19
- project: storedProject.id,
20
- source_folder_path: storedProject.source.locator,
21
- });
30
+ const job = await runtime.createBuildPlanAuthoringRun(storedProject.projectDataDir, projectScopedJobRequest(storedProject, body));
22
31
  sendJson(res, 202, job);
23
32
  }
24
33
  catch (error) {
@@ -31,11 +40,7 @@ export async function tryHandleProjectJobs(req, res, runtime, ctx, method) {
31
40
  if (method === "POST") {
32
41
  try {
33
42
  const body = (await readJsonBody(req));
34
- const job = await runtime.createBuildPlanAuthoringRun(storedProject.projectDataDir, {
35
- ...(body ?? {}),
36
- project: storedProject.id,
37
- source_folder_path: storedProject.source.locator,
38
- }, "build-plan-improvement");
43
+ const job = await runtime.createBuildPlanAuthoringRun(storedProject.projectDataDir, projectScopedJobRequest(storedProject, body), "build-plan-improvement");
39
44
  sendJson(res, 202, job);
40
45
  }
41
46
  catch (error) {
@@ -48,8 +53,9 @@ export async function tryHandleProjectJobs(req, res, runtime, ctx, method) {
48
53
  if (method === "POST") {
49
54
  try {
50
55
  const body = (await readJsonBody(req));
56
+ const scopedRequest = ProjectBenchmarkQuestionDraftCreateRequestSchema.parse(body ?? {});
51
57
  const job = await runtime.createBenchmarkQuestionDraftRun(storedProject.projectDataDir, {
52
- ...(body ?? {}),
58
+ ...scopedRequest,
53
59
  project: storedProject.id,
54
60
  source_folder_path: storedProject.source.locator,
55
61
  });
@@ -116,7 +122,8 @@ export async function tryHandleProjectJobs(req, res, runtime, ctx, method) {
116
122
  if (method === "POST") {
117
123
  try {
118
124
  const body = (await readJsonBody(req));
119
- const request = { project: storedProject.id, scope: "build", ...(body ?? {}) };
125
+ const scopedRequest = ProjectResetRequestSchema.parse(body ?? {});
126
+ const request = { ...scopedRequest, project: storedProject.id };
120
127
  const result = runtime.applyReset(storedProject.projectDataDir, request);
121
128
  sendJson(res, 200, result);
122
129
  }
@@ -1,6 +1,7 @@
1
1
  import { PROJECT_SUBRESOURCES } from "./routes.js";
2
2
  import { readJsonBody, sendError, sendErrorResponse, sendJson, } from "./server-helpers.js";
3
3
  import { assertCanRunBenchmark } from "../entitlement-guard.js";
4
+ import { ProjectBuildRunCreateRequestSchema, ProjectVerifyRunCreateRequestSchema, } from "../schemas/index.js";
4
5
  /**
5
6
  * Build/verify run mutations and the per-Project runs listing.
6
7
  * Run-id lookups for an unknown project are handled by the
@@ -16,7 +17,8 @@ export async function tryHandleProjectRuns(req, res, runtime, ctx, method) {
16
17
  }
17
18
  try {
18
19
  const body = (await readJsonBody(req));
19
- const request = { project: storedProject.id, ...(body ?? {}) };
20
+ const scopedRequest = ProjectBuildRunCreateRequestSchema.parse(body ?? {});
21
+ const request = { ...scopedRequest, project: storedProject.id };
20
22
  const idempotencyKeyRaw = req.headers["x-interf-idempotency-key"];
21
23
  const idempotencyKey = Array.isArray(idempotencyKeyRaw)
22
24
  ? idempotencyKeyRaw[0]
@@ -64,7 +66,8 @@ export async function tryHandleProjectRuns(req, res, runtime, ctx, method) {
64
66
  .filter((run) => !run.parent_run_id).length;
65
67
  assertCanRunBenchmark(runtime.currentAccount, existingCount);
66
68
  const body = (await readJsonBody(req));
67
- const request = { project: storedProject.id, ...(body ?? {}) };
69
+ const scopedRequest = ProjectVerifyRunCreateRequestSchema.parse(body ?? {});
70
+ const request = { ...scopedRequest, project: storedProject.id };
68
71
  const resource = await runtime.createVerifyRun(storedProject.projectDataDir, request);
69
72
  sendJson(res, 201, resource);
70
73
  }
@@ -1,6 +1,6 @@
1
1
  import { ProjectCreateRequestSchema, ProjectUpdateRequestSchema, } from "../schemas/index.js";
2
2
  import { LOCAL_SERVICE_ROUTES } from "./routes.js";
3
- import { readJsonBody, sendError, sendErrorResponse, sendJson, } from "./server-helpers.js";
3
+ import { decodeOr400, readJsonBody, sendError, sendErrorResponse, sendJson, } from "./server-helpers.js";
4
4
  import { createStoredProject, deleteStoredProject, getStoredProject, listStoredProjects, projectWireShape, updateStoredProject, } from "../project-store.js";
5
5
  import { tryHandleProjectRuns, } from "./server-routes-project-runs.js";
6
6
  import { tryHandleProjectJobs } from "./server-routes-project-jobs.js";
@@ -19,6 +19,7 @@ export async function tryHandleProjectCollection(req, res, runtime, path, method
19
19
  const stored = createStoredProject(runtime, {
20
20
  id: body.id,
21
21
  source: { kind: "local-folder", locator: body.source.locator },
22
+ intent: body.intent,
22
23
  build_plan_id: body.build_plan_id,
23
24
  });
24
25
  sendJson(res, 201, projectWireShape(stored));
@@ -41,7 +42,9 @@ export async function tryHandlePerProject(req, res, runtime, path, method) {
41
42
  const slashIndex = tail.indexOf("/");
42
43
  const projectId = slashIndex === -1 ? tail : tail.slice(0, slashIndex);
43
44
  const subPath = slashIndex === -1 ? "" : tail.slice(slashIndex + 1);
44
- const decodedProjectId = decodeURIComponent(projectId);
45
+ const decodedProjectId = decodeOr400(res, projectId, "Project id");
46
+ if (decodedProjectId === null)
47
+ return true;
45
48
  const storedProject = getStoredProject(decodedProjectId);
46
49
  if (!storedProject) {
47
50
  sendError(res, 404, `Project not found: ${decodedProjectId}`);
@@ -63,6 +66,7 @@ export async function tryHandlePerProject(req, res, runtime, path, method) {
63
66
  try {
64
67
  const body = ProjectUpdateRequestSchema.parse(await readJsonBody(req));
65
68
  const updated = updateStoredProject(decodedProjectId, {
69
+ intent: body.intent,
66
70
  build_plan_id: body.build_plan_id,
67
71
  });
68
72
  sendJson(res, 200, projectWireShape(updated));
@@ -1,5 +1,5 @@
1
1
  import { LOCAL_SERVICE_ROUTES, RUN_SUBRESOURCES } from "./routes.js";
2
- import { parseRequestUrl, sendError, sendJson } from "./server-helpers.js";
2
+ import { decodeOr400, parseRequestUrl, sendError, sendJson } from "./server-helpers.js";
3
3
  import { isTraversalRelativePath, safeApiFilePath, sendApiFile, } from "./server-api-files.js";
4
4
  import { findInstanceBuildRun, findInstanceJob, findInstanceRun, findInstanceVerifyRun, listInstanceRuns, } from "./server-instance-helpers.js";
5
5
  import { runObservabilityToStatus } from "../run-observability.js";
@@ -10,7 +10,9 @@ export function tryHandleRuns(req, res, runtime, path, method) {
10
10
  }
11
11
  const observableRunMatch = path.match(/^\/v1\/runs\/([^/]+)(?:\/([^/]+))?$/);
12
12
  if (observableRunMatch?.[1]) {
13
- const runId = decodeURIComponent(observableRunMatch[1]);
13
+ const runId = decodeOr400(res, observableRunMatch[1], "Run id");
14
+ if (runId === null)
15
+ return true;
14
16
  const child = observableRunMatch[2];
15
17
  if (method === "GET" && !child) {
16
18
  const run = findInstanceRun(runtime, runId);
@@ -80,7 +82,10 @@ export function tryHandleRuns(req, res, runtime, path, method) {
80
82
  }
81
83
  const verifyRunMatch = path.match(/^\/v1\/verify-runs\/([^/]+)$/);
82
84
  if (method === "GET" && verifyRunMatch?.[1]) {
83
- const found = findInstanceVerifyRun(runtime, decodeURIComponent(verifyRunMatch[1]));
85
+ const verifyRunId = decodeOr400(res, verifyRunMatch[1], "Verify run id");
86
+ if (verifyRunId === null)
87
+ return true;
88
+ const found = findInstanceVerifyRun(runtime, verifyRunId);
84
89
  if (!found)
85
90
  sendError(res, 404, "Verify run not found.");
86
91
  else
@@ -89,7 +94,9 @@ export function tryHandleRuns(req, res, runtime, path, method) {
89
94
  }
90
95
  const jobMatch = path.match(/^\/v1\/jobs\/([^/]+)(?:\/([^/]+))?$/);
91
96
  if (jobMatch?.[1]) {
92
- const runId = decodeURIComponent(jobMatch[1]);
97
+ const runId = decodeOr400(res, jobMatch[1], "Job run id");
98
+ if (runId === null)
99
+ return true;
93
100
  const child = jobMatch[2];
94
101
  const found = findInstanceJob(runtime, runId);
95
102
  if (method === "GET" && !child) {
@@ -1,5 +1,6 @@
1
1
  import { z } from "zod";
2
2
  import { RuntimeExecutorInfoSchema, TestCaseExpectSchema, TestTargetTypeSchema, ProjectIdSchema, } from "../../../contracts/lib/schema.js";
3
+ import { isSafeRelativeTestFile } from "../test-file-guard.js";
3
4
  const TestCaseCoreSchema = z.object({
4
5
  id: z.string().regex(/^[a-z0-9][a-z0-9-]{0,79}$/),
5
6
  question: z.string().min(1),
@@ -14,6 +15,17 @@ const TestCaseCoreSchema = z.object({
14
15
  message: "Test cases need at least one of file, answer, or expect.",
15
16
  });
16
17
  }
18
+ // H2: a `file` target is a project-relative output path that is later joined
19
+ // onto the (sandboxed) target directory. Reject absolute or `..`-escaping
20
+ // values at parse time so they can never reach `path.join`. Centralizing the
21
+ // guard here means H1 (verify-execution join) and H3 (spec writer) inherit it.
22
+ if (value.file !== undefined && !isSafeRelativeTestFile(value.file)) {
23
+ ctx.addIssue({
24
+ code: z.ZodIssueCode.custom,
25
+ path: ["file"],
26
+ message: "Test case file must be a relative path without '..' or a leading '/'.",
27
+ });
28
+ }
17
29
  });
18
30
  export const TestCaseSchema = TestCaseCoreSchema;
19
31
  const TestSpecCoreSchema = z.object({
@@ -0,0 +1,2 @@
1
+ export declare function isSafeRelativeTestFile(file: string): boolean;
2
+ export declare function assertSafeRelativeTestFile(file: string): void;
@@ -0,0 +1,29 @@
1
+ // Pure, Node-free guard for a `testCase.file` value. A test file is a
2
+ // project-relative output path inside the (sandboxed) target directory, so it
3
+ // must never be absolute, never escape via `..`, and stay within a conservative
4
+ // character set. This is the single guard both the schema (H2) and the spec
5
+ // writer (H3) consume so a malicious `file` cannot reach `path.join` and a later
6
+ // `existsSync`/`readFileSync`.
7
+ //
8
+ // This module imports no Node builtins so it is safe to pull into the schemas
9
+ // barrel (`@interf/compiler/schemas`) and bundle into the renderer. The
10
+ // filesystem-aware path helpers live in `verify-paths.ts`, which re-exports
11
+ // these guards for server-side callers.
12
+ const TEST_FILE_PATTERN = /^[A-Za-z0-9._/-]+$/;
13
+ export function isSafeRelativeTestFile(file) {
14
+ if (file.length === 0)
15
+ return false;
16
+ if (file.startsWith("/"))
17
+ return false;
18
+ if (!TEST_FILE_PATTERN.test(file))
19
+ return false;
20
+ // Reject `..` as a whole path segment (e.g. `..`, `../x`, `a/../b`, `a/..`).
21
+ if (file.split("/").some((segment) => segment === ".."))
22
+ return false;
23
+ return true;
24
+ }
25
+ export function assertSafeRelativeTestFile(file) {
26
+ if (!isSafeRelativeTestFile(file)) {
27
+ throw new Error(`Invalid test case file: ${file}`);
28
+ }
29
+ }
@@ -1,6 +1,12 @@
1
1
  import { type AgentExecutor } from "../agents/lib/executors.js";
2
2
  import type { TestTargetRun, TestTargetCandidate, LoadedTestSpec } from "./verify-types.js";
3
3
  import { type TestSandboxRetentionMode } from "./verify-sandbox.js";
4
+ export declare function resolveTargetFilePath(targetPath: string, file: string): string;
5
+ interface TestJudgeVerdict {
6
+ pass: boolean;
7
+ summary: string;
8
+ }
9
+ export declare function readTestJudgeVerdictFromStatus(statusPath: string): TestJudgeVerdict | null;
4
10
  export declare function runTargetTests(sourcePath: string, spec: LoadedTestSpec, targets: TestTargetCandidate[]): TestTargetRun;
5
11
  export declare function runTargetTestsWithJudge(sourcePath: string, spec: LoadedTestSpec, targets: TestTargetCandidate[], executor: AgentExecutor, options?: {
6
12
  preserveSandboxes?: TestSandboxRetentionMode;
@@ -12,3 +18,4 @@ export declare function runTargetTestsAuto(sourcePath: string, spec: LoadedTestS
12
18
  artifactRootPath?: string;
13
19
  }): Promise<TestTargetRun>;
14
20
  export declare function saveTargetTestRun(artifactRootPath: string, result: TestTargetRun): string;
21
+ export {};
@@ -1,9 +1,21 @@
1
1
  import { existsSync, mkdirSync, mkdtempSync, readFileSync, rmSync, writeFileSync, } from "node:fs";
2
2
  import { join } from "node:path";
3
3
  import { tmpdir } from "node:os";
4
+ import { assertPathWithinRoot } from "../../contracts/utils/path-guards.js";
4
5
  import { buildRuntimeExecutorInfo } from "../agents/lib/executors.js";
5
6
  import { targetTestRunGitignorePath, targetTestRunsPath, targetTestSandboxGitignorePath, targetTestSandboxesPath, normalizeTestId, } from "./verify-paths.js";
6
7
  import { createTestSandbox, } from "./verify-sandbox.js";
8
+ import { freezePreservedShell } from "../agents/lib/shell-fs.js";
9
+ // H1: `testCase.file` is a project-relative output path joined onto the target
10
+ // directory. `path.join` resolves `..` at string level, so a malicious file can
11
+ // escape the (sandboxed) target and be read via existsSync/readFileSync. Resolve
12
+ // the join and assert it stays within target.path before any filesystem access.
13
+ // Centralized so every read of a test-case file inherits the bound (ENGINE-POLICY
14
+ // rule 7). The schema/spec-writer guards (H2/H3) are defense-in-depth ahead of
15
+ // this; this is the last line before the filesystem.
16
+ export function resolveTargetFilePath(targetPath, file) {
17
+ return assertPathWithinRoot(targetPath, join(targetPath, file), "test case file");
18
+ }
7
19
  function parseWords(content) {
8
20
  return content.trim().split(/\s+/).filter(Boolean).length;
9
21
  }
@@ -66,7 +78,7 @@ function runTestCase(target, testCase) {
66
78
  if (!testCase.file) {
67
79
  throw new Error(`Test case "${testCase.id}" requires an executor because it has no file target.`);
68
80
  }
69
- const outputPath = join(target.path, testCase.file);
81
+ const outputPath = resolveTargetFilePath(target.path, testCase.file);
70
82
  const checks = [];
71
83
  if (!existsSync(outputPath)) {
72
84
  checks.push({
@@ -133,7 +145,13 @@ function readTestJudgeVerdict(verdictPath) {
133
145
  summary: typeof raw.summary === "string" ? raw.summary : "",
134
146
  };
135
147
  }
136
- function readTestJudgeVerdictFromStatus(statusPath) {
148
+ // M13: verdict.json is the single source of truth for a judge pass/fail. This is
149
+ // a strict fallback for when the agent omitted the JSON file: only an explicit
150
+ // `pass=true` / `pass=false` token on a terminal DONE:/BLOCKED:/ERROR: line is
151
+ // honored. The prompt mandates `DONE: pass=true|false - <summary>`, so a benign
152
+ // status line that merely mentions a match must NOT be read as a pass. A terminal
153
+ // line with no explicit pass token yields no verdict (treated as missing/invalid).
154
+ export function readTestJudgeVerdictFromStatus(statusPath) {
137
155
  if (!existsSync(statusPath))
138
156
  return null;
139
157
  const lines = readFileSync(statusPath, "utf8")
@@ -148,53 +166,98 @@ function readTestJudgeVerdictFromStatus(statusPath) {
148
166
  continue;
149
167
  const normalized = line.toLowerCase();
150
168
  const summary = line.replace(/^(DONE|BLOCKED|ERROR):\s*/i, "").trim();
151
- if (normalized.includes("pass=true")) {
152
- return { pass: true, summary };
153
- }
154
- if (normalized.includes("pass=false")) {
155
- return { pass: false, summary };
156
- }
157
- if (/values match expected|matches expected|candidate matches|answer matches|expected values match/i.test(line)) {
169
+ if (/\bpass=true\b/.test(normalized)) {
158
170
  return { pass: true, summary };
159
171
  }
160
- if (/does not match|do not match|mismatch|candidate fails|answer fails|expected values do not match/i.test(line)) {
172
+ if (/\bpass=false\b/.test(normalized)) {
161
173
  return { pass: false, summary };
162
174
  }
175
+ // Terminal line reached without an explicit pass token: no trustworthy
176
+ // verdict. Stop scanning so an earlier, benign line cannot be misread.
177
+ return null;
163
178
  }
164
179
  return null;
165
180
  }
166
- async function runTargetTestsJudge(testCase, executor, candidateLabel, candidateContent) {
167
- const tempDir = mkdtempSync(join(tmpdir(), "interf-test-judge-"));
181
+ async function runTargetTestsJudge(testCase, executor, candidateLabel, candidateContent,
182
+ // When supplied, the judge shell is created at this durable path and preserved
183
+ // (frozen) on finish so the judge execution is inspectable: prompt, reasoning
184
+ // transcript, the candidate it judged, and the JSON verdict all survive. Lives
185
+ // under the benchmark sandbox so it is preserved with the sandbox on failure.
186
+ // When omitted, the shell is an ephemeral /tmp dir removed on finish.
187
+ preservedShellRoot) {
188
+ const preserve = Boolean(preservedShellRoot);
189
+ const tempDir = preservedShellRoot
190
+ ? (mkdirSync(preservedShellRoot, { recursive: true }), preservedShellRoot)
191
+ : mkdtempSync(join(tmpdir(), "interf-test-judge-"));
192
+ // The preserved shell needs a runtime/ dir up front: freezePreservedShell
193
+ // writes its manifest to runtime/preserved-shell.json, and reasoning is teed
194
+ // there too. (The ephemeral path doesn't need it.)
195
+ if (preserve) {
196
+ mkdirSync(join(tempDir, "runtime"), { recursive: true });
197
+ }
168
198
  let executionError = null;
169
199
  let verdict = null;
170
200
  try {
171
201
  const verdictPath = join(tempDir, "verdict.json");
172
202
  const statusPath = join(tempDir, "judge.status.log");
173
203
  const prompt = buildTestJudgePrompt(testCase, candidateLabel, candidateContent, verdictPath);
204
+ // Preserve the rendered prompt and the candidate it judged so the preserved
205
+ // shell holds everything needed to reproduce the verdict, not just the output.
206
+ if (preserve) {
207
+ writeFileSync(join(tempDir, "prompt.txt"), `${prompt}\n`);
208
+ writeFileSync(join(tempDir, "candidate.txt"), `${candidateContent}\n`);
209
+ }
210
+ // Same canonical convention as the stage path: reasoning is teed into the
211
+ // shell's runtime/ dir so it is preserved when the shell is frozen.
212
+ const reasoningLogPath = preserve ? join(tempDir, "runtime", "agent-reasoning.jsonl") : null;
174
213
  try {
175
214
  await executor.execute(tempDir, prompt, {
176
215
  statusLogPath: statusPath,
216
+ reasoningLogPath,
177
217
  });
178
218
  }
179
219
  catch (error) {
180
220
  executionError = error instanceof Error ? error.message : String(error);
181
221
  }
182
- try {
183
- verdict = readTestJudgeVerdict(verdictPath);
184
- if (!verdict) {
185
- verdict = readTestJudgeVerdictFromStatus(statusPath);
222
+ // M12: first error wins. If execute() already failed, do not read verdict
223
+ // files — a throw in verdict-reading would otherwise overwrite the real
224
+ // execution failure and mask why the judge never ran. Only read the verdict
225
+ // when execution itself reported no error.
226
+ if (!executionError) {
227
+ try {
228
+ verdict = readTestJudgeVerdict(verdictPath);
229
+ if (!verdict) {
230
+ verdict = readTestJudgeVerdictFromStatus(statusPath);
231
+ }
232
+ }
233
+ catch (error) {
234
+ executionError = error instanceof Error ? error.message : String(error);
186
235
  }
187
- }
188
- catch (error) {
189
- executionError = error instanceof Error ? error.message : String(error);
190
236
  }
191
237
  }
192
238
  finally {
193
- rmSync(tempDir, { recursive: true, force: true });
239
+ // Preserve a durable judge shell (freeze materializes symlinks + writes the
240
+ // preserved-shell manifest, path unchanged); only remove an ephemeral one.
241
+ if (preserve) {
242
+ freezePreservedShell(tempDir, "test-judge");
243
+ }
244
+ else {
245
+ rmSync(tempDir, { recursive: true, force: true });
246
+ }
194
247
  }
195
248
  return { verdict, error: executionError };
196
249
  }
197
- function buildTestQueryPrompt(target, testCase, answerPath, tracePath) {
250
+ /**
251
+ * Durable judge-shell root under a benchmark sandbox target. Lives beside the
252
+ * sandbox's other preserved runtime artifacts so a preserved sandbox carries the
253
+ * judge execution too. Keyed by case + candidate so multiple judged cases in one
254
+ * target don't collide.
255
+ */
256
+ function judgeShellRoot(targetPath, testCase, candidateLabel) {
257
+ const slug = `${normalizeTestId(testCase.id) || "case"}-${normalizeTestId(candidateLabel) || "candidate"}`;
258
+ return join(targetPath, ".interf", "runtime", "test-judge", slug);
259
+ }
260
+ function buildTestQueryPrompt(target, testCase, answerPath, tracePath, retryReason) {
198
261
  const header = target.type === "context-graph"
199
262
  ? [
200
263
  "You are running an Interf benchmark inside an isolated sandboxed context-graph copy.",
@@ -217,10 +280,13 @@ function buildTestQueryPrompt(target, testCase, answerPath, tracePath) {
217
280
  ...header,
218
281
  "Emit only STATUS:, DONE:, BLOCKED:, or ERROR: lines.",
219
282
  "Do not ask follow-up questions.",
283
+ "Create both required output files before printing DONE.",
284
+ "The output paths are absolute temp file paths outside the Context Graph sandbox; write them exactly as given.",
220
285
  `Write the answer to ${JSON.stringify(answerPath)}.`,
221
286
  `Write the trace to ${JSON.stringify(tracePath)} with keys: case_id, target, artifacts_consulted, source_paths_read, used_source_files, answer_summary.`,
222
287
  `Set \`case_id\` to ${JSON.stringify(testCase.id)}.`,
223
288
  `Set \`target\` to ${JSON.stringify(target.type)}.`,
289
+ ...(retryReason ? [`Retry reason: ${retryReason}`] : []),
224
290
  `Question: ${testCase.question}`,
225
291
  ].join("\n");
226
292
  }
@@ -228,20 +294,26 @@ async function runLiveTestCase(target, testCase, executor) {
228
294
  const tempDir = mkdtempSync(join(tmpdir(), "interf-test-live-"));
229
295
  const answerPath = join(tempDir, "answer.md");
230
296
  const tracePath = join(tempDir, "trace.json");
231
- const statusPath = join(tempDir, "status.log");
232
- const eventPath = join(tempDir, "events.ndjson");
233
- const prompt = buildTestQueryPrompt(target, testCase, answerPath, tracePath);
234
297
  let executionError = null;
235
298
  let code = -1;
236
299
  try {
237
- try {
238
- code = await executor.execute(target.path, prompt, {
239
- eventLogPath: eventPath,
240
- statusLogPath: statusPath,
241
- });
242
- }
243
- catch (error) {
244
- executionError = error instanceof Error ? error.message : String(error);
300
+ for (let attempt = 1; attempt <= 2; attempt += 1) {
301
+ const statusPath = join(tempDir, `status-${attempt}.log`);
302
+ const eventPath = join(tempDir, `events-${attempt}.ndjson`);
303
+ const prompt = buildTestQueryPrompt(target, testCase, answerPath, tracePath, attempt === 1
304
+ ? undefined
305
+ : `Attempt ${attempt - 1} exited without writing ${!existsSync(answerPath) ? "the answer file" : "the trace file"}. Write both files before DONE.`);
306
+ try {
307
+ code = await executor.execute(target.path, prompt, {
308
+ eventLogPath: eventPath,
309
+ statusLogPath: statusPath,
310
+ });
311
+ }
312
+ catch (error) {
313
+ executionError = error instanceof Error ? error.message : String(error);
314
+ }
315
+ if (existsSync(answerPath) && existsSync(tracePath))
316
+ break;
245
317
  }
246
318
  const checks = [];
247
319
  if (!existsSync(answerPath)) {
@@ -294,7 +366,8 @@ async function runLiveTestCase(target, testCase, executor) {
294
366
  });
295
367
  }
296
368
  if (testCase.answer) {
297
- const judged = await runTargetTestsJudge(testCase, executor, `generated answer for ${testCase.id}`, answer);
369
+ const candidateLabel = `generated answer for ${testCase.id}`;
370
+ const judged = await runTargetTestsJudge(testCase, executor, candidateLabel, answer, judgeShellRoot(target.path, testCase, candidateLabel));
298
371
  checks.push({
299
372
  label: "judge verdict",
300
373
  ok: judged.verdict?.pass === true,
@@ -324,7 +397,7 @@ async function runTestCaseWithJudge(target, testCase, executor) {
324
397
  if (!testCase.file) {
325
398
  return runLiveTestCase(target, testCase, executor);
326
399
  }
327
- const outputPath = join(target.path, testCase.file);
400
+ const outputPath = resolveTargetFilePath(target.path, testCase.file);
328
401
  const checks = [];
329
402
  if (!existsSync(outputPath)) {
330
403
  checks.push({
@@ -353,7 +426,8 @@ async function runTestCaseWithJudge(target, testCase, executor) {
353
426
  });
354
427
  checks.push(...evaluated.checks);
355
428
  if (testCase.answer) {
356
- const judged = await runTargetTestsJudge(testCase, executor, `Context Graph file ${outputPath}`, content);
429
+ const candidateLabel = `Context Graph file ${outputPath}`;
430
+ const judged = await runTargetTestsJudge(testCase, executor, candidateLabel, content, judgeShellRoot(target.path, testCase, candidateLabel));
357
431
  checks.push({
358
432
  label: "judge verdict",
359
433
  ok: judged.verdict?.pass === true,
@@ -1,5 +1,6 @@
1
1
  import type { TestTargetType } from "./verify-types.js";
2
2
  export declare const TEST_SPEC_EXTENSIONS: Set<string>;
3
+ export { isSafeRelativeTestFile, assertSafeRelativeTestFile, } from "./test-file-guard.js";
3
4
  export declare function testSpecRootPath(sourcePath: string): string;
4
5
  export declare function testSpecTypePath(sourcePath: string, type: TestTargetType): string;
5
6
  export declare function targetTestRunsPath(contextGraphPath: string, type: TestTargetType): string;
@@ -4,6 +4,10 @@ import { targetTestRunsRootForContextGraph, targetTestSandboxesRootForContextGra
4
4
  import { asProjectDataDir, projectTestsSpecsRoot, } from "../../contracts/lib/project-paths.js";
5
5
  const TEST_ID_PATTERN = /^[a-z0-9][a-z0-9-]{0,79}$/;
6
6
  export const TEST_SPEC_EXTENSIONS = new Set([".json"]);
7
+ // The `testCase.file` safelist guard lives in a Node-free module so the schemas
8
+ // barrel can import it without pulling Node builtins into the renderer bundle.
9
+ // Re-exported here for server-side path callers.
10
+ export { isSafeRelativeTestFile, assertSafeRelativeTestFile, } from "./test-file-guard.js";
7
11
  export function testSpecRootPath(sourcePath) {
8
12
  return projectTestsSpecsRoot(asProjectDataDir(sourcePath));
9
13
  }