@interf/compiler 0.3.4 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (385) hide show
  1. package/README.md +131 -149
  2. package/builtin-workflows/interf/README.md +19 -0
  3. package/builtin-workflows/interf/compile/stages/shape/SKILL.md +28 -0
  4. package/builtin-workflows/interf/compile/stages/structure/SKILL.md +18 -0
  5. package/builtin-workflows/interf/compile/stages/summarize/SKILL.md +18 -0
  6. package/builtin-workflows/interf/improve/SKILL.md +18 -0
  7. package/builtin-workflows/interf/use/query/SKILL.md +28 -0
  8. package/builtin-workflows/interf/workflow.json +120 -0
  9. package/builtin-workflows/interf/workspace.schema.json +75 -0
  10. package/dist/bin.d.ts +0 -1
  11. package/dist/bin.js +0 -1
  12. package/dist/commands/compile-controller.d.ts +14 -0
  13. package/dist/commands/compile-controller.js +409 -0
  14. package/dist/commands/compile.d.ts +8 -9
  15. package/dist/commands/compile.js +151 -153
  16. package/dist/commands/create-workflow-wizard.d.ts +4 -2
  17. package/dist/commands/create-workflow-wizard.js +33 -31
  18. package/dist/commands/create.d.ts +0 -1
  19. package/dist/commands/create.js +15 -11
  20. package/dist/commands/default.d.ts +0 -1
  21. package/dist/commands/default.js +0 -1
  22. package/dist/commands/doctor.d.ts +0 -1
  23. package/dist/commands/doctor.js +1 -16
  24. package/dist/commands/executor-flow.d.ts +0 -1
  25. package/dist/commands/executor-flow.js +0 -1
  26. package/dist/commands/init.d.ts +0 -1
  27. package/dist/commands/init.js +71 -14
  28. package/dist/commands/list.d.ts +0 -1
  29. package/dist/commands/list.js +0 -1
  30. package/dist/commands/reset.d.ts +0 -1
  31. package/dist/commands/reset.js +0 -1
  32. package/dist/commands/source-config-wizard.d.ts +28 -7
  33. package/dist/commands/source-config-wizard.js +159 -63
  34. package/dist/commands/status.d.ts +0 -1
  35. package/dist/commands/status.js +0 -1
  36. package/dist/commands/test-flow.d.ts +20 -10
  37. package/dist/commands/test-flow.js +89 -23
  38. package/dist/commands/test.d.ts +0 -1
  39. package/dist/commands/test.js +36 -72
  40. package/dist/commands/verify.d.ts +0 -1
  41. package/dist/commands/verify.js +0 -1
  42. package/dist/commands/workspace-flow.d.ts +3 -3
  43. package/dist/commands/workspace-flow.js +30 -12
  44. package/dist/index.d.ts +5 -6
  45. package/dist/index.js +3 -4
  46. package/dist/lib/agent-args.d.ts +0 -1
  47. package/dist/lib/agent-args.js +0 -1
  48. package/dist/lib/agent-constants.d.ts +0 -1
  49. package/dist/lib/agent-constants.js +0 -1
  50. package/dist/lib/agent-detection.d.ts +0 -1
  51. package/dist/lib/agent-detection.js +0 -1
  52. package/dist/lib/agent-execution.d.ts +0 -1
  53. package/dist/lib/agent-execution.js +47 -12
  54. package/dist/lib/agent-logs.d.ts +0 -1
  55. package/dist/lib/agent-logs.js +0 -1
  56. package/dist/lib/agent-preflight.d.ts +0 -1
  57. package/dist/lib/agent-preflight.js +1 -2
  58. package/dist/lib/agent-render.d.ts +0 -1
  59. package/dist/lib/agent-render.js +0 -1
  60. package/dist/lib/agent-shells.d.ts +30 -3
  61. package/dist/lib/agent-shells.js +517 -54
  62. package/dist/lib/agent-status.d.ts +0 -1
  63. package/dist/lib/agent-status.js +0 -1
  64. package/dist/lib/agent-types.d.ts +0 -1
  65. package/dist/lib/agent-types.js +0 -1
  66. package/dist/lib/agents.d.ts +0 -9
  67. package/dist/lib/agents.js +0 -9
  68. package/dist/lib/chart-guidance.d.ts +1 -0
  69. package/dist/lib/chart-guidance.js +8 -0
  70. package/dist/lib/config.d.ts +0 -3
  71. package/dist/lib/config.js +0 -5
  72. package/dist/lib/discovery.d.ts +0 -1
  73. package/dist/lib/discovery.js +0 -1
  74. package/dist/lib/execution-profile.d.ts +0 -1
  75. package/dist/lib/execution-profile.js +0 -1
  76. package/dist/lib/executors.d.ts +0 -1
  77. package/dist/lib/executors.js +0 -1
  78. package/dist/lib/filesystem.d.ts +0 -1
  79. package/dist/lib/filesystem.js +0 -1
  80. package/dist/lib/interf-bootstrap.d.ts +0 -1
  81. package/dist/lib/interf-bootstrap.js +6 -2
  82. package/dist/lib/interf-detect.d.ts +2 -2
  83. package/dist/lib/interf-detect.js +25 -8
  84. package/dist/lib/interf-scaffold.d.ts +0 -1
  85. package/dist/lib/interf-scaffold.js +54 -21
  86. package/dist/lib/interf-workflow-package.d.ts +18 -1
  87. package/dist/lib/interf-workflow-package.js +164 -23
  88. package/dist/lib/interf.d.ts +1 -2
  89. package/dist/lib/interf.js +1 -2
  90. package/dist/lib/local-workflows.d.ts +10 -5
  91. package/dist/lib/local-workflows.js +208 -97
  92. package/dist/lib/logger.d.ts +0 -1
  93. package/dist/lib/logger.js +0 -1
  94. package/dist/lib/obsidian.d.ts +0 -1
  95. package/dist/lib/obsidian.js +0 -1
  96. package/dist/lib/parse.d.ts +0 -1
  97. package/dist/lib/parse.js +0 -1
  98. package/dist/lib/registry.d.ts +0 -1
  99. package/dist/lib/registry.js +0 -1
  100. package/dist/lib/runtime-acceptance.d.ts +0 -1
  101. package/dist/lib/runtime-acceptance.js +1 -2
  102. package/dist/lib/runtime-contracts.d.ts +0 -1
  103. package/dist/lib/runtime-contracts.js +14 -8
  104. package/dist/lib/runtime-paths.d.ts +0 -1
  105. package/dist/lib/runtime-paths.js +5 -10
  106. package/dist/lib/runtime-prompt.d.ts +0 -1
  107. package/dist/lib/runtime-prompt.js +8 -23
  108. package/dist/lib/runtime-reconcile.d.ts +0 -1
  109. package/dist/lib/runtime-reconcile.js +7 -3
  110. package/dist/lib/runtime-runs.d.ts +0 -1
  111. package/dist/lib/runtime-runs.js +4 -5
  112. package/dist/lib/runtime-types.d.ts +0 -1
  113. package/dist/lib/runtime-types.js +0 -1
  114. package/dist/lib/runtime.d.ts +0 -1
  115. package/dist/lib/runtime.js +0 -1
  116. package/dist/lib/schema.d.ts +356 -51
  117. package/dist/lib/schema.js +189 -54
  118. package/dist/lib/source-config.d.ts +17 -8
  119. package/dist/lib/source-config.js +125 -11
  120. package/dist/lib/state-artifacts.d.ts +0 -1
  121. package/dist/lib/state-artifacts.js +0 -1
  122. package/dist/lib/state-health.d.ts +0 -1
  123. package/dist/lib/state-health.js +2 -3
  124. package/dist/lib/state-io.d.ts +1 -1
  125. package/dist/lib/state-io.js +9 -10
  126. package/dist/lib/state-paths.d.ts +0 -1
  127. package/dist/lib/state-paths.js +5 -11
  128. package/dist/lib/state-view.d.ts +0 -1
  129. package/dist/lib/state-view.js +7 -8
  130. package/dist/lib/state.d.ts +0 -1
  131. package/dist/lib/state.js +0 -1
  132. package/dist/lib/summarize-plan.d.ts +0 -1
  133. package/dist/lib/summarize-plan.js +3 -3
  134. package/dist/lib/test-execution.d.ts +14 -0
  135. package/dist/lib/{benchmark-execution.js → test-execution.js} +122 -120
  136. package/dist/lib/test-matrices.d.ts +90 -0
  137. package/dist/lib/test-matrices.js +96 -0
  138. package/dist/lib/test-paths.d.ts +12 -0
  139. package/dist/lib/test-paths.js +44 -0
  140. package/dist/lib/test-profile-presets.d.ts +57 -0
  141. package/dist/lib/test-profile-presets.js +50 -0
  142. package/dist/lib/test-sandbox.d.ts +11 -0
  143. package/dist/lib/{benchmark-sandbox.js → test-sandbox.js} +18 -13
  144. package/dist/lib/test-specs.d.ts +7 -0
  145. package/dist/lib/test-specs.js +114 -0
  146. package/dist/lib/test-targets.d.ts +5 -0
  147. package/dist/lib/test-targets.js +38 -0
  148. package/dist/lib/test-types.d.ts +17 -0
  149. package/dist/lib/test-types.js +1 -0
  150. package/dist/lib/test.d.ts +4 -0
  151. package/dist/lib/test.js +3 -0
  152. package/dist/lib/user-config.d.ts +0 -1
  153. package/dist/lib/user-config.js +0 -1
  154. package/dist/lib/util.d.ts +0 -2
  155. package/dist/lib/util.js +0 -2
  156. package/dist/lib/validate-helpers.d.ts +0 -1
  157. package/dist/lib/validate-helpers.js +0 -1
  158. package/dist/lib/validate-workspace.d.ts +0 -1
  159. package/dist/lib/validate-workspace.js +34 -25
  160. package/dist/lib/validate.d.ts +0 -1
  161. package/dist/lib/validate.js +55 -9
  162. package/dist/lib/workflow-abi.d.ts +138 -0
  163. package/dist/lib/workflow-abi.js +181 -0
  164. package/dist/lib/workflow-definitions.d.ts +26 -5
  165. package/dist/lib/workflow-definitions.js +105 -168
  166. package/dist/lib/workflow-helpers.d.ts +1 -2
  167. package/dist/lib/workflow-helpers.js +32 -21
  168. package/dist/lib/workflow-improvement.d.ts +22 -0
  169. package/dist/lib/workflow-improvement.js +396 -0
  170. package/dist/lib/workflow-review-paths.d.ts +10 -0
  171. package/dist/lib/workflow-review-paths.js +27 -0
  172. package/dist/lib/workflow-stage-runner.d.ts +1 -1
  173. package/dist/lib/workflow-stage-runner.js +4 -1
  174. package/dist/lib/workflows.d.ts +1 -2
  175. package/dist/lib/workflows.js +1 -2
  176. package/dist/lib/workspace-compile.d.ts +0 -1
  177. package/dist/lib/workspace-compile.js +146 -109
  178. package/dist/lib/workspace-home.d.ts +5 -0
  179. package/dist/lib/workspace-home.js +32 -0
  180. package/dist/lib/workspace-layout.d.ts +2 -0
  181. package/dist/lib/workspace-layout.js +60 -0
  182. package/dist/lib/workspace-paths.d.ts +41 -0
  183. package/dist/lib/workspace-paths.js +107 -0
  184. package/dist/lib/workspace-raw.d.ts +20 -2
  185. package/dist/lib/workspace-raw.js +6 -8
  186. package/dist/lib/workspace-reset.d.ts +0 -1
  187. package/dist/lib/workspace-reset.js +27 -5
  188. package/dist/lib/workspace-schema.d.ts +1 -10
  189. package/dist/lib/workspace-schema.js +16 -74
  190. package/package.json +16 -15
  191. package/dist/bin.d.ts.map +0 -1
  192. package/dist/bin.js.map +0 -1
  193. package/dist/commands/compile.d.ts.map +0 -1
  194. package/dist/commands/compile.js.map +0 -1
  195. package/dist/commands/create-workflow-wizard.d.ts.map +0 -1
  196. package/dist/commands/create-workflow-wizard.js.map +0 -1
  197. package/dist/commands/create.d.ts.map +0 -1
  198. package/dist/commands/create.js.map +0 -1
  199. package/dist/commands/default.d.ts.map +0 -1
  200. package/dist/commands/default.js.map +0 -1
  201. package/dist/commands/doctor.d.ts.map +0 -1
  202. package/dist/commands/doctor.js.map +0 -1
  203. package/dist/commands/executor-flow.d.ts.map +0 -1
  204. package/dist/commands/executor-flow.js.map +0 -1
  205. package/dist/commands/init.d.ts.map +0 -1
  206. package/dist/commands/init.js.map +0 -1
  207. package/dist/commands/list.d.ts.map +0 -1
  208. package/dist/commands/list.js.map +0 -1
  209. package/dist/commands/reset.d.ts.map +0 -1
  210. package/dist/commands/reset.js.map +0 -1
  211. package/dist/commands/source-config-wizard.d.ts.map +0 -1
  212. package/dist/commands/source-config-wizard.js.map +0 -1
  213. package/dist/commands/status.d.ts.map +0 -1
  214. package/dist/commands/status.js.map +0 -1
  215. package/dist/commands/test-flow.d.ts.map +0 -1
  216. package/dist/commands/test-flow.js.map +0 -1
  217. package/dist/commands/test.d.ts.map +0 -1
  218. package/dist/commands/test.js.map +0 -1
  219. package/dist/commands/verify.d.ts.map +0 -1
  220. package/dist/commands/verify.js.map +0 -1
  221. package/dist/commands/workspace-flow.d.ts.map +0 -1
  222. package/dist/commands/workspace-flow.js.map +0 -1
  223. package/dist/index.d.ts.map +0 -1
  224. package/dist/index.js.map +0 -1
  225. package/dist/lib/agent-args.d.ts.map +0 -1
  226. package/dist/lib/agent-args.js.map +0 -1
  227. package/dist/lib/agent-constants.d.ts.map +0 -1
  228. package/dist/lib/agent-constants.js.map +0 -1
  229. package/dist/lib/agent-detection.d.ts.map +0 -1
  230. package/dist/lib/agent-detection.js.map +0 -1
  231. package/dist/lib/agent-execution.d.ts.map +0 -1
  232. package/dist/lib/agent-execution.js.map +0 -1
  233. package/dist/lib/agent-logs.d.ts.map +0 -1
  234. package/dist/lib/agent-logs.js.map +0 -1
  235. package/dist/lib/agent-preflight.d.ts.map +0 -1
  236. package/dist/lib/agent-preflight.js.map +0 -1
  237. package/dist/lib/agent-render.d.ts.map +0 -1
  238. package/dist/lib/agent-render.js.map +0 -1
  239. package/dist/lib/agent-shells.d.ts.map +0 -1
  240. package/dist/lib/agent-shells.js.map +0 -1
  241. package/dist/lib/agent-skills.d.ts +0 -21
  242. package/dist/lib/agent-skills.d.ts.map +0 -1
  243. package/dist/lib/agent-skills.js +0 -215
  244. package/dist/lib/agent-skills.js.map +0 -1
  245. package/dist/lib/agent-status.d.ts.map +0 -1
  246. package/dist/lib/agent-status.js.map +0 -1
  247. package/dist/lib/agent-types.d.ts.map +0 -1
  248. package/dist/lib/agent-types.js.map +0 -1
  249. package/dist/lib/agents.d.ts.map +0 -1
  250. package/dist/lib/agents.js.map +0 -1
  251. package/dist/lib/benchmark-execution.d.ts +0 -13
  252. package/dist/lib/benchmark-execution.d.ts.map +0 -1
  253. package/dist/lib/benchmark-execution.js.map +0 -1
  254. package/dist/lib/benchmark-paths.d.ts +0 -13
  255. package/dist/lib/benchmark-paths.d.ts.map +0 -1
  256. package/dist/lib/benchmark-paths.js +0 -44
  257. package/dist/lib/benchmark-paths.js.map +0 -1
  258. package/dist/lib/benchmark-sandbox.d.ts +0 -12
  259. package/dist/lib/benchmark-sandbox.d.ts.map +0 -1
  260. package/dist/lib/benchmark-sandbox.js.map +0 -1
  261. package/dist/lib/benchmark-specs.d.ts +0 -8
  262. package/dist/lib/benchmark-specs.d.ts.map +0 -1
  263. package/dist/lib/benchmark-specs.js +0 -115
  264. package/dist/lib/benchmark-specs.js.map +0 -1
  265. package/dist/lib/benchmark-targets.d.ts +0 -5
  266. package/dist/lib/benchmark-targets.d.ts.map +0 -1
  267. package/dist/lib/benchmark-targets.js +0 -38
  268. package/dist/lib/benchmark-targets.js.map +0 -1
  269. package/dist/lib/benchmark-types.d.ts +0 -18
  270. package/dist/lib/benchmark-types.d.ts.map +0 -1
  271. package/dist/lib/benchmark-types.js +0 -2
  272. package/dist/lib/benchmark-types.js.map +0 -1
  273. package/dist/lib/benchmark.d.ts +0 -5
  274. package/dist/lib/benchmark.d.ts.map +0 -1
  275. package/dist/lib/benchmark.js +0 -4
  276. package/dist/lib/benchmark.js.map +0 -1
  277. package/dist/lib/config.d.ts.map +0 -1
  278. package/dist/lib/config.js.map +0 -1
  279. package/dist/lib/discovery.d.ts.map +0 -1
  280. package/dist/lib/discovery.js.map +0 -1
  281. package/dist/lib/eval-packs.d.ts +0 -158
  282. package/dist/lib/eval-packs.d.ts.map +0 -1
  283. package/dist/lib/eval-packs.js +0 -149
  284. package/dist/lib/eval-packs.js.map +0 -1
  285. package/dist/lib/execution-profile.d.ts.map +0 -1
  286. package/dist/lib/execution-profile.js.map +0 -1
  287. package/dist/lib/executors.d.ts.map +0 -1
  288. package/dist/lib/executors.js.map +0 -1
  289. package/dist/lib/filesystem.d.ts.map +0 -1
  290. package/dist/lib/filesystem.js.map +0 -1
  291. package/dist/lib/interf-bootstrap.d.ts.map +0 -1
  292. package/dist/lib/interf-bootstrap.js.map +0 -1
  293. package/dist/lib/interf-detect.d.ts.map +0 -1
  294. package/dist/lib/interf-detect.js.map +0 -1
  295. package/dist/lib/interf-scaffold.d.ts.map +0 -1
  296. package/dist/lib/interf-scaffold.js.map +0 -1
  297. package/dist/lib/interf-workflow-package.d.ts.map +0 -1
  298. package/dist/lib/interf-workflow-package.js.map +0 -1
  299. package/dist/lib/interf.d.ts.map +0 -1
  300. package/dist/lib/interf.js.map +0 -1
  301. package/dist/lib/local-workflows.d.ts.map +0 -1
  302. package/dist/lib/local-workflows.js.map +0 -1
  303. package/dist/lib/logger.d.ts.map +0 -1
  304. package/dist/lib/logger.js.map +0 -1
  305. package/dist/lib/obsidian.d.ts.map +0 -1
  306. package/dist/lib/obsidian.js.map +0 -1
  307. package/dist/lib/parse.d.ts.map +0 -1
  308. package/dist/lib/parse.js.map +0 -1
  309. package/dist/lib/registry.d.ts.map +0 -1
  310. package/dist/lib/registry.js.map +0 -1
  311. package/dist/lib/runtime-acceptance.d.ts.map +0 -1
  312. package/dist/lib/runtime-acceptance.js.map +0 -1
  313. package/dist/lib/runtime-contracts.d.ts.map +0 -1
  314. package/dist/lib/runtime-contracts.js.map +0 -1
  315. package/dist/lib/runtime-paths.d.ts.map +0 -1
  316. package/dist/lib/runtime-paths.js.map +0 -1
  317. package/dist/lib/runtime-prompt.d.ts.map +0 -1
  318. package/dist/lib/runtime-prompt.js.map +0 -1
  319. package/dist/lib/runtime-reconcile.d.ts.map +0 -1
  320. package/dist/lib/runtime-reconcile.js.map +0 -1
  321. package/dist/lib/runtime-runs.d.ts.map +0 -1
  322. package/dist/lib/runtime-runs.js.map +0 -1
  323. package/dist/lib/runtime-types.d.ts.map +0 -1
  324. package/dist/lib/runtime-types.js.map +0 -1
  325. package/dist/lib/runtime.d.ts.map +0 -1
  326. package/dist/lib/runtime.js.map +0 -1
  327. package/dist/lib/schema.d.ts.map +0 -1
  328. package/dist/lib/schema.js.map +0 -1
  329. package/dist/lib/source-config.d.ts.map +0 -1
  330. package/dist/lib/source-config.js.map +0 -1
  331. package/dist/lib/state-artifacts.d.ts.map +0 -1
  332. package/dist/lib/state-artifacts.js.map +0 -1
  333. package/dist/lib/state-health.d.ts.map +0 -1
  334. package/dist/lib/state-health.js.map +0 -1
  335. package/dist/lib/state-io.d.ts.map +0 -1
  336. package/dist/lib/state-io.js.map +0 -1
  337. package/dist/lib/state-paths.d.ts.map +0 -1
  338. package/dist/lib/state-paths.js.map +0 -1
  339. package/dist/lib/state-view.d.ts.map +0 -1
  340. package/dist/lib/state-view.js.map +0 -1
  341. package/dist/lib/state.d.ts.map +0 -1
  342. package/dist/lib/state.js.map +0 -1
  343. package/dist/lib/summarize-plan.d.ts.map +0 -1
  344. package/dist/lib/summarize-plan.js.map +0 -1
  345. package/dist/lib/user-config.d.ts.map +0 -1
  346. package/dist/lib/user-config.js.map +0 -1
  347. package/dist/lib/util.d.ts.map +0 -1
  348. package/dist/lib/util.js.map +0 -1
  349. package/dist/lib/validate-helpers.d.ts.map +0 -1
  350. package/dist/lib/validate-helpers.js.map +0 -1
  351. package/dist/lib/validate-workspace.d.ts.map +0 -1
  352. package/dist/lib/validate-workspace.js.map +0 -1
  353. package/dist/lib/validate.d.ts.map +0 -1
  354. package/dist/lib/validate.js.map +0 -1
  355. package/dist/lib/workflow-definitions.d.ts.map +0 -1
  356. package/dist/lib/workflow-definitions.js.map +0 -1
  357. package/dist/lib/workflow-helpers.d.ts.map +0 -1
  358. package/dist/lib/workflow-helpers.js.map +0 -1
  359. package/dist/lib/workflow-stage-runner.d.ts.map +0 -1
  360. package/dist/lib/workflow-stage-runner.js.map +0 -1
  361. package/dist/lib/workflow-starter-docs.d.ts +0 -7
  362. package/dist/lib/workflow-starter-docs.d.ts.map +0 -1
  363. package/dist/lib/workflow-starter-docs.js +0 -3
  364. package/dist/lib/workflow-starter-docs.js.map +0 -1
  365. package/dist/lib/workflows.d.ts.map +0 -1
  366. package/dist/lib/workflows.js.map +0 -1
  367. package/dist/lib/workspace-compile.d.ts.map +0 -1
  368. package/dist/lib/workspace-compile.js.map +0 -1
  369. package/dist/lib/workspace-docs.d.ts +0 -3
  370. package/dist/lib/workspace-docs.d.ts.map +0 -1
  371. package/dist/lib/workspace-docs.js +0 -82
  372. package/dist/lib/workspace-docs.js.map +0 -1
  373. package/dist/lib/workspace-raw.d.ts.map +0 -1
  374. package/dist/lib/workspace-raw.js.map +0 -1
  375. package/dist/lib/workspace-reset.d.ts.map +0 -1
  376. package/dist/lib/workspace-reset.js.map +0 -1
  377. package/dist/lib/workspace-schema.d.ts.map +0 -1
  378. package/dist/lib/workspace-schema.js.map +0 -1
  379. package/skills/benchmark/SKILL.md +0 -122
  380. package/skills/workflow/create/SKILL.md +0 -141
  381. package/skills/workspace/shape/SKILL.md +0 -15
  382. package/skills/workspace/structure/SKILL.md +0 -15
  383. package/skills/workspace/summarize/SKILL.md +0 -15
  384. package/templates/workspace/README.md +0 -24
  385. package/templates/workspace/interfignore +0 -2
@@ -2,19 +2,19 @@ import { existsSync, mkdirSync, mkdtempSync, readFileSync, rmSync, writeFileSync
2
2
  import { join } from "node:path";
3
3
  import { tmpdir } from "node:os";
4
4
  import { buildRuntimeExecutorInfo } from "./executors.js";
5
- import { benchmarkRunGitignorePath, benchmarkRunsPath, benchmarkSandboxGitignorePath, benchmarkSandboxesPath, normalizeBenchmarkId, } from "./benchmark-paths.js";
6
- import { createBenchmarkSandbox, } from "./benchmark-sandbox.js";
5
+ import { targetTestRunGitignorePath, targetTestRunsPath, targetTestSandboxGitignorePath, targetTestSandboxesPath, normalizeTestId, } from "./test-paths.js";
6
+ import { createTestSandbox, } from "./test-sandbox.js";
7
7
  function parseWords(content) {
8
8
  return content.trim().split(/\s+/).filter(Boolean).length;
9
9
  }
10
10
  function normalizeText(content) {
11
11
  return content.toLowerCase().replace(/\s+/g, " ").trim();
12
12
  }
13
- function evaluateTextExpect(benchmarkCase, content) {
13
+ function evaluateTextExpect(testCase, content) {
14
14
  const checks = [];
15
15
  const normalized = normalizeText(content);
16
16
  const wordCount = parseWords(content);
17
- for (const phrase of benchmarkCase.expect?.must_include ?? []) {
17
+ for (const phrase of testCase.expect?.must_include ?? []) {
18
18
  const ok = normalized.includes(normalizeText(phrase));
19
19
  checks.push({
20
20
  label: `must include "${phrase}"`,
@@ -22,7 +22,7 @@ function evaluateTextExpect(benchmarkCase, content) {
22
22
  detail: ok ? "present" : "missing",
23
23
  });
24
24
  }
25
- for (const options of benchmarkCase.expect?.must_include_one_of ?? []) {
25
+ for (const options of testCase.expect?.must_include_one_of ?? []) {
26
26
  const ok = options.some((phrase) => normalized.includes(normalizeText(phrase)));
27
27
  checks.push({
28
28
  label: `must include one of [${options.map((phrase) => `"${phrase}"`).join(", ")}]`,
@@ -30,7 +30,7 @@ function evaluateTextExpect(benchmarkCase, content) {
30
30
  detail: ok ? "present" : "missing",
31
31
  });
32
32
  }
33
- for (const phrase of benchmarkCase.expect?.must_not_include ?? []) {
33
+ for (const phrase of testCase.expect?.must_not_include ?? []) {
34
34
  const ok = !normalized.includes(normalizeText(phrase));
35
35
  checks.push({
36
36
  label: `must not include "${phrase}"`,
@@ -38,18 +38,18 @@ function evaluateTextExpect(benchmarkCase, content) {
38
38
  detail: ok ? "absent" : "present",
39
39
  });
40
40
  }
41
- if (typeof benchmarkCase.expect?.min_words === "number") {
42
- const ok = wordCount >= benchmarkCase.expect.min_words;
41
+ if (typeof testCase.expect?.min_words === "number") {
42
+ const ok = wordCount >= testCase.expect.min_words;
43
43
  checks.push({
44
- label: `min words ${benchmarkCase.expect.min_words}`,
44
+ label: `min words ${testCase.expect.min_words}`,
45
45
  ok,
46
46
  detail: `${wordCount} words`,
47
47
  });
48
48
  }
49
- if (typeof benchmarkCase.expect?.max_words === "number") {
50
- const ok = wordCount <= benchmarkCase.expect.max_words;
49
+ if (typeof testCase.expect?.max_words === "number") {
50
+ const ok = wordCount <= testCase.expect.max_words;
51
51
  checks.push({
52
- label: `max words ${benchmarkCase.expect.max_words}`,
52
+ label: `max words ${testCase.expect.max_words}`,
53
53
  ok,
54
54
  detail: `${wordCount} words`,
55
55
  });
@@ -59,25 +59,25 @@ function evaluateTextExpect(benchmarkCase, content) {
59
59
  wordCount,
60
60
  };
61
61
  }
62
- function benchmarkCaseNeedsExecutor(benchmarkCase) {
63
- return !benchmarkCase.file || Boolean(benchmarkCase.answer);
62
+ function testCaseNeedsExecutor(testCase) {
63
+ return !testCase.file || Boolean(testCase.answer);
64
64
  }
65
- function runBenchmarkCase(target, benchmarkCase) {
66
- if (!benchmarkCase.file) {
67
- throw new Error(`Benchmark case "${benchmarkCase.id}" requires an executor because it has no file target.`);
65
+ function runTestCase(target, testCase) {
66
+ if (!testCase.file) {
67
+ throw new Error(`Test case "${testCase.id}" requires an executor because it has no file target.`);
68
68
  }
69
- const outputPath = join(target.path, benchmarkCase.file);
69
+ const outputPath = join(target.path, testCase.file);
70
70
  const checks = [];
71
71
  if (!existsSync(outputPath)) {
72
72
  checks.push({
73
73
  label: "file exists",
74
74
  ok: false,
75
- detail: `Missing ${benchmarkCase.file}`,
75
+ detail: `Missing ${testCase.file}`,
76
76
  });
77
77
  return {
78
- caseId: benchmarkCase.id,
79
- question: benchmarkCase.question,
80
- ...(benchmarkCase.file ? { file: benchmarkCase.file } : {}),
78
+ caseId: testCase.id,
79
+ question: testCase.question,
80
+ ...(testCase.file ? { file: testCase.file } : {}),
81
81
  ok: false,
82
82
  wordCount: 0,
83
83
  passedChecks: 0,
@@ -88,17 +88,17 @@ function runBenchmarkCase(target, benchmarkCase) {
88
88
  checks.push({
89
89
  label: "file exists",
90
90
  ok: true,
91
- detail: `Found ${benchmarkCase.file}`,
91
+ detail: `Found ${testCase.file}`,
92
92
  });
93
93
  const content = readFileSync(outputPath, "utf8");
94
- const evaluated = evaluateTextExpect(benchmarkCase, content);
94
+ const evaluated = evaluateTextExpect(testCase, content);
95
95
  const wordCount = evaluated.wordCount;
96
96
  checks.push(...evaluated.checks);
97
97
  const passedChecks = checks.filter((check) => check.ok).length;
98
98
  return {
99
- caseId: benchmarkCase.id,
100
- question: benchmarkCase.question,
101
- ...(benchmarkCase.file ? { file: benchmarkCase.file } : {}),
99
+ caseId: testCase.id,
100
+ question: testCase.question,
101
+ ...(testCase.file ? { file: testCase.file } : {}),
102
102
  ok: passedChecks === checks.length,
103
103
  wordCount,
104
104
  passedChecks,
@@ -106,25 +106,25 @@ function runBenchmarkCase(target, benchmarkCase) {
106
106
  checks,
107
107
  };
108
108
  }
109
- function buildBenchmarkJudgePrompt(benchmarkCase, candidateLabel, candidateContent, verdictPath) {
109
+ function buildTestJudgePrompt(testCase, candidateLabel, candidateContent, verdictPath) {
110
110
  return [
111
- "You are judging whether one Interf benchmark answer passes.",
111
+ "You are judging whether one Interf test answer passes.",
112
112
  "Do not browse other files or ask follow-up questions.",
113
- "Judge only from the benchmark rule and the candidate answer below.",
113
+ "Judge only from the truth-check rule and the candidate answer below.",
114
114
  "Emit only STATUS:, DONE:, BLOCKED:, or ERROR: lines.",
115
115
  `Write JSON to ${JSON.stringify(verdictPath)} with keys: pass (boolean), summary (string).`,
116
116
  "Before finishing, write the JSON verdict file.",
117
117
  "Final line must be `DONE: pass=true - <short summary>` or `DONE: pass=false - <short summary>`.",
118
- `Question: ${benchmarkCase.question}`,
119
- `Expected answer: ${benchmarkCase.answer ?? "The answer clearly satisfies the question."}`,
120
- `Strictness: ${benchmarkCase.strictness ?? "approximate"}`,
118
+ `Question: ${testCase.question}`,
119
+ `Expected answer: ${testCase.answer ?? "The answer clearly satisfies the question."}`,
120
+ `Strictness: ${testCase.strictness ?? "approximate"}`,
121
121
  `Candidate: ${candidateLabel}`,
122
122
  "Candidate answer starts after the next line and ends at `END CANDIDATE`.",
123
123
  candidateContent,
124
124
  "END CANDIDATE",
125
125
  ].join("\n");
126
126
  }
127
- function readBenchmarkJudgeVerdict(verdictPath) {
127
+ function readTestJudgeVerdict(verdictPath) {
128
128
  if (!existsSync(verdictPath))
129
129
  return null;
130
130
  const raw = JSON.parse(readFileSync(verdictPath, "utf8"));
@@ -133,7 +133,7 @@ function readBenchmarkJudgeVerdict(verdictPath) {
133
133
  summary: typeof raw.summary === "string" ? raw.summary : "",
134
134
  };
135
135
  }
136
- function readBenchmarkJudgeVerdictFromStatus(statusPath) {
136
+ function readTestJudgeVerdictFromStatus(statusPath) {
137
137
  if (!existsSync(statusPath))
138
138
  return null;
139
139
  const lines = readFileSync(statusPath, "utf8")
@@ -163,14 +163,14 @@ function readBenchmarkJudgeVerdictFromStatus(statusPath) {
163
163
  }
164
164
  return null;
165
165
  }
166
- async function runBenchmarkJudge(benchmarkCase, executor, candidateLabel, candidateContent) {
167
- const tempDir = mkdtempSync(join(tmpdir(), "interf-benchmark-judge-"));
166
+ async function runTargetTestsJudge(testCase, executor, candidateLabel, candidateContent) {
167
+ const tempDir = mkdtempSync(join(tmpdir(), "interf-test-judge-"));
168
168
  let executionError = null;
169
169
  let verdict = null;
170
170
  try {
171
171
  const verdictPath = join(tempDir, "verdict.json");
172
172
  const statusPath = join(tempDir, "judge.status.log");
173
- const prompt = buildBenchmarkJudgePrompt(benchmarkCase, candidateLabel, candidateContent, verdictPath);
173
+ const prompt = buildTestJudgePrompt(testCase, candidateLabel, candidateContent, verdictPath);
174
174
  try {
175
175
  await executor.execute(tempDir, prompt, {
176
176
  statusLogPath: statusPath,
@@ -180,9 +180,9 @@ async function runBenchmarkJudge(benchmarkCase, executor, candidateLabel, candid
180
180
  executionError = error instanceof Error ? error.message : String(error);
181
181
  }
182
182
  try {
183
- verdict = readBenchmarkJudgeVerdict(verdictPath);
183
+ verdict = readTestJudgeVerdict(verdictPath);
184
184
  if (!verdict) {
185
- verdict = readBenchmarkJudgeVerdictFromStatus(statusPath);
185
+ verdict = readTestJudgeVerdictFromStatus(statusPath);
186
186
  }
187
187
  }
188
188
  catch (error) {
@@ -194,22 +194,23 @@ async function runBenchmarkJudge(benchmarkCase, executor, candidateLabel, candid
194
194
  }
195
195
  return { verdict, error: executionError };
196
196
  }
197
- function buildBenchmarkQueryPrompt(target, benchmarkCase, answerPath, tracePath) {
197
+ function buildTestQueryPrompt(target, testCase, answerPath, tracePath) {
198
198
  const header = target.type === "workspace"
199
199
  ? [
200
- "You are running an Interf benchmark inside an isolated sandboxed compiled workspace.",
200
+ "You are running an Interf test inside an isolated sandboxed compiled workspace.",
201
201
  "Read `AGENTS.md` first.",
202
202
  "Use the local native `interf-query` skill available in this workspace.",
203
- "If you need the editable method source, read `workflow/use/query/SKILL.md`.",
204
- "Answer the benchmark question the same way you would answer a real user inside this compiled workspace.",
203
+ "Answer the truth-check question the same way you would answer a real user inside this compiled workspace.",
205
204
  "Prefer `home.md`, `knowledge/`, and `summaries/` before raw fallback.",
206
- "This sandbox is self-contained: the copied workspace has its own sanitized `raw/` fallback via `interf.json` `source.path`.",
205
+ "This sandbox is self-contained: the copied workspace has its own sanitized `raw/` fallback via `.interf/interf.json` `source.path`.",
207
206
  "The source-folder control plane is intentionally absent from this sandbox. Work only from this sandboxed workspace and its embedded raw files.",
208
207
  ]
209
208
  : [
210
- "You are running an Interf baseline test inside an isolated sandbox of the raw files for this folder.",
209
+ "You are running an Interf baseline test inside an isolated raw test shell.",
210
+ "Read `AGENTS.md` first.",
211
+ "Use the local native `interf-query` skill available in this shell.",
211
212
  "There is no compiled workspace in this sandbox.",
212
- "Answer only from the sandboxed raw files you can read here.",
213
+ "Answer only from `raw/` inside this shell.",
213
214
  "The source-folder control plane is intentionally absent from this sandbox.",
214
215
  ];
215
216
  return [
@@ -218,18 +219,18 @@ function buildBenchmarkQueryPrompt(target, benchmarkCase, answerPath, tracePath)
218
219
  "Do not ask follow-up questions.",
219
220
  `Write the answer to ${JSON.stringify(answerPath)}.`,
220
221
  `Write the trace to ${JSON.stringify(tracePath)} with keys: case_id, target, artifacts_consulted, raw_paths_read, used_raw_fallback, answer_summary.`,
221
- `Set \`case_id\` to ${JSON.stringify(benchmarkCase.id)}.`,
222
+ `Set \`case_id\` to ${JSON.stringify(testCase.id)}.`,
222
223
  `Set \`target\` to ${JSON.stringify(target.type)}.`,
223
- `Question: ${benchmarkCase.question}`,
224
+ `Question: ${testCase.question}`,
224
225
  ].join("\n");
225
226
  }
226
- async function runLiveBenchmarkCase(target, benchmarkCase, executor) {
227
- const tempDir = mkdtempSync(join(tmpdir(), "interf-benchmark-live-"));
227
+ async function runLiveTestCase(target, testCase, executor) {
228
+ const tempDir = mkdtempSync(join(tmpdir(), "interf-test-live-"));
228
229
  const answerPath = join(tempDir, "answer.md");
229
230
  const tracePath = join(tempDir, "trace.json");
230
231
  const statusPath = join(tempDir, "status.log");
231
232
  const eventPath = join(tempDir, "events.ndjson");
232
- const prompt = buildBenchmarkQueryPrompt(target, benchmarkCase, answerPath, tracePath);
233
+ const prompt = buildTestQueryPrompt(target, testCase, answerPath, tracePath);
233
234
  let executionError = null;
234
235
  let code = -1;
235
236
  try {
@@ -250,8 +251,8 @@ async function runLiveBenchmarkCase(target, benchmarkCase, executor) {
250
251
  detail: executionError ? `missing answer file (${executionError})` : "missing answer file",
251
252
  });
252
253
  return {
253
- caseId: benchmarkCase.id,
254
- question: benchmarkCase.question,
254
+ caseId: testCase.id,
255
+ question: testCase.question,
255
256
  ok: false,
256
257
  wordCount: 0,
257
258
  passedChecks: 0,
@@ -260,7 +261,7 @@ async function runLiveBenchmarkCase(target, benchmarkCase, executor) {
260
261
  };
261
262
  }
262
263
  const answer = readFileSync(answerPath, "utf8");
263
- const evaluated = evaluateTextExpect(benchmarkCase, answer);
264
+ const evaluated = evaluateTextExpect(testCase, answer);
264
265
  checks.push({
265
266
  label: "answer exists",
266
267
  ok: true,
@@ -292,8 +293,8 @@ async function runLiveBenchmarkCase(target, benchmarkCase, executor) {
292
293
  detail: "missing trace file",
293
294
  });
294
295
  }
295
- if (benchmarkCase.answer) {
296
- const judged = await runBenchmarkJudge(benchmarkCase, executor, `generated answer for ${benchmarkCase.id}`, answer);
296
+ if (testCase.answer) {
297
+ const judged = await runTargetTestsJudge(testCase, executor, `generated answer for ${testCase.id}`, answer);
297
298
  checks.push({
298
299
  label: "judge verdict",
299
300
  ok: judged.verdict?.pass === true,
@@ -304,8 +305,8 @@ async function runLiveBenchmarkCase(target, benchmarkCase, executor) {
304
305
  }
305
306
  const passedChecks = checks.filter((check) => check.ok).length;
306
307
  return {
307
- caseId: benchmarkCase.id,
308
- question: benchmarkCase.question,
308
+ caseId: testCase.id,
309
+ question: testCase.question,
309
310
  ok: code === 0 && passedChecks === checks.length,
310
311
  wordCount: evaluated.wordCount,
311
312
  passedChecks,
@@ -319,22 +320,22 @@ async function runLiveBenchmarkCase(target, benchmarkCase, executor) {
319
320
  rmSync(tempDir, { recursive: true, force: true });
320
321
  }
321
322
  }
322
- async function runBenchmarkCaseWithJudge(target, benchmarkCase, executor) {
323
- if (!benchmarkCase.file) {
324
- return runLiveBenchmarkCase(target, benchmarkCase, executor);
323
+ async function runTestCaseWithJudge(target, testCase, executor) {
324
+ if (!testCase.file) {
325
+ return runLiveTestCase(target, testCase, executor);
325
326
  }
326
- const outputPath = join(target.path, benchmarkCase.file);
327
+ const outputPath = join(target.path, testCase.file);
327
328
  const checks = [];
328
329
  if (!existsSync(outputPath)) {
329
330
  checks.push({
330
331
  label: "file exists",
331
332
  ok: false,
332
- detail: `Missing ${benchmarkCase.file}`,
333
+ detail: `Missing ${testCase.file}`,
333
334
  });
334
335
  return {
335
- caseId: benchmarkCase.id,
336
- question: benchmarkCase.question,
337
- ...(benchmarkCase.file ? { file: benchmarkCase.file } : {}),
336
+ caseId: testCase.id,
337
+ question: testCase.question,
338
+ ...(testCase.file ? { file: testCase.file } : {}),
338
339
  ok: false,
339
340
  wordCount: 0,
340
341
  passedChecks: 0,
@@ -343,16 +344,16 @@ async function runBenchmarkCaseWithJudge(target, benchmarkCase, executor) {
343
344
  };
344
345
  }
345
346
  const content = readFileSync(outputPath, "utf8");
346
- const evaluated = evaluateTextExpect(benchmarkCase, content);
347
+ const evaluated = evaluateTextExpect(testCase, content);
347
348
  const wordCount = evaluated.wordCount;
348
349
  checks.push({
349
350
  label: "file exists",
350
351
  ok: true,
351
- detail: `Found ${benchmarkCase.file}`,
352
+ detail: `Found ${testCase.file}`,
352
353
  });
353
354
  checks.push(...evaluated.checks);
354
- if (benchmarkCase.answer) {
355
- const judged = await runBenchmarkJudge(benchmarkCase, executor, `compiled file ${outputPath}`, content);
355
+ if (testCase.answer) {
356
+ const judged = await runTargetTestsJudge(testCase, executor, `compiled file ${outputPath}`, content);
356
357
  checks.push({
357
358
  label: "judge verdict",
358
359
  ok: judged.verdict?.pass === true,
@@ -363,9 +364,9 @@ async function runBenchmarkCaseWithJudge(target, benchmarkCase, executor) {
363
364
  }
364
365
  const passedChecks = checks.filter((check) => check.ok).length;
365
366
  return {
366
- caseId: benchmarkCase.id,
367
- question: benchmarkCase.question,
368
- ...(benchmarkCase.file ? { file: benchmarkCase.file } : {}),
367
+ caseId: testCase.id,
368
+ question: testCase.question,
369
+ ...(testCase.file ? { file: testCase.file } : {}),
369
370
  ok: passedChecks === checks.length,
370
371
  wordCount,
371
372
  passedChecks,
@@ -373,7 +374,7 @@ async function runBenchmarkCaseWithJudge(target, benchmarkCase, executor) {
373
374
  checks,
374
375
  };
375
376
  }
376
- function buildBenchmarkTargetResult(target, caseResults, options = {}) {
377
+ function buildTestTargetResult(target, caseResults, options = {}) {
377
378
  const passedCases = caseResults.filter((result) => result.ok).length;
378
379
  const passedChecks = caseResults.reduce((total, result) => total + result.passedChecks, 0);
379
380
  const totalChecks = caseResults.reduce((total, result) => total + result.totalChecks, 0);
@@ -393,25 +394,25 @@ function buildBenchmarkTargetResult(target, caseResults, options = {}) {
393
394
  caseResults,
394
395
  };
395
396
  }
396
- function buildBenchmarkRunResult(sourcePath, spec, results, executor, generatedAt) {
397
+ function buildTestTargetRun(sourcePath, spec, results, executor, generatedAt) {
397
398
  return {
398
- kind: "interf-benchmark-run",
399
+ kind: "interf-test-target-run",
399
400
  version: 1,
400
401
  generated_at: generatedAt ?? new Date().toISOString(),
401
- benchmark: {
402
+ spec: {
402
403
  id: spec.id,
403
404
  name: spec.name,
404
405
  type: spec.type,
405
406
  file: spec.filePath,
406
407
  ...(spec.description ? { description: spec.description } : {}),
407
408
  case_count: spec.cases.length,
408
- cases: spec.cases.map((benchmarkCase) => ({
409
- id: benchmarkCase.id,
410
- question: benchmarkCase.question,
411
- ...(benchmarkCase.file ? { file: benchmarkCase.file } : {}),
412
- ...(benchmarkCase.answer ? { answer: benchmarkCase.answer } : {}),
413
- ...(benchmarkCase.strictness ? { strictness: benchmarkCase.strictness } : {}),
414
- ...(benchmarkCase.expect ? { expect: benchmarkCase.expect } : {}),
409
+ cases: spec.cases.map((testCase) => ({
410
+ id: testCase.id,
411
+ question: testCase.question,
412
+ ...(testCase.file ? { file: testCase.file } : {}),
413
+ ...(testCase.answer ? { answer: testCase.answer } : {}),
414
+ ...(testCase.strictness ? { strictness: testCase.strictness } : {}),
415
+ ...(testCase.expect ? { expect: testCase.expect } : {}),
415
416
  })),
416
417
  },
417
418
  source_path: sourcePath,
@@ -420,97 +421,99 @@ function buildBenchmarkRunResult(sourcePath, spec, results, executor, generatedA
420
421
  results,
421
422
  };
422
423
  }
423
- export function runBenchmark(sourcePath, spec, targets) {
424
- if (spec.cases.some((benchmarkCase) => benchmarkCaseNeedsExecutor(benchmarkCase))) {
425
- throw new Error("This benchmark needs a live executor. Use runBenchmarkWithJudge instead.");
424
+ export function runTargetTests(sourcePath, spec, targets) {
425
+ if (spec.cases.some((testCase) => testCaseNeedsExecutor(testCase))) {
426
+ throw new Error("This test needs a live executor. Use runTargetTestsWithJudge instead.");
426
427
  }
427
428
  for (const target of targets) {
428
429
  if (target.type !== spec.type) {
429
- throw new Error(`Benchmark target type mismatch: expected ${spec.type}, got ${target.type}`);
430
+ throw new Error(`Test target type mismatch: expected ${spec.type}, got ${target.type}`);
430
431
  }
431
432
  }
432
- const results = targets.map((target) => buildBenchmarkTargetResult(target, spec.cases.map((benchmarkCase) => runBenchmarkCase(target, benchmarkCase))));
433
- return buildBenchmarkRunResult(sourcePath, spec, results);
433
+ const results = targets.map((target) => buildTestTargetResult(target, spec.cases.map((testCase) => runTestCase(target, testCase))));
434
+ return buildTestTargetRun(sourcePath, spec, results);
434
435
  }
435
- export async function runBenchmarkWithJudge(sourcePath, spec, targets, executor, options = {}) {
436
+ export async function runTargetTestsWithJudge(sourcePath, spec, targets, executor, options = {}) {
436
437
  const preserveMode = options.preserveSandboxes ?? "on-failure";
438
+ const artifactRootPath = options.artifactRootPath ?? sourcePath;
437
439
  const generatedAt = new Date().toISOString();
438
440
  const sandboxRunId = `${generatedAt.replace(/[:.]/g, "-")}-${spec.id}`;
439
441
  for (const target of targets) {
440
442
  if (target.type !== spec.type) {
441
- throw new Error(`Benchmark target type mismatch: expected ${spec.type}, got ${target.type}`);
443
+ throw new Error(`Test target type mismatch: expected ${spec.type}, got ${target.type}`);
442
444
  }
443
445
  }
444
446
  const results = [];
445
447
  for (const [index, target] of targets.entries()) {
446
- const sandbox = createBenchmarkSandbox(target);
448
+ const sandbox = createTestSandbox(target);
447
449
  try {
448
450
  const sandboxTarget = {
449
451
  ...target,
450
452
  path: sandbox.targetPath,
451
453
  };
452
454
  const caseResults = [];
453
- for (const benchmarkCase of spec.cases) {
454
- if (benchmarkCaseNeedsExecutor(benchmarkCase)) {
455
- caseResults.push(await runLiveBenchmarkCase(sandboxTarget, benchmarkCase, executor));
455
+ for (const testCase of spec.cases) {
456
+ if (testCaseNeedsExecutor(testCase)) {
457
+ caseResults.push(await runLiveTestCase(sandboxTarget, testCase, executor));
456
458
  }
457
459
  else {
458
- caseResults.push(await runBenchmarkCaseWithJudge(sandboxTarget, benchmarkCase, executor));
460
+ caseResults.push(await runTestCaseWithJudge(sandboxTarget, testCase, executor));
459
461
  }
460
462
  }
461
463
  let sandboxPath;
462
- const targetResult = buildBenchmarkTargetResult(target, caseResults);
464
+ const targetResult = buildTestTargetResult(target, caseResults);
463
465
  const shouldPreserveSandbox = preserveMode === "always" || !targetResult.ok;
464
466
  if (shouldPreserveSandbox) {
465
- const sandboxRoot = benchmarkSandboxesPath(sourcePath, target.type);
467
+ const sandboxRoot = targetTestSandboxesPath(artifactRootPath, target.type);
466
468
  mkdirSync(sandboxRoot, { recursive: true });
467
- const gitignorePath = benchmarkSandboxGitignorePath(sourcePath, target.type);
469
+ const gitignorePath = targetTestSandboxGitignorePath(artifactRootPath, target.type);
468
470
  if (!existsSync(gitignorePath)) {
469
471
  writeFileSync(gitignorePath, "*\n!.gitignore\n");
470
472
  }
471
- const sandboxPathName = `${String(index + 1).padStart(2, "0")}-${normalizeBenchmarkId(target.name) || target.type}`;
473
+ const sandboxPathName = `${String(index + 1).padStart(2, "0")}-${normalizeTestId(target.name) || target.type}`;
472
474
  sandbox.preserve(join(sandboxRoot, sandboxRunId, sandboxPathName));
473
475
  sandboxPath = sandbox.targetPath;
474
476
  }
475
- results.push(buildBenchmarkTargetResult(target, caseResults, { sandboxPath }));
477
+ results.push(buildTestTargetResult(target, caseResults, { sandboxPath }));
476
478
  }
477
479
  finally {
478
480
  sandbox.cleanup();
479
481
  }
480
482
  }
481
- return buildBenchmarkRunResult(sourcePath, spec, results, executor, generatedAt);
483
+ return buildTestTargetRun(sourcePath, spec, results, executor, generatedAt);
482
484
  }
483
- export async function runBenchmarkAuto(sourcePath, spec, targets, options) {
484
- if (spec.cases.some((benchmarkCase) => benchmarkCaseNeedsExecutor(benchmarkCase))) {
485
+ export async function runTargetTestsAuto(sourcePath, spec, targets, options) {
486
+ if (spec.cases.some((testCase) => testCaseNeedsExecutor(testCase))) {
485
487
  if (!options?.executor) {
486
- throw new Error("This benchmark needs a live local executor, but no executor was provided.");
488
+ throw new Error("This test needs a live local executor, but no executor was provided.");
487
489
  }
488
- return runBenchmarkWithJudge(sourcePath, spec, targets, options.executor, {
490
+ return runTargetTestsWithJudge(sourcePath, spec, targets, options.executor, {
489
491
  preserveSandboxes: options.preserveSandboxes,
492
+ artifactRootPath: options.artifactRootPath,
490
493
  });
491
494
  }
492
- return runBenchmark(sourcePath, spec, targets);
495
+ return runTargetTests(sourcePath, spec, targets);
493
496
  }
494
- export function saveBenchmarkRun(sourcePath, result) {
495
- const dirPath = benchmarkRunsPath(sourcePath, result.benchmark.type);
497
+ export function saveTargetTestRun(artifactRootPath, result) {
498
+ const dirPath = targetTestRunsPath(artifactRootPath, result.spec.type);
496
499
  mkdirSync(dirPath, { recursive: true });
497
- const gitignorePath = benchmarkRunGitignorePath(sourcePath, result.benchmark.type);
500
+ const gitignorePath = targetTestRunGitignorePath(artifactRootPath, result.spec.type);
498
501
  if (!existsSync(gitignorePath)) {
499
502
  writeFileSync(gitignorePath, "*\n!.gitignore\n");
500
503
  }
501
504
  const timestamp = result.generated_at.replace(/[:.]/g, "-");
502
- const runDirPath = join(dirPath, `${timestamp}-${result.benchmark.id}`);
505
+ const runDirPath = join(dirPath, `${timestamp}-${result.spec.id}`);
503
506
  mkdirSync(runDirPath, { recursive: true });
504
507
  const manifestPath = join(runDirPath, "manifest.json");
505
508
  writeFileSync(manifestPath, `${JSON.stringify({
506
- kind: "interf-benchmark-run-manifest",
509
+ kind: "interf-test-target-run-manifest",
507
510
  version: 1,
508
511
  generated_at: result.generated_at,
509
- benchmark: {
510
- id: result.benchmark.id,
511
- name: result.benchmark.name,
512
- type: result.benchmark.type,
513
- case_count: result.benchmark.case_count,
512
+ spec: {
513
+ id: result.spec.id,
514
+ name: result.spec.name,
515
+ type: result.spec.type,
516
+ case_count: result.spec.case_count,
514
517
  },
515
518
  result_file: "run.json",
516
519
  target_count: result.target_count,
@@ -520,4 +523,3 @@ export function saveBenchmarkRun(sourcePath, result) {
520
523
  writeFileSync(runPath, `${JSON.stringify(result, null, 2)}\n`);
521
524
  return runPath;
522
525
  }
523
- //# sourceMappingURL=benchmark-execution.js.map
@@ -0,0 +1,90 @@
1
+ import { z } from "zod";
2
+ import type { WorkflowExecutionProfile } from "./executors.js";
3
+ export declare const TestMatrixAgentSchema: z.ZodEnum<{
4
+ "claude-code": "claude-code";
5
+ codex: "codex";
6
+ }>;
7
+ export declare const TestMatrixProfileSchema: z.ZodPipe<z.ZodObject<{
8
+ id: z.ZodString;
9
+ preset: z.ZodOptional<z.ZodEnum<{
10
+ "release-claude-high": "release-claude-high";
11
+ "release-codex-high": "release-codex-high";
12
+ "tier1-max-claude": "tier1-max-claude";
13
+ "tier1-max-codex": "tier1-max-codex";
14
+ }>>;
15
+ agent: z.ZodOptional<z.ZodEnum<{
16
+ "claude-code": "claude-code";
17
+ codex: "codex";
18
+ }>>;
19
+ model: z.ZodOptional<z.ZodString>;
20
+ profile: z.ZodOptional<z.ZodString>;
21
+ effort: z.ZodOptional<z.ZodString>;
22
+ timeout_ms: z.ZodOptional<z.ZodNumber>;
23
+ }, z.core.$strip>, z.ZodTransform<{
24
+ agent: "claude-code" | "codex";
25
+ id: string;
26
+ preset?: "release-claude-high" | "release-codex-high" | "tier1-max-claude" | "tier1-max-codex" | undefined;
27
+ model?: string | undefined;
28
+ profile?: string | undefined;
29
+ effort?: string | undefined;
30
+ timeout_ms?: number | undefined;
31
+ }, {
32
+ id: string;
33
+ preset?: "release-claude-high" | "release-codex-high" | "tier1-max-claude" | "tier1-max-codex" | undefined;
34
+ agent?: "claude-code" | "codex" | undefined;
35
+ model?: string | undefined;
36
+ profile?: string | undefined;
37
+ effort?: string | undefined;
38
+ timeout_ms?: number | undefined;
39
+ }>>;
40
+ export declare const TestMatrixRetryPolicySchema: z.ZodObject<{
41
+ max_attempts_per_profile: z.ZodOptional<z.ZodNumber>;
42
+ }, z.core.$strip>;
43
+ export declare const TestMatrixWorkspaceSchema: z.ZodString;
44
+ export declare const TestMatrixSchema: z.ZodObject<{
45
+ id: z.ZodString;
46
+ name: z.ZodString;
47
+ source_path: z.ZodString;
48
+ compile_profiles: z.ZodArray<z.ZodPipe<z.ZodObject<{
49
+ id: z.ZodString;
50
+ preset: z.ZodOptional<z.ZodEnum<{
51
+ "release-claude-high": "release-claude-high";
52
+ "release-codex-high": "release-codex-high";
53
+ "tier1-max-claude": "tier1-max-claude";
54
+ "tier1-max-codex": "tier1-max-codex";
55
+ }>>;
56
+ agent: z.ZodOptional<z.ZodEnum<{
57
+ "claude-code": "claude-code";
58
+ codex: "codex";
59
+ }>>;
60
+ model: z.ZodOptional<z.ZodString>;
61
+ profile: z.ZodOptional<z.ZodString>;
62
+ effort: z.ZodOptional<z.ZodString>;
63
+ timeout_ms: z.ZodOptional<z.ZodNumber>;
64
+ }, z.core.$strip>, z.ZodTransform<{
65
+ agent: "claude-code" | "codex";
66
+ id: string;
67
+ preset?: "release-claude-high" | "release-codex-high" | "tier1-max-claude" | "tier1-max-codex" | undefined;
68
+ model?: string | undefined;
69
+ profile?: string | undefined;
70
+ effort?: string | undefined;
71
+ timeout_ms?: number | undefined;
72
+ }, {
73
+ id: string;
74
+ preset?: "release-claude-high" | "release-codex-high" | "tier1-max-claude" | "tier1-max-codex" | undefined;
75
+ agent?: "claude-code" | "codex" | undefined;
76
+ model?: string | undefined;
77
+ profile?: string | undefined;
78
+ effort?: string | undefined;
79
+ timeout_ms?: number | undefined;
80
+ }>>>;
81
+ retry_policy: z.ZodOptional<z.ZodObject<{
82
+ max_attempts_per_profile: z.ZodOptional<z.ZodNumber>;
83
+ }, z.core.$strip>>;
84
+ workspaces: z.ZodArray<z.ZodString>;
85
+ }, z.core.$strip>;
86
+ export type TestMatrixProfile = z.infer<typeof TestMatrixProfileSchema>;
87
+ export type TestMatrixRetryPolicy = z.infer<typeof TestMatrixRetryPolicySchema>;
88
+ export type TestMatrixWorkspace = z.infer<typeof TestMatrixWorkspaceSchema>;
89
+ export type TestMatrix = z.infer<typeof TestMatrixSchema>;
90
+ export declare function testMatrixProfileToExecutionProfile(profile: TestMatrixProfile): WorkflowExecutionProfile;