@interf/compiler 0.5.0 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. package/README.md +126 -188
  2. package/builtin-workflows/interf/README.md +22 -10
  3. package/builtin-workflows/interf/compile/stages/shape/SKILL.md +6 -3
  4. package/builtin-workflows/interf/compile/stages/structure/SKILL.md +3 -0
  5. package/builtin-workflows/interf/compile/stages/summarize/SKILL.md +18 -2
  6. package/builtin-workflows/interf/improve/SKILL.md +2 -2
  7. package/builtin-workflows/interf/workflow.json +18 -4
  8. package/builtin-workflows/interf/{compiled.schema.json → workflow.schema.json} +9 -2
  9. package/dist/commands/check-draft.js +3 -3
  10. package/dist/commands/compile-controller.js +9 -16
  11. package/dist/commands/compile.d.ts +19 -1
  12. package/dist/commands/compile.js +98 -28
  13. package/dist/commands/create-workflow-wizard.d.ts +20 -2
  14. package/dist/commands/create-workflow-wizard.js +163 -27
  15. package/dist/commands/create.d.ts +1 -1
  16. package/dist/commands/create.js +67 -60
  17. package/dist/commands/dataset-selection.d.ts +6 -0
  18. package/dist/commands/dataset-selection.js +11 -0
  19. package/dist/commands/default.js +3 -3
  20. package/dist/commands/doctor.js +8 -8
  21. package/dist/commands/executor-flow.d.ts +1 -1
  22. package/dist/commands/executor-flow.js +5 -2
  23. package/dist/commands/init.d.ts +5 -0
  24. package/dist/commands/init.js +56 -48
  25. package/dist/commands/list.js +6 -3
  26. package/dist/commands/reset.js +1 -1
  27. package/dist/commands/source-config-wizard.d.ts +2 -2
  28. package/dist/commands/source-config-wizard.js +50 -17
  29. package/dist/commands/test-flow.js +5 -16
  30. package/dist/commands/test.d.ts +0 -6
  31. package/dist/commands/test.js +9 -17
  32. package/dist/index.d.ts +1 -1
  33. package/dist/index.js +1 -1
  34. package/dist/lib/agent-args.d.ts +1 -0
  35. package/dist/lib/agent-args.js +10 -0
  36. package/dist/lib/agent-execution.js +2 -1
  37. package/dist/lib/agent-preflight.js +2 -1
  38. package/dist/lib/agent-shells.d.ts +26 -1
  39. package/dist/lib/agent-shells.js +214 -40
  40. package/dist/lib/agents.d.ts +1 -1
  41. package/dist/lib/agents.js +1 -1
  42. package/dist/lib/builtin-compiled-workflow.d.ts +38 -0
  43. package/dist/lib/builtin-compiled-workflow.js +94 -0
  44. package/dist/lib/compiled-compile.d.ts +0 -4
  45. package/dist/lib/compiled-compile.js +11 -30
  46. package/dist/lib/compiled-paths.d.ts +1 -2
  47. package/dist/lib/compiled-paths.js +8 -13
  48. package/dist/lib/compiled-raw.d.ts +2 -2
  49. package/dist/lib/compiled-reset.d.ts +1 -0
  50. package/dist/lib/compiled-reset.js +42 -14
  51. package/dist/lib/compiled-schema.d.ts +11 -7
  52. package/dist/lib/compiled-schema.js +47 -16
  53. package/dist/lib/discovery.d.ts +1 -1
  54. package/dist/lib/discovery.js +2 -2
  55. package/dist/lib/executors.d.ts +1 -1
  56. package/dist/lib/executors.js +2 -2
  57. package/dist/lib/interf-detect.d.ts +0 -1
  58. package/dist/lib/interf-detect.js +7 -18
  59. package/dist/lib/interf-scaffold.js +4 -11
  60. package/dist/lib/interf-workflow-package.d.ts +8 -3
  61. package/dist/lib/interf-workflow-package.js +128 -62
  62. package/dist/lib/interf.d.ts +1 -1
  63. package/dist/lib/interf.js +1 -1
  64. package/dist/lib/local-workflows.d.ts +4 -3
  65. package/dist/lib/local-workflows.js +127 -104
  66. package/dist/lib/project-paths.d.ts +2 -4
  67. package/dist/lib/project-paths.js +13 -10
  68. package/dist/lib/runtime-acceptance.js +15 -3
  69. package/dist/lib/runtime-contracts.js +3 -2
  70. package/dist/lib/runtime-paths.d.ts +1 -0
  71. package/dist/lib/runtime-paths.js +4 -1
  72. package/dist/lib/runtime-prompt.js +4 -4
  73. package/dist/lib/runtime-reconcile.js +90 -64
  74. package/dist/lib/runtime-runs.js +29 -102
  75. package/dist/lib/runtime.d.ts +1 -1
  76. package/dist/lib/runtime.js +1 -1
  77. package/dist/lib/schema.d.ts +104 -54
  78. package/dist/lib/schema.js +32 -116
  79. package/dist/lib/source-config.js +21 -22
  80. package/dist/lib/state-health.js +4 -2
  81. package/dist/lib/state-io.js +2 -110
  82. package/dist/lib/state-view.js +8 -8
  83. package/dist/lib/state.d.ts +1 -0
  84. package/dist/lib/state.js +7 -0
  85. package/dist/lib/test-execution.js +2 -2
  86. package/dist/lib/test-paths.js +12 -3
  87. package/dist/lib/test-sandbox.js +4 -17
  88. package/dist/lib/test-specs.js +1 -1
  89. package/dist/lib/validate-compiled.js +13 -8
  90. package/dist/lib/validate.d.ts +5 -1
  91. package/dist/lib/validate.js +30 -22
  92. package/dist/lib/workflow-authoring.d.ts +26 -0
  93. package/dist/lib/workflow-authoring.js +119 -0
  94. package/dist/lib/workflow-definitions.d.ts +14 -3
  95. package/dist/lib/workflow-definitions.js +21 -17
  96. package/dist/lib/workflow-edit-session.d.ts +16 -0
  97. package/dist/lib/workflow-edit-session.js +57 -0
  98. package/dist/lib/workflow-edit-utils.d.ts +10 -0
  99. package/dist/lib/workflow-edit-utils.js +39 -0
  100. package/dist/lib/workflow-improvement.js +30 -217
  101. package/dist/lib/workflow-primitives.d.ts +2 -0
  102. package/dist/lib/workflow-primitives.js +5 -0
  103. package/dist/lib/workflow-stage-policy.d.ts +5 -0
  104. package/dist/lib/workflow-stage-policy.js +31 -0
  105. package/package.json +7 -8
  106. package/dist/lib/compiled-layout.d.ts +0 -2
  107. package/dist/lib/compiled-layout.js +0 -60
  108. package/dist/lib/obsidian.d.ts +0 -1
  109. package/dist/lib/obsidian.js +0 -15
  110. package/dist/lib/summarize-plan.d.ts +0 -17
  111. package/dist/lib/summarize-plan.js +0 -124
  112. package/dist/lib/workflow-abi.d.ts +0 -129
  113. package/dist/lib/workflow-abi.js +0 -156
@@ -1,13 +1,13 @@
1
1
  import chalk from "chalk";
2
2
  import * as p from "@clack/prompts";
3
+ import { resolve } from "node:path";
3
4
  import { detectInterf, readInterfConfig, resolveSourceControlPath, } from "../lib/interf.js";
4
- import { SOURCE_FOLDER_CONFIG_FILE, resolveDatasetCompileMaxAttempts, resolveDatasetCompileMaxLoops, syncCompiledInterfConfigFromSourceDatasetConfig, upsertSourceDatasetConfig, } from "../lib/source-config.js";
5
+ import { SOURCE_FOLDER_CONFIG_FILE, syncCompiledInterfConfigFromSourceDatasetConfig, upsertSourceDatasetConfig, } from "../lib/source-config.js";
5
6
  import { DEFAULT_COMPILED_NAME, describeCompileLoopSelection, promptSingleCompiledConfig, } from "./source-config-wizard.js";
6
7
  import { buildCompiledWorkflowOptions, chooseCompiledWorkflow, createWorkflowWizard, } from "./create-workflow-wizard.js";
7
- import { findBuiltCompiledPath, findSavedCompiledConfig, listSavedCompiledEntries, ensureCompiledFromConfig, } from "./compiled-flow.js";
8
+ import { findBuiltCompiledPath, findSavedCompiledConfig, listSavedCompiledEntries, } from "./compiled-flow.js";
8
9
  import { readSavedTestComparison } from "./test-flow.js";
9
- import { resolveOrConfigureLocalExecutor } from "./executor-flow.js";
10
- import { runConfiguredCompiledCompile } from "./compile.js";
10
+ import { runCompileCommand } from "./compile.js";
11
11
  import { runTestCommand } from "./test.js";
12
12
  function describeSavedQuestions(dataset) {
13
13
  const count = dataset.checks.length;
@@ -15,19 +15,13 @@ function describeSavedQuestions(dataset) {
15
15
  return "No saved truth checks yet";
16
16
  return `${count} saved truth check${count === 1 ? "" : "s"}`;
17
17
  }
18
- function compileModeAlreadyRanSavedTests(dataset) {
19
- if (dataset.checks.length === 0)
20
- return false;
21
- return (resolveDatasetCompileMaxAttempts(dataset) != null ||
22
- resolveDatasetCompileMaxLoops(dataset) != null);
23
- }
24
18
  function printDatasetSummary(options) {
25
19
  const compiledConfig = options.builtCompiledPath
26
20
  ? readInterfConfig(options.builtCompiledPath)
27
21
  : null;
28
22
  const workflowLabel = `${options.dataset.workflow ?? "interf"}${compiledConfig?.workflow_origin?.local_draft === true ? " (local draft)" : ""}`;
29
23
  p.log.info(`Dataset: ${options.dataset.name}`);
30
- p.log.info(`Path: ${options.dataset.path === "." ? "project root" : options.dataset.path}`);
24
+ p.log.info(`Path: ${options.dataset.path === "." ? "source folder" : options.dataset.path}`);
31
25
  if (options.dataset.about) {
32
26
  p.log.info(`About: ${options.dataset.about}`);
33
27
  }
@@ -60,12 +54,12 @@ async function promptDatasetAction(dataset, built, latestComparison) {
60
54
  options.push({
61
55
  value: "test",
62
56
  label: built
63
- ? "Measure files-as-is and compiled accuracy (Recommended)"
57
+ ? "Compare files-as-is and compiled dataset (Recommended)"
64
58
  : hasSavedRawBaseline
65
59
  ? "Rerun the files-as-is baseline"
66
60
  : "Measure the files-as-is baseline (Recommended)",
67
61
  hint: built
68
- ? "Compare whether the compiled dataset is actually better on the saved checks"
62
+ ? "See whether preparation helps on the saved checks"
69
63
  : hasSavedRawBaseline
70
64
  ? "Refresh the saved raw baseline on the current checks"
71
65
  : "See whether the raw dataset is already good enough before compiling",
@@ -162,6 +156,8 @@ async function chooseCompiledForWizard(options) {
162
156
  }
163
157
  async function promptCompiledSetup(options) {
164
158
  let workflowId = options.initial?.workflow ?? "interf";
159
+ let workflowLabel = buildCompiledWorkflowOptions(options.sourcePath)
160
+ .find((option) => option.value === workflowId)?.label ?? workflowId;
165
161
  if (options.introStyle === "edit") {
166
162
  const workflowChoice = await chooseCompiledWorkflow(options.sourcePath, {
167
163
  currentWorkflowId: workflowId,
@@ -170,9 +166,9 @@ async function promptCompiledSetup(options) {
170
166
  if (p.isCancel(workflowChoice))
171
167
  return null;
172
168
  workflowId = workflowChoice;
169
+ workflowLabel = buildCompiledWorkflowOptions(options.sourcePath)
170
+ .find((option) => option.value === workflowId)?.label ?? workflowId;
173
171
  }
174
- const workflowLabel = buildCompiledWorkflowOptions(options.sourcePath)
175
- .find((option) => option.value === workflowId)?.label ?? workflowId;
176
172
  const compiledConfig = await promptSingleCompiledConfig({
177
173
  projectPath: options.sourcePath,
178
174
  initial: options.initial,
@@ -206,31 +202,14 @@ async function promptCompiledSetup(options) {
206
202
  })}`));
207
203
  return compiledConfigWithWorkflow;
208
204
  }
209
- async function compileSelectedCompiled(sourcePath, compiledConfig) {
210
- const { executor, error } = await resolveOrConfigureLocalExecutor({
211
- purpose: "compile",
212
- });
213
- if (!executor && !error) {
214
- return null;
215
- }
216
- if (!executor) {
217
- process.exitCode = 1;
218
- console.log(chalk.red(error ?? "No coding agent detected."));
219
- return null;
220
- }
221
- const compiledPath = ensureCompiledFromConfig(sourcePath, compiledConfig);
222
- const compiled = await runConfiguredCompiledCompile({
223
- executor,
224
- compiledPath,
205
+ export async function compileSelectedCompiled(sourcePath, compiledConfig, deps = {}) {
206
+ return (deps.runCompileCommand ?? runCompileCommand)({
225
207
  sourcePath,
226
- compiledConfig,
227
- maxAttemptsOverride: null,
228
- maxLoopsOverride: null,
208
+ dataset: compiledConfig.name,
209
+ datasetConfig: compiledConfig,
210
+ skipConfirm: true,
211
+ skipDatasetBanner: true,
229
212
  });
230
- if (!compiled) {
231
- return null;
232
- }
233
- return compiledPath;
234
213
  }
235
214
  async function runCompiledActionMenu(sourcePath, compiledConfig, options = {}) {
236
215
  const builtCompiledPath = findBuiltCompiledPath(sourcePath, compiledConfig.name);
@@ -251,7 +230,30 @@ async function runCompiledActionMenu(sourcePath, compiledConfig, options = {}) {
251
230
  return;
252
231
  }
253
232
  if (action === "workflow") {
254
- await createWorkflowWizard({ sourcePath });
233
+ const workflowId = await createWorkflowWizard({
234
+ sourcePath,
235
+ datasetContext: {
236
+ config: compiledConfig,
237
+ datasetPath: compiledConfig.path === "."
238
+ ? sourcePath
239
+ : resolve(sourcePath, compiledConfig.path),
240
+ },
241
+ });
242
+ if (typeof workflowId === "string") {
243
+ const nextConfig = {
244
+ ...compiledConfig,
245
+ workflow: workflowId,
246
+ };
247
+ upsertSourceDatasetConfig(sourcePath, nextConfig, {
248
+ matchName: compiledConfig.name,
249
+ });
250
+ const builtCompiledPath = findBuiltCompiledPath(sourcePath, compiledConfig.name);
251
+ if (builtCompiledPath) {
252
+ syncCompiledInterfConfigFromSourceDatasetConfig(builtCompiledPath, nextConfig);
253
+ }
254
+ p.log.info(`Assigned workflow "${workflowId}" to dataset "${compiledConfig.name}".`);
255
+ p.log.info("Next: run `interf compile`, then `interf test`.");
256
+ }
255
257
  return;
256
258
  }
257
259
  if (action === "dataset") {
@@ -316,8 +318,13 @@ async function runCompiledActionMenu(sourcePath, compiledConfig, options = {}) {
316
318
  return;
317
319
  }
318
320
  }
319
- const compiledPath = await compileSelectedCompiled(sourcePath, compiledConfig);
320
- if (!compiledPath) {
321
+ const compileResult = await compileSelectedCompiled(sourcePath, compiledConfig);
322
+ if (!compileResult) {
323
+ return;
324
+ }
325
+ if (compileResult.testedDuringCompile) {
326
+ p.log.info("This compile run already tested the compiled dataset on the saved checks.");
327
+ p.log.info("Run `interf test` later if you want a fresh side-by-side comparison summary.");
321
328
  return;
322
329
  }
323
330
  await runTestCommand({
@@ -329,12 +336,13 @@ async function runCompiledActionMenu(sourcePath, compiledConfig, options = {}) {
329
336
  return;
330
337
  }
331
338
  if (action === "compile") {
332
- if (!await compileSelectedCompiled(sourcePath, compiledConfig))
339
+ const compileResult = await compileSelectedCompiled(sourcePath, compiledConfig);
340
+ if (!compileResult)
333
341
  return;
334
342
  if (compiledConfig.checks.length === 0)
335
343
  return;
336
- if (compileModeAlreadyRanSavedTests(compiledConfig)) {
337
- p.log.info("Saved compile mode already ran the compiled-dataset test.");
344
+ if (compileResult.testedDuringCompile) {
345
+ p.log.info("This compile run already ran the compiled-dataset test.");
338
346
  return;
339
347
  }
340
348
  const runCompiledTest = await p.confirm({
@@ -356,19 +364,19 @@ async function runCompiledActionMenu(sourcePath, compiledConfig, options = {}) {
356
364
  }
357
365
  export const initCommand = {
358
366
  command: "init",
359
- describe: "Open the root-folder wizard for this folder",
367
+ describe: "Open the dataset wizard for this folder",
360
368
  handler: async () => {
361
369
  await runInitCommand();
362
370
  },
363
371
  };
364
372
  export async function runInitCommand() {
365
- p.intro(chalk.bold("Interf Compiler"));
366
- p.log.info("Measure how accurately your local agents answer from the dataset in this folder, then compile only if it helps.");
373
+ p.intro(chalk.bold("Interf"));
374
+ p.log.info("Measure whether your local agents can answer the questions your task depends on from the data in this folder, then prepare the dataset only if the raw files are not good enough.");
367
375
  const cwd = process.cwd();
368
376
  const detected = detectInterf(cwd);
369
377
  const sourcePath = detected ? resolveSourceControlPath(detected.path) : cwd;
370
378
  if (detected) {
371
- p.log.info(`Working from the dataset control plane: ${sourcePath}`);
379
+ p.log.info(`Working from the source folder: ${sourcePath}`);
372
380
  }
373
381
  const savedEntries = listSavedCompiledEntries(sourcePath);
374
382
  if (savedEntries.length === 0) {
@@ -9,7 +9,7 @@ export const listCommand = {
9
9
  const sourcePath = detected ? resolveSourceControlPath(detected.path) : process.cwd();
10
10
  const datasets = listSavedCompiledEntries(sourcePath);
11
11
  if (datasets.length === 0) {
12
- console.log(chalk.dim(" Nothing found. Start with `interf`, run `interf test`, then compile a dataset when needed."));
12
+ console.log(chalk.dim(" Nothing found. Start with `interf` or `interf init` to define a dataset and save truth checks."));
13
13
  return;
14
14
  }
15
15
  console.log();
@@ -17,8 +17,11 @@ export const listCommand = {
17
17
  console.log();
18
18
  for (const dataset of datasets) {
19
19
  console.log(` ${dataset.config.name}`);
20
- console.log(chalk.dim(` ${dataset.path ?? "not built yet"}`));
21
- console.log(chalk.dim(` workflow: ${dataset.config.workflow ?? "interf"}`));
20
+ if (dataset.config.about) {
21
+ console.log(chalk.dim(` task: ${dataset.config.about}`));
22
+ }
23
+ console.log(chalk.dim(` ${dataset.path ? "built" : "not built yet"} · path: ${dataset.config.path}`));
24
+ console.log(chalk.dim(` workflow: ${dataset.config.workflow ?? "interf"}${dataset.localDraft ? " (local draft)" : ""}`));
22
25
  console.log(chalk.dim(` checks: ${dataset.config.checks.length}`));
23
26
  }
24
27
  },
@@ -3,7 +3,7 @@ import { detectInterf } from "../lib/interf.js";
3
3
  import { resetCompiledGeneratedState } from "../lib/compiled-reset.js";
4
4
  export const resetCommand = {
5
5
  command: "reset <scope>",
6
- describe: "Reset generated compiled state while keeping source files",
6
+ describe: "Reset generated compiled state while keeping `raw/` and the local workflow package",
7
7
  builder: (yargs) => yargs.positional("scope", {
8
8
  type: "string",
9
9
  choices: ["compile", "all"],
@@ -2,9 +2,9 @@ import type { SourceTruthCheck, SourceDatasetConfig } from "../lib/schema.js";
2
2
  export declare const DEFAULT_COMPILED_NAME = "dataset1";
3
3
  export declare const DEFAULT_COMPILED_CHECK_QUESTION_PLACEHOLDER = "A question you can already verify from this dataset";
4
4
  export declare const DEFAULT_COMPILED_CHECK_ANSWER_PLACEHOLDER = "The expected answer in plain English";
5
- export declare const DEFAULT_COMPILED_ABOUT_PLACEHOLDER = "Example: forward-demand metrics, board-prep questions, or chart reads from this dataset.";
5
+ export declare const DEFAULT_COMPILED_ABOUT_PLACEHOLDER = "Example: board-prep questions from these files, chart reads from this report, or tax review from these exports.";
6
6
  export declare const DEFAULT_DATASET_PATH_PLACEHOLDER = "./dataset1";
7
- export declare const DEFAULT_COMPILE_RETRY_ATTEMPTS = 1;
7
+ export declare const DEFAULT_COMPILE_RETRY_ATTEMPTS = 3;
8
8
  export declare const DEFAULT_SELF_IMPROVING_LOOPS = 3;
9
9
  export type CompileLoopMode = "once" | "retry" | "self-improving";
10
10
  export interface CompileLoopSelection {
@@ -8,10 +8,11 @@ import { draftTruthChecks } from "./check-draft.js";
8
8
  export const DEFAULT_COMPILED_NAME = "dataset1";
9
9
  export const DEFAULT_COMPILED_CHECK_QUESTION_PLACEHOLDER = "A question you can already verify from this dataset";
10
10
  export const DEFAULT_COMPILED_CHECK_ANSWER_PLACEHOLDER = "The expected answer in plain English";
11
- export const DEFAULT_COMPILED_ABOUT_PLACEHOLDER = "Example: forward-demand metrics, board-prep questions, or chart reads from this dataset.";
11
+ export const DEFAULT_COMPILED_ABOUT_PLACEHOLDER = "Example: board-prep questions from these files, chart reads from this report, or tax review from these exports.";
12
12
  export const DEFAULT_DATASET_PATH_PLACEHOLDER = "./dataset1";
13
- export const DEFAULT_COMPILE_RETRY_ATTEMPTS = 1;
13
+ export const DEFAULT_COMPILE_RETRY_ATTEMPTS = 3;
14
14
  export const DEFAULT_SELF_IMPROVING_LOOPS = 3;
15
+ const DEFAULT_SELF_IMPROVING_ATTEMPTS = 1;
15
16
  function normalizeOptionalPromptText(value) {
16
17
  if (typeof value !== "string")
17
18
  return undefined;
@@ -75,9 +76,9 @@ export function defaultDatasetPathForPrompt(name, introStyle, cwd = process.cwd(
75
76
  }
76
77
  export function datasetAboutPromptMessage(introStyle) {
77
78
  if (introStyle === "edit") {
78
- return "What should this dataset stay accurate about?";
79
+ return "What task should this dataset stay focused on?";
79
80
  }
80
- return "What should this dataset be accurate about?";
81
+ return "What task should this dataset help with?";
81
82
  }
82
83
  function formatAttemptLabel(count, noun) {
83
84
  return `${count} ${noun}${count === 1 ? "" : "s"}`;
@@ -95,10 +96,29 @@ export function describeCompileLoopSelection(options) {
95
96
  return "Compile once.";
96
97
  }
97
98
  if (mode === "retry") {
98
- return "Compile retries are enabled for this dataset.";
99
+ const attempts = options.maxAttempts ?? DEFAULT_COMPILE_RETRY_ATTEMPTS;
100
+ return `Compile retries are enabled for this dataset (${formatAttemptLabel(attempts, "total attempt")}).`;
99
101
  }
100
102
  const loops = options.maxLoops ?? DEFAULT_SELF_IMPROVING_LOOPS;
101
- return `Self-improving loops are enabled for this dataset (${formatAttemptLabel(loops, "workflow revision")}).`;
103
+ const attempts = options.maxAttempts ?? DEFAULT_SELF_IMPROVING_ATTEMPTS;
104
+ return `Self-improving loops are enabled for this dataset (${formatAttemptLabel(attempts, "attempt")} per variation, ${formatAttemptLabel(loops, "workflow revision")}).`;
105
+ }
106
+ async function promptCompileRetryAttempts(options) {
107
+ const maxAttempts = await p.text({
108
+ message: options.message,
109
+ placeholder: String(DEFAULT_COMPILE_RETRY_ATTEMPTS),
110
+ initialValue: String(options.initialMaxAttempts ?? DEFAULT_COMPILE_RETRY_ATTEMPTS),
111
+ validate: (value) => {
112
+ const parsed = Number.parseInt(value.trim(), 10);
113
+ if (!Number.isInteger(parsed) || parsed < 2 || parsed > 5) {
114
+ return "Enter a whole number from 2 to 5";
115
+ }
116
+ return undefined;
117
+ },
118
+ });
119
+ if (p.isCancel(maxAttempts))
120
+ return null;
121
+ return compiledMaxAttempts(Number.parseInt(String(maxAttempts).trim(), 10)) ?? DEFAULT_COMPILE_RETRY_ATTEMPTS;
102
122
  }
103
123
  export async function promptCheckCases(options) {
104
124
  p.log.info(options.heading);
@@ -263,7 +283,7 @@ async function promptCompiledChecks(options) {
263
283
  if (options.initialAbout) {
264
284
  p.log.info(`About: ${options.initialAbout}`);
265
285
  }
266
- p.log.info("Truth checks are the questions this dataset should already answer correctly.");
286
+ p.log.info("Truth checks are the questions an agent should be able to answer from the data behind this task.");
267
287
  const mode = await p.select({
268
288
  message: "How do you want to create them?",
269
289
  options: [
@@ -420,8 +440,8 @@ export async function promptCompileLoopSelection(options) {
420
440
  return {};
421
441
  }
422
442
  const currentMode = resolveCompileLoopMode(options.initialMaxAttempts, options.initialMaxLoops);
423
- const currentPromptMode = currentMode === "self-improving" ? "self-improving" : "once";
424
- const recommendedMode = options.recommendedMode ?? "self-improving";
443
+ const currentPromptMode = currentMode;
444
+ const recommendedMode = options.recommendedMode ?? "once";
425
445
  const includeCompileOnce = options.includeCompileOnce !== false;
426
446
  const hasSavedPolicy = typeof options.initialMaxAttempts === "number" ||
427
447
  typeof options.initialMaxLoops === "number";
@@ -447,18 +467,20 @@ export async function promptCompileLoopSelection(options) {
447
467
  },
448
468
  retry: {
449
469
  value: "retry",
450
- label: "Compile once",
451
- hint: "Build the compiled dataset once with the selected workflow",
470
+ label: "Retry same workflow",
471
+ hint: `Retry compile and compiled-side truth checks up to ${options.initialMaxAttempts ?? DEFAULT_COMPILE_RETRY_ATTEMPTS} total time${(options.initialMaxAttempts ?? DEFAULT_COMPILE_RETRY_ATTEMPTS) === 1 ? "" : "s"} without editing the workflow`,
452
472
  },
453
473
  };
454
474
  const orderedModes = (hasSavedPolicy
455
475
  ? [
456
476
  currentPromptMode,
457
477
  ...(recommendedMode !== currentPromptMode ? [recommendedMode] : []),
478
+ "retry",
458
479
  "once",
459
480
  ]
460
481
  : [
461
482
  recommendedMode,
483
+ "retry",
462
484
  "once",
463
485
  ]).filter((mode, index, list) => list.indexOf(mode) === index)
464
486
  .filter((mode) => includeCompileOnce || mode !== "once");
@@ -471,6 +493,17 @@ export async function promptCompileLoopSelection(options) {
471
493
  if (selectedMode === "once") {
472
494
  return {};
473
495
  }
496
+ if (selectedMode === "retry") {
497
+ const maxAttempts = await promptCompileRetryAttempts({
498
+ initialMaxAttempts: options.initialMaxAttempts ?? DEFAULT_COMPILE_RETRY_ATTEMPTS,
499
+ message: "How many total attempts should Interf try before stopping?",
500
+ });
501
+ if (maxAttempts === null)
502
+ return null;
503
+ return {
504
+ max_attempts: maxAttempts,
505
+ };
506
+ }
474
507
  const maxLoops = await promptSelfImprovingLoops({
475
508
  initialMaxLoops: options.initialMaxLoops ?? DEFAULT_SELF_IMPROVING_LOOPS,
476
509
  message: "How many workflow revisions should Interf try?",
@@ -478,7 +511,7 @@ export async function promptCompileLoopSelection(options) {
478
511
  if (maxLoops === null)
479
512
  return null;
480
513
  return {
481
- max_attempts: compiledMaxAttempts(DEFAULT_COMPILE_RETRY_ATTEMPTS) ?? DEFAULT_COMPILE_RETRY_ATTEMPTS,
514
+ max_attempts: DEFAULT_SELF_IMPROVING_ATTEMPTS,
482
515
  max_loops: maxLoops,
483
516
  };
484
517
  }
@@ -487,7 +520,7 @@ export async function promptSingleCompiledConfig(options = {}) {
487
520
  const introStyle = options.introStyle ?? "first";
488
521
  if (introStyle === "first") {
489
522
  p.log.info("Pick the dataset folder you want to measure.");
490
- p.log.info("Then say what this dataset should be accurate about and save a few truth checks you can verify.");
523
+ p.log.info("Then say what task this dataset should help with and save a few truth checks you can verify.");
491
524
  }
492
525
  else if (introStyle === "additional") {
493
526
  p.log.info("Add another dataset only if this project needs a separate folder or set of truth checks.");
@@ -544,6 +577,9 @@ export async function promptSingleCompiledConfig(options = {}) {
544
577
  if (p.isCancel(datasetPath))
545
578
  return null;
546
579
  const normalizedDatasetPath = String(datasetPath).trim();
580
+ if (options.selectedWorkflowLabel) {
581
+ p.log.info(`Workflow: ${options.selectedWorkflowLabel}`);
582
+ }
547
583
  let compileLoopSelection = {
548
584
  ...(typeof initial?.max_attempts === "number" ? { max_attempts: initial.max_attempts } : {}),
549
585
  ...(typeof initial?.max_loops === "number" ? { max_loops: initial.max_loops } : {}),
@@ -573,11 +609,8 @@ export async function promptSingleCompiledConfig(options = {}) {
573
609
  initialMaxLoops: initial?.max_loops,
574
610
  hasChecks: checks.length > 0,
575
611
  intro: [
576
- ...(options.selectedWorkflowLabel
577
- ? [`Workflow: ${options.selectedWorkflowLabel}`]
578
- : []),
579
612
  "Compile mode",
580
- "Choose whether this dataset should compile once or keep revising the workflow until it passes the same truth checks.",
613
+ "Choose whether this dataset should compile once, retry the same workflow, or try self-improving workflow variations on the same truth checks.",
581
614
  ],
582
615
  });
583
616
  if (compileLoopSelection === null)
@@ -3,7 +3,7 @@ import { existsSync, mkdirSync, writeFileSync } from "node:fs";
3
3
  import { dirname, join } from "node:path";
4
4
  import { createRawTestTarget, createCompiledTestTarget, runTargetTestsAuto, saveTargetTestRun, } from "../lib/test.js";
5
5
  import { buildTestSpecFromSourceFolderConfig, buildTestSpecFromCompiledDatasetConfig, resolveSourceDatasetPath, } from "../lib/source-config.js";
6
- import { datasetArtifactRoot, datasetLatestTestStatePath, datasetLatestTestSummaryPath, datasetTestRunsRoot, } from "../lib/project-paths.js";
6
+ import { datasetLatestTestStatePath, datasetLatestTestSummaryPath, normalizeDatasetTestRunId, datasetTestRunPath, datasetTestRunsRoot, datasetTestsRoot, } from "../lib/project-paths.js";
7
7
  import { testRootForCompiled } from "../lib/compiled-paths.js";
8
8
  import { readJsonFileWithSchema } from "../lib/parse.js";
9
9
  import { TestRunComparisonSchema } from "../lib/schema.js";
@@ -29,21 +29,10 @@ function summarizeSavedTestOutcome(label, outcome) {
29
29
  target: outcome.target,
30
30
  };
31
31
  }
32
- function normalizeTestRunId(input) {
33
- return input
34
- .toLowerCase()
35
- .trim()
36
- .replace(/[^a-z0-9]+/g, "-")
37
- .replace(/^-+|-+$/g, "")
38
- .slice(0, 80);
39
- }
40
- function datasetRunPathForTarget(projectPath, datasetName, target, generatedAt, runId, runSuffix) {
41
- return join(datasetTestRunsRoot(projectPath, datasetName, target), `${generatedAt.replace(/[:.]/g, "-")}-${runId}${runSuffix ? `-${normalizeTestRunId(runSuffix)}` : ""}.json`);
42
- }
43
32
  function writeDatasetTargetRun(options) {
44
33
  const dirPath = datasetTestRunsRoot(options.projectPath, options.datasetName, options.target);
45
34
  mkdirSync(dirPath, { recursive: true });
46
- const runPath = datasetRunPathForTarget(options.projectPath, options.datasetName, options.target, options.generatedAt, options.runId, options.runSuffix);
35
+ const runPath = datasetTestRunPath(options.projectPath, options.datasetName, options.target, options.generatedAt, options.runId, options.runSuffix);
47
36
  writeFileSync(runPath, `${JSON.stringify(options.payload, null, 2)}\n`);
48
37
  return runPath;
49
38
  }
@@ -354,7 +343,7 @@ export async function runSavedRawTest(options) {
354
343
  const run = await runTargetTestsAuto(datasetSourcePath, spec, [target], {
355
344
  executor,
356
345
  preserveSandboxes: options.preserveSandboxes ?? "on-failure",
357
- artifactRootPath: datasetArtifactRoot(options.sourcePath, options.datasetConfig.name),
346
+ artifactRootPath: datasetTestsRoot(options.sourcePath, options.datasetConfig.name),
358
347
  });
359
348
  const result = run.results[0];
360
349
  if (!result)
@@ -364,7 +353,7 @@ export async function runSavedRawTest(options) {
364
353
  datasetName: options.datasetConfig.name,
365
354
  target: "file-as-is",
366
355
  generatedAt: run.generated_at,
367
- runId: normalizeTestRunId(spec.id),
356
+ runId: normalizeDatasetTestRunId(spec.id),
368
357
  runSuffix: options.runSuffix,
369
358
  payload: run,
370
359
  });
@@ -413,7 +402,7 @@ export async function runSavedCompiledTest(options) {
413
402
  datasetName: options.datasetConfig.name,
414
403
  target: "compiled",
415
404
  generatedAt: run.generated_at,
416
- runId: normalizeTestRunId(spec.id),
405
+ runId: normalizeDatasetTestRunId(spec.id),
417
406
  runSuffix: options.runSuffix,
418
407
  payload: run,
419
408
  });
@@ -1,9 +1,3 @@
1
1
  import type { CommandModule } from "yargs";
2
- import type { SourceDatasetConfig } from "../lib/schema.js";
3
- export declare function resolveConfiguredDatasetSelection(options: {
4
- sourcePath: string;
5
- requestedDatasetName?: string | null;
6
- hintedDatasetConfig?: SourceDatasetConfig | null;
7
- }): SourceDatasetConfig | null;
8
2
  export declare const testCommand: CommandModule;
9
3
  export declare function runTestCommand(argv?: Record<string, unknown>): Promise<boolean>;
@@ -3,19 +3,10 @@ import * as p from "@clack/prompts";
3
3
  import { detectInterf, resolveSourceControlPath, } from "../lib/interf.js";
4
4
  import { fingerprintTruthChecks, sourceDatasetConfigFromInterfConfig, } from "../lib/source-config.js";
5
5
  import { addExecutionProfileOptions, executionProfileFromArgv, } from "../lib/execution-profile.js";
6
- import { chooseCompiledConfigToBuild, findBuiltCompiledPath, findSavedCompiledConfig, } from "./compiled-flow.js";
6
+ import { chooseCompiledConfigToBuild, findBuiltCompiledPath, } from "./compiled-flow.js";
7
+ import { resolveConfiguredDatasetSelection } from "./dataset-selection.js";
7
8
  import { printAgentTestFailures, printAgentTestMatrix, printSavedTestComparisonState, readSavedTestComparison, runSavedRawTest, runSavedCompiledTest, saveTestComparisonRun, } from "./test-flow.js";
8
9
  import { listRunAgentOptions, promptForTestAgents, resolveNamedLocalExecutor, resolveOrConfigureLocalExecutor, } from "./executor-flow.js";
9
- export function resolveConfiguredDatasetSelection(options) {
10
- if (options.hintedDatasetConfig &&
11
- (!options.requestedDatasetName || options.hintedDatasetConfig.name === options.requestedDatasetName)) {
12
- return options.hintedDatasetConfig;
13
- }
14
- if (!options.requestedDatasetName) {
15
- return null;
16
- }
17
- return findSavedCompiledConfig(options.sourcePath, options.requestedDatasetName);
18
- }
19
10
  export const testCommand = {
20
11
  command: "test",
21
12
  describe: "Compare files as-is and a compiled dataset on saved truth checks",
@@ -156,17 +147,17 @@ async function resolveSelectedTestAgents(options) {
156
147
  ];
157
148
  }
158
149
  async function runModeForAgent(options) {
159
- const rawOutcome = options.mode === "raw" || options.mode === "both"
160
- ? await runSavedRawTest({
150
+ const rawPromise = options.mode === "raw" || options.mode === "both"
151
+ ? runSavedRawTest({
161
152
  sourcePath: options.sourcePath,
162
153
  datasetConfig: options.datasetConfig,
163
154
  executor: options.executor,
164
155
  preserveSandboxes: options.preserveSandboxes,
165
156
  runSuffix: options.executor.name,
166
157
  })
167
- : null;
168
- const compiledOutcome = options.mode === "compiled" || options.mode === "both"
169
- ? await runSavedCompiledTest({
158
+ : Promise.resolve(null);
159
+ const compiledPromise = options.mode === "compiled" || options.mode === "both"
160
+ ? runSavedCompiledTest({
170
161
  sourcePath: options.sourcePath,
171
162
  datasetConfig: options.datasetConfig,
172
163
  compiledPath: options.builtCompiledPath,
@@ -174,7 +165,8 @@ async function runModeForAgent(options) {
174
165
  preserveSandboxes: options.preserveSandboxes,
175
166
  runSuffix: options.executor.name,
176
167
  })
177
- : null;
168
+ : Promise.resolve(null);
169
+ const [rawOutcome, compiledOutcome] = await Promise.all([rawPromise, compiledPromise]);
178
170
  return {
179
171
  agentLabel: options.executor.displayName,
180
172
  rawOutcome,
package/dist/index.d.ts CHANGED
@@ -1,5 +1,5 @@
1
1
  export { createCompiled, compileCompiled, runCompiledSummarize, runCompiledCompile, } from "./lib/workflows.js";
2
- export { createRawTestTarget, listTestSpecs, loadTestSpec, loadTestSpecFromFile, writeTestSpec, listTestTargets, runTargetTests, runTargetTestsWithJudge, runTargetTestsAuto, saveTargetTestRun, } from "./lib/test.js";
2
+ export { createRawTestTarget, createCompiledTestTarget, listTestSpecs, loadTestSpec, loadTestSpecFromFile, writeTestSpec, listTestTargets, runTargetTests, runTargetTestsWithJudge, runTargetTestsAuto, saveTargetTestRun, } from "./lib/test.js";
3
3
  export { computeCompiledHealth, } from "./lib/state.js";
4
4
  export { SOURCE_FOLDER_CONFIG_FILE, loadSourceFolderConfig, buildTestSpecFromSourceFolderConfig, } from "./lib/source-config.js";
5
5
  export { validateCompiledSummarize, validateCompiledCompile, } from "./lib/validate.js";
package/dist/index.js CHANGED
@@ -1,5 +1,5 @@
1
1
  export { createCompiled, compileCompiled, runCompiledSummarize, runCompiledCompile, } from "./lib/workflows.js";
2
- export { createRawTestTarget, listTestSpecs, loadTestSpec, loadTestSpecFromFile, writeTestSpec, listTestTargets, runTargetTests, runTargetTestsWithJudge, runTargetTestsAuto, saveTargetTestRun, } from "./lib/test.js";
2
+ export { createRawTestTarget, createCompiledTestTarget, listTestSpecs, loadTestSpec, loadTestSpecFromFile, writeTestSpec, listTestTargets, runTargetTests, runTargetTestsWithJudge, runTargetTestsAuto, saveTargetTestRun, } from "./lib/test.js";
3
3
  export { computeCompiledHealth, } from "./lib/state.js";
4
4
  export { SOURCE_FOLDER_CONFIG_FILE, loadSourceFolderConfig, buildTestSpecFromSourceFolderConfig, } from "./lib/source-config.js";
5
5
  export { validateCompiledSummarize, validateCompiledCompile, } from "./lib/validate.js";
@@ -1,3 +1,4 @@
1
1
  import type { WorkflowExecutionProfile } from "./executors.js";
2
2
  import type { Agent } from "./agent-types.js";
3
+ export declare function buildAgentEnv(agent: Agent, baseEnv?: NodeJS.ProcessEnv): NodeJS.ProcessEnv;
3
4
  export declare function buildAgentArgs(agent: Agent, prompt: string, executionProfile?: WorkflowExecutionProfile): string[];
@@ -1,4 +1,14 @@
1
1
  const CODEX_SANDBOX_MODE = `work${"space-write"}`;
2
+ export function buildAgentEnv(agent, baseEnv = process.env) {
3
+ const env = { ...baseEnv };
4
+ if (agent.name === "codex") {
5
+ // Codex executes Bash-tool commands inside its own sandbox. Let Codex
6
+ // choose a sandbox-safe default shell instead of inheriting a host-only
7
+ // login shell path such as /bin/zsh.
8
+ delete env.SHELL;
9
+ }
10
+ return env;
11
+ }
2
12
  export function buildAgentArgs(agent, prompt, executionProfile = {}) {
3
13
  if (agent.name === "claude-code") {
4
14
  const args = [
@@ -1,7 +1,7 @@
1
1
  import { spawn } from "node:child_process";
2
2
  import chalk from "chalk";
3
3
  import { CODEX_NOISE_PATTERNS } from "./agent-constants.js";
4
- import { buildAgentArgs } from "./agent-args.js";
4
+ import { buildAgentArgs, buildAgentEnv } from "./agent-args.js";
5
5
  import { appendAgentEventLog, appendAgentStatusLog } from "./agent-logs.js";
6
6
  import { displayAgentEvent, emitVisibleAgentText, summarizeAgentToolActivity, } from "./agent-render.js";
7
7
  import { classifyTerminalVisibleStatus, extractAgentFailureStatus, hasAgentStalled, } from "./agent-status.js";
@@ -20,6 +20,7 @@ export function spawnAgent(agent, dirPath, prompt, options = {}) {
20
20
  appendAgentStatusLog(options.statusLogPath, launchLine);
21
21
  const proc = spawn(agent.command, args, {
22
22
  cwd: dirPath,
23
+ env: buildAgentEnv(agent),
23
24
  stdio: ["ignore", "pipe", "pipe"],
24
25
  });
25
26
  const timeoutMs = options.executionProfile?.timeoutMs ?? null;
@@ -3,7 +3,7 @@ import { spawnSync } from "node:child_process";
3
3
  import { tmpdir } from "node:os";
4
4
  import { join } from "node:path";
5
5
  import { CODEX_NOISE_PATTERNS } from "./agent-constants.js";
6
- import { buildAgentArgs } from "./agent-args.js";
6
+ import { buildAgentArgs, buildAgentEnv } from "./agent-args.js";
7
7
  const successfulPreflightAgents = new Set();
8
8
  export function buildAgentPreflightPrompt() {
9
9
  return [
@@ -22,6 +22,7 @@ export function runAgentPreflight(agent, options = {}) {
22
22
  const result = spawnSyncImpl(agent.command, buildAgentArgs(agent, buildAgentPreflightPrompt()), {
23
23
  cwd: dirPath,
24
24
  encoding: "utf8",
25
+ env: buildAgentEnv(agent),
25
26
  timeout: timeoutMs,
26
27
  });
27
28
  const stdout = typeof result.stdout === "string" ? result.stdout.trim() : "";
@@ -1,4 +1,4 @@
1
- import type { RuntimeContractType, WorkflowImprovementContext, WorkflowZoneId } from "./schema.js";
1
+ import type { RuntimeContractType, SourceTruthCheck, WorkflowImprovementContext, WorkflowZoneId } from "./schema.js";
2
2
  export interface NativeStageDefinition {
3
3
  id: string;
4
4
  label: string;
@@ -8,6 +8,7 @@ export interface NativeStageDefinition {
8
8
  reads: WorkflowZoneId[];
9
9
  writes: WorkflowZoneId[];
10
10
  }
11
+ export declare function writeNativeAgentSurface(rootPath: string, agentsContent: string, skillName: string, skillContent: string): boolean;
11
12
  export declare function renderCompiledAgents(compiledPath: string, name: string, workflowId: string, about?: string, options?: {
12
13
  workflowOriginSelected?: string | null;
13
14
  workflowLocalDraft?: boolean;
@@ -15,6 +16,30 @@ export declare function renderCompiledAgents(compiledPath: string, name: string,
15
16
  export declare function renderCompiledQuerySkill(): string;
16
17
  export declare function syncStageExecutionShellWrites(compiledPath: string, shellRoot: string, stage: NativeStageDefinition, writeArtifacts?: readonly string[]): void;
17
18
  export declare function renderClaudeBootstrap(content: string): string;
19
+ export interface WorkflowAuthoringPreviewInfo {
20
+ compiledPath: string;
21
+ compileResult: {
22
+ ok: boolean;
23
+ failedStage: string | null;
24
+ };
25
+ }
26
+ export declare function createWorkflowAuthoringShell(options: {
27
+ workflowPath: string;
28
+ workflowId: string;
29
+ label: string;
30
+ baseWorkflowId: string;
31
+ datasetPath: string;
32
+ taskPrompt: string;
33
+ checks: SourceTruthCheck[];
34
+ preview?: WorkflowAuthoringPreviewInfo | null;
35
+ }): {
36
+ rootPath: string;
37
+ workflowBeforePath: string;
38
+ workflowAfterPath: string;
39
+ promptLogPath: string;
40
+ eventLogPath: string;
41
+ statusLogPath: string;
42
+ };
18
43
  export declare function pruneStageExecutionShells(compiledPath: string): void;
19
44
  export declare function projectCompiledQueryShell(compiledPath: string, compiledName: string, workflowId: string, about?: string, options?: {
20
45
  workflowOriginSelected?: string | null;