npm - @interf/compiler - Versions diffs - 0.3.2 → 0.3.4 - Mend

@interf/compiler 0.3.2 → 0.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (139) hide show

package/README.md +124 -173
package/dist/commands/compile.d.ts +2 -0
package/dist/commands/compile.d.ts.map +1 -1
package/dist/commands/compile.js +42 -10
package/dist/commands/compile.js.map +1 -1
package/dist/commands/create.d.ts.map +1 -1
package/dist/commands/create.js +5 -5
package/dist/commands/create.js.map +1 -1
package/dist/commands/default.js +2 -2
package/dist/commands/default.js.map +1 -1
package/dist/commands/doctor.js +7 -7
package/dist/commands/doctor.js.map +1 -1
package/dist/commands/init.js +19 -23
package/dist/commands/init.js.map +1 -1
package/dist/commands/source-config-wizard.d.ts +2 -1
package/dist/commands/source-config-wizard.d.ts.map +1 -1
package/dist/commands/source-config-wizard.js +29 -27
package/dist/commands/source-config-wizard.js.map +1 -1
package/dist/commands/test-flow.d.ts +4 -0
package/dist/commands/test-flow.d.ts.map +1 -1
package/dist/commands/test-flow.js +24 -13
package/dist/commands/test-flow.js.map +1 -1
package/dist/commands/test.d.ts.map +1 -1
package/dist/commands/test.js +16 -5
package/dist/commands/test.js.map +1 -1
package/dist/commands/workspace-flow.d.ts +2 -0
package/dist/commands/workspace-flow.d.ts.map +1 -1
package/dist/commands/workspace-flow.js +3 -2
package/dist/commands/workspace-flow.js.map +1 -1
package/dist/lib/agent-shells.d.ts +17 -0
package/dist/lib/agent-shells.d.ts.map +1 -0
package/dist/lib/agent-shells.js +295 -0
package/dist/lib/agent-shells.js.map +1 -0
package/dist/lib/benchmark-execution.d.ts +5 -1
package/dist/lib/benchmark-execution.d.ts.map +1 -1
package/dist/lib/benchmark-execution.js +34 -12
package/dist/lib/benchmark-execution.js.map +1 -1
package/dist/lib/benchmark-paths.d.ts +2 -0
package/dist/lib/benchmark-paths.d.ts.map +1 -1
package/dist/lib/benchmark-paths.js +6 -0
package/dist/lib/benchmark-paths.js.map +1 -1
package/dist/lib/benchmark-sandbox.d.ts +2 -0
package/dist/lib/benchmark-sandbox.d.ts.map +1 -1
package/dist/lib/benchmark-sandbox.js +68 -37
package/dist/lib/benchmark-sandbox.js.map +1 -1
package/dist/lib/benchmark-targets.js +1 -1
package/dist/lib/benchmark-targets.js.map +1 -1
package/dist/lib/interf-bootstrap.d.ts +2 -13
package/dist/lib/interf-bootstrap.d.ts.map +1 -1
package/dist/lib/interf-bootstrap.js +7 -164
package/dist/lib/interf-bootstrap.js.map +1 -1
package/dist/lib/interf-detect.d.ts +1 -0
package/dist/lib/interf-detect.d.ts.map +1 -1
package/dist/lib/interf-detect.js +5 -18
package/dist/lib/interf-detect.js.map +1 -1
package/dist/lib/interf-scaffold.d.ts.map +1 -1
package/dist/lib/interf-scaffold.js +7 -71
package/dist/lib/interf-scaffold.js.map +1 -1
package/dist/lib/interf-workflow-package.d.ts.map +1 -1
package/dist/lib/interf-workflow-package.js +21 -26
package/dist/lib/interf-workflow-package.js.map +1 -1
package/dist/lib/interf.d.ts +3 -2
package/dist/lib/interf.d.ts.map +1 -1
package/dist/lib/interf.js +3 -2
package/dist/lib/interf.js.map +1 -1
package/dist/lib/local-workflows.d.ts +6 -1
package/dist/lib/local-workflows.d.ts.map +1 -1
package/dist/lib/local-workflows.js +143 -2
package/dist/lib/local-workflows.js.map +1 -1
package/dist/lib/runtime-contracts.d.ts.map +1 -1
package/dist/lib/runtime-contracts.js +10 -4
package/dist/lib/runtime-contracts.js.map +1 -1
package/dist/lib/runtime-prompt.d.ts.map +1 -1
package/dist/lib/runtime-prompt.js +1 -0
package/dist/lib/runtime-prompt.js.map +1 -1
package/dist/lib/runtime-runs.d.ts.map +1 -1
package/dist/lib/runtime-runs.js +6 -2
package/dist/lib/runtime-runs.js.map +1 -1
package/dist/lib/runtime-types.d.ts +1 -0
package/dist/lib/runtime-types.d.ts.map +1 -1
package/dist/lib/schema.d.ts +88 -23
package/dist/lib/schema.d.ts.map +1 -1
package/dist/lib/schema.js +66 -37
package/dist/lib/schema.js.map +1 -1
package/dist/lib/source-config.d.ts +3 -3
package/dist/lib/source-config.d.ts.map +1 -1
package/dist/lib/source-config.js +8 -6
package/dist/lib/source-config.js.map +1 -1
package/dist/lib/state-artifacts.d.ts +2 -2
package/dist/lib/state-artifacts.d.ts.map +1 -1
package/dist/lib/state-artifacts.js +3 -3
package/dist/lib/state-artifacts.js.map +1 -1
package/dist/lib/state-io.d.ts +2 -2
package/dist/lib/state-io.d.ts.map +1 -1
package/dist/lib/state-io.js +5 -5
package/dist/lib/state-io.js.map +1 -1
package/dist/lib/state-paths.d.ts +1 -1
package/dist/lib/state-paths.d.ts.map +1 -1
package/dist/lib/state-paths.js +3 -3
package/dist/lib/state-paths.js.map +1 -1
package/dist/lib/state-view.d.ts +2 -2
package/dist/lib/state-view.d.ts.map +1 -1
package/dist/lib/state-view.js +6 -7
package/dist/lib/state-view.js.map +1 -1
package/dist/lib/state.d.ts +4 -4
package/dist/lib/state.d.ts.map +1 -1
package/dist/lib/state.js +3 -3
package/dist/lib/state.js.map +1 -1
package/dist/lib/workflow-definitions.d.ts +4 -1
package/dist/lib/workflow-definitions.d.ts.map +1 -1
package/dist/lib/workflow-definitions.js +41 -6
package/dist/lib/workflow-definitions.js.map +1 -1
package/dist/lib/workflow-stage-runner.d.ts +1 -0
package/dist/lib/workflow-stage-runner.d.ts.map +1 -1
package/dist/lib/workflow-stage-runner.js +2 -0
package/dist/lib/workflow-stage-runner.js.map +1 -1
package/dist/lib/workflows.d.ts +1 -1
package/dist/lib/workflows.d.ts.map +1 -1
package/dist/lib/workspace-compile.d.ts +4 -0
package/dist/lib/workspace-compile.d.ts.map +1 -1
package/dist/lib/workspace-compile.js +108 -66
package/dist/lib/workspace-compile.js.map +1 -1
package/dist/lib/workspace-docs.d.ts +3 -0
package/dist/lib/workspace-docs.d.ts.map +1 -0
package/dist/lib/workspace-docs.js +82 -0
package/dist/lib/workspace-docs.js.map +1 -0
package/dist/lib/workspace-raw.d.ts +30 -0
package/dist/lib/workspace-raw.d.ts.map +1 -0
package/dist/lib/workspace-raw.js +102 -0
package/dist/lib/workspace-raw.js.map +1 -0
package/dist/lib/workspace-schema.d.ts +26 -0
package/dist/lib/workspace-schema.d.ts.map +1 -0
package/dist/lib/workspace-schema.js +132 -0
package/dist/lib/workspace-schema.js.map +1 -0
package/package.json +2 -2
package/skills/benchmark/SKILL.md +4 -4
package/skills/workflow/create/SKILL.md +23 -4
package/skills/workspace/shape/SKILL.md +1 -1
package/templates/workspace/README.md +4 -3

package/README.md CHANGED Viewed

@@ -1,28 +1,17 @@
-# Interf
+# Interf Compiler
-Open-source knowledge compiler for local agents.
+Prepare local datasets for accurate agent use.
-Interf measures and improves how accurately local agents answer questions from your files.
+Interf Compiler runs local data-processing workflows over your dataset to build a compiled workspace: a folder of agent-readable files that helps agents navigate evidence, verify facts, and answer accurately.
-If you use Claude Code, Codex, OpenClaw, Hermes, or your own local agent setup on folders full of PDFs, docs, spreadsheets, and notes, the failure often shows up late: missed evidence, shallow analysis, bad comparisons, or answers that sound confident but are wrong.
-Interf lets you define a few checks over your files, measure the raw baseline first if you want it, compile a workspace on top of those files, and see whether the result actually passes.
-- your files stay on your machine
-- you choose the local agent
-- your raw files stay the source of truth
-- Interf adds a file-based layer on top
-`interf compile` runs a local data-processing pipeline with your agents as executors and produces a compiled workspace: a file-based layer on top of your raw files that agents can navigate, inspect, and work from.
-In the advanced looped mode, Interf can keep rerunning that pipeline, testing the result, and trying improved preparation attempts until it finds the best-performing workspace within the attempt budget.
+Use truth checks to test the raw dataset, compile the workspace, and compare the result on the same task.
 ## Quick Start
 Requirements:
 - Node.js 20+
-- a local coding agent: Claude Code or Codex
+- a local coding agent such as Claude Code or Codex
 Install:
@@ -30,24 +19,78 @@ Install:
 npm install -g @interf/compiler
 ```
-The quickest start is the wizard:
+Start from the folder that already contains your dataset:
 ```bash
-cd ~/my-folder
+cd ~/my-dataset
 interf
+interf compile
+interf test
 ```
-If you want to see the config shape first, this is what Interf writes:
+The first run can:
+- save a few truth checks for the dataset
+- test the raw dataset as a baseline
+- build the compiled workspace
+- test the compiled workspace on the same truth checks
+## What Interf Compiler Creates
+Interf Compiler adds three things beside your dataset:
+- `interf.config.json` with your saved truth checks and workspace setup
+- `interf/workspaces/<name>/` with the compiled workspace
+- `interf/benchmarks/runs/...` with saved test runs
+A compiled workspace is a folder on top of your dataset. It includes:
+- a workspace-local `raw/` snapshot for direct evidence and verification
+- agent-readable summaries and cross-file notes
+- `AGENTS.md`, `CLAUDE.md`, and generated local query skills
+- runtime state under `.interf/`
+The compiled workspace is the folder your agent should work from.
+## Why Use It
+Raw dataset folders are hard for agents.
+Common failure modes:
+- missed evidence
+- weak cross-file understanding
+- bad comparisons
+- answers that sound confident but are wrong
+Interf Compiler keeps the raw dataset as the source of truth, builds a compiled workspace on top of it, and tests whether that workspace actually helps.
+## The Loop
-```json
+1. Define truth checks for the dataset.
+2. Build the compiled workspace.
+3. Test raw vs compiled on the same truth checks.
+Truth checks are simple:
+- one question
+- one expected answer
+Good first truth checks are small and practical:
+- one exact number from a chart, table, or filing
+- one short statement that should be true or false
+- one simple comparison across years, files, or sections
+If you want to see the config shape first, this is what Interf Compiler writes:
+```jsonc
 {
   "workspaces": [
     {
-      "name": "default",
+      "name": "my-workspace",
       "about": "General compiled workspace for the quarterly results folder.",
-      "retry_policy": {
-        "max_attempts": 3
-      },
+      "max_attempts": 3, // rerun compile + test until this workspace passes the saved truth checks or hits this limit
       "checks": [
         {
           "question": "What full-year revenue range did the company maintain?",
@@ -63,63 +106,13 @@ If you want to see the config shape first, this is what Interf writes:
 }
 ```
-The root-level flow is:
-```bash
-interf
-interf compile
-interf test
-```
-The first guided run can:
-- save a few questions and expected answers for this folder
-- run a baseline test on the raw files
-- compile the workspace
-- optionally keep compiling and retesting until it passes or reaches the attempt limit
-- run the same test against the compiled workspace
-That gives you three concrete things:
-- `interf/workspaces/default/` with the compiled workspace for your files
-- `interf/benchmarks/runs/...` with the saved test result
-- a pass/fail score on the same questions and expected answers you wrote
-Saved test runs keep the details you need later:
-- whether the run tested `raw`, `workspace`, or both
-- per-question pass/fail results
-- the saved run path under `interf/benchmarks/runs/...`
-- executor metadata such as agent, command, model, effort, and profile when available
-If `interf.config.json` is missing, `interf` or `interf init` can draft it with you before the first compile. If Interf cannot find your local agent or compile setup, run:
+If `interf.config.json` is missing, `interf` or `interf init` can draft it with you before the first compile. If the compiler cannot find your local agent or compile setup, run:
 ```bash
 interf doctor
 ```
-The first flow is:
-- write down a few questions your agent should be able to answer from your files
-- let `interf` or `interf init` save those checks in `interf.config.json`
-- optionally run a baseline test on the raw files
-- run `interf compile` to build the compiled workspace
-- run `interf test` to test the raw files, the compiled workspace, or both
-- only create another workspace if you want a separate compiled setup with its own checks
-- if needed, rerun compile or use the advanced retry path until it is good enough
-## Why This Approach
-Interf is built around a few simple design principles:
-- `Explicit`: the output is visible and inspectable, not hidden memory
-- `Local`: your files stay on your machine
-- `File over app`: the output is just files, so you can use your editor, Unix tools, Obsidian, or your own software on top
-- `BYOAI`: use Claude Code, Codex, OpenClaw, Hermes, or your own model
-Interf does not replace your data with an opaque store. It keeps the raw files in place and adds a file-based layer on top for agents.
-Sample flow:
+Sample run:
 ```bash
 cp -r examples/benchmark-demo /tmp/interf-demo
@@ -129,68 +122,44 @@ interf compile
 interf test
 ```
-## Start With Your Own Checks
-Start with your own checks over the files: questions where you already know the correct answer from the dataset.
-`interf.config.json` is where you save those checks for a folder.
-That file uses one `workspaces` array:
-- most folders only need one workspace
-- add another workspace only if you want a separate compiled setup with different checks
-- each workspace carries its own `checks`
-- each workspace can optionally carry `retry_policy.max_attempts` for the self-improving compile loop
-If the file is missing, `interf init` can draft it with you before the first compile. You can edit it any time.
-Good first checks are small and practical:
-- one exact number from a chart, table, or filing
-- one short statement that should be true or false
-- one simple comparison across years, files, or sections
+## What `interf test` Does
-Then run:
+`interf test` scores either the raw files, a compiled workspace, or both on the same saved truth checks.
-```bash
-interf compile
-interf test
-```
-## What `interf test` Does
+It answers a simple question:
-`interf test` scores either the raw files, a compiled workspace, or both on the same saved checks.
+- does the compiled workspace help on this dataset or not?
-It lets you answer a simple question:
+By default it loads truth checks from `interf.config.json`, can run a raw baseline in an isolated raw-files sandbox, can test eligible compiled workspaces under `interf/workspaces/`, and saves the run under `interf/benchmarks/runs/`.
-- what is the current baseline on the raw files?
-- does this compiled workspace improve on that baseline?
-- which compiled workspace or workflow performs better on the same folder?
-- does a separate workspace with different checks work better for that job?
+For live runs:
-By default it loads checks from `interf.config.json`, can run a raw baseline in an isolated raw-files sandbox, can test eligible compiled workspaces under `interf/workspaces/`, and saves the run under `interf/benchmarks/runs/`.
+- raw tests execute from a sanitized raw-only sandbox
+- compiled-workspace tests execute from a copied workspace sandbox with embedded sanitized `raw/`
+- neither sandbox includes `interf.config.json` or the source-folder `interf/` control plane
+- failed test sandboxes are kept automatically for review
+- `interf test --keep-sandboxes` keeps every sandbox, even successful ones
 Each saved run includes:
-- the benchmark target and mode
+- whether the run tested `raw`, `workspace`, or both
 - per-question results and traces
+- the preserved sandbox path when one was kept
 - the executor metadata for that run
-If you run `interf test` from inside a workspace, it uses that workspace's checks and tests that workspace. If you run it from the source folder, it lets you choose a saved workspace and then choose raw files, the compiled workspace, or both.
-Live test runs use an isolated sandbox. For raw baselines, Interf gives the agent sanitized raw files only. For compiled-workspace tests, it gives the agent a copied workspace plus sanitized raw files. The source-folder control plane, `interf.config.json`, and saved test runs are not part of those sandboxes.
+If you run `interf test` from inside a workspace, it uses that workspace's truth checks and tests that workspace. If you run it from the source folder, it lets you choose a saved workspace and then choose raw files, the compiled workspace, or both.
 If you need repeated isolated experiments across workflows or models, use the advanced eval-pack runner in [docs/eval-loop.md](./docs/eval-loop.md).
 ## What `interf compile` Does
-`interf compile` runs the Interf data-processing pipeline over your files.
+`interf compile` runs the selected workflow over a dataset.
-By default, that means:
+The built-in workflow:
 - summarize the source files into per-file evidence notes
 - structure the cross-file knowledge layer into entities, claims, and indexes
-- shape the final workspace around its saved focus and questions
+- shape the final workspace around its saved focus and truth checks
 In other words, the built-in workflow is:
@@ -198,47 +167,30 @@ In other words, the built-in workflow is:
 2. `structure`
 3. `shape`
-In public docs, `pipeline` is the thing Interf runs. `workflow` is the saved method that defines or customizes that pipeline.
-The default workflow is built in. If you want a different method, you can define your own workflow package and benchmark it on the same folder.
-If a workspace has `retry_policy.max_attempts`, or if you run `interf compile --max-attempts <n>`, Interf can keep compiling, testing, and retrying until that workspace passes or reaches the attempt limit. If several attempts fail, Interf keeps the best-performing compiled workspace from that run.
-## What Gets Created
-After compile, Interf writes into `./interf/` beside your source files.
-- `interf/workspaces/<name>/` is a compiled workspace over the folder
-- `interf/benchmarks/runs/...` stores saved test runs
-Inside those workspaces you will see things like:
-- summaries of source files
-- navigation notes and entrypoints for agents
-- cross-file knowledge notes
-- workspace-specific outputs when you define a separate job-focused workspace
+If you want a different method, you can define your own workflow and test it on the same dataset.
-The compiled workspace is just a normal folder. Open it in your editor, in your agent, or in Obsidian if you want the graph view.
+Under the hood, each workflow defines:
-If you use Obsidian, open `interf/workspaces/<name>/` as the vault for the compiled workspace.
+- `workflow.json` for stage order, compiler API target, and deterministic contract mapping
+- `workspace.schema.json` for the deterministic output shape of the compiled workspace
+- stage `reads` / `writes` declarations that reference schema-defined zone ids
+- local `SKILL.md` files as the authoring source for query and stage-execution behavior
-## Terminology
+The compiler then projects that workflow into the native agent surfaces it actually runs:
-Public terms:
+- the compiled workspace gets a generated native query shell
+- each compile stage gets a generated native execution shell
+- that shell keeps its own `AGENTS.md`, `CLAUDE.md`, and native local skills
+- schema-declared workspace zones are mounted both at their workflow-relative paths and as shell-local `inputs/<zone-id>` / `outputs/<zone-id>` aliases
+- the workspace root itself is not linked into the shell
-- `your files` = the source folder Interf reads from
-- `questions and expected answers` = the checks you want your agent to pass
-- `checks` = the pass/fail questions each workspace should satisfy
-- `test` = run the saved questions and get a score
-- `compiled workspace` = the output Interf produces on top of a folder
-- `workspace` = one compiled setup with its own checks
+If a workspace has `max_attempts`, or if you run `interf compile --max-attempts <n>`, the compiler can keep compiling, testing, and retrying until that workspace passes or reaches the attempt limit. If several attempts fail, it keeps the best-performing compiled workspace from that run.
-Technical terms:
+For stage-level review:
-- `source folder` = the raw files Interf reads from
-- `benchmark` = the technical alias and saved-run layer behind `interf test`
-- `workflow` = the saved method that defines or customizes the pipeline
-- `.interf/` = runtime state, proofs, and health artifacts
+- successful stage shells are pruned by default
+- failed stage shells stay under `.interf/execution-shells/`
+- `interf compile --keep-stage-shells` keeps every stage shell so you can inspect the exact native instruction surface, mounted inputs, and mounted outputs for each stage
 ## Advanced: Separate Workspaces
@@ -253,31 +205,32 @@ Create another only when you want a different compiled setup with different chec
 Why create another one:
-- it keeps a separate set of questions and expected answers
+- it keeps a separate set of truth checks
 - it gives that job its own compiled output under `interf/workspaces/<name>/`
 - it lets you test that job separately
 ## Advanced: Keep Improving Until It Passes
-Interf also supports a deeper loop above the normal compile + test flow.
+Interf Compiler also supports a deeper loop above the normal compile + test flow.
-The normal workspace flow already supports `retry_policy.max_attempts` inside `interf.config.json` or `interf compile --max-attempts <n>`.
+The normal workspace flow already supports `max_attempts` inside `interf.config.json` or `interf compile --max-attempts <n>`.
-Give it the same folder and the same checks. Interf can keep rerunning compile + test attempts until the test passes or the attempt budget runs out.
+Give it the same dataset and the same truth checks. The compiler can keep rerunning compile + test attempts until the test passes or the attempt budget runs out.
-That loop is the self-improving part of the product:
+That loop is the self-improving part:
-- it reruns the local data-processing pipeline over the same files
-- it keeps the checks fixed, so the target does not move
+- it reruns the same workflow over the same dataset
+- it keeps the truth checks fixed, so the target does not move
+- it keeps the measurement fixed, so attempts stay comparable
 - it can vary the compile profile and follow-up diagnostics
 - it records which attempt performed best on the same saved test
 In practice:
-- `retry_policy.max_attempts` controls how many total attempts a normal workspace compile gets
+- `max_attempts` controls how many total attempts a normal workspace compile gets
 - `retry_policy.max_attempts_per_profile` controls how many attempts each compile profile gets in eval packs
 - stronger diagnostic profiles can be used only after the default ones fail
-- the checks stay the same across every attempt
+- the truth checks stay the same across every attempt
 - each attempt records what changed and which attempt performed best
 Example eval-pack shape:
@@ -286,10 +239,8 @@ Example eval-pack shape:
 {
   "workspaces": [
     {
-      "name": "default",
-      "retry_policy": {
-        "max_attempts": 3
-      },
+      "name": "my-workspace",
+      "max_attempts": 3, // rerun compile + test until this workspace passes the saved truth checks or hits this limit
       "checks": [
         {
           "question": "What full-year revenue range did the company maintain?",
@@ -304,38 +255,38 @@ Example eval-pack shape:
 }
 ```
-Use the normal workspace retry loop first. Use the eval-pack path when you want Interf to compare multiple compile profiles, add diagnostics, or keep iterating in a more controlled experiment loop. It spends more tokens, so use it when that extra spend is worth the accuracy target.
+Use the normal workspace retry loop first. Use the eval-pack path when you want Interf Compiler to compare multiple compile profiles, add diagnostics, or keep iterating in a more controlled experiment loop. It spends more tokens, so use it when that extra spend is worth the accuracy target.
 ## Use It With Your Agent
-If you already work through Claude Code, Codex, OpenClaw, or Hermes, the agent can run this process for you.
+If you already work through a local coding agent, it can run this process for you.
 Paste something like this into your agent:
 ```text
 Install @interf/compiler, run `interf` in this folder, and use the local agent executor.
-If `interf.config.json` is missing, draft one workspace with a few checks this agent should be able to answer from these files and add the expected answers for me to confirm.
+If `interf.config.json` is missing, draft one workspace with a few truth checks this agent should be able to answer from this dataset and add the expected answers for me to confirm.
 Then run a raw baseline if helpful, compile the workspace, and run `interf test`.
-Tell me whether the compiled workspace passes the checks, and only recommend it if it does.
+Tell me whether the compiled workspace passes the truth checks, and only recommend it if it does.
 ```
 ## Custom Workflows
-Interf ships with a default workflow.
+Interf Compiler ships with a default workflow.
-If you want to change how the data-processing pipeline runs on your files, this is the part you customize:
+If you want to change how the workflow runs on your dataset, this is the part you customize:
 ```bash
 interf create workflow
 interf verify workflow --path <path>
 ```
-Then benchmark that workflow on the same folder and the same checks.
+Then test that workflow on the same dataset and the same truth checks.
-Workflow package docs live in [docs/workflow-spec.md](./docs/workflow-spec.md).
+Workflow docs live in [docs/workflow-spec.md](./docs/workflow-spec.md).
 ## Core Commands
@@ -344,7 +295,7 @@ Workflow package docs live in [docs/workflow-spec.md](./docs/workflow-spec.md).
 - `interf create workspace` = create another compiled workspace when you need one
 - `interf create workflow` = create a reusable local workflow package
 - `interf compile` = build a selected workspace for the current folder
-- `interf test` = test the raw files, a compiled workspace, or both on saved checks
+- `interf test` = test the raw files, a compiled workspace, or both on saved truth checks
 - `interf benchmark` = alias for `interf test`
 - `interf doctor` = check local executor setup
 - `interf verify <check>` = run deterministic checks on major workflow steps

package/dist/commands/compile.d.ts CHANGED Viewed

@@ -1,6 +1,7 @@
 import type { WorkflowExecutionProfile, WorkflowExecutor } from "../lib/executors.js";
 import type { SourceWorkspaceConfig } from "../lib/schema.js";
 import type { CommandModule } from "yargs";
+import type { StageShellRetentionMode } from "../lib/workflows.js";
 export declare const compileCommand: CommandModule;
 export declare function runCompileCommand(argv?: Record<string, unknown>): Promise<void>;
 export declare function runConfiguredWorkspaceCompile(options: {
@@ -10,5 +11,6 @@ export declare function runConfiguredWorkspaceCompile(options: {
     workspaceConfig: SourceWorkspaceConfig | null;
     executionProfile?: WorkflowExecutionProfile;
     maxAttemptsOverride: number | null;
+    preserveStageShells?: StageShellRetentionMode;
 }): Promise<boolean>;
 //# sourceMappingURL=compile.d.ts.map

package/dist/commands/compile.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"compile.d.ts","sourceRoot":"","sources":["../../src/commands/compile.ts"],"names":[],"mappings":"AAUA,OAAO,KAAK,EAAE,wBAAwB,EAAE,gBAAgB,EAAE,MAAM,qBAAqB,CAAC;AAMtF,OAAO,KAAK,EAAE,qBAAqB,EAAE,MAAM,kBAAkB,CAAC;AAM9D,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,OAAO,CAAC;~~AAa3C~~,eAAO,MAAM,cAAc,EAAE,~~aAa5B~~,CAAC;AAEF,wBAAsB,iBAAiB,CAAC,IAAI,GAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAM,GAAG,OAAO,CAAC,IAAI,CAAC,~~CA+EzF~~;~~AA6CD~~,wBAAsB,6BAA6B,CACjD,OAAO,EAAE;IACP,QAAQ,EAAE,gBAAgB,CAAC;IAC3B,aAAa,EAAE,MAAM,CAAC;IACtB,UAAU,EAAE,MAAM,CAAC;IACnB,eAAe,EAAE,qBAAqB,GAAG,IAAI,CAAC;IAC9C,gBAAgB,CAAC,EAAE,wBAAwB,CAAC;IAC5C,mBAAmB,EAAE,MAAM,GAAG,IAAI,CAAC;~~CACpC~~,GACA,OAAO,CAAC,OAAO,CAAC,~~CA+GlB~~"}
1	+ {"version":3,"file":"compile.d.ts","sourceRoot":"","sources":["../../src/commands/compile.ts"],"names":[],"mappings":"AAUA,OAAO,KAAK,EAAE,wBAAwB,EAAE,gBAAgB,EAAE,MAAM,qBAAqB,CAAC;AAMtF,OAAO,KAAK,EAAE,qBAAqB,EAAE,MAAM,kBAAkB,CAAC;AAM9D,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,OAAO,CAAC;AAY3C,OAAO,KAAK,EAAE,uBAAuB,EAAE,MAAM,qBAAqB,CAAC;AAEnE,eAAO,MAAM,cAAc,EAAE,aAkB5B,CAAC;AAEF,wBAAsB,iBAAiB,CAAC,IAAI,GAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAgFzF;AAuDD,wBAAsB,6BAA6B,CACjD,OAAO,EAAE;IACP,QAAQ,EAAE,gBAAgB,CAAC;IAC3B,aAAa,EAAE,MAAM,CAAC;IACtB,UAAU,EAAE,MAAM,CAAC;IACnB,eAAe,EAAE,qBAAqB,GAAG,IAAI,CAAC;IAC9C,gBAAgB,CAAC,EAAE,wBAAwB,CAAC;IAC5C,mBAAmB,EAAE,MAAM,GAAG,IAAI,CAAC;IACnC,mBAAmB,CAAC,EAAE,uBAAuB,CAAC;CAC/C,GACA,OAAO,CAAC,OAAO,CAAC,CA4HlB"}

package/dist/commands/compile.js CHANGED Viewed

@@ -3,21 +3,25 @@ import { tmpdir } from "node:os";
 import { join } from "node:path";
 import chalk from "chalk";
 import * as p from "@clack/prompts";
-import { detectInterf, readInterfConfig, resolveSourceFolderPath, } from "../lib/interf.js";
+import { detectInterf, readInterfConfig, resolveSourceControlPath, } from "../lib/interf.js";
 import { findSourceWorkspaceConfig, loadSourceFolderConfig, resolveWorkspaceCompileMaxAttempts, } from "../lib/source-config.js";
 import { resetWorkspaceGeneratedState } from "../lib/workspace-reset.js";
 import { formatWorkspaceWorkflowStageStep, resolveWorkspaceWorkflowFromConfig, } from "../lib/workflow-definitions.js";
 import { addExecutionProfileOptions, executionProfileFromArgv, } from "../lib/execution-profile.js";
 import { chooseWorkspaceConfigToBuild, compileWorkspaceWithReporter, ensureWorkspaceFromConfig, } from "./workspace-flow.js";
 import { resolveOrConfigureLocalExecutor } from "./executor-flow.js";
-import { printSavedTestOutcome, runSavedWorkspaceTest } from "./test-flow.js";
+import { printSavedTestOutcome, questionPassRate, runSavedWorkspaceTest } from "./test-flow.js";
 export const compileCommand = {
     command: "compile",
-    describe: "Build a workspace for this folder",
+    describe: "Build a workspace for this dataset",
     builder: (yargs) => addExecutionProfileOptions(yargs).option("max-attempts", {
         alias: "max-retries",
         type: "number",
         describe: "Compile, test, and retry until the workspace passes or reaches this total attempt limit",
+    }).option("keep-stage-shells", {
+        type: "boolean",
+        default: false,
+        describe: "Keep every executed stage shell under .interf/execution-shells for review instead of pruning successful shells",
     }),
     handler: async (argv) => {
         await runCompileCommand(argv);
@@ -30,7 +34,7 @@ export async function runCompileCommand(argv = {}) {
     const detected = detectInterf(process.cwd());
     if (detected) {
         workspacePath = detected.path;
-        sourcePath = resolveSourceFolderPath(detected.path, detected.config);
+        sourcePath = resolveSourceControlPath(detected.path);
         workspaceConfig = findSourceWorkspaceConfig(loadSourceFolderConfig(sourcePath), detected.config.name) ?? {
             name: detected.config.name,
             ...(detected.config.about ? { about: detected.config.about } : {}),
@@ -99,6 +103,7 @@ export async function runCompileCommand(argv = {}) {
         workspaceConfig,
         executionProfile,
         maxAttemptsOverride,
+        preserveStageShells: readStageShellRetentionMode(argv),
     });
 }
 function readCompileMaxAttemptsOverride(argv) {
@@ -114,15 +119,21 @@ function readCompileMaxAttemptsOverride(argv) {
     }
     return parsed;
 }
+function readStageShellRetentionMode(argv) {
+    const enabled = argv["keep-stage-shells"] ??
+        argv.keepStageShells ??
+        false;
+    return enabled ? "always" : "on-failure";
+}
 function printCompileFailure(workspacePath, failedStage) {
     const workflowId = resolveWorkspaceWorkflowFromConfig(readInterfConfig(workspacePath));
     const failedStageLabel = formatWorkspaceWorkflowStageStep(workflowId, failedStage ?? "compile", {
-        sourcePath: resolveSourceFolderPath(workspacePath),
+        sourcePath: resolveSourceControlPath(workspacePath),
     });
     console.log(chalk.red(`  ${failedStageLabel} failed.`));
 }
 function testScore(outcome) {
-    return (outcome.result.passedChecks * 1000) + outcome.result.passedCases;
+    return (outcome.result.passedCases * 1000) + outcome.result.passedChecks;
 }
 function snapshotWorkspace(workspacePath) {
     const snapshotRoot = mkdtempSync(join(tmpdir(), "interf-compile-attempt-"));
@@ -135,23 +146,28 @@ function restoreWorkspaceSnapshot(snapshotPath, workspacePath) {
     cpSync(snapshotPath, workspacePath, { recursive: true });
 }
 export async function runConfiguredWorkspaceCompile(options) {
-    const maxAttempts = resolveWorkspaceCompileMaxAttempts(options.workspaceConfig ?? { retry_policy: undefined }, options.maxAttemptsOverride);
+    const preserveStageShells = options.preserveStageShells ?? "on-failure";
+    const maxAttempts = resolveWorkspaceCompileMaxAttempts(options.workspaceConfig ?? { max_attempts: undefined }, options.maxAttemptsOverride);
     const loopEnabled = maxAttempts != null;
     const checks = options.workspaceConfig?.checks ?? [];
     if (loopEnabled && checks.length === 0) {
-        console.log(chalk.yellow("  Self-improving mode needs saved questions. Building once without the retry loop."));
+        console.log(chalk.yellow("  Self-improving mode needs saved truth checks. Building once without the retry loop."));
     }
     else if (loopEnabled) {
         console.log(chalk.dim(`  Self-improving mode: up to ${maxAttempts} compile attempts.`));
-        console.log(chalk.dim("  Interf will compile, test the workspace, and retry until it passes or reaches the limit."));
+        console.log(chalk.dim("  Interf Compiler will run the workflow, test the workspace, and retry until it passes or reaches the limit."));
     }
     if (!loopEnabled || checks.length === 0) {
-        const result = await compileWorkspaceWithReporter(options.executor, options.workspacePath);
+        const result = await compileWorkspaceWithReporter(options.executor, options.workspacePath, {
+            preserveStageShells,
+        });
         if (!result.ok) {
             process.exitCode = 1;
             printCompileFailure(options.workspacePath, result.failedStage);
+            printStageShellReviewHint(options.workspacePath, preserveStageShells);
             return false;
         }
+        printStageShellReviewHint(options.workspacePath, preserveStageShells);
         return true;
     }
     let bestOutcome = null;
@@ -171,9 +187,11 @@ export async function runConfiguredWorkspaceCompile(options) {
                 successMessage: maxAttempts > 1
                     ? `Compiled workspace ready for attempt ${attempt}.`
                     : "Compiled workspace ready.",
+                preserveStageShells,
             });
             if (!result.ok) {
                 printCompileFailure(options.workspacePath, result.failedStage);
+                printStageShellReviewHint(options.workspacePath, preserveStageShells);
                 if (attempt < maxAttempts) {
                     console.log(chalk.yellow(`  Attempt ${attempt}/${maxAttempts} failed. Retrying with a fresh compile.`));
                     continue;
@@ -189,6 +207,7 @@ export async function runConfiguredWorkspaceCompile(options) {
                 },
                 executionProfile: options.executionProfile,
                 workspacePath: options.workspacePath,
+                preserveSandboxes: preserveStageShells === "always" ? "always" : "on-failure",
             });
             if (!outcome) {
                 process.exitCode = 1;
@@ -208,6 +227,7 @@ export async function runConfiguredWorkspaceCompile(options) {
             if (outcome.result.ok) {
                 console.log();
                 console.log(chalk.green(`  Workspace passed on attempt ${attempt}/${maxAttempts}.`));
+                printStageShellReviewHint(options.workspacePath, preserveStageShells);
                 return true;
             }
             if (attempt < maxAttempts) {
@@ -223,6 +243,10 @@ export async function runConfiguredWorkspaceCompile(options) {
         process.exitCode = 1;
         console.log();
         console.log(chalk.red(`  Workspace did not pass within ${maxAttempts} attempts.`));
+        if (bestOutcome) {
+            console.log(chalk.dim(`  Best attempt truth-check pass rate: ${questionPassRate(bestOutcome)}%.`));
+        }
+        printStageShellReviewHint(options.workspacePath, preserveStageShells);
         return false;
     }
     finally {
@@ -231,4 +255,12 @@ export async function runConfiguredWorkspaceCompile(options) {
         }
     }
 }
+function printStageShellReviewHint(workspacePath, preserveStageShells) {
+    const reviewRoot = join(workspacePath, ".interf", "execution-shells");
+    if (preserveStageShells === "always") {
+        console.log(chalk.dim(`  Preserved stage shells: ${reviewRoot}`));
+        return;
+    }
+    console.log(chalk.dim(`  Failed stage shells remain under: ${reviewRoot}`));
+}
 //# sourceMappingURL=compile.js.map