@interf/compiler 0.3.2 → 0.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. package/README.md +124 -173
  2. package/dist/commands/compile.d.ts +2 -0
  3. package/dist/commands/compile.d.ts.map +1 -1
  4. package/dist/commands/compile.js +42 -10
  5. package/dist/commands/compile.js.map +1 -1
  6. package/dist/commands/create.d.ts.map +1 -1
  7. package/dist/commands/create.js +5 -5
  8. package/dist/commands/create.js.map +1 -1
  9. package/dist/commands/default.js +2 -2
  10. package/dist/commands/default.js.map +1 -1
  11. package/dist/commands/doctor.js +7 -7
  12. package/dist/commands/doctor.js.map +1 -1
  13. package/dist/commands/init.js +19 -23
  14. package/dist/commands/init.js.map +1 -1
  15. package/dist/commands/source-config-wizard.d.ts +2 -1
  16. package/dist/commands/source-config-wizard.d.ts.map +1 -1
  17. package/dist/commands/source-config-wizard.js +29 -27
  18. package/dist/commands/source-config-wizard.js.map +1 -1
  19. package/dist/commands/test-flow.d.ts +4 -0
  20. package/dist/commands/test-flow.d.ts.map +1 -1
  21. package/dist/commands/test-flow.js +24 -13
  22. package/dist/commands/test-flow.js.map +1 -1
  23. package/dist/commands/test.d.ts.map +1 -1
  24. package/dist/commands/test.js +16 -5
  25. package/dist/commands/test.js.map +1 -1
  26. package/dist/commands/workspace-flow.d.ts +2 -0
  27. package/dist/commands/workspace-flow.d.ts.map +1 -1
  28. package/dist/commands/workspace-flow.js +3 -2
  29. package/dist/commands/workspace-flow.js.map +1 -1
  30. package/dist/lib/agent-shells.d.ts +17 -0
  31. package/dist/lib/agent-shells.d.ts.map +1 -0
  32. package/dist/lib/agent-shells.js +295 -0
  33. package/dist/lib/agent-shells.js.map +1 -0
  34. package/dist/lib/benchmark-execution.d.ts +5 -1
  35. package/dist/lib/benchmark-execution.d.ts.map +1 -1
  36. package/dist/lib/benchmark-execution.js +34 -12
  37. package/dist/lib/benchmark-execution.js.map +1 -1
  38. package/dist/lib/benchmark-paths.d.ts +2 -0
  39. package/dist/lib/benchmark-paths.d.ts.map +1 -1
  40. package/dist/lib/benchmark-paths.js +6 -0
  41. package/dist/lib/benchmark-paths.js.map +1 -1
  42. package/dist/lib/benchmark-sandbox.d.ts +2 -0
  43. package/dist/lib/benchmark-sandbox.d.ts.map +1 -1
  44. package/dist/lib/benchmark-sandbox.js +68 -37
  45. package/dist/lib/benchmark-sandbox.js.map +1 -1
  46. package/dist/lib/benchmark-targets.js +1 -1
  47. package/dist/lib/benchmark-targets.js.map +1 -1
  48. package/dist/lib/interf-bootstrap.d.ts +2 -13
  49. package/dist/lib/interf-bootstrap.d.ts.map +1 -1
  50. package/dist/lib/interf-bootstrap.js +7 -164
  51. package/dist/lib/interf-bootstrap.js.map +1 -1
  52. package/dist/lib/interf-detect.d.ts +1 -0
  53. package/dist/lib/interf-detect.d.ts.map +1 -1
  54. package/dist/lib/interf-detect.js +5 -18
  55. package/dist/lib/interf-detect.js.map +1 -1
  56. package/dist/lib/interf-scaffold.d.ts.map +1 -1
  57. package/dist/lib/interf-scaffold.js +7 -71
  58. package/dist/lib/interf-scaffold.js.map +1 -1
  59. package/dist/lib/interf-workflow-package.d.ts.map +1 -1
  60. package/dist/lib/interf-workflow-package.js +21 -26
  61. package/dist/lib/interf-workflow-package.js.map +1 -1
  62. package/dist/lib/interf.d.ts +3 -2
  63. package/dist/lib/interf.d.ts.map +1 -1
  64. package/dist/lib/interf.js +3 -2
  65. package/dist/lib/interf.js.map +1 -1
  66. package/dist/lib/local-workflows.d.ts +6 -1
  67. package/dist/lib/local-workflows.d.ts.map +1 -1
  68. package/dist/lib/local-workflows.js +143 -2
  69. package/dist/lib/local-workflows.js.map +1 -1
  70. package/dist/lib/runtime-contracts.d.ts.map +1 -1
  71. package/dist/lib/runtime-contracts.js +10 -4
  72. package/dist/lib/runtime-contracts.js.map +1 -1
  73. package/dist/lib/runtime-prompt.d.ts.map +1 -1
  74. package/dist/lib/runtime-prompt.js +1 -0
  75. package/dist/lib/runtime-prompt.js.map +1 -1
  76. package/dist/lib/runtime-runs.d.ts.map +1 -1
  77. package/dist/lib/runtime-runs.js +6 -2
  78. package/dist/lib/runtime-runs.js.map +1 -1
  79. package/dist/lib/runtime-types.d.ts +1 -0
  80. package/dist/lib/runtime-types.d.ts.map +1 -1
  81. package/dist/lib/schema.d.ts +88 -23
  82. package/dist/lib/schema.d.ts.map +1 -1
  83. package/dist/lib/schema.js +66 -37
  84. package/dist/lib/schema.js.map +1 -1
  85. package/dist/lib/source-config.d.ts +3 -3
  86. package/dist/lib/source-config.d.ts.map +1 -1
  87. package/dist/lib/source-config.js +8 -6
  88. package/dist/lib/source-config.js.map +1 -1
  89. package/dist/lib/state-artifacts.d.ts +2 -2
  90. package/dist/lib/state-artifacts.d.ts.map +1 -1
  91. package/dist/lib/state-artifacts.js +3 -3
  92. package/dist/lib/state-artifacts.js.map +1 -1
  93. package/dist/lib/state-io.d.ts +2 -2
  94. package/dist/lib/state-io.d.ts.map +1 -1
  95. package/dist/lib/state-io.js +5 -5
  96. package/dist/lib/state-io.js.map +1 -1
  97. package/dist/lib/state-paths.d.ts +1 -1
  98. package/dist/lib/state-paths.d.ts.map +1 -1
  99. package/dist/lib/state-paths.js +3 -3
  100. package/dist/lib/state-paths.js.map +1 -1
  101. package/dist/lib/state-view.d.ts +2 -2
  102. package/dist/lib/state-view.d.ts.map +1 -1
  103. package/dist/lib/state-view.js +6 -7
  104. package/dist/lib/state-view.js.map +1 -1
  105. package/dist/lib/state.d.ts +4 -4
  106. package/dist/lib/state.d.ts.map +1 -1
  107. package/dist/lib/state.js +3 -3
  108. package/dist/lib/state.js.map +1 -1
  109. package/dist/lib/workflow-definitions.d.ts +4 -1
  110. package/dist/lib/workflow-definitions.d.ts.map +1 -1
  111. package/dist/lib/workflow-definitions.js +41 -6
  112. package/dist/lib/workflow-definitions.js.map +1 -1
  113. package/dist/lib/workflow-stage-runner.d.ts +1 -0
  114. package/dist/lib/workflow-stage-runner.d.ts.map +1 -1
  115. package/dist/lib/workflow-stage-runner.js +2 -0
  116. package/dist/lib/workflow-stage-runner.js.map +1 -1
  117. package/dist/lib/workflows.d.ts +1 -1
  118. package/dist/lib/workflows.d.ts.map +1 -1
  119. package/dist/lib/workspace-compile.d.ts +4 -0
  120. package/dist/lib/workspace-compile.d.ts.map +1 -1
  121. package/dist/lib/workspace-compile.js +108 -66
  122. package/dist/lib/workspace-compile.js.map +1 -1
  123. package/dist/lib/workspace-docs.d.ts +3 -0
  124. package/dist/lib/workspace-docs.d.ts.map +1 -0
  125. package/dist/lib/workspace-docs.js +82 -0
  126. package/dist/lib/workspace-docs.js.map +1 -0
  127. package/dist/lib/workspace-raw.d.ts +30 -0
  128. package/dist/lib/workspace-raw.d.ts.map +1 -0
  129. package/dist/lib/workspace-raw.js +102 -0
  130. package/dist/lib/workspace-raw.js.map +1 -0
  131. package/dist/lib/workspace-schema.d.ts +26 -0
  132. package/dist/lib/workspace-schema.d.ts.map +1 -0
  133. package/dist/lib/workspace-schema.js +132 -0
  134. package/dist/lib/workspace-schema.js.map +1 -0
  135. package/package.json +2 -2
  136. package/skills/benchmark/SKILL.md +4 -4
  137. package/skills/workflow/create/SKILL.md +23 -4
  138. package/skills/workspace/shape/SKILL.md +1 -1
  139. package/templates/workspace/README.md +4 -3
package/README.md CHANGED
@@ -1,28 +1,17 @@
1
- # Interf
1
+ # Interf Compiler
2
2
 
3
- Open-source knowledge compiler for local agents.
3
+ Prepare local datasets for accurate agent use.
4
4
 
5
- Interf measures and improves how accurately local agents answer questions from your files.
5
+ Interf Compiler runs local data-processing workflows over your dataset to build a compiled workspace: a folder of agent-readable files that helps agents navigate evidence, verify facts, and answer accurately.
6
6
 
7
- If you use Claude Code, Codex, OpenClaw, Hermes, or your own local agent setup on folders full of PDFs, docs, spreadsheets, and notes, the failure often shows up late: missed evidence, shallow analysis, bad comparisons, or answers that sound confident but are wrong.
8
-
9
- Interf lets you define a few checks over your files, measure the raw baseline first if you want it, compile a workspace on top of those files, and see whether the result actually passes.
10
-
11
- - your files stay on your machine
12
- - you choose the local agent
13
- - your raw files stay the source of truth
14
- - Interf adds a file-based layer on top
15
-
16
- `interf compile` runs a local data-processing pipeline with your agents as executors and produces a compiled workspace: a file-based layer on top of your raw files that agents can navigate, inspect, and work from.
17
-
18
- In the advanced looped mode, Interf can keep rerunning that pipeline, testing the result, and trying improved preparation attempts until it finds the best-performing workspace within the attempt budget.
7
+ Use truth checks to test the raw dataset, compile the workspace, and compare the result on the same task.
19
8
 
20
9
  ## Quick Start
21
10
 
22
11
  Requirements:
23
12
 
24
13
  - Node.js 20+
25
- - a local coding agent: Claude Code or Codex
14
+ - a local coding agent such as Claude Code or Codex
26
15
 
27
16
  Install:
28
17
 
@@ -30,24 +19,78 @@ Install:
30
19
  npm install -g @interf/compiler
31
20
  ```
32
21
 
33
- The quickest start is the wizard:
22
+ Start from the folder that already contains your dataset:
34
23
 
35
24
  ```bash
36
- cd ~/my-folder
25
+ cd ~/my-dataset
37
26
  interf
27
+ interf compile
28
+ interf test
38
29
  ```
39
30
 
40
- If you want to see the config shape first, this is what Interf writes:
31
+ The first run can:
32
+
33
+ - save a few truth checks for the dataset
34
+ - test the raw dataset as a baseline
35
+ - build the compiled workspace
36
+ - test the compiled workspace on the same truth checks
37
+
38
+ ## What Interf Compiler Creates
39
+
40
+ Interf Compiler adds three things beside your dataset:
41
+
42
+ - `interf.config.json` with your saved truth checks and workspace setup
43
+ - `interf/workspaces/<name>/` with the compiled workspace
44
+ - `interf/benchmarks/runs/...` with saved test runs
45
+
46
+ A compiled workspace is a folder on top of your dataset. It includes:
47
+
48
+ - a workspace-local `raw/` snapshot for direct evidence and verification
49
+ - agent-readable summaries and cross-file notes
50
+ - `AGENTS.md`, `CLAUDE.md`, and generated local query skills
51
+ - runtime state under `.interf/`
52
+
53
+ The compiled workspace is the folder your agent should work from.
54
+
55
+ ## Why Use It
56
+
57
+ Raw dataset folders are hard for agents.
58
+
59
+ Common failure modes:
60
+
61
+ - missed evidence
62
+ - weak cross-file understanding
63
+ - bad comparisons
64
+ - answers that sound confident but are wrong
65
+
66
+ Interf Compiler keeps the raw dataset as the source of truth, builds a compiled workspace on top of it, and tests whether that workspace actually helps.
67
+
68
+ ## The Loop
41
69
 
42
- ```json
70
+ 1. Define truth checks for the dataset.
71
+ 2. Build the compiled workspace.
72
+ 3. Test raw vs compiled on the same truth checks.
73
+
74
+ Truth checks are simple:
75
+
76
+ - one question
77
+ - one expected answer
78
+
79
+ Good first truth checks are small and practical:
80
+
81
+ - one exact number from a chart, table, or filing
82
+ - one short statement that should be true or false
83
+ - one simple comparison across years, files, or sections
84
+
85
+ If you want to see the config shape first, this is what Interf Compiler writes:
86
+
87
+ ```jsonc
43
88
  {
44
89
  "workspaces": [
45
90
  {
46
- "name": "default",
91
+ "name": "my-workspace",
47
92
  "about": "General compiled workspace for the quarterly results folder.",
48
- "retry_policy": {
49
- "max_attempts": 3
50
- },
93
+ "max_attempts": 3, // rerun compile + test until this workspace passes the saved truth checks or hits this limit
51
94
  "checks": [
52
95
  {
53
96
  "question": "What full-year revenue range did the company maintain?",
@@ -63,63 +106,13 @@ If you want to see the config shape first, this is what Interf writes:
63
106
  }
64
107
  ```
65
108
 
66
- The root-level flow is:
67
-
68
- ```bash
69
- interf
70
- interf compile
71
- interf test
72
- ```
73
-
74
- The first guided run can:
75
-
76
- - save a few questions and expected answers for this folder
77
- - run a baseline test on the raw files
78
- - compile the workspace
79
- - optionally keep compiling and retesting until it passes or reaches the attempt limit
80
- - run the same test against the compiled workspace
81
-
82
- That gives you three concrete things:
83
-
84
- - `interf/workspaces/default/` with the compiled workspace for your files
85
- - `interf/benchmarks/runs/...` with the saved test result
86
- - a pass/fail score on the same questions and expected answers you wrote
87
-
88
- Saved test runs keep the details you need later:
89
-
90
- - whether the run tested `raw`, `workspace`, or both
91
- - per-question pass/fail results
92
- - the saved run path under `interf/benchmarks/runs/...`
93
- - executor metadata such as agent, command, model, effort, and profile when available
94
-
95
- If `interf.config.json` is missing, `interf` or `interf init` can draft it with you before the first compile. If Interf cannot find your local agent or compile setup, run:
109
+ If `interf.config.json` is missing, `interf` or `interf init` can draft it with you before the first compile. If the compiler cannot find your local agent or compile setup, run:
96
110
 
97
111
  ```bash
98
112
  interf doctor
99
113
  ```
100
114
 
101
- The first flow is:
102
-
103
- - write down a few questions your agent should be able to answer from your files
104
- - let `interf` or `interf init` save those checks in `interf.config.json`
105
- - optionally run a baseline test on the raw files
106
- - run `interf compile` to build the compiled workspace
107
- - run `interf test` to test the raw files, the compiled workspace, or both
108
- - only create another workspace if you want a separate compiled setup with its own checks
109
- - if needed, rerun compile or use the advanced retry path until it is good enough
110
-
111
- ## Why This Approach
112
-
113
- Interf is built around a few simple design principles:
114
-
115
- - `Explicit`: the output is visible and inspectable, not hidden memory
116
- - `Local`: your files stay on your machine
117
- - `File over app`: the output is just files, so you can use your editor, Unix tools, Obsidian, or your own software on top
118
- - `BYOAI`: use Claude Code, Codex, OpenClaw, Hermes, or your own model
119
-
120
- Interf does not replace your data with an opaque store. It keeps the raw files in place and adds a file-based layer on top for agents.
121
-
122
- Sample flow:
115
+ Sample run:
123
116
 
124
117
  ```bash
125
118
  cp -r examples/benchmark-demo /tmp/interf-demo
@@ -129,68 +122,44 @@ interf compile
129
122
  interf test
130
123
  ```
131
124
 
132
- ## Start With Your Own Checks
133
-
134
- Start with your own checks over the files: questions where you already know the correct answer from the dataset.
135
-
136
- `interf.config.json` is where you save those checks for a folder.
137
-
138
- That file uses one `workspaces` array:
139
-
140
- - most folders only need one workspace
141
- - add another workspace only if you want a separate compiled setup with different checks
142
- - each workspace carries its own `checks`
143
- - each workspace can optionally carry `retry_policy.max_attempts` for the self-improving compile loop
144
-
145
- If the file is missing, `interf init` can draft it with you before the first compile. You can edit it any time.
146
-
147
- Good first checks are small and practical:
148
-
149
- - one exact number from a chart, table, or filing
150
- - one short statement that should be true or false
151
- - one simple comparison across years, files, or sections
125
+ ## What `interf test` Does
152
126
 
153
- Then run:
127
+ `interf test` scores either the raw files, a compiled workspace, or both on the same saved truth checks.
154
128
 
155
- ```bash
156
- interf compile
157
- interf test
158
- ```
159
-
160
- ## What `interf test` Does
129
+ It answers a simple question:
161
130
 
162
- `interf test` scores either the raw files, a compiled workspace, or both on the same saved checks.
131
+ - does the compiled workspace help on this dataset or not?
163
132
 
164
- It lets you answer a simple question:
133
+ By default it loads truth checks from `interf.config.json`, can run a raw baseline in an isolated raw-files sandbox, can test eligible compiled workspaces under `interf/workspaces/`, and saves the run under `interf/benchmarks/runs/`.
165
134
 
166
- - what is the current baseline on the raw files?
167
- - does this compiled workspace improve on that baseline?
168
- - which compiled workspace or workflow performs better on the same folder?
169
- - does a separate workspace with different checks work better for that job?
135
+ For live runs:
170
136
 
171
- By default it loads checks from `interf.config.json`, can run a raw baseline in an isolated raw-files sandbox, can test eligible compiled workspaces under `interf/workspaces/`, and saves the run under `interf/benchmarks/runs/`.
137
+ - raw tests execute from a sanitized raw-only sandbox
138
+ - compiled-workspace tests execute from a copied workspace sandbox with embedded sanitized `raw/`
139
+ - neither sandbox includes `interf.config.json` or the source-folder `interf/` control plane
140
+ - failed test sandboxes are kept automatically for review
141
+ - `interf test --keep-sandboxes` keeps every sandbox, even successful ones
172
142
 
173
143
  Each saved run includes:
174
144
 
175
- - the benchmark target and mode
145
+ - whether the run tested `raw`, `workspace`, or both
176
146
  - per-question results and traces
147
+ - the preserved sandbox path when one was kept
177
148
  - the executor metadata for that run
178
149
 
179
- If you run `interf test` from inside a workspace, it uses that workspace's checks and tests that workspace. If you run it from the source folder, it lets you choose a saved workspace and then choose raw files, the compiled workspace, or both.
180
-
181
- Live test runs use an isolated sandbox. For raw baselines, Interf gives the agent sanitized raw files only. For compiled-workspace tests, it gives the agent a copied workspace plus sanitized raw files. The source-folder control plane, `interf.config.json`, and saved test runs are not part of those sandboxes.
150
+ If you run `interf test` from inside a workspace, it uses that workspace's truth checks and tests that workspace. If you run it from the source folder, it lets you choose a saved workspace and then choose raw files, the compiled workspace, or both.
182
151
 
183
152
  If you need repeated isolated experiments across workflows or models, use the advanced eval-pack runner in [docs/eval-loop.md](./docs/eval-loop.md).
184
153
 
185
154
  ## What `interf compile` Does
186
155
 
187
- `interf compile` runs the Interf data-processing pipeline over your files.
156
+ `interf compile` runs the selected workflow over a dataset.
188
157
 
189
- By default, that means:
158
+ The built-in workflow:
190
159
 
191
160
  - summarize the source files into per-file evidence notes
192
161
  - structure the cross-file knowledge layer into entities, claims, and indexes
193
- - shape the final workspace around its saved focus and questions
162
+ - shape the final workspace around its saved focus and truth checks
194
163
 
195
164
  In other words, the built-in workflow is:
196
165
 
@@ -198,47 +167,30 @@ In other words, the built-in workflow is:
198
167
  2. `structure`
199
168
  3. `shape`
200
169
 
201
- In public docs, `pipeline` is the thing Interf runs. `workflow` is the saved method that defines or customizes that pipeline.
202
-
203
- The default workflow is built in. If you want a different method, you can define your own workflow package and benchmark it on the same folder.
204
-
205
- If a workspace has `retry_policy.max_attempts`, or if you run `interf compile --max-attempts <n>`, Interf can keep compiling, testing, and retrying until that workspace passes or reaches the attempt limit. If several attempts fail, Interf keeps the best-performing compiled workspace from that run.
206
-
207
- ## What Gets Created
208
-
209
- After compile, Interf writes into `./interf/` beside your source files.
210
-
211
- - `interf/workspaces/<name>/` is a compiled workspace over the folder
212
- - `interf/benchmarks/runs/...` stores saved test runs
213
-
214
- Inside those workspaces you will see things like:
215
-
216
- - summaries of source files
217
- - navigation notes and entrypoints for agents
218
- - cross-file knowledge notes
219
- - workspace-specific outputs when you define a separate job-focused workspace
170
+ If you want a different method, you can define your own workflow and test it on the same dataset.
220
171
 
221
- The compiled workspace is just a normal folder. Open it in your editor, in your agent, or in Obsidian if you want the graph view.
172
+ Under the hood, each workflow defines:
222
173
 
223
- If you use Obsidian, open `interf/workspaces/<name>/` as the vault for the compiled workspace.
174
+ - `workflow.json` for stage order, compiler API target, and deterministic contract mapping
175
+ - `workspace.schema.json` for the deterministic output shape of the compiled workspace
176
+ - stage `reads` / `writes` declarations that reference schema-defined zone ids
177
+ - local `SKILL.md` files as the authoring source for query and stage-execution behavior
224
178
 
225
- ## Terminology
179
+ The compiler then projects that workflow into the native agent surfaces it actually runs:
226
180
 
227
- Public terms:
181
+ - the compiled workspace gets a generated native query shell
182
+ - each compile stage gets a generated native execution shell
183
+ - that shell keeps its own `AGENTS.md`, `CLAUDE.md`, and native local skills
184
+ - schema-declared workspace zones are mounted both at their workflow-relative paths and as shell-local `inputs/<zone-id>` / `outputs/<zone-id>` aliases
185
+ - the workspace root itself is not linked into the shell
228
186
 
229
- - `your files` = the source folder Interf reads from
230
- - `questions and expected answers` = the checks you want your agent to pass
231
- - `checks` = the pass/fail questions each workspace should satisfy
232
- - `test` = run the saved questions and get a score
233
- - `compiled workspace` = the output Interf produces on top of a folder
234
- - `workspace` = one compiled setup with its own checks
187
+ If a workspace has `max_attempts`, or if you run `interf compile --max-attempts <n>`, the compiler can keep compiling, testing, and retrying until that workspace passes or reaches the attempt limit. If several attempts fail, it keeps the best-performing compiled workspace from that run.
235
188
 
236
- Technical terms:
189
+ For stage-level review:
237
190
 
238
- - `source folder` = the raw files Interf reads from
239
- - `benchmark` = the technical alias and saved-run layer behind `interf test`
240
- - `workflow` = the saved method that defines or customizes the pipeline
241
- - `.interf/` = runtime state, proofs, and health artifacts
191
+ - successful stage shells are pruned by default
192
+ - failed stage shells stay under `.interf/execution-shells/`
193
+ - `interf compile --keep-stage-shells` keeps every stage shell so you can inspect the exact native instruction surface, mounted inputs, and mounted outputs for each stage
242
194
 
243
195
  ## Advanced: Separate Workspaces
244
196
 
@@ -253,31 +205,32 @@ Create another only when you want a different compiled setup with different chec
253
205
 
254
206
  Why create another one:
255
207
 
256
- - it keeps a separate set of questions and expected answers
208
+ - it keeps a separate set of truth checks
257
209
  - it gives that job its own compiled output under `interf/workspaces/<name>/`
258
210
  - it lets you test that job separately
259
211
 
260
212
  ## Advanced: Keep Improving Until It Passes
261
213
 
262
- Interf also supports a deeper loop above the normal compile + test flow.
214
+ Interf Compiler also supports a deeper loop above the normal compile + test flow.
263
215
 
264
- The normal workspace flow already supports `retry_policy.max_attempts` inside `interf.config.json` or `interf compile --max-attempts <n>`.
216
+ The normal workspace flow already supports `max_attempts` inside `interf.config.json` or `interf compile --max-attempts <n>`.
265
217
 
266
- Give it the same folder and the same checks. Interf can keep rerunning compile + test attempts until the test passes or the attempt budget runs out.
218
+ Give it the same dataset and the same truth checks. The compiler can keep rerunning compile + test attempts until the test passes or the attempt budget runs out.
267
219
 
268
- That loop is the self-improving part of the product:
220
+ That loop is the self-improving part:
269
221
 
270
- - it reruns the local data-processing pipeline over the same files
271
- - it keeps the checks fixed, so the target does not move
222
+ - it reruns the same workflow over the same dataset
223
+ - it keeps the truth checks fixed, so the target does not move
224
+ - it keeps the measurement fixed, so attempts stay comparable
272
225
  - it can vary the compile profile and follow-up diagnostics
273
226
  - it records which attempt performed best on the same saved test
274
227
 
275
228
  In practice:
276
229
 
277
- - `retry_policy.max_attempts` controls how many total attempts a normal workspace compile gets
230
+ - `max_attempts` controls how many total attempts a normal workspace compile gets
278
231
  - `retry_policy.max_attempts_per_profile` controls how many attempts each compile profile gets in eval packs
279
232
  - stronger diagnostic profiles can be used only after the default ones fail
280
- - the checks stay the same across every attempt
233
+ - the truth checks stay the same across every attempt
281
234
  - each attempt records what changed and which attempt performed best
282
235
 
283
236
  Example eval-pack shape:
@@ -286,10 +239,8 @@ Example eval-pack shape:
286
239
  {
287
240
  "workspaces": [
288
241
  {
289
- "name": "default",
290
- "retry_policy": {
291
- "max_attempts": 3
292
- },
242
+ "name": "my-workspace",
243
+ "max_attempts": 3, // rerun compile + test until this workspace passes the saved truth checks or hits this limit
293
244
  "checks": [
294
245
  {
295
246
  "question": "What full-year revenue range did the company maintain?",
@@ -304,38 +255,38 @@ Example eval-pack shape:
304
255
  }
305
256
  ```
306
257
 
307
- Use the normal workspace retry loop first. Use the eval-pack path when you want Interf to compare multiple compile profiles, add diagnostics, or keep iterating in a more controlled experiment loop. It spends more tokens, so use it when that extra spend is worth the accuracy target.
258
+ Use the normal workspace retry loop first. Use the eval-pack path when you want Interf Compiler to compare multiple compile profiles, add diagnostics, or keep iterating in a more controlled experiment loop. It spends more tokens, so use it when that extra spend is worth the accuracy target.
308
259
 
309
260
  ## Use It With Your Agent
310
261
 
311
- If you already work through Claude Code, Codex, OpenClaw, or Hermes, the agent can run this process for you.
262
+ If you already work through a local coding agent, it can run this process for you.
312
263
 
313
264
  Paste something like this into your agent:
314
265
 
315
266
  ```text
316
267
  Install @interf/compiler, run `interf` in this folder, and use the local agent executor.
317
268
 
318
- If `interf.config.json` is missing, draft one workspace with a few checks this agent should be able to answer from these files and add the expected answers for me to confirm.
269
+ If `interf.config.json` is missing, draft one workspace with a few truth checks this agent should be able to answer from this dataset and add the expected answers for me to confirm.
319
270
 
320
271
  Then run a raw baseline if helpful, compile the workspace, and run `interf test`.
321
272
 
322
- Tell me whether the compiled workspace passes the checks, and only recommend it if it does.
273
+ Tell me whether the compiled workspace passes the truth checks, and only recommend it if it does.
323
274
  ```
324
275
 
325
276
  ## Custom Workflows
326
277
 
327
- Interf ships with a default workflow.
278
+ Interf Compiler ships with a default workflow.
328
279
 
329
- If you want to change how the data-processing pipeline runs on your files, this is the part you customize:
280
+ If you want to change how the workflow runs on your dataset, this is the part you customize:
330
281
 
331
282
  ```bash
332
283
  interf create workflow
333
284
  interf verify workflow --path <path>
334
285
  ```
335
286
 
336
- Then benchmark that workflow on the same folder and the same checks.
287
+ Then test that workflow on the same dataset and the same truth checks.
337
288
 
338
- Workflow package docs live in [docs/workflow-spec.md](./docs/workflow-spec.md).
289
+ Workflow docs live in [docs/workflow-spec.md](./docs/workflow-spec.md).
339
290
 
340
291
  ## Core Commands
341
292
 
@@ -344,7 +295,7 @@ Workflow package docs live in [docs/workflow-spec.md](./docs/workflow-spec.md).
344
295
  - `interf create workspace` = create another compiled workspace when you need one
345
296
  - `interf create workflow` = create a reusable local workflow package
346
297
  - `interf compile` = build a selected workspace for the current folder
347
- - `interf test` = test the raw files, a compiled workspace, or both on saved checks
298
+ - `interf test` = test the raw files, a compiled workspace, or both on saved truth checks
348
299
  - `interf benchmark` = alias for `interf test`
349
300
  - `interf doctor` = check local executor setup
350
301
  - `interf verify <check>` = run deterministic checks on major workflow steps
@@ -1,6 +1,7 @@
1
1
  import type { WorkflowExecutionProfile, WorkflowExecutor } from "../lib/executors.js";
2
2
  import type { SourceWorkspaceConfig } from "../lib/schema.js";
3
3
  import type { CommandModule } from "yargs";
4
+ import type { StageShellRetentionMode } from "../lib/workflows.js";
4
5
  export declare const compileCommand: CommandModule;
5
6
  export declare function runCompileCommand(argv?: Record<string, unknown>): Promise<void>;
6
7
  export declare function runConfiguredWorkspaceCompile(options: {
@@ -10,5 +11,6 @@ export declare function runConfiguredWorkspaceCompile(options: {
10
11
  workspaceConfig: SourceWorkspaceConfig | null;
11
12
  executionProfile?: WorkflowExecutionProfile;
12
13
  maxAttemptsOverride: number | null;
14
+ preserveStageShells?: StageShellRetentionMode;
13
15
  }): Promise<boolean>;
14
16
  //# sourceMappingURL=compile.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"compile.d.ts","sourceRoot":"","sources":["../../src/commands/compile.ts"],"names":[],"mappings":"AAUA,OAAO,KAAK,EAAE,wBAAwB,EAAE,gBAAgB,EAAE,MAAM,qBAAqB,CAAC;AAMtF,OAAO,KAAK,EAAE,qBAAqB,EAAE,MAAM,kBAAkB,CAAC;AAM9D,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,OAAO,CAAC;AAa3C,eAAO,MAAM,cAAc,EAAE,aAa5B,CAAC;AAEF,wBAAsB,iBAAiB,CAAC,IAAI,GAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CA+EzF;AA6CD,wBAAsB,6BAA6B,CACjD,OAAO,EAAE;IACP,QAAQ,EAAE,gBAAgB,CAAC;IAC3B,aAAa,EAAE,MAAM,CAAC;IACtB,UAAU,EAAE,MAAM,CAAC;IACnB,eAAe,EAAE,qBAAqB,GAAG,IAAI,CAAC;IAC9C,gBAAgB,CAAC,EAAE,wBAAwB,CAAC;IAC5C,mBAAmB,EAAE,MAAM,GAAG,IAAI,CAAC;CACpC,GACA,OAAO,CAAC,OAAO,CAAC,CA+GlB"}
1
+ {"version":3,"file":"compile.d.ts","sourceRoot":"","sources":["../../src/commands/compile.ts"],"names":[],"mappings":"AAUA,OAAO,KAAK,EAAE,wBAAwB,EAAE,gBAAgB,EAAE,MAAM,qBAAqB,CAAC;AAMtF,OAAO,KAAK,EAAE,qBAAqB,EAAE,MAAM,kBAAkB,CAAC;AAM9D,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,OAAO,CAAC;AAY3C,OAAO,KAAK,EAAE,uBAAuB,EAAE,MAAM,qBAAqB,CAAC;AAEnE,eAAO,MAAM,cAAc,EAAE,aAkB5B,CAAC;AAEF,wBAAsB,iBAAiB,CAAC,IAAI,GAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAgFzF;AAuDD,wBAAsB,6BAA6B,CACjD,OAAO,EAAE;IACP,QAAQ,EAAE,gBAAgB,CAAC;IAC3B,aAAa,EAAE,MAAM,CAAC;IACtB,UAAU,EAAE,MAAM,CAAC;IACnB,eAAe,EAAE,qBAAqB,GAAG,IAAI,CAAC;IAC9C,gBAAgB,CAAC,EAAE,wBAAwB,CAAC;IAC5C,mBAAmB,EAAE,MAAM,GAAG,IAAI,CAAC;IACnC,mBAAmB,CAAC,EAAE,uBAAuB,CAAC;CAC/C,GACA,OAAO,CAAC,OAAO,CAAC,CA4HlB"}
@@ -3,21 +3,25 @@ import { tmpdir } from "node:os";
3
3
  import { join } from "node:path";
4
4
  import chalk from "chalk";
5
5
  import * as p from "@clack/prompts";
6
- import { detectInterf, readInterfConfig, resolveSourceFolderPath, } from "../lib/interf.js";
6
+ import { detectInterf, readInterfConfig, resolveSourceControlPath, } from "../lib/interf.js";
7
7
  import { findSourceWorkspaceConfig, loadSourceFolderConfig, resolveWorkspaceCompileMaxAttempts, } from "../lib/source-config.js";
8
8
  import { resetWorkspaceGeneratedState } from "../lib/workspace-reset.js";
9
9
  import { formatWorkspaceWorkflowStageStep, resolveWorkspaceWorkflowFromConfig, } from "../lib/workflow-definitions.js";
10
10
  import { addExecutionProfileOptions, executionProfileFromArgv, } from "../lib/execution-profile.js";
11
11
  import { chooseWorkspaceConfigToBuild, compileWorkspaceWithReporter, ensureWorkspaceFromConfig, } from "./workspace-flow.js";
12
12
  import { resolveOrConfigureLocalExecutor } from "./executor-flow.js";
13
- import { printSavedTestOutcome, runSavedWorkspaceTest } from "./test-flow.js";
13
+ import { printSavedTestOutcome, questionPassRate, runSavedWorkspaceTest } from "./test-flow.js";
14
14
  export const compileCommand = {
15
15
  command: "compile",
16
- describe: "Build a workspace for this folder",
16
+ describe: "Build a workspace for this dataset",
17
17
  builder: (yargs) => addExecutionProfileOptions(yargs).option("max-attempts", {
18
18
  alias: "max-retries",
19
19
  type: "number",
20
20
  describe: "Compile, test, and retry until the workspace passes or reaches this total attempt limit",
21
+ }).option("keep-stage-shells", {
22
+ type: "boolean",
23
+ default: false,
24
+ describe: "Keep every executed stage shell under .interf/execution-shells for review instead of pruning successful shells",
21
25
  }),
22
26
  handler: async (argv) => {
23
27
  await runCompileCommand(argv);
@@ -30,7 +34,7 @@ export async function runCompileCommand(argv = {}) {
30
34
  const detected = detectInterf(process.cwd());
31
35
  if (detected) {
32
36
  workspacePath = detected.path;
33
- sourcePath = resolveSourceFolderPath(detected.path, detected.config);
37
+ sourcePath = resolveSourceControlPath(detected.path);
34
38
  workspaceConfig = findSourceWorkspaceConfig(loadSourceFolderConfig(sourcePath), detected.config.name) ?? {
35
39
  name: detected.config.name,
36
40
  ...(detected.config.about ? { about: detected.config.about } : {}),
@@ -99,6 +103,7 @@ export async function runCompileCommand(argv = {}) {
99
103
  workspaceConfig,
100
104
  executionProfile,
101
105
  maxAttemptsOverride,
106
+ preserveStageShells: readStageShellRetentionMode(argv),
102
107
  });
103
108
  }
104
109
  function readCompileMaxAttemptsOverride(argv) {
@@ -114,15 +119,21 @@ function readCompileMaxAttemptsOverride(argv) {
114
119
  }
115
120
  return parsed;
116
121
  }
122
+ function readStageShellRetentionMode(argv) {
123
+ const enabled = argv["keep-stage-shells"] ??
124
+ argv.keepStageShells ??
125
+ false;
126
+ return enabled ? "always" : "on-failure";
127
+ }
117
128
  function printCompileFailure(workspacePath, failedStage) {
118
129
  const workflowId = resolveWorkspaceWorkflowFromConfig(readInterfConfig(workspacePath));
119
130
  const failedStageLabel = formatWorkspaceWorkflowStageStep(workflowId, failedStage ?? "compile", {
120
- sourcePath: resolveSourceFolderPath(workspacePath),
131
+ sourcePath: resolveSourceControlPath(workspacePath),
121
132
  });
122
133
  console.log(chalk.red(` ${failedStageLabel} failed.`));
123
134
  }
124
135
  function testScore(outcome) {
125
- return (outcome.result.passedChecks * 1000) + outcome.result.passedCases;
136
+ return (outcome.result.passedCases * 1000) + outcome.result.passedChecks;
126
137
  }
127
138
  function snapshotWorkspace(workspacePath) {
128
139
  const snapshotRoot = mkdtempSync(join(tmpdir(), "interf-compile-attempt-"));
@@ -135,23 +146,28 @@ function restoreWorkspaceSnapshot(snapshotPath, workspacePath) {
135
146
  cpSync(snapshotPath, workspacePath, { recursive: true });
136
147
  }
137
148
  export async function runConfiguredWorkspaceCompile(options) {
138
- const maxAttempts = resolveWorkspaceCompileMaxAttempts(options.workspaceConfig ?? { retry_policy: undefined }, options.maxAttemptsOverride);
149
+ const preserveStageShells = options.preserveStageShells ?? "on-failure";
150
+ const maxAttempts = resolveWorkspaceCompileMaxAttempts(options.workspaceConfig ?? { max_attempts: undefined }, options.maxAttemptsOverride);
139
151
  const loopEnabled = maxAttempts != null;
140
152
  const checks = options.workspaceConfig?.checks ?? [];
141
153
  if (loopEnabled && checks.length === 0) {
142
- console.log(chalk.yellow(" Self-improving mode needs saved questions. Building once without the retry loop."));
154
+ console.log(chalk.yellow(" Self-improving mode needs saved truth checks. Building once without the retry loop."));
143
155
  }
144
156
  else if (loopEnabled) {
145
157
  console.log(chalk.dim(` Self-improving mode: up to ${maxAttempts} compile attempts.`));
146
- console.log(chalk.dim(" Interf will compile, test the workspace, and retry until it passes or reaches the limit."));
158
+ console.log(chalk.dim(" Interf Compiler will run the workflow, test the workspace, and retry until it passes or reaches the limit."));
147
159
  }
148
160
  if (!loopEnabled || checks.length === 0) {
149
- const result = await compileWorkspaceWithReporter(options.executor, options.workspacePath);
161
+ const result = await compileWorkspaceWithReporter(options.executor, options.workspacePath, {
162
+ preserveStageShells,
163
+ });
150
164
  if (!result.ok) {
151
165
  process.exitCode = 1;
152
166
  printCompileFailure(options.workspacePath, result.failedStage);
167
+ printStageShellReviewHint(options.workspacePath, preserveStageShells);
153
168
  return false;
154
169
  }
170
+ printStageShellReviewHint(options.workspacePath, preserveStageShells);
155
171
  return true;
156
172
  }
157
173
  let bestOutcome = null;
@@ -171,9 +187,11 @@ export async function runConfiguredWorkspaceCompile(options) {
171
187
  successMessage: maxAttempts > 1
172
188
  ? `Compiled workspace ready for attempt ${attempt}.`
173
189
  : "Compiled workspace ready.",
190
+ preserveStageShells,
174
191
  });
175
192
  if (!result.ok) {
176
193
  printCompileFailure(options.workspacePath, result.failedStage);
194
+ printStageShellReviewHint(options.workspacePath, preserveStageShells);
177
195
  if (attempt < maxAttempts) {
178
196
  console.log(chalk.yellow(` Attempt ${attempt}/${maxAttempts} failed. Retrying with a fresh compile.`));
179
197
  continue;
@@ -189,6 +207,7 @@ export async function runConfiguredWorkspaceCompile(options) {
189
207
  },
190
208
  executionProfile: options.executionProfile,
191
209
  workspacePath: options.workspacePath,
210
+ preserveSandboxes: preserveStageShells === "always" ? "always" : "on-failure",
192
211
  });
193
212
  if (!outcome) {
194
213
  process.exitCode = 1;
@@ -208,6 +227,7 @@ export async function runConfiguredWorkspaceCompile(options) {
208
227
  if (outcome.result.ok) {
209
228
  console.log();
210
229
  console.log(chalk.green(` Workspace passed on attempt ${attempt}/${maxAttempts}.`));
230
+ printStageShellReviewHint(options.workspacePath, preserveStageShells);
211
231
  return true;
212
232
  }
213
233
  if (attempt < maxAttempts) {
@@ -223,6 +243,10 @@ export async function runConfiguredWorkspaceCompile(options) {
223
243
  process.exitCode = 1;
224
244
  console.log();
225
245
  console.log(chalk.red(` Workspace did not pass within ${maxAttempts} attempts.`));
246
+ if (bestOutcome) {
247
+ console.log(chalk.dim(` Best attempt truth-check pass rate: ${questionPassRate(bestOutcome)}%.`));
248
+ }
249
+ printStageShellReviewHint(options.workspacePath, preserveStageShells);
226
250
  return false;
227
251
  }
228
252
  finally {
@@ -231,4 +255,12 @@ export async function runConfiguredWorkspaceCompile(options) {
231
255
  }
232
256
  }
233
257
  }
258
+ function printStageShellReviewHint(workspacePath, preserveStageShells) {
259
+ const reviewRoot = join(workspacePath, ".interf", "execution-shells");
260
+ if (preserveStageShells === "always") {
261
+ console.log(chalk.dim(` Preserved stage shells: ${reviewRoot}`));
262
+ return;
263
+ }
264
+ console.log(chalk.dim(` Failed stage shells remain under: ${reviewRoot}`));
265
+ }
234
266
  //# sourceMappingURL=compile.js.map