@interf/compiler 0.3.2 → 0.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +124 -173
- package/dist/commands/compile.d.ts +2 -0
- package/dist/commands/compile.d.ts.map +1 -1
- package/dist/commands/compile.js +42 -10
- package/dist/commands/compile.js.map +1 -1
- package/dist/commands/create.d.ts.map +1 -1
- package/dist/commands/create.js +5 -5
- package/dist/commands/create.js.map +1 -1
- package/dist/commands/default.js +2 -2
- package/dist/commands/default.js.map +1 -1
- package/dist/commands/doctor.js +7 -7
- package/dist/commands/doctor.js.map +1 -1
- package/dist/commands/init.js +19 -23
- package/dist/commands/init.js.map +1 -1
- package/dist/commands/source-config-wizard.d.ts +2 -1
- package/dist/commands/source-config-wizard.d.ts.map +1 -1
- package/dist/commands/source-config-wizard.js +29 -27
- package/dist/commands/source-config-wizard.js.map +1 -1
- package/dist/commands/test-flow.d.ts +4 -0
- package/dist/commands/test-flow.d.ts.map +1 -1
- package/dist/commands/test-flow.js +24 -13
- package/dist/commands/test-flow.js.map +1 -1
- package/dist/commands/test.d.ts.map +1 -1
- package/dist/commands/test.js +16 -5
- package/dist/commands/test.js.map +1 -1
- package/dist/commands/workspace-flow.d.ts +2 -0
- package/dist/commands/workspace-flow.d.ts.map +1 -1
- package/dist/commands/workspace-flow.js +3 -2
- package/dist/commands/workspace-flow.js.map +1 -1
- package/dist/lib/agent-shells.d.ts +17 -0
- package/dist/lib/agent-shells.d.ts.map +1 -0
- package/dist/lib/agent-shells.js +295 -0
- package/dist/lib/agent-shells.js.map +1 -0
- package/dist/lib/benchmark-execution.d.ts +5 -1
- package/dist/lib/benchmark-execution.d.ts.map +1 -1
- package/dist/lib/benchmark-execution.js +34 -12
- package/dist/lib/benchmark-execution.js.map +1 -1
- package/dist/lib/benchmark-paths.d.ts +2 -0
- package/dist/lib/benchmark-paths.d.ts.map +1 -1
- package/dist/lib/benchmark-paths.js +6 -0
- package/dist/lib/benchmark-paths.js.map +1 -1
- package/dist/lib/benchmark-sandbox.d.ts +2 -0
- package/dist/lib/benchmark-sandbox.d.ts.map +1 -1
- package/dist/lib/benchmark-sandbox.js +68 -37
- package/dist/lib/benchmark-sandbox.js.map +1 -1
- package/dist/lib/benchmark-targets.js +1 -1
- package/dist/lib/benchmark-targets.js.map +1 -1
- package/dist/lib/interf-bootstrap.d.ts +2 -13
- package/dist/lib/interf-bootstrap.d.ts.map +1 -1
- package/dist/lib/interf-bootstrap.js +7 -164
- package/dist/lib/interf-bootstrap.js.map +1 -1
- package/dist/lib/interf-detect.d.ts +1 -0
- package/dist/lib/interf-detect.d.ts.map +1 -1
- package/dist/lib/interf-detect.js +5 -18
- package/dist/lib/interf-detect.js.map +1 -1
- package/dist/lib/interf-scaffold.d.ts.map +1 -1
- package/dist/lib/interf-scaffold.js +7 -71
- package/dist/lib/interf-scaffold.js.map +1 -1
- package/dist/lib/interf-workflow-package.d.ts.map +1 -1
- package/dist/lib/interf-workflow-package.js +21 -26
- package/dist/lib/interf-workflow-package.js.map +1 -1
- package/dist/lib/interf.d.ts +3 -2
- package/dist/lib/interf.d.ts.map +1 -1
- package/dist/lib/interf.js +3 -2
- package/dist/lib/interf.js.map +1 -1
- package/dist/lib/local-workflows.d.ts +6 -1
- package/dist/lib/local-workflows.d.ts.map +1 -1
- package/dist/lib/local-workflows.js +143 -2
- package/dist/lib/local-workflows.js.map +1 -1
- package/dist/lib/runtime-contracts.d.ts.map +1 -1
- package/dist/lib/runtime-contracts.js +10 -4
- package/dist/lib/runtime-contracts.js.map +1 -1
- package/dist/lib/runtime-prompt.d.ts.map +1 -1
- package/dist/lib/runtime-prompt.js +1 -0
- package/dist/lib/runtime-prompt.js.map +1 -1
- package/dist/lib/runtime-runs.d.ts.map +1 -1
- package/dist/lib/runtime-runs.js +6 -2
- package/dist/lib/runtime-runs.js.map +1 -1
- package/dist/lib/runtime-types.d.ts +1 -0
- package/dist/lib/runtime-types.d.ts.map +1 -1
- package/dist/lib/schema.d.ts +88 -23
- package/dist/lib/schema.d.ts.map +1 -1
- package/dist/lib/schema.js +66 -37
- package/dist/lib/schema.js.map +1 -1
- package/dist/lib/source-config.d.ts +3 -3
- package/dist/lib/source-config.d.ts.map +1 -1
- package/dist/lib/source-config.js +8 -6
- package/dist/lib/source-config.js.map +1 -1
- package/dist/lib/state-artifacts.d.ts +2 -2
- package/dist/lib/state-artifacts.d.ts.map +1 -1
- package/dist/lib/state-artifacts.js +3 -3
- package/dist/lib/state-artifacts.js.map +1 -1
- package/dist/lib/state-io.d.ts +2 -2
- package/dist/lib/state-io.d.ts.map +1 -1
- package/dist/lib/state-io.js +5 -5
- package/dist/lib/state-io.js.map +1 -1
- package/dist/lib/state-paths.d.ts +1 -1
- package/dist/lib/state-paths.d.ts.map +1 -1
- package/dist/lib/state-paths.js +3 -3
- package/dist/lib/state-paths.js.map +1 -1
- package/dist/lib/state-view.d.ts +2 -2
- package/dist/lib/state-view.d.ts.map +1 -1
- package/dist/lib/state-view.js +6 -7
- package/dist/lib/state-view.js.map +1 -1
- package/dist/lib/state.d.ts +4 -4
- package/dist/lib/state.d.ts.map +1 -1
- package/dist/lib/state.js +3 -3
- package/dist/lib/state.js.map +1 -1
- package/dist/lib/workflow-definitions.d.ts +4 -1
- package/dist/lib/workflow-definitions.d.ts.map +1 -1
- package/dist/lib/workflow-definitions.js +41 -6
- package/dist/lib/workflow-definitions.js.map +1 -1
- package/dist/lib/workflow-stage-runner.d.ts +1 -0
- package/dist/lib/workflow-stage-runner.d.ts.map +1 -1
- package/dist/lib/workflow-stage-runner.js +2 -0
- package/dist/lib/workflow-stage-runner.js.map +1 -1
- package/dist/lib/workflows.d.ts +1 -1
- package/dist/lib/workflows.d.ts.map +1 -1
- package/dist/lib/workspace-compile.d.ts +4 -0
- package/dist/lib/workspace-compile.d.ts.map +1 -1
- package/dist/lib/workspace-compile.js +108 -66
- package/dist/lib/workspace-compile.js.map +1 -1
- package/dist/lib/workspace-docs.d.ts +3 -0
- package/dist/lib/workspace-docs.d.ts.map +1 -0
- package/dist/lib/workspace-docs.js +82 -0
- package/dist/lib/workspace-docs.js.map +1 -0
- package/dist/lib/workspace-raw.d.ts +30 -0
- package/dist/lib/workspace-raw.d.ts.map +1 -0
- package/dist/lib/workspace-raw.js +102 -0
- package/dist/lib/workspace-raw.js.map +1 -0
- package/dist/lib/workspace-schema.d.ts +26 -0
- package/dist/lib/workspace-schema.d.ts.map +1 -0
- package/dist/lib/workspace-schema.js +132 -0
- package/dist/lib/workspace-schema.js.map +1 -0
- package/package.json +2 -2
- package/skills/benchmark/SKILL.md +4 -4
- package/skills/workflow/create/SKILL.md +23 -4
- package/skills/workspace/shape/SKILL.md +1 -1
- package/templates/workspace/README.md +4 -3
package/README.md
CHANGED
|
@@ -1,28 +1,17 @@
|
|
|
1
|
-
# Interf
|
|
1
|
+
# Interf Compiler
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
Prepare local datasets for accurate agent use.
|
|
4
4
|
|
|
5
|
-
Interf
|
|
5
|
+
Interf Compiler runs local data-processing workflows over your dataset to build a compiled workspace: a folder of agent-readable files that helps agents navigate evidence, verify facts, and answer accurately.
|
|
6
6
|
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
Interf lets you define a few checks over your files, measure the raw baseline first if you want it, compile a workspace on top of those files, and see whether the result actually passes.
|
|
10
|
-
|
|
11
|
-
- your files stay on your machine
|
|
12
|
-
- you choose the local agent
|
|
13
|
-
- your raw files stay the source of truth
|
|
14
|
-
- Interf adds a file-based layer on top
|
|
15
|
-
|
|
16
|
-
`interf compile` runs a local data-processing pipeline with your agents as executors and produces a compiled workspace: a file-based layer on top of your raw files that agents can navigate, inspect, and work from.
|
|
17
|
-
|
|
18
|
-
In the advanced looped mode, Interf can keep rerunning that pipeline, testing the result, and trying improved preparation attempts until it finds the best-performing workspace within the attempt budget.
|
|
7
|
+
Use truth checks to test the raw dataset, compile the workspace, and compare the result on the same task.
|
|
19
8
|
|
|
20
9
|
## Quick Start
|
|
21
10
|
|
|
22
11
|
Requirements:
|
|
23
12
|
|
|
24
13
|
- Node.js 20+
|
|
25
|
-
- a local coding agent
|
|
14
|
+
- a local coding agent such as Claude Code or Codex
|
|
26
15
|
|
|
27
16
|
Install:
|
|
28
17
|
|
|
@@ -30,24 +19,78 @@ Install:
|
|
|
30
19
|
npm install -g @interf/compiler
|
|
31
20
|
```
|
|
32
21
|
|
|
33
|
-
|
|
22
|
+
Start from the folder that already contains your dataset:
|
|
34
23
|
|
|
35
24
|
```bash
|
|
36
|
-
cd ~/my-
|
|
25
|
+
cd ~/my-dataset
|
|
37
26
|
interf
|
|
27
|
+
interf compile
|
|
28
|
+
interf test
|
|
38
29
|
```
|
|
39
30
|
|
|
40
|
-
|
|
31
|
+
The first run can:
|
|
32
|
+
|
|
33
|
+
- save a few truth checks for the dataset
|
|
34
|
+
- test the raw dataset as a baseline
|
|
35
|
+
- build the compiled workspace
|
|
36
|
+
- test the compiled workspace on the same truth checks
|
|
37
|
+
|
|
38
|
+
## What Interf Compiler Creates
|
|
39
|
+
|
|
40
|
+
Interf Compiler adds three things beside your dataset:
|
|
41
|
+
|
|
42
|
+
- `interf.config.json` with your saved truth checks and workspace setup
|
|
43
|
+
- `interf/workspaces/<name>/` with the compiled workspace
|
|
44
|
+
- `interf/benchmarks/runs/...` with saved test runs
|
|
45
|
+
|
|
46
|
+
A compiled workspace is a folder on top of your dataset. It includes:
|
|
47
|
+
|
|
48
|
+
- a workspace-local `raw/` snapshot for direct evidence and verification
|
|
49
|
+
- agent-readable summaries and cross-file notes
|
|
50
|
+
- `AGENTS.md`, `CLAUDE.md`, and generated local query skills
|
|
51
|
+
- runtime state under `.interf/`
|
|
52
|
+
|
|
53
|
+
The compiled workspace is the folder your agent should work from.
|
|
54
|
+
|
|
55
|
+
## Why Use It
|
|
56
|
+
|
|
57
|
+
Raw dataset folders are hard for agents.
|
|
58
|
+
|
|
59
|
+
Common failure modes:
|
|
60
|
+
|
|
61
|
+
- missed evidence
|
|
62
|
+
- weak cross-file understanding
|
|
63
|
+
- bad comparisons
|
|
64
|
+
- answers that sound confident but are wrong
|
|
65
|
+
|
|
66
|
+
Interf Compiler keeps the raw dataset as the source of truth, builds a compiled workspace on top of it, and tests whether that workspace actually helps.
|
|
67
|
+
|
|
68
|
+
## The Loop
|
|
41
69
|
|
|
42
|
-
|
|
70
|
+
1. Define truth checks for the dataset.
|
|
71
|
+
2. Build the compiled workspace.
|
|
72
|
+
3. Test raw vs compiled on the same truth checks.
|
|
73
|
+
|
|
74
|
+
Truth checks are simple:
|
|
75
|
+
|
|
76
|
+
- one question
|
|
77
|
+
- one expected answer
|
|
78
|
+
|
|
79
|
+
Good first truth checks are small and practical:
|
|
80
|
+
|
|
81
|
+
- one exact number from a chart, table, or filing
|
|
82
|
+
- one short statement that should be true or false
|
|
83
|
+
- one simple comparison across years, files, or sections
|
|
84
|
+
|
|
85
|
+
If you want to see the config shape first, this is what Interf Compiler writes:
|
|
86
|
+
|
|
87
|
+
```jsonc
|
|
43
88
|
{
|
|
44
89
|
"workspaces": [
|
|
45
90
|
{
|
|
46
|
-
"name": "
|
|
91
|
+
"name": "my-workspace",
|
|
47
92
|
"about": "General compiled workspace for the quarterly results folder.",
|
|
48
|
-
"
|
|
49
|
-
"max_attempts": 3
|
|
50
|
-
},
|
|
93
|
+
"max_attempts": 3, // rerun compile + test until this workspace passes the saved truth checks or hits this limit
|
|
51
94
|
"checks": [
|
|
52
95
|
{
|
|
53
96
|
"question": "What full-year revenue range did the company maintain?",
|
|
@@ -63,63 +106,13 @@ If you want to see the config shape first, this is what Interf writes:
|
|
|
63
106
|
}
|
|
64
107
|
```
|
|
65
108
|
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
```bash
|
|
69
|
-
interf
|
|
70
|
-
interf compile
|
|
71
|
-
interf test
|
|
72
|
-
```
|
|
73
|
-
|
|
74
|
-
The first guided run can:
|
|
75
|
-
|
|
76
|
-
- save a few questions and expected answers for this folder
|
|
77
|
-
- run a baseline test on the raw files
|
|
78
|
-
- compile the workspace
|
|
79
|
-
- optionally keep compiling and retesting until it passes or reaches the attempt limit
|
|
80
|
-
- run the same test against the compiled workspace
|
|
81
|
-
|
|
82
|
-
That gives you three concrete things:
|
|
83
|
-
|
|
84
|
-
- `interf/workspaces/default/` with the compiled workspace for your files
|
|
85
|
-
- `interf/benchmarks/runs/...` with the saved test result
|
|
86
|
-
- a pass/fail score on the same questions and expected answers you wrote
|
|
87
|
-
|
|
88
|
-
Saved test runs keep the details you need later:
|
|
89
|
-
|
|
90
|
-
- whether the run tested `raw`, `workspace`, or both
|
|
91
|
-
- per-question pass/fail results
|
|
92
|
-
- the saved run path under `interf/benchmarks/runs/...`
|
|
93
|
-
- executor metadata such as agent, command, model, effort, and profile when available
|
|
94
|
-
|
|
95
|
-
If `interf.config.json` is missing, `interf` or `interf init` can draft it with you before the first compile. If Interf cannot find your local agent or compile setup, run:
|
|
109
|
+
If `interf.config.json` is missing, `interf` or `interf init` can draft it with you before the first compile. If the compiler cannot find your local agent or compile setup, run:
|
|
96
110
|
|
|
97
111
|
```bash
|
|
98
112
|
interf doctor
|
|
99
113
|
```
|
|
100
114
|
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
- write down a few questions your agent should be able to answer from your files
|
|
104
|
-
- let `interf` or `interf init` save those checks in `interf.config.json`
|
|
105
|
-
- optionally run a baseline test on the raw files
|
|
106
|
-
- run `interf compile` to build the compiled workspace
|
|
107
|
-
- run `interf test` to test the raw files, the compiled workspace, or both
|
|
108
|
-
- only create another workspace if you want a separate compiled setup with its own checks
|
|
109
|
-
- if needed, rerun compile or use the advanced retry path until it is good enough
|
|
110
|
-
|
|
111
|
-
## Why This Approach
|
|
112
|
-
|
|
113
|
-
Interf is built around a few simple design principles:
|
|
114
|
-
|
|
115
|
-
- `Explicit`: the output is visible and inspectable, not hidden memory
|
|
116
|
-
- `Local`: your files stay on your machine
|
|
117
|
-
- `File over app`: the output is just files, so you can use your editor, Unix tools, Obsidian, or your own software on top
|
|
118
|
-
- `BYOAI`: use Claude Code, Codex, OpenClaw, Hermes, or your own model
|
|
119
|
-
|
|
120
|
-
Interf does not replace your data with an opaque store. It keeps the raw files in place and adds a file-based layer on top for agents.
|
|
121
|
-
|
|
122
|
-
Sample flow:
|
|
115
|
+
Sample run:
|
|
123
116
|
|
|
124
117
|
```bash
|
|
125
118
|
cp -r examples/benchmark-demo /tmp/interf-demo
|
|
@@ -129,68 +122,44 @@ interf compile
|
|
|
129
122
|
interf test
|
|
130
123
|
```
|
|
131
124
|
|
|
132
|
-
##
|
|
133
|
-
|
|
134
|
-
Start with your own checks over the files: questions where you already know the correct answer from the dataset.
|
|
135
|
-
|
|
136
|
-
`interf.config.json` is where you save those checks for a folder.
|
|
137
|
-
|
|
138
|
-
That file uses one `workspaces` array:
|
|
139
|
-
|
|
140
|
-
- most folders only need one workspace
|
|
141
|
-
- add another workspace only if you want a separate compiled setup with different checks
|
|
142
|
-
- each workspace carries its own `checks`
|
|
143
|
-
- each workspace can optionally carry `retry_policy.max_attempts` for the self-improving compile loop
|
|
144
|
-
|
|
145
|
-
If the file is missing, `interf init` can draft it with you before the first compile. You can edit it any time.
|
|
146
|
-
|
|
147
|
-
Good first checks are small and practical:
|
|
148
|
-
|
|
149
|
-
- one exact number from a chart, table, or filing
|
|
150
|
-
- one short statement that should be true or false
|
|
151
|
-
- one simple comparison across years, files, or sections
|
|
125
|
+
## What `interf test` Does
|
|
152
126
|
|
|
153
|
-
|
|
127
|
+
`interf test` scores either the raw files, a compiled workspace, or both on the same saved truth checks.
|
|
154
128
|
|
|
155
|
-
|
|
156
|
-
interf compile
|
|
157
|
-
interf test
|
|
158
|
-
```
|
|
159
|
-
|
|
160
|
-
## What `interf test` Does
|
|
129
|
+
It answers a simple question:
|
|
161
130
|
|
|
162
|
-
|
|
131
|
+
- does the compiled workspace help on this dataset or not?
|
|
163
132
|
|
|
164
|
-
|
|
133
|
+
By default it loads truth checks from `interf.config.json`, can run a raw baseline in an isolated raw-files sandbox, can test eligible compiled workspaces under `interf/workspaces/`, and saves the run under `interf/benchmarks/runs/`.
|
|
165
134
|
|
|
166
|
-
|
|
167
|
-
- does this compiled workspace improve on that baseline?
|
|
168
|
-
- which compiled workspace or workflow performs better on the same folder?
|
|
169
|
-
- does a separate workspace with different checks work better for that job?
|
|
135
|
+
For live runs:
|
|
170
136
|
|
|
171
|
-
|
|
137
|
+
- raw tests execute from a sanitized raw-only sandbox
|
|
138
|
+
- compiled-workspace tests execute from a copied workspace sandbox with embedded sanitized `raw/`
|
|
139
|
+
- neither sandbox includes `interf.config.json` or the source-folder `interf/` control plane
|
|
140
|
+
- failed test sandboxes are kept automatically for review
|
|
141
|
+
- `interf test --keep-sandboxes` keeps every sandbox, even successful ones
|
|
172
142
|
|
|
173
143
|
Each saved run includes:
|
|
174
144
|
|
|
175
|
-
- the
|
|
145
|
+
- whether the run tested `raw`, `workspace`, or both
|
|
176
146
|
- per-question results and traces
|
|
147
|
+
- the preserved sandbox path when one was kept
|
|
177
148
|
- the executor metadata for that run
|
|
178
149
|
|
|
179
|
-
If you run `interf test` from inside a workspace, it uses that workspace's checks and tests that workspace. If you run it from the source folder, it lets you choose a saved workspace and then choose raw files, the compiled workspace, or both.
|
|
180
|
-
|
|
181
|
-
Live test runs use an isolated sandbox. For raw baselines, Interf gives the agent sanitized raw files only. For compiled-workspace tests, it gives the agent a copied workspace plus sanitized raw files. The source-folder control plane, `interf.config.json`, and saved test runs are not part of those sandboxes.
|
|
150
|
+
If you run `interf test` from inside a workspace, it uses that workspace's truth checks and tests that workspace. If you run it from the source folder, it lets you choose a saved workspace and then choose raw files, the compiled workspace, or both.
|
|
182
151
|
|
|
183
152
|
If you need repeated isolated experiments across workflows or models, use the advanced eval-pack runner in [docs/eval-loop.md](./docs/eval-loop.md).
|
|
184
153
|
|
|
185
154
|
## What `interf compile` Does
|
|
186
155
|
|
|
187
|
-
`interf compile` runs the
|
|
156
|
+
`interf compile` runs the selected workflow over a dataset.
|
|
188
157
|
|
|
189
|
-
|
|
158
|
+
The built-in workflow:
|
|
190
159
|
|
|
191
160
|
- summarize the source files into per-file evidence notes
|
|
192
161
|
- structure the cross-file knowledge layer into entities, claims, and indexes
|
|
193
|
-
- shape the final workspace around its saved focus and
|
|
162
|
+
- shape the final workspace around its saved focus and truth checks
|
|
194
163
|
|
|
195
164
|
In other words, the built-in workflow is:
|
|
196
165
|
|
|
@@ -198,47 +167,30 @@ In other words, the built-in workflow is:
|
|
|
198
167
|
2. `structure`
|
|
199
168
|
3. `shape`
|
|
200
169
|
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
The default workflow is built in. If you want a different method, you can define your own workflow package and benchmark it on the same folder.
|
|
204
|
-
|
|
205
|
-
If a workspace has `retry_policy.max_attempts`, or if you run `interf compile --max-attempts <n>`, Interf can keep compiling, testing, and retrying until that workspace passes or reaches the attempt limit. If several attempts fail, Interf keeps the best-performing compiled workspace from that run.
|
|
206
|
-
|
|
207
|
-
## What Gets Created
|
|
208
|
-
|
|
209
|
-
After compile, Interf writes into `./interf/` beside your source files.
|
|
210
|
-
|
|
211
|
-
- `interf/workspaces/<name>/` is a compiled workspace over the folder
|
|
212
|
-
- `interf/benchmarks/runs/...` stores saved test runs
|
|
213
|
-
|
|
214
|
-
Inside those workspaces you will see things like:
|
|
215
|
-
|
|
216
|
-
- summaries of source files
|
|
217
|
-
- navigation notes and entrypoints for agents
|
|
218
|
-
- cross-file knowledge notes
|
|
219
|
-
- workspace-specific outputs when you define a separate job-focused workspace
|
|
170
|
+
If you want a different method, you can define your own workflow and test it on the same dataset.
|
|
220
171
|
|
|
221
|
-
|
|
172
|
+
Under the hood, each workflow defines:
|
|
222
173
|
|
|
223
|
-
|
|
174
|
+
- `workflow.json` for stage order, compiler API target, and deterministic contract mapping
|
|
175
|
+
- `workspace.schema.json` for the deterministic output shape of the compiled workspace
|
|
176
|
+
- stage `reads` / `writes` declarations that reference schema-defined zone ids
|
|
177
|
+
- local `SKILL.md` files as the authoring source for query and stage-execution behavior
|
|
224
178
|
|
|
225
|
-
|
|
179
|
+
The compiler then projects that workflow into the native agent surfaces it actually runs:
|
|
226
180
|
|
|
227
|
-
|
|
181
|
+
- the compiled workspace gets a generated native query shell
|
|
182
|
+
- each compile stage gets a generated native execution shell
|
|
183
|
+
- that shell keeps its own `AGENTS.md`, `CLAUDE.md`, and native local skills
|
|
184
|
+
- schema-declared workspace zones are mounted both at their workflow-relative paths and as shell-local `inputs/<zone-id>` / `outputs/<zone-id>` aliases
|
|
185
|
+
- the workspace root itself is not linked into the shell
|
|
228
186
|
|
|
229
|
-
|
|
230
|
-
- `questions and expected answers` = the checks you want your agent to pass
|
|
231
|
-
- `checks` = the pass/fail questions each workspace should satisfy
|
|
232
|
-
- `test` = run the saved questions and get a score
|
|
233
|
-
- `compiled workspace` = the output Interf produces on top of a folder
|
|
234
|
-
- `workspace` = one compiled setup with its own checks
|
|
187
|
+
If a workspace has `max_attempts`, or if you run `interf compile --max-attempts <n>`, the compiler can keep compiling, testing, and retrying until that workspace passes or reaches the attempt limit. If several attempts fail, it keeps the best-performing compiled workspace from that run.
|
|
235
188
|
|
|
236
|
-
|
|
189
|
+
For stage-level review:
|
|
237
190
|
|
|
238
|
-
-
|
|
239
|
-
-
|
|
240
|
-
- `
|
|
241
|
-
- `.interf/` = runtime state, proofs, and health artifacts
|
|
191
|
+
- successful stage shells are pruned by default
|
|
192
|
+
- failed stage shells stay under `.interf/execution-shells/`
|
|
193
|
+
- `interf compile --keep-stage-shells` keeps every stage shell so you can inspect the exact native instruction surface, mounted inputs, and mounted outputs for each stage
|
|
242
194
|
|
|
243
195
|
## Advanced: Separate Workspaces
|
|
244
196
|
|
|
@@ -253,31 +205,32 @@ Create another only when you want a different compiled setup with different chec
|
|
|
253
205
|
|
|
254
206
|
Why create another one:
|
|
255
207
|
|
|
256
|
-
- it keeps a separate set of
|
|
208
|
+
- it keeps a separate set of truth checks
|
|
257
209
|
- it gives that job its own compiled output under `interf/workspaces/<name>/`
|
|
258
210
|
- it lets you test that job separately
|
|
259
211
|
|
|
260
212
|
## Advanced: Keep Improving Until It Passes
|
|
261
213
|
|
|
262
|
-
Interf also supports a deeper loop above the normal compile + test flow.
|
|
214
|
+
Interf Compiler also supports a deeper loop above the normal compile + test flow.
|
|
263
215
|
|
|
264
|
-
The normal workspace flow already supports `
|
|
216
|
+
The normal workspace flow already supports `max_attempts` inside `interf.config.json` or `interf compile --max-attempts <n>`.
|
|
265
217
|
|
|
266
|
-
Give it the same
|
|
218
|
+
Give it the same dataset and the same truth checks. The compiler can keep rerunning compile + test attempts until the test passes or the attempt budget runs out.
|
|
267
219
|
|
|
268
|
-
That loop is the self-improving part
|
|
220
|
+
That loop is the self-improving part:
|
|
269
221
|
|
|
270
|
-
- it reruns the
|
|
271
|
-
- it keeps the checks fixed, so the target does not move
|
|
222
|
+
- it reruns the same workflow over the same dataset
|
|
223
|
+
- it keeps the truth checks fixed, so the target does not move
|
|
224
|
+
- it keeps the measurement fixed, so attempts stay comparable
|
|
272
225
|
- it can vary the compile profile and follow-up diagnostics
|
|
273
226
|
- it records which attempt performed best on the same saved test
|
|
274
227
|
|
|
275
228
|
In practice:
|
|
276
229
|
|
|
277
|
-
- `
|
|
230
|
+
- `max_attempts` controls how many total attempts a normal workspace compile gets
|
|
278
231
|
- `retry_policy.max_attempts_per_profile` controls how many attempts each compile profile gets in eval packs
|
|
279
232
|
- stronger diagnostic profiles can be used only after the default ones fail
|
|
280
|
-
- the checks stay the same across every attempt
|
|
233
|
+
- the truth checks stay the same across every attempt
|
|
281
234
|
- each attempt records what changed and which attempt performed best
|
|
282
235
|
|
|
283
236
|
Example eval-pack shape:
|
|
@@ -286,10 +239,8 @@ Example eval-pack shape:
|
|
|
286
239
|
{
|
|
287
240
|
"workspaces": [
|
|
288
241
|
{
|
|
289
|
-
"name": "
|
|
290
|
-
"
|
|
291
|
-
"max_attempts": 3
|
|
292
|
-
},
|
|
242
|
+
"name": "my-workspace",
|
|
243
|
+
"max_attempts": 3, // rerun compile + test until this workspace passes the saved truth checks or hits this limit
|
|
293
244
|
"checks": [
|
|
294
245
|
{
|
|
295
246
|
"question": "What full-year revenue range did the company maintain?",
|
|
@@ -304,38 +255,38 @@ Example eval-pack shape:
|
|
|
304
255
|
}
|
|
305
256
|
```
|
|
306
257
|
|
|
307
|
-
Use the normal workspace retry loop first. Use the eval-pack path when you want Interf to compare multiple compile profiles, add diagnostics, or keep iterating in a more controlled experiment loop. It spends more tokens, so use it when that extra spend is worth the accuracy target.
|
|
258
|
+
Use the normal workspace retry loop first. Use the eval-pack path when you want Interf Compiler to compare multiple compile profiles, add diagnostics, or keep iterating in a more controlled experiment loop. It spends more tokens, so use it when that extra spend is worth the accuracy target.
|
|
308
259
|
|
|
309
260
|
## Use It With Your Agent
|
|
310
261
|
|
|
311
|
-
If you already work through
|
|
262
|
+
If you already work through a local coding agent, it can run this process for you.
|
|
312
263
|
|
|
313
264
|
Paste something like this into your agent:
|
|
314
265
|
|
|
315
266
|
```text
|
|
316
267
|
Install @interf/compiler, run `interf` in this folder, and use the local agent executor.
|
|
317
268
|
|
|
318
|
-
If `interf.config.json` is missing, draft one workspace with a few checks this agent should be able to answer from
|
|
269
|
+
If `interf.config.json` is missing, draft one workspace with a few truth checks this agent should be able to answer from this dataset and add the expected answers for me to confirm.
|
|
319
270
|
|
|
320
271
|
Then run a raw baseline if helpful, compile the workspace, and run `interf test`.
|
|
321
272
|
|
|
322
|
-
Tell me whether the compiled workspace passes the checks, and only recommend it if it does.
|
|
273
|
+
Tell me whether the compiled workspace passes the truth checks, and only recommend it if it does.
|
|
323
274
|
```
|
|
324
275
|
|
|
325
276
|
## Custom Workflows
|
|
326
277
|
|
|
327
|
-
Interf ships with a default workflow.
|
|
278
|
+
Interf Compiler ships with a default workflow.
|
|
328
279
|
|
|
329
|
-
If you want to change how the
|
|
280
|
+
If you want to change how the workflow runs on your dataset, this is the part you customize:
|
|
330
281
|
|
|
331
282
|
```bash
|
|
332
283
|
interf create workflow
|
|
333
284
|
interf verify workflow --path <path>
|
|
334
285
|
```
|
|
335
286
|
|
|
336
|
-
Then
|
|
287
|
+
Then test that workflow on the same dataset and the same truth checks.
|
|
337
288
|
|
|
338
|
-
Workflow
|
|
289
|
+
Workflow docs live in [docs/workflow-spec.md](./docs/workflow-spec.md).
|
|
339
290
|
|
|
340
291
|
## Core Commands
|
|
341
292
|
|
|
@@ -344,7 +295,7 @@ Workflow package docs live in [docs/workflow-spec.md](./docs/workflow-spec.md).
|
|
|
344
295
|
- `interf create workspace` = create another compiled workspace when you need one
|
|
345
296
|
- `interf create workflow` = create a reusable local workflow package
|
|
346
297
|
- `interf compile` = build a selected workspace for the current folder
|
|
347
|
-
- `interf test` = test the raw files, a compiled workspace, or both on saved checks
|
|
298
|
+
- `interf test` = test the raw files, a compiled workspace, or both on saved truth checks
|
|
348
299
|
- `interf benchmark` = alias for `interf test`
|
|
349
300
|
- `interf doctor` = check local executor setup
|
|
350
301
|
- `interf verify <check>` = run deterministic checks on major workflow steps
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import type { WorkflowExecutionProfile, WorkflowExecutor } from "../lib/executors.js";
|
|
2
2
|
import type { SourceWorkspaceConfig } from "../lib/schema.js";
|
|
3
3
|
import type { CommandModule } from "yargs";
|
|
4
|
+
import type { StageShellRetentionMode } from "../lib/workflows.js";
|
|
4
5
|
export declare const compileCommand: CommandModule;
|
|
5
6
|
export declare function runCompileCommand(argv?: Record<string, unknown>): Promise<void>;
|
|
6
7
|
export declare function runConfiguredWorkspaceCompile(options: {
|
|
@@ -10,5 +11,6 @@ export declare function runConfiguredWorkspaceCompile(options: {
|
|
|
10
11
|
workspaceConfig: SourceWorkspaceConfig | null;
|
|
11
12
|
executionProfile?: WorkflowExecutionProfile;
|
|
12
13
|
maxAttemptsOverride: number | null;
|
|
14
|
+
preserveStageShells?: StageShellRetentionMode;
|
|
13
15
|
}): Promise<boolean>;
|
|
14
16
|
//# sourceMappingURL=compile.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"compile.d.ts","sourceRoot":"","sources":["../../src/commands/compile.ts"],"names":[],"mappings":"AAUA,OAAO,KAAK,EAAE,wBAAwB,EAAE,gBAAgB,EAAE,MAAM,qBAAqB,CAAC;AAMtF,OAAO,KAAK,EAAE,qBAAqB,EAAE,MAAM,kBAAkB,CAAC;AAM9D,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,OAAO,CAAC;
|
|
1
|
+
{"version":3,"file":"compile.d.ts","sourceRoot":"","sources":["../../src/commands/compile.ts"],"names":[],"mappings":"AAUA,OAAO,KAAK,EAAE,wBAAwB,EAAE,gBAAgB,EAAE,MAAM,qBAAqB,CAAC;AAMtF,OAAO,KAAK,EAAE,qBAAqB,EAAE,MAAM,kBAAkB,CAAC;AAM9D,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,OAAO,CAAC;AAY3C,OAAO,KAAK,EAAE,uBAAuB,EAAE,MAAM,qBAAqB,CAAC;AAEnE,eAAO,MAAM,cAAc,EAAE,aAkB5B,CAAC;AAEF,wBAAsB,iBAAiB,CAAC,IAAI,GAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAgFzF;AAuDD,wBAAsB,6BAA6B,CACjD,OAAO,EAAE;IACP,QAAQ,EAAE,gBAAgB,CAAC;IAC3B,aAAa,EAAE,MAAM,CAAC;IACtB,UAAU,EAAE,MAAM,CAAC;IACnB,eAAe,EAAE,qBAAqB,GAAG,IAAI,CAAC;IAC9C,gBAAgB,CAAC,EAAE,wBAAwB,CAAC;IAC5C,mBAAmB,EAAE,MAAM,GAAG,IAAI,CAAC;IACnC,mBAAmB,CAAC,EAAE,uBAAuB,CAAC;CAC/C,GACA,OAAO,CAAC,OAAO,CAAC,CA4HlB"}
|
package/dist/commands/compile.js
CHANGED
|
@@ -3,21 +3,25 @@ import { tmpdir } from "node:os";
|
|
|
3
3
|
import { join } from "node:path";
|
|
4
4
|
import chalk from "chalk";
|
|
5
5
|
import * as p from "@clack/prompts";
|
|
6
|
-
import { detectInterf, readInterfConfig,
|
|
6
|
+
import { detectInterf, readInterfConfig, resolveSourceControlPath, } from "../lib/interf.js";
|
|
7
7
|
import { findSourceWorkspaceConfig, loadSourceFolderConfig, resolveWorkspaceCompileMaxAttempts, } from "../lib/source-config.js";
|
|
8
8
|
import { resetWorkspaceGeneratedState } from "../lib/workspace-reset.js";
|
|
9
9
|
import { formatWorkspaceWorkflowStageStep, resolveWorkspaceWorkflowFromConfig, } from "../lib/workflow-definitions.js";
|
|
10
10
|
import { addExecutionProfileOptions, executionProfileFromArgv, } from "../lib/execution-profile.js";
|
|
11
11
|
import { chooseWorkspaceConfigToBuild, compileWorkspaceWithReporter, ensureWorkspaceFromConfig, } from "./workspace-flow.js";
|
|
12
12
|
import { resolveOrConfigureLocalExecutor } from "./executor-flow.js";
|
|
13
|
-
import { printSavedTestOutcome, runSavedWorkspaceTest } from "./test-flow.js";
|
|
13
|
+
import { printSavedTestOutcome, questionPassRate, runSavedWorkspaceTest } from "./test-flow.js";
|
|
14
14
|
export const compileCommand = {
|
|
15
15
|
command: "compile",
|
|
16
|
-
describe: "Build a workspace for this
|
|
16
|
+
describe: "Build a workspace for this dataset",
|
|
17
17
|
builder: (yargs) => addExecutionProfileOptions(yargs).option("max-attempts", {
|
|
18
18
|
alias: "max-retries",
|
|
19
19
|
type: "number",
|
|
20
20
|
describe: "Compile, test, and retry until the workspace passes or reaches this total attempt limit",
|
|
21
|
+
}).option("keep-stage-shells", {
|
|
22
|
+
type: "boolean",
|
|
23
|
+
default: false,
|
|
24
|
+
describe: "Keep every executed stage shell under .interf/execution-shells for review instead of pruning successful shells",
|
|
21
25
|
}),
|
|
22
26
|
handler: async (argv) => {
|
|
23
27
|
await runCompileCommand(argv);
|
|
@@ -30,7 +34,7 @@ export async function runCompileCommand(argv = {}) {
|
|
|
30
34
|
const detected = detectInterf(process.cwd());
|
|
31
35
|
if (detected) {
|
|
32
36
|
workspacePath = detected.path;
|
|
33
|
-
sourcePath =
|
|
37
|
+
sourcePath = resolveSourceControlPath(detected.path);
|
|
34
38
|
workspaceConfig = findSourceWorkspaceConfig(loadSourceFolderConfig(sourcePath), detected.config.name) ?? {
|
|
35
39
|
name: detected.config.name,
|
|
36
40
|
...(detected.config.about ? { about: detected.config.about } : {}),
|
|
@@ -99,6 +103,7 @@ export async function runCompileCommand(argv = {}) {
|
|
|
99
103
|
workspaceConfig,
|
|
100
104
|
executionProfile,
|
|
101
105
|
maxAttemptsOverride,
|
|
106
|
+
preserveStageShells: readStageShellRetentionMode(argv),
|
|
102
107
|
});
|
|
103
108
|
}
|
|
104
109
|
function readCompileMaxAttemptsOverride(argv) {
|
|
@@ -114,15 +119,21 @@ function readCompileMaxAttemptsOverride(argv) {
|
|
|
114
119
|
}
|
|
115
120
|
return parsed;
|
|
116
121
|
}
|
|
122
|
+
function readStageShellRetentionMode(argv) {
|
|
123
|
+
const enabled = argv["keep-stage-shells"] ??
|
|
124
|
+
argv.keepStageShells ??
|
|
125
|
+
false;
|
|
126
|
+
return enabled ? "always" : "on-failure";
|
|
127
|
+
}
|
|
117
128
|
function printCompileFailure(workspacePath, failedStage) {
|
|
118
129
|
const workflowId = resolveWorkspaceWorkflowFromConfig(readInterfConfig(workspacePath));
|
|
119
130
|
const failedStageLabel = formatWorkspaceWorkflowStageStep(workflowId, failedStage ?? "compile", {
|
|
120
|
-
sourcePath:
|
|
131
|
+
sourcePath: resolveSourceControlPath(workspacePath),
|
|
121
132
|
});
|
|
122
133
|
console.log(chalk.red(` ${failedStageLabel} failed.`));
|
|
123
134
|
}
|
|
124
135
|
function testScore(outcome) {
|
|
125
|
-
return (outcome.result.
|
|
136
|
+
return (outcome.result.passedCases * 1000) + outcome.result.passedChecks;
|
|
126
137
|
}
|
|
127
138
|
function snapshotWorkspace(workspacePath) {
|
|
128
139
|
const snapshotRoot = mkdtempSync(join(tmpdir(), "interf-compile-attempt-"));
|
|
@@ -135,23 +146,28 @@ function restoreWorkspaceSnapshot(snapshotPath, workspacePath) {
|
|
|
135
146
|
cpSync(snapshotPath, workspacePath, { recursive: true });
|
|
136
147
|
}
|
|
137
148
|
export async function runConfiguredWorkspaceCompile(options) {
|
|
138
|
-
const
|
|
149
|
+
const preserveStageShells = options.preserveStageShells ?? "on-failure";
|
|
150
|
+
const maxAttempts = resolveWorkspaceCompileMaxAttempts(options.workspaceConfig ?? { max_attempts: undefined }, options.maxAttemptsOverride);
|
|
139
151
|
const loopEnabled = maxAttempts != null;
|
|
140
152
|
const checks = options.workspaceConfig?.checks ?? [];
|
|
141
153
|
if (loopEnabled && checks.length === 0) {
|
|
142
|
-
console.log(chalk.yellow(" Self-improving mode needs saved
|
|
154
|
+
console.log(chalk.yellow(" Self-improving mode needs saved truth checks. Building once without the retry loop."));
|
|
143
155
|
}
|
|
144
156
|
else if (loopEnabled) {
|
|
145
157
|
console.log(chalk.dim(` Self-improving mode: up to ${maxAttempts} compile attempts.`));
|
|
146
|
-
console.log(chalk.dim(" Interf will
|
|
158
|
+
console.log(chalk.dim(" Interf Compiler will run the workflow, test the workspace, and retry until it passes or reaches the limit."));
|
|
147
159
|
}
|
|
148
160
|
if (!loopEnabled || checks.length === 0) {
|
|
149
|
-
const result = await compileWorkspaceWithReporter(options.executor, options.workspacePath
|
|
161
|
+
const result = await compileWorkspaceWithReporter(options.executor, options.workspacePath, {
|
|
162
|
+
preserveStageShells,
|
|
163
|
+
});
|
|
150
164
|
if (!result.ok) {
|
|
151
165
|
process.exitCode = 1;
|
|
152
166
|
printCompileFailure(options.workspacePath, result.failedStage);
|
|
167
|
+
printStageShellReviewHint(options.workspacePath, preserveStageShells);
|
|
153
168
|
return false;
|
|
154
169
|
}
|
|
170
|
+
printStageShellReviewHint(options.workspacePath, preserveStageShells);
|
|
155
171
|
return true;
|
|
156
172
|
}
|
|
157
173
|
let bestOutcome = null;
|
|
@@ -171,9 +187,11 @@ export async function runConfiguredWorkspaceCompile(options) {
|
|
|
171
187
|
successMessage: maxAttempts > 1
|
|
172
188
|
? `Compiled workspace ready for attempt ${attempt}.`
|
|
173
189
|
: "Compiled workspace ready.",
|
|
190
|
+
preserveStageShells,
|
|
174
191
|
});
|
|
175
192
|
if (!result.ok) {
|
|
176
193
|
printCompileFailure(options.workspacePath, result.failedStage);
|
|
194
|
+
printStageShellReviewHint(options.workspacePath, preserveStageShells);
|
|
177
195
|
if (attempt < maxAttempts) {
|
|
178
196
|
console.log(chalk.yellow(` Attempt ${attempt}/${maxAttempts} failed. Retrying with a fresh compile.`));
|
|
179
197
|
continue;
|
|
@@ -189,6 +207,7 @@ export async function runConfiguredWorkspaceCompile(options) {
|
|
|
189
207
|
},
|
|
190
208
|
executionProfile: options.executionProfile,
|
|
191
209
|
workspacePath: options.workspacePath,
|
|
210
|
+
preserveSandboxes: preserveStageShells === "always" ? "always" : "on-failure",
|
|
192
211
|
});
|
|
193
212
|
if (!outcome) {
|
|
194
213
|
process.exitCode = 1;
|
|
@@ -208,6 +227,7 @@ export async function runConfiguredWorkspaceCompile(options) {
|
|
|
208
227
|
if (outcome.result.ok) {
|
|
209
228
|
console.log();
|
|
210
229
|
console.log(chalk.green(` Workspace passed on attempt ${attempt}/${maxAttempts}.`));
|
|
230
|
+
printStageShellReviewHint(options.workspacePath, preserveStageShells);
|
|
211
231
|
return true;
|
|
212
232
|
}
|
|
213
233
|
if (attempt < maxAttempts) {
|
|
@@ -223,6 +243,10 @@ export async function runConfiguredWorkspaceCompile(options) {
|
|
|
223
243
|
process.exitCode = 1;
|
|
224
244
|
console.log();
|
|
225
245
|
console.log(chalk.red(` Workspace did not pass within ${maxAttempts} attempts.`));
|
|
246
|
+
if (bestOutcome) {
|
|
247
|
+
console.log(chalk.dim(` Best attempt truth-check pass rate: ${questionPassRate(bestOutcome)}%.`));
|
|
248
|
+
}
|
|
249
|
+
printStageShellReviewHint(options.workspacePath, preserveStageShells);
|
|
226
250
|
return false;
|
|
227
251
|
}
|
|
228
252
|
finally {
|
|
@@ -231,4 +255,12 @@ export async function runConfiguredWorkspaceCompile(options) {
|
|
|
231
255
|
}
|
|
232
256
|
}
|
|
233
257
|
}
|
|
258
|
+
function printStageShellReviewHint(workspacePath, preserveStageShells) {
|
|
259
|
+
const reviewRoot = join(workspacePath, ".interf", "execution-shells");
|
|
260
|
+
if (preserveStageShells === "always") {
|
|
261
|
+
console.log(chalk.dim(` Preserved stage shells: ${reviewRoot}`));
|
|
262
|
+
return;
|
|
263
|
+
}
|
|
264
|
+
console.log(chalk.dim(` Failed stage shells remain under: ${reviewRoot}`));
|
|
265
|
+
}
|
|
234
266
|
//# sourceMappingURL=compile.js.map
|