@interf/compiler 0.3.1 → 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +124 -54
- package/dist/commands/compile.d.ts +12 -0
- package/dist/commands/compile.d.ts.map +1 -1
- package/dist/commands/compile.js +233 -29
- package/dist/commands/compile.js.map +1 -1
- package/dist/commands/create.d.ts.map +1 -1
- package/dist/commands/create.js +22 -13
- package/dist/commands/create.js.map +1 -1
- package/dist/commands/default.js +1 -1
- package/dist/commands/default.js.map +1 -1
- package/dist/commands/executor-flow.d.ts +2 -0
- package/dist/commands/executor-flow.d.ts.map +1 -1
- package/dist/commands/executor-flow.js +71 -22
- package/dist/commands/executor-flow.js.map +1 -1
- package/dist/commands/init.d.ts.map +1 -1
- package/dist/commands/init.js +60 -22
- package/dist/commands/init.js.map +1 -1
- package/dist/commands/reset.d.ts.map +1 -1
- package/dist/commands/reset.js +2 -18
- package/dist/commands/reset.js.map +1 -1
- package/dist/commands/source-config-wizard.d.ts +2 -1
- package/dist/commands/source-config-wizard.d.ts.map +1 -1
- package/dist/commands/source-config-wizard.js +87 -16
- package/dist/commands/source-config-wizard.js.map +1 -1
- package/dist/commands/test-flow.d.ts +5 -0
- package/dist/commands/test-flow.d.ts.map +1 -1
- package/dist/commands/test-flow.js +48 -14
- package/dist/commands/test-flow.js.map +1 -1
- package/dist/commands/test.d.ts.map +1 -1
- package/dist/commands/test.js +18 -5
- package/dist/commands/test.js.map +1 -1
- package/dist/commands/workspace-flow.d.ts +3 -1
- package/dist/commands/workspace-flow.d.ts.map +1 -1
- package/dist/commands/workspace-flow.js +4 -3
- package/dist/commands/workspace-flow.js.map +1 -1
- package/dist/lib/agent-shells.d.ts +17 -0
- package/dist/lib/agent-shells.d.ts.map +1 -0
- package/dist/lib/agent-shells.js +294 -0
- package/dist/lib/agent-shells.js.map +1 -0
- package/dist/lib/benchmark-execution.d.ts +5 -1
- package/dist/lib/benchmark-execution.d.ts.map +1 -1
- package/dist/lib/benchmark-execution.js +34 -12
- package/dist/lib/benchmark-execution.js.map +1 -1
- package/dist/lib/benchmark-paths.d.ts +2 -0
- package/dist/lib/benchmark-paths.d.ts.map +1 -1
- package/dist/lib/benchmark-paths.js +6 -0
- package/dist/lib/benchmark-paths.js.map +1 -1
- package/dist/lib/benchmark-sandbox.d.ts +2 -0
- package/dist/lib/benchmark-sandbox.d.ts.map +1 -1
- package/dist/lib/benchmark-sandbox.js +68 -37
- package/dist/lib/benchmark-sandbox.js.map +1 -1
- package/dist/lib/benchmark-targets.js +1 -1
- package/dist/lib/benchmark-targets.js.map +1 -1
- package/dist/lib/interf-bootstrap.d.ts +2 -13
- package/dist/lib/interf-bootstrap.d.ts.map +1 -1
- package/dist/lib/interf-bootstrap.js +7 -164
- package/dist/lib/interf-bootstrap.js.map +1 -1
- package/dist/lib/interf-detect.d.ts +1 -0
- package/dist/lib/interf-detect.d.ts.map +1 -1
- package/dist/lib/interf-detect.js +5 -18
- package/dist/lib/interf-detect.js.map +1 -1
- package/dist/lib/interf-scaffold.d.ts.map +1 -1
- package/dist/lib/interf-scaffold.js +7 -71
- package/dist/lib/interf-scaffold.js.map +1 -1
- package/dist/lib/interf-workflow-package.d.ts.map +1 -1
- package/dist/lib/interf-workflow-package.js +20 -25
- package/dist/lib/interf-workflow-package.js.map +1 -1
- package/dist/lib/interf.d.ts +3 -2
- package/dist/lib/interf.d.ts.map +1 -1
- package/dist/lib/interf.js +3 -2
- package/dist/lib/interf.js.map +1 -1
- package/dist/lib/local-workflows.d.ts +6 -1
- package/dist/lib/local-workflows.d.ts.map +1 -1
- package/dist/lib/local-workflows.js +143 -2
- package/dist/lib/local-workflows.js.map +1 -1
- package/dist/lib/runtime-contracts.d.ts.map +1 -1
- package/dist/lib/runtime-contracts.js +10 -4
- package/dist/lib/runtime-contracts.js.map +1 -1
- package/dist/lib/runtime-prompt.d.ts.map +1 -1
- package/dist/lib/runtime-prompt.js +1 -0
- package/dist/lib/runtime-prompt.js.map +1 -1
- package/dist/lib/runtime-runs.d.ts.map +1 -1
- package/dist/lib/runtime-runs.js +6 -2
- package/dist/lib/runtime-runs.js.map +1 -1
- package/dist/lib/runtime-types.d.ts +1 -0
- package/dist/lib/runtime-types.d.ts.map +1 -1
- package/dist/lib/schema.d.ts +88 -13
- package/dist/lib/schema.d.ts.map +1 -1
- package/dist/lib/schema.js +66 -21
- package/dist/lib/schema.js.map +1 -1
- package/dist/lib/source-config.d.ts +2 -0
- package/dist/lib/source-config.d.ts.map +1 -1
- package/dist/lib/source-config.js +19 -1
- package/dist/lib/source-config.js.map +1 -1
- package/dist/lib/state-artifacts.d.ts +2 -2
- package/dist/lib/state-artifacts.d.ts.map +1 -1
- package/dist/lib/state-artifacts.js +3 -3
- package/dist/lib/state-artifacts.js.map +1 -1
- package/dist/lib/state-io.d.ts +2 -2
- package/dist/lib/state-io.d.ts.map +1 -1
- package/dist/lib/state-io.js +5 -5
- package/dist/lib/state-io.js.map +1 -1
- package/dist/lib/state-paths.d.ts +1 -1
- package/dist/lib/state-paths.d.ts.map +1 -1
- package/dist/lib/state-paths.js +3 -3
- package/dist/lib/state-paths.js.map +1 -1
- package/dist/lib/state-view.d.ts +2 -2
- package/dist/lib/state-view.d.ts.map +1 -1
- package/dist/lib/state-view.js +6 -7
- package/dist/lib/state-view.js.map +1 -1
- package/dist/lib/state.d.ts +4 -4
- package/dist/lib/state.d.ts.map +1 -1
- package/dist/lib/state.js +3 -3
- package/dist/lib/state.js.map +1 -1
- package/dist/lib/workflow-definitions.d.ts +4 -1
- package/dist/lib/workflow-definitions.d.ts.map +1 -1
- package/dist/lib/workflow-definitions.js +38 -3
- package/dist/lib/workflow-definitions.js.map +1 -1
- package/dist/lib/workflow-stage-runner.d.ts +1 -0
- package/dist/lib/workflow-stage-runner.d.ts.map +1 -1
- package/dist/lib/workflow-stage-runner.js +2 -0
- package/dist/lib/workflow-stage-runner.js.map +1 -1
- package/dist/lib/workflows.d.ts +1 -1
- package/dist/lib/workflows.d.ts.map +1 -1
- package/dist/lib/workspace-compile.d.ts +4 -0
- package/dist/lib/workspace-compile.d.ts.map +1 -1
- package/dist/lib/workspace-compile.js +108 -66
- package/dist/lib/workspace-compile.js.map +1 -1
- package/dist/lib/workspace-docs.d.ts +3 -0
- package/dist/lib/workspace-docs.d.ts.map +1 -0
- package/dist/lib/workspace-docs.js +82 -0
- package/dist/lib/workspace-docs.js.map +1 -0
- package/dist/lib/workspace-raw.d.ts +30 -0
- package/dist/lib/workspace-raw.d.ts.map +1 -0
- package/dist/lib/workspace-raw.js +102 -0
- package/dist/lib/workspace-raw.js.map +1 -0
- package/dist/lib/workspace-reset.d.ts +2 -0
- package/dist/lib/workspace-reset.d.ts.map +1 -0
- package/dist/lib/workspace-reset.js +21 -0
- package/dist/lib/workspace-reset.js.map +1 -0
- package/dist/lib/workspace-schema.d.ts +26 -0
- package/dist/lib/workspace-schema.d.ts.map +1 -0
- package/dist/lib/workspace-schema.js +132 -0
- package/dist/lib/workspace-schema.js.map +1 -0
- package/package.json +1 -1
- package/skills/workflow/create/SKILL.md +19 -0
- package/skills/workspace/shape/SKILL.md +1 -1
- package/templates/workspace/README.md +2 -1
package/README.md
CHANGED
|
@@ -1,19 +1,29 @@
|
|
|
1
1
|
# Interf
|
|
2
2
|
|
|
3
|
-
Open-source
|
|
3
|
+
Open-source toolkit for preparing local files for agents.
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
Turn PDFs, docs, spreadsheets, and notes into a local workspace your agent can navigate, verify, and answer from.
|
|
6
6
|
|
|
7
7
|
If you use Claude Code, Codex, OpenClaw, Hermes, or your own local agent setup on folders full of PDFs, docs, spreadsheets, and notes, the failure often shows up late: missed evidence, shallow analysis, bad comparisons, or answers that sound confident but are wrong.
|
|
8
8
|
|
|
9
|
-
Interf lets you
|
|
9
|
+
Interf lets you set a few truth checks on a dataset, measure the raw baseline first if you want it, compile a workspace on top of that dataset, and see whether the result actually passes.
|
|
10
10
|
|
|
11
|
-
- your
|
|
12
|
-
-
|
|
11
|
+
- your dataset stays on your machine
|
|
12
|
+
- BYOAI: use Claude Code, Codex, OpenClaw, Hermes, or your own local setup
|
|
13
13
|
- your raw files stay the source of truth
|
|
14
|
-
-
|
|
14
|
+
- the workflow package is the reusable method
|
|
15
|
+
- the compiler runtime executes that workflow package and builds a file-based layer on top
|
|
15
16
|
|
|
16
|
-
|
|
17
|
+
`interf compile` runs a workflow package with your agents as executors and produces a compiled workspace: a file-based layer on top of your raw files that agents can navigate, inspect, and work from.
|
|
18
|
+
|
|
19
|
+
Each compiled workspace carries its own `raw/` snapshot, so agents can work from one self-contained folder instead of reaching back into the source-folder control plane.
|
|
20
|
+
|
|
21
|
+
Interf also projects native agent shells from that workspace:
|
|
22
|
+
|
|
23
|
+
- the compiled workspace itself is the folder your agent works from
|
|
24
|
+
- each compile stage runs inside its own ephemeral execution shell with stage-specific instructions and mounted zone aliases under `inputs/` and `outputs/`
|
|
25
|
+
|
|
26
|
+
The main reusable artifact is the workflow package. In the advanced looped mode, Interf can keep rerunning that workflow package against the same dataset and truth checks until it either passes or exhausts the attempt budget.
|
|
17
27
|
|
|
18
28
|
## Quick Start
|
|
19
29
|
|
|
@@ -37,12 +47,13 @@ interf
|
|
|
37
47
|
|
|
38
48
|
If you want to see the config shape first, this is what Interf writes:
|
|
39
49
|
|
|
40
|
-
```
|
|
50
|
+
```jsonc
|
|
41
51
|
{
|
|
42
52
|
"workspaces": [
|
|
43
53
|
{
|
|
44
|
-
"name": "
|
|
54
|
+
"name": "my-workspace",
|
|
45
55
|
"about": "General compiled workspace for the quarterly results folder.",
|
|
56
|
+
"max_attempts": 3, // rerun compile + test until this workspace passes the saved truth checks or hits this limit
|
|
46
57
|
"checks": [
|
|
47
58
|
{
|
|
48
59
|
"question": "What full-year revenue range did the company maintain?",
|
|
@@ -68,16 +79,25 @@ interf test
|
|
|
68
79
|
|
|
69
80
|
The first guided run can:
|
|
70
81
|
|
|
71
|
-
- save a few
|
|
82
|
+
- save a few truth checks for the dataset in this folder
|
|
72
83
|
- run a baseline test on the raw files
|
|
73
84
|
- compile the workspace
|
|
85
|
+
- optionally keep compiling and retesting until it passes or reaches the attempt limit
|
|
74
86
|
- run the same test against the compiled workspace
|
|
75
87
|
|
|
76
88
|
That gives you three concrete things:
|
|
77
89
|
|
|
78
|
-
- `interf/workspaces/
|
|
90
|
+
- `interf/workspaces/my-workspace/` with the compiled workspace for your dataset
|
|
79
91
|
- `interf/benchmarks/runs/...` with the saved test result
|
|
80
|
-
- a pass/fail score on the same
|
|
92
|
+
- a pass/fail score on the same truth checks you wrote
|
|
93
|
+
|
|
94
|
+
Saved test runs keep the details you need later:
|
|
95
|
+
|
|
96
|
+
- whether the run tested `raw`, `workspace`, or both
|
|
97
|
+
- per-question pass/fail results
|
|
98
|
+
- the saved run path under `interf/benchmarks/runs/...`
|
|
99
|
+
- the preserved sandbox path when a failed run is kept for review or you use `interf test --keep-sandboxes`
|
|
100
|
+
- executor metadata such as agent, command, model, effort, and profile when available
|
|
81
101
|
|
|
82
102
|
If `interf.config.json` is missing, `interf` or `interf init` can draft it with you before the first compile. If Interf cannot find your local agent or compile setup, run:
|
|
83
103
|
|
|
@@ -87,7 +107,7 @@ interf doctor
|
|
|
87
107
|
|
|
88
108
|
The first flow is:
|
|
89
109
|
|
|
90
|
-
- write down a few
|
|
110
|
+
- write down a few truth checks your agent should be able to pass on the dataset
|
|
91
111
|
- let `interf` or `interf init` save those checks in `interf.config.json`
|
|
92
112
|
- optionally run a baseline test on the raw files
|
|
93
113
|
- run `interf compile` to build the compiled workspace
|
|
@@ -100,7 +120,7 @@ The first flow is:
|
|
|
100
120
|
Interf is built around a few simple design principles:
|
|
101
121
|
|
|
102
122
|
- `Explicit`: the output is visible and inspectable, not hidden memory
|
|
103
|
-
- `Local`: your
|
|
123
|
+
- `Local`: your dataset stays on your machine
|
|
104
124
|
- `File over app`: the output is just files, so you can use your editor, Unix tools, Obsidian, or your own software on top
|
|
105
125
|
- `BYOAI`: use Claude Code, Codex, OpenClaw, Hermes, or your own model
|
|
106
126
|
|
|
@@ -116,22 +136,22 @@ interf compile
|
|
|
116
136
|
interf test
|
|
117
137
|
```
|
|
118
138
|
|
|
119
|
-
## Start With
|
|
139
|
+
## Start With Your Own Truth Checks
|
|
140
|
+
|
|
141
|
+
Start with your own truth checks: questions where you already know the correct answer from the dataset.
|
|
120
142
|
|
|
121
|
-
`interf.config.json` is where you
|
|
143
|
+
`interf.config.json` is where you save those truth checks for a dataset folder.
|
|
122
144
|
|
|
123
145
|
That file uses one `workspaces` array:
|
|
124
146
|
|
|
125
147
|
- most folders only need one workspace
|
|
126
|
-
- add another workspace only if you want a separate compiled setup with different checks
|
|
148
|
+
- add another workspace only if you want a separate compiled setup with different truth checks
|
|
127
149
|
- each workspace carries its own `checks`
|
|
150
|
+
- each workspace can optionally carry `max_attempts` for the self-improving compile loop
|
|
128
151
|
|
|
129
152
|
If the file is missing, `interf init` can draft it with you before the first compile. You can edit it any time.
|
|
130
153
|
|
|
131
|
-
|
|
132
|
-
Advanced retry settings do not live there.
|
|
133
|
-
|
|
134
|
-
Good first checks are small and practical:
|
|
154
|
+
Good first truth checks are small and practical:
|
|
135
155
|
|
|
136
156
|
- one exact number from a chart, table, or filing
|
|
137
157
|
- one short statement that should be true or false
|
|
@@ -144,34 +164,49 @@ interf compile
|
|
|
144
164
|
interf test
|
|
145
165
|
```
|
|
146
166
|
|
|
147
|
-
## What `interf test`
|
|
167
|
+
## What `interf test` Does
|
|
148
168
|
|
|
149
|
-
`interf test` scores either the raw files, a compiled workspace, or both on the same saved
|
|
169
|
+
`interf test` scores either the raw files, a compiled workspace, or both on the same saved truth checks.
|
|
150
170
|
|
|
151
171
|
It lets you answer a simple question:
|
|
152
172
|
|
|
153
173
|
- what is the current baseline on the raw files?
|
|
154
174
|
- does this compiled workspace improve on that baseline?
|
|
155
|
-
- which compiled workspace or workflow performs better on the same
|
|
156
|
-
- does a separate workspace with different checks work better for that job?
|
|
175
|
+
- which compiled workspace or workflow performs better on the same dataset?
|
|
176
|
+
- does a separate workspace with different truth checks work better for that job?
|
|
177
|
+
|
|
178
|
+
By default it loads truth checks from `interf.config.json`, can run a raw baseline in an isolated raw-files sandbox, can test eligible compiled workspaces under `interf/workspaces/`, and saves the run under `interf/benchmarks/runs/`.
|
|
179
|
+
|
|
180
|
+
For live runs:
|
|
181
|
+
|
|
182
|
+
- raw tests execute from a sanitized raw-only sandbox
|
|
183
|
+
- compiled-workspace tests execute from a copied workspace sandbox with embedded sanitized `raw/`
|
|
184
|
+
- neither sandbox includes `interf.config.json` or the source-folder `interf/` control plane
|
|
185
|
+
- failed test sandboxes are kept automatically for review
|
|
186
|
+
- `interf test --keep-sandboxes` keeps every sandbox, even successful ones
|
|
157
187
|
|
|
158
|
-
|
|
188
|
+
Each saved run includes:
|
|
159
189
|
|
|
160
|
-
|
|
190
|
+
- the benchmark target and mode
|
|
191
|
+
- per-question results and traces
|
|
192
|
+
- the preserved sandbox path when one was kept
|
|
193
|
+
- the executor metadata for that run
|
|
161
194
|
|
|
162
|
-
|
|
195
|
+
If you run `interf test` from inside a workspace, it uses that workspace's truth checks and tests that workspace. If you run it from the source folder, it lets you choose a saved workspace and then choose raw files, the compiled workspace, or both.
|
|
196
|
+
|
|
197
|
+
Live test runs use an isolated sandbox. For raw baselines, Interf gives the agent sanitized raw files only. For compiled-workspace tests, it gives the agent a copied workspace with its own embedded sanitized `raw/` fallback via `source.path`. The source-folder control plane, `interf.config.json`, and saved test runs are not part of those sandboxes.
|
|
163
198
|
|
|
164
199
|
If you need repeated isolated experiments across workflows or models, use the advanced eval-pack runner in [docs/eval-loop.md](./docs/eval-loop.md).
|
|
165
200
|
|
|
166
201
|
## What `interf compile` Does
|
|
167
202
|
|
|
168
|
-
`interf compile` runs the
|
|
203
|
+
`interf compile` runs the selected workflow package over a dataset.
|
|
169
204
|
|
|
170
205
|
By default, that means:
|
|
171
206
|
|
|
172
207
|
- summarize the source files into per-file evidence notes
|
|
173
208
|
- structure the cross-file knowledge layer into entities, claims, and indexes
|
|
174
|
-
- shape the final workspace around its saved focus and
|
|
209
|
+
- shape the final workspace around its saved focus and truth checks
|
|
175
210
|
|
|
176
211
|
In other words, the built-in workflow is:
|
|
177
212
|
|
|
@@ -179,9 +214,30 @@ In other words, the built-in workflow is:
|
|
|
179
214
|
2. `structure`
|
|
180
215
|
3. `shape`
|
|
181
216
|
|
|
182
|
-
|
|
217
|
+
The default workflow is built in. If you want a different method, you can define your own workflow package and test it on the same dataset.
|
|
218
|
+
|
|
219
|
+
Each workflow package combines:
|
|
220
|
+
|
|
221
|
+
- `workflow.json` for stage order, compiler API target, and deterministic contract mapping
|
|
222
|
+
- `workspace.schema.json` for the deterministic output shape of the compiled workspace
|
|
223
|
+
- stage `reads` / `writes` declarations that reference schema-defined zone ids
|
|
224
|
+
- local `SKILL.md` files as the authoring source for query and stage-execution behavior
|
|
225
|
+
|
|
226
|
+
Interf then projects that package into the native agent surfaces it actually runs:
|
|
227
|
+
|
|
228
|
+
- the compiled workspace gets a generated native query shell
|
|
229
|
+
- each compile stage gets a generated native execution shell
|
|
230
|
+
- that shell keeps its own `AGENTS.md`, `CLAUDE.md`, and native local skills
|
|
231
|
+
- schema-declared workspace zones are mounted both at their workflow-relative paths and as shell-local `inputs/<zone-id>` / `outputs/<zone-id>` aliases
|
|
232
|
+
- the workspace root itself is not linked into the shell
|
|
233
|
+
|
|
234
|
+
If a workspace has `max_attempts`, or if you run `interf compile --max-attempts <n>`, Interf can keep compiling, testing, and retrying until that workspace passes or reaches the attempt limit. If several attempts fail, Interf keeps the best-performing compiled workspace from that run.
|
|
183
235
|
|
|
184
|
-
|
|
236
|
+
For stage-level review:
|
|
237
|
+
|
|
238
|
+
- successful stage shells are pruned by default
|
|
239
|
+
- failed stage shells stay under `.interf/execution-shells/`
|
|
240
|
+
- `interf compile --keep-stage-shells` keeps every stage shell so you can inspect the exact native instruction surface, mounted inputs, and mounted outputs for each stage
|
|
185
241
|
|
|
186
242
|
## What Gets Created
|
|
187
243
|
|
|
@@ -192,6 +248,9 @@ After compile, Interf writes into `./interf/` beside your source files.
|
|
|
192
248
|
|
|
193
249
|
Inside those workspaces you will see things like:
|
|
194
250
|
|
|
251
|
+
- a workspace-local `raw/` snapshot for direct evidence and verification
|
|
252
|
+
- `workflow/workspace.schema.json` describing the deterministic output shape
|
|
253
|
+
- `AGENTS.md`, `CLAUDE.md`, and generated local query skills for manual agent use
|
|
195
254
|
- summaries of source files
|
|
196
255
|
- navigation notes and entrypoints for agents
|
|
197
256
|
- cross-file knowledge notes
|
|
@@ -199,24 +258,26 @@ Inside those workspaces you will see things like:
|
|
|
199
258
|
|
|
200
259
|
The compiled workspace is just a normal folder. Open it in your editor, in your agent, or in Obsidian if you want the graph view.
|
|
201
260
|
|
|
261
|
+
For manual use, the workspace is the native agent shell. The editable authoring source for that shell lives under `workflow/use/query/`, and Interf generates native local query skills from it inside the workspace.
|
|
262
|
+
|
|
202
263
|
If you use Obsidian, open `interf/workspaces/<name>/` as the vault for the compiled workspace.
|
|
203
264
|
|
|
204
265
|
## Terminology
|
|
205
266
|
|
|
206
267
|
Public terms:
|
|
207
268
|
|
|
208
|
-
- `
|
|
209
|
-
- `
|
|
210
|
-
- `checks` = the
|
|
211
|
-
- `test` = run the saved
|
|
269
|
+
- `dataset` = the collection of files Interf prepares
|
|
270
|
+
- `truth check` = one question plus the expected correct answer
|
|
271
|
+
- `checks` = the config field that stores those truth checks
|
|
272
|
+
- `test` = run the saved truth checks and get a score
|
|
212
273
|
- `compiled workspace` = the output Interf produces on top of a folder
|
|
213
274
|
- `workspace` = one compiled setup with its own checks
|
|
214
275
|
|
|
215
276
|
Technical terms:
|
|
216
277
|
|
|
217
|
-
- `source folder` = the
|
|
278
|
+
- `source folder` = the dataset root Interf reads from
|
|
218
279
|
- `benchmark` = the technical alias and saved-run layer behind `interf test`
|
|
219
|
-
- `workflow` = the saved method that defines or customizes
|
|
280
|
+
- `workflow package` = the saved method that defines or customizes how compile runs
|
|
220
281
|
- `.interf/` = runtime state, proofs, and health artifacts
|
|
221
282
|
|
|
222
283
|
## Advanced: Separate Workspaces
|
|
@@ -232,22 +293,33 @@ Create another only when you want a different compiled setup with different chec
|
|
|
232
293
|
|
|
233
294
|
Why create another one:
|
|
234
295
|
|
|
235
|
-
- it keeps a separate set of
|
|
296
|
+
- it keeps a separate set of truth checks
|
|
236
297
|
- it gives that job its own compiled output under `interf/workspaces/<name>/`
|
|
237
298
|
- it lets you test that job separately
|
|
238
299
|
|
|
239
300
|
## Advanced: Keep Improving Until It Passes
|
|
240
301
|
|
|
241
|
-
Interf also supports
|
|
302
|
+
Interf also supports a deeper loop above the normal compile + test flow.
|
|
303
|
+
|
|
304
|
+
The normal workspace flow already supports `max_attempts` inside `interf.config.json` or `interf compile --max-attempts <n>`.
|
|
242
305
|
|
|
243
|
-
Give it the same
|
|
306
|
+
Give it the same dataset and the same truth checks. Interf can keep rerunning compile + test attempts until the test passes or the attempt budget runs out.
|
|
307
|
+
|
|
308
|
+
That loop is the self-improving part of the product:
|
|
309
|
+
|
|
310
|
+
- it reruns the same workflow package over the same dataset
|
|
311
|
+
- it keeps the truth checks fixed, so the target does not move
|
|
312
|
+
- it keeps the measurement fixed, so attempts stay comparable
|
|
313
|
+
- it can vary the compile profile and follow-up diagnostics
|
|
314
|
+
- it records which attempt performed best on the same saved test
|
|
244
315
|
|
|
245
316
|
In practice:
|
|
246
317
|
|
|
247
|
-
- `
|
|
318
|
+
- `max_attempts` controls how many total attempts a normal workspace compile gets
|
|
319
|
+
- `retry_policy.max_attempts_per_profile` controls how many attempts each compile profile gets in eval packs
|
|
248
320
|
- stronger diagnostic profiles can be used only after the default ones fail
|
|
249
|
-
- the checks stay the same across every attempt
|
|
250
|
-
- each attempt records what changed
|
|
321
|
+
- the truth checks stay the same across every attempt
|
|
322
|
+
- each attempt records what changed and which attempt performed best
|
|
251
323
|
|
|
252
324
|
Example eval-pack shape:
|
|
253
325
|
|
|
@@ -255,7 +327,8 @@ Example eval-pack shape:
|
|
|
255
327
|
{
|
|
256
328
|
"workspaces": [
|
|
257
329
|
{
|
|
258
|
-
"name": "
|
|
330
|
+
"name": "my-workspace",
|
|
331
|
+
"max_attempts": 3, // rerun compile + test until this workspace passes the saved truth checks or hits this limit
|
|
259
332
|
"checks": [
|
|
260
333
|
{
|
|
261
334
|
"question": "What full-year revenue range did the company maintain?",
|
|
@@ -264,16 +337,13 @@ Example eval-pack shape:
|
|
|
264
337
|
]
|
|
265
338
|
}
|
|
266
339
|
],
|
|
267
|
-
// Advanced only: retry settings live in eval packs, not in interf.config.json.
|
|
268
340
|
"retry_policy": {
|
|
269
341
|
"max_attempts_per_profile": 3
|
|
270
342
|
}
|
|
271
343
|
}
|
|
272
344
|
```
|
|
273
345
|
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
Use the normal test flow first. Use this advanced path when you want Interf to keep improving the local preparation workflow until the workspace is good enough for your task or the attempt budget runs out. It spends more tokens, so use it when that extra spend is worth the accuracy target.
|
|
346
|
+
Use the normal workspace retry loop first. Use the eval-pack path when you want Interf to compare multiple compile profiles, add diagnostics, or keep iterating in a more controlled experiment loop. It spends more tokens, so use it when that extra spend is worth the accuracy target.
|
|
277
347
|
|
|
278
348
|
## Use It With Your Agent
|
|
279
349
|
|
|
@@ -284,25 +354,25 @@ Paste something like this into your agent:
|
|
|
284
354
|
```text
|
|
285
355
|
Install @interf/compiler, run `interf` in this folder, and use the local agent executor.
|
|
286
356
|
|
|
287
|
-
If `interf.config.json` is missing, draft one workspace with a few checks this agent should be able to answer from
|
|
357
|
+
If `interf.config.json` is missing, draft one workspace with a few truth checks this agent should be able to answer from this dataset and add the expected answers for me to confirm.
|
|
288
358
|
|
|
289
359
|
Then run a raw baseline if helpful, compile the workspace, and run `interf test`.
|
|
290
360
|
|
|
291
|
-
Tell me whether the compiled workspace passes the checks, and only recommend it if it does.
|
|
361
|
+
Tell me whether the compiled workspace passes the truth checks, and only recommend it if it does.
|
|
292
362
|
```
|
|
293
363
|
|
|
294
364
|
## Custom Workflows
|
|
295
365
|
|
|
296
366
|
Interf ships with a default workflow.
|
|
297
367
|
|
|
298
|
-
If you want to change how the
|
|
368
|
+
If you want to change how the workflow package runs on your dataset, this is the part you customize:
|
|
299
369
|
|
|
300
370
|
```bash
|
|
301
371
|
interf create workflow
|
|
302
372
|
interf verify workflow --path <path>
|
|
303
373
|
```
|
|
304
374
|
|
|
305
|
-
Then
|
|
375
|
+
Then test that workflow on the same dataset and the same truth checks.
|
|
306
376
|
|
|
307
377
|
Workflow package docs live in [docs/workflow-spec.md](./docs/workflow-spec.md).
|
|
308
378
|
|
|
@@ -313,7 +383,7 @@ Workflow package docs live in [docs/workflow-spec.md](./docs/workflow-spec.md).
|
|
|
313
383
|
- `interf create workspace` = create another compiled workspace when you need one
|
|
314
384
|
- `interf create workflow` = create a reusable local workflow package
|
|
315
385
|
- `interf compile` = build a selected workspace for the current folder
|
|
316
|
-
- `interf test` = test the raw files, a compiled workspace, or both on saved checks
|
|
386
|
+
- `interf test` = test the raw files, a compiled workspace, or both on saved truth checks
|
|
317
387
|
- `interf benchmark` = alias for `interf test`
|
|
318
388
|
- `interf doctor` = check local executor setup
|
|
319
389
|
- `interf verify <check>` = run deterministic checks on major workflow steps
|
|
@@ -1,4 +1,16 @@
|
|
|
1
|
+
import type { WorkflowExecutionProfile, WorkflowExecutor } from "../lib/executors.js";
|
|
2
|
+
import type { SourceWorkspaceConfig } from "../lib/schema.js";
|
|
1
3
|
import type { CommandModule } from "yargs";
|
|
4
|
+
import type { StageShellRetentionMode } from "../lib/workflows.js";
|
|
2
5
|
export declare const compileCommand: CommandModule;
|
|
3
6
|
export declare function runCompileCommand(argv?: Record<string, unknown>): Promise<void>;
|
|
7
|
+
export declare function runConfiguredWorkspaceCompile(options: {
|
|
8
|
+
executor: WorkflowExecutor;
|
|
9
|
+
workspacePath: string;
|
|
10
|
+
sourcePath: string;
|
|
11
|
+
workspaceConfig: SourceWorkspaceConfig | null;
|
|
12
|
+
executionProfile?: WorkflowExecutionProfile;
|
|
13
|
+
maxAttemptsOverride: number | null;
|
|
14
|
+
preserveStageShells?: StageShellRetentionMode;
|
|
15
|
+
}): Promise<boolean>;
|
|
4
16
|
//# sourceMappingURL=compile.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"compile.d.ts","sourceRoot":"","sources":["../../src/commands/compile.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"compile.d.ts","sourceRoot":"","sources":["../../src/commands/compile.ts"],"names":[],"mappings":"AAUA,OAAO,KAAK,EAAE,wBAAwB,EAAE,gBAAgB,EAAE,MAAM,qBAAqB,CAAC;AAMtF,OAAO,KAAK,EAAE,qBAAqB,EAAE,MAAM,kBAAkB,CAAC;AAM9D,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,OAAO,CAAC;AAY3C,OAAO,KAAK,EAAE,uBAAuB,EAAE,MAAM,qBAAqB,CAAC;AAEnE,eAAO,MAAM,cAAc,EAAE,aAkB5B,CAAC;AAEF,wBAAsB,iBAAiB,CAAC,IAAI,GAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAgFzF;AAuDD,wBAAsB,6BAA6B,CACjD,OAAO,EAAE;IACP,QAAQ,EAAE,gBAAgB,CAAC;IAC3B,aAAa,EAAE,MAAM,CAAC;IACtB,UAAU,EAAE,MAAM,CAAC;IACnB,eAAe,EAAE,qBAAqB,GAAG,IAAI,CAAC;IAC9C,gBAAgB,CAAC,EAAE,wBAAwB,CAAC;IAC5C,mBAAmB,EAAE,MAAM,GAAG,IAAI,CAAC;IACnC,mBAAmB,CAAC,EAAE,uBAAuB,CAAC;CAC/C,GACA,OAAO,CAAC,OAAO,CAAC,CA4HlB"}
|