@interf/compiler 0.2.5 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (240) hide show
  1. package/README.md +182 -183
  2. package/dist/commands/benchmark.d.ts.map +1 -1
  3. package/dist/commands/benchmark.js +60 -351
  4. package/dist/commands/benchmark.js.map +1 -1
  5. package/dist/commands/compile.d.ts.map +1 -1
  6. package/dist/commands/compile.js +43 -110
  7. package/dist/commands/compile.js.map +1 -1
  8. package/dist/commands/create-workflow-wizard.d.ts +4 -25
  9. package/dist/commands/create-workflow-wizard.d.ts.map +1 -1
  10. package/dist/commands/create-workflow-wizard.js +29 -214
  11. package/dist/commands/create-workflow-wizard.js.map +1 -1
  12. package/dist/commands/create.d.ts +2 -11
  13. package/dist/commands/create.d.ts.map +1 -1
  14. package/dist/commands/create.js +72 -461
  15. package/dist/commands/create.js.map +1 -1
  16. package/dist/commands/default.d.ts.map +1 -1
  17. package/dist/commands/default.js +16 -28
  18. package/dist/commands/default.js.map +1 -1
  19. package/dist/commands/init.d.ts.map +1 -1
  20. package/dist/commands/init.js +71 -340
  21. package/dist/commands/init.js.map +1 -1
  22. package/dist/commands/list.d.ts.map +1 -1
  23. package/dist/commands/list.js +12 -22
  24. package/dist/commands/list.js.map +1 -1
  25. package/dist/commands/reset.d.ts.map +1 -1
  26. package/dist/commands/reset.js +27 -124
  27. package/dist/commands/reset.js.map +1 -1
  28. package/dist/commands/source-config-wizard.d.ts +11 -11
  29. package/dist/commands/source-config-wizard.d.ts.map +1 -1
  30. package/dist/commands/source-config-wizard.js +68 -69
  31. package/dist/commands/source-config-wizard.js.map +1 -1
  32. package/dist/commands/status.d.ts.map +1 -1
  33. package/dist/commands/status.js +60 -56
  34. package/dist/commands/status.js.map +1 -1
  35. package/dist/commands/verify.d.ts.map +1 -1
  36. package/dist/commands/verify.js +59 -98
  37. package/dist/commands/verify.js.map +1 -1
  38. package/dist/index.d.ts +7 -7
  39. package/dist/index.d.ts.map +1 -1
  40. package/dist/index.js +4 -6
  41. package/dist/index.js.map +1 -1
  42. package/dist/lib/agent-constants.js +1 -1
  43. package/dist/lib/agent-constants.js.map +1 -1
  44. package/dist/lib/benchmark-execution.d.ts.map +1 -1
  45. package/dist/lib/benchmark-execution.js +7 -16
  46. package/dist/lib/benchmark-execution.js.map +1 -1
  47. package/dist/lib/benchmark-targets.d.ts +3 -4
  48. package/dist/lib/benchmark-targets.d.ts.map +1 -1
  49. package/dist/lib/benchmark-targets.js +9 -55
  50. package/dist/lib/benchmark-targets.js.map +1 -1
  51. package/dist/lib/benchmark-types.d.ts +2 -3
  52. package/dist/lib/benchmark-types.d.ts.map +1 -1
  53. package/dist/lib/benchmark.d.ts +1 -1
  54. package/dist/lib/benchmark.d.ts.map +1 -1
  55. package/dist/lib/benchmark.js +1 -1
  56. package/dist/lib/benchmark.js.map +1 -1
  57. package/dist/lib/config.d.ts +1 -2
  58. package/dist/lib/config.d.ts.map +1 -1
  59. package/dist/lib/config.js +2 -4
  60. package/dist/lib/config.js.map +1 -1
  61. package/dist/lib/discovery.d.ts +1 -1
  62. package/dist/lib/discovery.d.ts.map +1 -1
  63. package/dist/lib/discovery.js +7 -2
  64. package/dist/lib/discovery.js.map +1 -1
  65. package/dist/lib/eval-packs.d.ts +6 -52
  66. package/dist/lib/eval-packs.d.ts.map +1 -1
  67. package/dist/lib/eval-packs.js +11 -39
  68. package/dist/lib/eval-packs.js.map +1 -1
  69. package/dist/lib/interf-bootstrap.d.ts +3 -5
  70. package/dist/lib/interf-bootstrap.d.ts.map +1 -1
  71. package/dist/lib/interf-bootstrap.js +10 -57
  72. package/dist/lib/interf-bootstrap.js.map +1 -1
  73. package/dist/lib/interf-detect.d.ts +13 -11
  74. package/dist/lib/interf-detect.d.ts.map +1 -1
  75. package/dist/lib/interf-detect.js +59 -45
  76. package/dist/lib/interf-detect.js.map +1 -1
  77. package/dist/lib/interf-scaffold.d.ts +2 -5
  78. package/dist/lib/interf-scaffold.d.ts.map +1 -1
  79. package/dist/lib/interf-scaffold.js +98 -235
  80. package/dist/lib/interf-scaffold.js.map +1 -1
  81. package/dist/lib/interf-workflow-package.d.ts +1 -2
  82. package/dist/lib/interf-workflow-package.d.ts.map +1 -1
  83. package/dist/lib/interf-workflow-package.js +94 -90
  84. package/dist/lib/interf-workflow-package.js.map +1 -1
  85. package/dist/lib/interf.d.ts +4 -5
  86. package/dist/lib/interf.d.ts.map +1 -1
  87. package/dist/lib/interf.js +3 -6
  88. package/dist/lib/interf.js.map +1 -1
  89. package/dist/lib/local-workflows.d.ts +9 -8
  90. package/dist/lib/local-workflows.d.ts.map +1 -1
  91. package/dist/lib/local-workflows.js +42 -94
  92. package/dist/lib/local-workflows.js.map +1 -1
  93. package/dist/lib/obsidian.d.ts +1 -3
  94. package/dist/lib/obsidian.d.ts.map +1 -1
  95. package/dist/lib/obsidian.js +10 -81
  96. package/dist/lib/obsidian.js.map +1 -1
  97. package/dist/lib/registry.d.ts +6 -17
  98. package/dist/lib/registry.d.ts.map +1 -1
  99. package/dist/lib/registry.js +36 -50
  100. package/dist/lib/registry.js.map +1 -1
  101. package/dist/lib/runtime-contracts.d.ts +2 -3
  102. package/dist/lib/runtime-contracts.d.ts.map +1 -1
  103. package/dist/lib/runtime-contracts.js +10 -9
  104. package/dist/lib/runtime-contracts.js.map +1 -1
  105. package/dist/lib/runtime-reconcile.d.ts +2 -5
  106. package/dist/lib/runtime-reconcile.d.ts.map +1 -1
  107. package/dist/lib/runtime-reconcile.js +23 -176
  108. package/dist/lib/runtime-reconcile.js.map +1 -1
  109. package/dist/lib/runtime-runs.d.ts.map +1 -1
  110. package/dist/lib/runtime-runs.js +52 -57
  111. package/dist/lib/runtime-runs.js.map +1 -1
  112. package/dist/lib/runtime-types.d.ts +5 -6
  113. package/dist/lib/runtime-types.d.ts.map +1 -1
  114. package/dist/lib/runtime.d.ts +2 -2
  115. package/dist/lib/runtime.d.ts.map +1 -1
  116. package/dist/lib/runtime.js +1 -1
  117. package/dist/lib/runtime.js.map +1 -1
  118. package/dist/lib/schema.d.ts +53 -312
  119. package/dist/lib/schema.d.ts.map +1 -1
  120. package/dist/lib/schema.js +39 -206
  121. package/dist/lib/schema.js.map +1 -1
  122. package/dist/lib/source-config.d.ts +7 -7
  123. package/dist/lib/source-config.d.ts.map +1 -1
  124. package/dist/lib/source-config.js +55 -62
  125. package/dist/lib/source-config.js.map +1 -1
  126. package/dist/lib/state-artifacts.d.ts +5 -11
  127. package/dist/lib/state-artifacts.d.ts.map +1 -1
  128. package/dist/lib/state-artifacts.js +8 -18
  129. package/dist/lib/state-artifacts.js.map +1 -1
  130. package/dist/lib/state-health.d.ts +4 -8
  131. package/dist/lib/state-health.d.ts.map +1 -1
  132. package/dist/lib/state-health.js +27 -223
  133. package/dist/lib/state-health.js.map +1 -1
  134. package/dist/lib/state-io.d.ts +7 -12
  135. package/dist/lib/state-io.d.ts.map +1 -1
  136. package/dist/lib/state-io.js +26 -93
  137. package/dist/lib/state-io.js.map +1 -1
  138. package/dist/lib/state-view.d.ts +4 -6
  139. package/dist/lib/state-view.d.ts.map +1 -1
  140. package/dist/lib/state-view.js +62 -101
  141. package/dist/lib/state-view.js.map +1 -1
  142. package/dist/lib/state.d.ts +5 -5
  143. package/dist/lib/state.d.ts.map +1 -1
  144. package/dist/lib/state.js +4 -4
  145. package/dist/lib/state.js.map +1 -1
  146. package/dist/lib/summarize-plan.d.ts +2 -2
  147. package/dist/lib/summarize-plan.d.ts.map +1 -1
  148. package/dist/lib/summarize-plan.js +13 -13
  149. package/dist/lib/summarize-plan.js.map +1 -1
  150. package/dist/lib/{validate-kb.d.ts → validate-workspace.d.ts} +8 -8
  151. package/dist/lib/validate-workspace.d.ts.map +1 -0
  152. package/dist/lib/{validate-kb.js → validate-workspace.js} +44 -46
  153. package/dist/lib/validate-workspace.js.map +1 -0
  154. package/dist/lib/validate.d.ts +5 -7
  155. package/dist/lib/validate.d.ts.map +1 -1
  156. package/dist/lib/validate.js +6 -19
  157. package/dist/lib/validate.js.map +1 -1
  158. package/dist/lib/workflow-definitions.d.ts +14 -50
  159. package/dist/lib/workflow-definitions.d.ts.map +1 -1
  160. package/dist/lib/workflow-definitions.js +74 -349
  161. package/dist/lib/workflow-definitions.js.map +1 -1
  162. package/dist/lib/workflow-helpers.d.ts +3 -4
  163. package/dist/lib/workflow-helpers.d.ts.map +1 -1
  164. package/dist/lib/workflow-helpers.js +15 -49
  165. package/dist/lib/workflow-helpers.js.map +1 -1
  166. package/dist/lib/workflow-stage-runner.d.ts +1 -2
  167. package/dist/lib/workflow-stage-runner.d.ts.map +1 -1
  168. package/dist/lib/workflow-stage-runner.js +4 -6
  169. package/dist/lib/workflow-stage-runner.js.map +1 -1
  170. package/dist/lib/workflow-starter-docs.d.ts +3 -5
  171. package/dist/lib/workflow-starter-docs.d.ts.map +1 -1
  172. package/dist/lib/workflow-starter-docs.js +2 -17
  173. package/dist/lib/workflow-starter-docs.js.map +1 -1
  174. package/dist/lib/workflows.d.ts +9 -14
  175. package/dist/lib/workflows.d.ts.map +1 -1
  176. package/dist/lib/workflows.js +13 -30
  177. package/dist/lib/workflows.js.map +1 -1
  178. package/dist/lib/workspace-compile.d.ts +50 -0
  179. package/dist/lib/workspace-compile.d.ts.map +1 -0
  180. package/dist/lib/{workflows-kb.js → workspace-compile.js} +81 -89
  181. package/dist/lib/workspace-compile.js.map +1 -0
  182. package/package.json +9 -9
  183. package/skills/benchmark/SKILL.md +16 -24
  184. package/skills/workflow/create/SKILL.md +7 -14
  185. package/templates/workspace/README.md +23 -0
  186. package/templates/workspace/interfignore +2 -0
  187. package/dist/lib/bundled-templates.d.ts +0 -5
  188. package/dist/lib/bundled-templates.d.ts.map +0 -1
  189. package/dist/lib/bundled-templates.js +0 -23
  190. package/dist/lib/bundled-templates.js.map +0 -1
  191. package/dist/lib/interf-compile-plan.d.ts +0 -12
  192. package/dist/lib/interf-compile-plan.d.ts.map +0 -1
  193. package/dist/lib/interf-compile-plan.js +0 -143
  194. package/dist/lib/interf-compile-plan.js.map +0 -1
  195. package/dist/lib/validate-interface.d.ts +0 -79
  196. package/dist/lib/validate-interface.d.ts.map +0 -1
  197. package/dist/lib/validate-interface.js +0 -535
  198. package/dist/lib/validate-interface.js.map +0 -1
  199. package/dist/lib/validate-kb.d.ts.map +0 -1
  200. package/dist/lib/validate-kb.js.map +0 -1
  201. package/dist/lib/workflows-interface-contracts.d.ts +0 -24
  202. package/dist/lib/workflows-interface-contracts.d.ts.map +0 -1
  203. package/dist/lib/workflows-interface-contracts.js +0 -304
  204. package/dist/lib/workflows-interface-contracts.js.map +0 -1
  205. package/dist/lib/workflows-interface.d.ts +0 -72
  206. package/dist/lib/workflows-interface.d.ts.map +0 -1
  207. package/dist/lib/workflows-interface.js +0 -377
  208. package/dist/lib/workflows-interface.js.map +0 -1
  209. package/dist/lib/workflows-kb.d.ts +0 -50
  210. package/dist/lib/workflows-kb.d.ts.map +0 -1
  211. package/dist/lib/workflows-kb.js.map +0 -1
  212. package/skills/interface/analyze/SKILL.md +0 -191
  213. package/skills/interface/compile/SKILL.md +0 -152
  214. package/skills/interface/compile/references/output-format.md +0 -48
  215. package/skills/interface/create/SKILL.md +0 -87
  216. package/skills/interface/create/references/compile-plan-format.md +0 -109
  217. package/skills/interface/create/references/workflows.md +0 -35
  218. package/skills/interface/query/SKILL.md +0 -48
  219. package/skills/interface/retrieve/SKILL.md +0 -133
  220. package/skills/knowledge-base/compile/SKILL.md +0 -196
  221. package/skills/knowledge-base/compile/references/output-format.md +0 -48
  222. package/skills/knowledge-base/compile/references/stage-claims.md +0 -60
  223. package/skills/knowledge-base/compile/references/stage-entities.md +0 -46
  224. package/skills/knowledge-base/query/SKILL.md +0 -45
  225. package/skills/knowledge-base/summarize/SKILL.md +0 -152
  226. package/templates/interface/README.md +0 -159
  227. package/templates/interface/interfaces.md +0 -102
  228. package/templates/knowledge-base/README.md +0 -137
  229. package/templates/knowledge-base/interfignore +0 -19
  230. package/templates/knowledge-base/registry.md +0 -118
  231. package/templates/workflow-package/README.md +0 -16
  232. package/templates/workflow-package/create/SKILL.md +0 -8
  233. package/templates/workflow-package/interface-query/SKILL.md +0 -29
  234. package/templates/workflow-package/interface-stage/SKILL.md +0 -13
  235. package/templates/workflow-package/knowledge-base-query/SKILL.md +0 -36
  236. package/templates/workflow-package/knowledge-base-stage/SKILL.md +0 -13
  237. package/templates/workflow-starters/interface/interf/README.md +0 -13
  238. package/templates/workflow-starters/interface/interf/create/SKILL.md +0 -15
  239. package/templates/workflow-starters/knowledge-base/interf/README.md +0 -13
  240. package/templates/workflow-starters/knowledge-base/karpathy/README.md +0 -13
package/README.md CHANGED
@@ -1,39 +1,19 @@
1
1
  # Interf
2
2
 
3
- Interf is an eval-first knowledge compiler for agents such as Claude Code and Codex.
3
+ Open-source knowledge compiler for your files.
4
4
 
5
- If you use OpenClaw, Hermes, or your own local retrieval workflow, the real problem is not opening a folder. It is getting the agent to work correctly on raw filesystem data without missing evidence, doing shallow analysis, or hallucinating once the task spans several files.
5
+ Interf measures and improves how accurately local agents answer questions from your files.
6
6
 
7
- - your files stay on your machine
8
- - you choose the local agent
9
- - you decide what must be true
10
-
11
- Agents start missing things when a task spans PDFs, charts, and several files in one folder. That usually shows up when the job depends on:
12
-
13
- - reading reports and filings
14
- - extracting a number from a chart
15
- - understanding what is inside a folder before doing work
16
- - pulling context together across several files
17
- - checking the raw source when the answer has to be exact
18
-
19
- Raw filesystem data often looks fine until it is too late. The failure shows up as a missed number, a bad comparison, or an answer that sounds confident but is wrong.
20
-
21
- That is why compilation matters. Before the agent does the real job, the folder needs preparation.
22
-
23
- Interf Knowledge Compiler runs a local data pipeline with your agent as the executor. It produces a compiled workspace beside the raw files, with distilled notes and cross-file structure so the agent can understand what is in the folder, navigate it faster, and retrieve the right content without rediscovering everything from scratch.
7
+ If you use Claude Code, Codex, OpenClaw, Hermes, or your own local agent setup on folders full of PDFs, docs, spreadsheets, and notes, the failure often shows up late: missed evidence, shallow analysis, bad comparisons, or answers that sound confident but are wrong.
24
8
 
25
- Interf gives you a simple loop:
9
+ Interf lets you write a few questions and expected answers about your files, build a compiled workspace on top of those files, and see whether the result actually passes.
26
10
 
27
- - point Interf at a folder
28
- - say what must be true in `interf.config.json`
29
- - run `interf benchmark` to see how your agent does on the raw folder
30
- - compile a workspace beside the raw files
31
- - run the same evals again
32
- - keep the compiled workspace only if it performs better on your evals
33
-
34
- The first output is a shared compiled workspace for the whole folder.
11
+ - your files stay on your machine
12
+ - you choose the local agent
13
+ - your raw files stay the source of truth
14
+ - Interf adds a file-based layer on top
35
15
 
36
- Create an interface only when you want a second compiled workspace for one recurring job, with narrower retrieval, job-specific outputs, and extra evals for that job.
16
+ It runs local data-processing pipelines with your agents as executors and produces a compiled workspace: a file-based layer on top of your raw files that agents can navigate, inspect, and work from.
37
17
 
38
18
  ## Quick Start
39
19
 
@@ -48,108 +28,98 @@ Install:
48
28
  npm install -g @interf/compiler
49
29
  ```
50
30
 
51
- Then run Interf in any folder:
31
+ Start by writing a few questions and expected answers in `interf.config.json`:
32
+
33
+ ```json
34
+ {
35
+ "workspaces": [
36
+ {
37
+ "name": "default",
38
+ "about": "General compiled workspace for the quarterly results folder.",
39
+ "checks": [
40
+ {
41
+ "question": "What full-year revenue range did the company maintain?",
42
+ "answer": "$4.8B to $5.0B in revenue."
43
+ },
44
+ {
45
+ "question": "Did gross margin improve or decline year over year?",
46
+ "answer": "Gross margin declined year over year."
47
+ }
48
+ ]
49
+ }
50
+ ]
51
+ }
52
+ ```
53
+
54
+ Then run Interf in that folder:
52
55
 
53
56
  ```bash
54
57
  cd ~/my-folder
55
58
  interf init
56
- interf benchmark
59
+ interf create workspace
57
60
  interf compile
58
61
  interf benchmark
59
62
  ```
60
63
 
61
- That is the whole first loop:
62
-
63
- - point Interf at a folder you already have
64
- - let `interf init` write the first evals in `interf.config.json`
65
- - run `interf benchmark` on the raw folder first
66
- - compile the workspace
67
- - run `interf benchmark` again to see pass/fail on raw vs compiled
68
-
69
- `interf init` chooses your local agent, can draft `interf.config.json` if it is missing, and can attach the current folder right away. It does not move or replace your files.
64
+ That first run gives you three concrete things:
70
65
 
71
- The compiled workspace is just a normal folder. Open it in your editor, in your agent, or in Obsidian if you want the graph view.
66
+ - `interf/workspaces/default/` with the compiled workspace for your files
67
+ - `interf/benchmarks/runs/...` with the saved benchmark result
68
+ - a pass/fail score on the same questions and expected answers you wrote
72
69
 
73
- If Interf cannot find your local agent or compile setup, run:
70
+ If `interf.config.json` is missing, `interf init` can draft it with you before the first compile. If Interf cannot find your local agent or compile setup, run:
74
71
 
75
72
  ```bash
76
73
  interf doctor
77
74
  ```
78
75
 
79
- Fastest sample loop:
76
+ The first flow is:
77
+
78
+ - write down a few questions your agent should be able to answer from your files
79
+ - let `interf init` save those checks in `interf.config.json`
80
+ - run `interf create workspace` and `interf compile` to build the compiled workspace
81
+ - run `interf benchmark` to see whether that compiled workspace passes the checks
82
+ - add another workspace only when one recurring context needs a narrower setup
83
+ - if needed, rerun compile or use the advanced retry path until it is good enough
84
+
85
+ ## Why This Approach
86
+
87
+ Interf is built around a few simple design principles:
88
+
89
+ - `Explicit`: the output is visible and inspectable, not hidden memory
90
+ - `Local`: your files stay on your machine
91
+ - `File over app`: the output is just files, so you can use your editor, Unix tools, Obsidian, or your own software on top
92
+ - `BYOAI`: use Claude Code, Codex, OpenClaw, Hermes, or your own model
93
+
94
+ Interf does not replace your data with an opaque store. It keeps the raw files in place and adds a file-based layer on top for agents.
95
+
96
+ Sample flow:
80
97
 
81
98
  ```bash
82
99
  cp -r examples/benchmark-demo /tmp/interf-demo
83
100
  cd /tmp/interf-demo
84
101
  interf init
85
- interf benchmark
86
- interf compile
87
- interf benchmark
88
- ```
89
-
90
- If you want a second compiled workspace shaped for one recurring job, add an interface:
91
-
92
- ```bash
93
- interf create interface
102
+ interf create workspace
94
103
  interf compile
95
104
  interf benchmark
96
105
  ```
97
106
 
98
- An interface is useful when the shared workspace is still too broad and you want:
107
+ ## Start With A Few Questions
99
108
 
100
- - narrower retrieval for one job
101
- - job-specific outputs
102
- - extra evals on top of the shared baseline
109
+ `interf.config.json` is where you write the questions and expected answers for a folder.
103
110
 
104
- ## Start With One Small Eval
111
+ That file uses one `workspaces` array:
105
112
 
106
- `interf.config.json` is where you write what must be true.
113
+ - the first workspace is the default compiled workspace for the folder
114
+ - later workspaces are optional and only exist when one recurring context needs a narrower setup
115
+ - each workspace carries its own `checks`
107
116
 
108
117
  If the file is missing, `interf init` can draft it with you before the first compile. You can edit it any time.
109
118
 
110
- Use it for:
111
-
112
- - top-level `evals` for shared baseline checks
113
- - `interfaces[].evals` for task-specific additional checks
114
-
115
- Both live in the same root `interf.config.json`.
116
-
117
- Example shape:
118
-
119
- Top-level `evals` are shared baseline checks for the workspace and every interface. Each entry in `interfaces` adds extra checks for one dedicated job.
120
-
121
- ```json
122
- {
123
- "evals": [
124
- {
125
- "question": "What changed in full-year guidance?",
126
- "answer": "Full-year guidance was maintained at $4.8B to $5.0B in revenue."
127
- },
128
- {
129
- "question": "Did gross margin improve or decline year over year?",
130
- "answer": "Gross margin declined year over year."
131
- }
132
- ],
133
- "interfaces": [
134
- {
135
- "name": "operator-briefing",
136
- "about": "Prepare tomorrow's operator briefing from the quarterly results folder.",
137
- "evals": [
138
- {
139
- "question": "What revenue range did the company maintain for full-year guidance?",
140
- "answer": "$4.8B to $5.0B in revenue."
141
- },
142
- {
143
- "question": "What should the operator pay attention to next quarter?",
144
- "answer": "Watch guidance, gross margin, and any demand changes mentioned in the report."
145
- }
146
- ]
147
- }
148
- ]
149
- }
150
- ```
119
+ That example is just `interf.config.json`.
120
+ Advanced retry settings do not live there.
151
121
 
152
- Good first evals are small and practical:
122
+ Good first checks are small and practical:
153
123
 
154
124
  - one exact number from a chart, table, or filing
155
125
  - one short statement that should be true or false
@@ -158,158 +128,187 @@ Good first evals are small and practical:
158
128
  Then run:
159
129
 
160
130
  ```bash
161
- interf benchmark
131
+ interf create workspace
162
132
  interf compile
163
133
  interf benchmark
164
134
  ```
165
135
 
166
- If the benchmark does not show an improvement over raw files, keep iterating on evals or workflow choice first. Use the experiment loop below only when you want the advanced automated path.
136
+ ## What `interf benchmark` Compares
167
137
 
168
- ## Compare Three Things
138
+ `interf benchmark` compares compiled workspaces, not raw chat sessions.
169
139
 
170
- Compare:
140
+ It lets you answer a simple question:
171
141
 
172
- 1. the raw folder
173
- 2. the workspace
174
- 3. an interface for one specific job
142
+ - does this compiled workspace pass the checks?
143
+ - which compiled workspace or workflow performs better on the same folder?
144
+ - does another workspace help more on a narrower recurring context?
175
145
 
176
- `interf benchmark` runs the same evals against each one and saves a pass/fail report.
146
+ By default it loads checks from `interf.config.json`, discovers eligible compiled workspaces under `interf/workspaces/`, and saves the run under `interf/benchmarks/runs/`.
177
147
 
178
- That gives you one clear question:
148
+ If you run `interf benchmark` from inside a workspace, it uses that workspace's checks. If you run it from the source folder, it uses the default workspace checks.
179
149
 
180
- - is the raw folder enough?
181
- - does the workspace retrieve better?
182
- - does a dedicated interface do better than both?
150
+ If you need raw-file probes too, use the advanced eval-pack runner in [docs/eval-loop.md](./docs/eval-loop.md).
183
151
 
184
- ## What `interf compile` Actually Does
152
+ ## What `interf compile` Does
185
153
 
186
- `interf compile` runs a workflow over your folder.
154
+ `interf compile` runs the Interf data-processing pipeline over your files.
187
155
 
188
- That workflow is the compilation pipeline:
156
+ By default, that means:
189
157
 
190
158
  - read the files
191
- - write processed notes and navigation files
192
- - build the workspace your agent can use
193
- - optionally build an interface for one specific job
159
+ - write summaries and navigation docs
160
+ - organize cross-file knowledge
161
+ - build the compiled workspace for the folder
162
+ - optionally build additional workspaces for recurring contexts you care about
163
+
164
+ In public docs, `pipeline` is the thing Interf runs. `workflow` is the saved method that defines or customizes that pipeline.
194
165
 
195
166
  The default workflow is built in. If you want a different method, you can define your own workflow package and benchmark it on the same folder.
196
167
 
197
- ## Experiment Loop
168
+ ## What Gets Created
198
169
 
199
- Interf Knowledge Compiler also supports an advanced experiment loop above compile + benchmark.
170
+ After compile, Interf writes into `./interf/` beside your source files.
200
171
 
201
- This is the eval-first part of the product. You give Interf the folder and the evals that must pass. Interf keeps running controlled compile + benchmark attempts against that same truth surface until it either gets a working result or runs out of attempts.
172
+ - `interf/workspaces/<name>/` is a compiled workspace over the folder
173
+ - `interf/benchmarks/runs/...` stores saved benchmark runs
202
174
 
203
- Each attempt reruns the compilation workflow, reruns the benchmark, and records what changed. It stops when:
175
+ Inside those workspaces you will see things like:
204
176
 
205
- - the evals pass
206
- - or the experiment budget is exhausted
177
+ - summaries of source files
178
+ - navigation notes and entrypoints for agents
179
+ - cross-file knowledge notes
180
+ - workspace-specific outputs for one recurring context
181
+ - benchmark artifacts you can inspect later
207
182
 
208
- In practice, that means:
183
+ The compiled workspace is just a normal folder. Open it in your editor, in your agent, or in Obsidian if you want the graph view.
209
184
 
210
- - `retry_policy.max_attempts_per_profile` controls how many experiment attempts each compile profile gets
211
- - stronger diagnostic profiles can be used only after the default ones fail
212
- - the loop is still judged on the same eval truth from your folder
213
- - failure summaries can be captured between attempts for diagnosis
185
+ If you use Obsidian, open `interf/workspaces/<name>/` as the vault for the compiled workspace.
214
186
 
215
- Today that advanced path is configured through eval packs and explained in the deeper docs. The workflow is the part that changes. The experiment loop is the controller that keeps trying workflows and profiles against the same evals with a fixed attempt budget.
187
+ ## Terminology
216
188
 
217
- Use the simple loop first. Use the experiment loop when you want Interf to keep improving the local compilation workflow until the workspace is ready for your task or the attempt budget runs out.
189
+ Public terms:
218
190
 
219
- ## Use It With Your Agent
191
+ - `your files` = the source folder Interf reads from
192
+ - `questions and expected answers` = the checks you want your agent to pass
193
+ - `checks` = the pass/fail questions each workspace should satisfy
194
+ - `benchmark` = score compiled workspaces on the same checks
195
+ - `compiled workspace` = the output Interf produces on top of a folder
196
+ - `workspace` = an optional additional compiled workspace for one recurring context
220
197
 
221
- If you already work through Claude Code, Codex, OpenClaw, or Hermes, the agent can run this loop for you.
198
+ Technical terms:
222
199
 
223
- Paste something like this into Claude Code, Codex, OpenClaw, or Hermes:
200
+ - `source folder` = the raw files Interf reads from
201
+ - `workflow` = the saved method that defines or customizes the pipeline
202
+ - `.interf/` = runtime state, proofs, and health artifacts
224
203
 
225
- ```text
226
- Install @interf/compiler, run `interf init` in this folder, and use the local agent executor.
204
+ ## Advanced: Additional Workspaces
227
205
 
228
- If `interf.config.json` is missing, draft evals for what must be true for this task and ask me to confirm them.
206
+ Start with the default compiled workspace first.
229
207
 
230
- Then run `interf benchmark`, `interf compile`, and `interf benchmark` again.
208
+ Most folders only need one compiled workspace.
231
209
 
232
- Tell me whether the processed workspace beat raw files, and only recommend it if it did.
233
- ```
210
+ Create another workspace only when one recurring context needs a narrower setup.
234
211
 
235
- That is the basic loop:
212
+ Use the default compiled workspace for broad questions such as:
236
213
 
237
- - the user or agent defines what must be true
238
- - benchmark the raw folder first
239
- - Interf prepares the compiled workspace
240
- - benchmark again and keep it only if it helped
214
+ - what is in this folder?
215
+ - what changed?
216
+ - where is the source evidence?
241
217
 
242
- ## What Gets Created
218
+ Create another workspace when that broad layer is no longer enough and the work becomes a repeatable context, for example:
243
219
 
244
- After compile, Interf writes into `./interf/` beside your source files.
220
+ - operator briefings
221
+ - board prep
222
+ - finance reporting
223
+ - diligence review
224
+ - chart extraction for a report set
225
+ - one recurring research set
245
226
 
246
- - `interf/<name>/` is the shared workspace over the folder
247
- - `interf/<name>/interfaces/<name>/` is a task-specific workspace for one job
248
- - `interf/benchmarks/runs/...` stores saved benchmark runs
227
+ Why create one:
249
228
 
250
- Inside those workspaces you will see things like:
229
+ - it narrows what the agent should retrieve for that context
230
+ - it prepares guidance and structure on top of the raw folder for that recurring context
231
+ - it lets you benchmark that context separately
251
232
 
252
- - summaries of source files
253
- - navigation notes and entrypoints for agents
254
- - task-specific outputs for one interface
255
- - benchmark artifacts you can inspect later
233
+ If the default compiled workspace is already enough, do not create another workspace yet.
234
+
235
+ ## Advanced: Keep Improving Until It Passes
256
236
 
257
- In the CLI, the main Interf workspace is called a **knowledge base**. A task-specific workspace inside it is called an **interface**.
237
+ Interf also supports an advanced experiment path above the normal build + benchmark flow.
258
238
 
259
- Conceptually, the compiled workspace is a knowledge representation of the folder for agents. In the product and CLI, the concrete objects are the compiled workspace, the knowledge base, and the interface.
239
+ Give it the same folder and the same checks. Interf can keep rerunning compile + benchmark attempts until the benchmark passes or the attempt budget runs out.
260
240
 
261
- If you use Obsidian, open `interf/<name>/` as the vault for the main compiled workspace. If you are working with interfaces, open the parent knowledge-base folder so links across summaries, knowledge notes, and interfaces keep resolving.
241
+ In practice:
262
242
 
263
- ## What An Interface Is
243
+ - `retry_policy.max_attempts_per_profile` controls how many attempts each compile profile gets
244
+ - stronger diagnostic profiles can be used only after the default ones fail
245
+ - the checks stay the same across every attempt
246
+ - each attempt records what changed
247
+
248
+ Example eval-pack shape:
249
+
250
+ ```jsonc
251
+ {
252
+ "workspaces": [
253
+ {
254
+ "name": "default",
255
+ "checks": [
256
+ {
257
+ "question": "What full-year revenue range did the company maintain?",
258
+ "answer": "$4.8B to $5.0B in revenue."
259
+ }
260
+ ]
261
+ }
262
+ ],
263
+ // Advanced only: retry settings live in eval packs, not in interf.config.json.
264
+ "retry_policy": {
265
+ "max_attempts_per_profile": 3
266
+ }
267
+ }
268
+ ```
264
269
 
265
- Start with one workspace.
270
+ Today this lives in the advanced eval-pack runner, not in `interf.config.json` and not in a top-level `interf compile --max-retries` flag.
266
271
 
267
- An interface is a second compiled workspace for one recurring job.
272
+ Use the normal benchmark flow first. Use this advanced path when you want Interf to keep improving the local preparation workflow until the workspace is good enough for your task or the attempt budget runs out. It spends more tokens, so use it when that extra spend is worth the accuracy target.
268
273
 
269
- The main compiled workspace is the shared layer for the whole folder. Use it for broad questions such as:
274
+ ## Use It With Your Agent
270
275
 
271
- - what is in this folder?
272
- - what changed?
273
- - where is the source evidence?
276
+ If you already work through Claude Code, Codex, OpenClaw, or Hermes, the agent can run this process for you.
274
277
 
275
- Create an interface when that broad layer is no longer enough and the work becomes a repeatable job, for example:
278
+ Paste something like this into your agent:
276
279
 
277
- - prepare tomorrow's operator briefing
278
- - run diligence on this deal room
279
- - extract chart values from this report set
280
- - answer one recurring research question set
280
+ ```text
281
+ Install @interf/compiler, run `interf init` in this folder, and use the local agent executor.
281
282
 
282
- Why create one:
283
+ If `interf.config.json` is missing, draft a default workspace with a few checks this agent should be able to answer from these files and add the expected answers for me to confirm.
283
284
 
284
- - it narrows what the agent should retrieve for that job
285
- - it writes job-specific outputs on top of the shared workspace
286
- - it lets you add extra task-specific evals on top of the shared evals
285
+ Then run `interf create workspace`, `interf compile`, and `interf benchmark`.
287
286
 
288
- If the shared workspace is already enough for the job, do not create an interface yet.
287
+ Tell me whether the compiled workspace passes the checks, and only recommend it if it does.
288
+ ```
289
289
 
290
290
  ## Custom Workflows
291
291
 
292
292
  Interf ships with a default workflow.
293
293
 
294
- If you want to change how compilation happens on your data, this is the part you customize:
294
+ If you want to change how the data-processing pipeline runs on your files, this is the part you customize:
295
295
 
296
296
  ```bash
297
297
  interf create workflow
298
298
  interf verify workflow --path <path>
299
299
  ```
300
300
 
301
- Then benchmark that workflow on the same folder and the same evals.
301
+ Then benchmark that workflow on the same folder and the same checks.
302
302
 
303
303
  Workflow package docs live in [docs/workflow-spec.md](./docs/workflow-spec.md).
304
304
 
305
305
  ## Core Commands
306
306
 
307
- - `interf init` = choose your local executor and optionally attach the current folder
308
- - `interf create knowledge-base` = create the shared processed workspace for this folder
309
- - `interf create interface` = create a task-specific workspace on top
307
+ - `interf init` = choose your local executor and draft checks
308
+ - `interf create workspace` = create a compiled workspace for this folder
310
309
  - `interf create workflow` = create a reusable local workflow package
311
- - `interf compile` = build the current workspace
312
- - `interf benchmark` = compare raw files vs processed workspaces on your evals
310
+ - `interf compile` = build the current compiled workspace
311
+ - `interf benchmark` = score compiled workspaces on your checks
313
312
  - `interf doctor` = check local executor setup
314
313
  - `interf verify <check>` = run deterministic checks on major workflow steps
315
314
  - `interf reset <scope>` = remove generated state while keeping source files
@@ -319,7 +318,7 @@ Workflow package docs live in [docs/workflow-spec.md](./docs/workflow-spec.md).
319
318
  - [docs/workflow-spec.md](./docs/workflow-spec.md) for custom workflow packages
320
319
  - [docs/runtime-contract.md](./docs/runtime-contract.md) for the exact on-disk contract
321
320
  - [docs/architecture.md](./docs/architecture.md) for the deeper system model
322
- - [docs/eval-loop.md](./docs/eval-loop.md) for the advanced benchmark and experiment loop
321
+ - [docs/eval-loop.md](./docs/eval-loop.md) for advanced eval-pack experiments across workflows and models
323
322
 
324
323
  Maintainers should use [CONTRIBUTING.md](./CONTRIBUTING.md) for test and release gates.
325
324
 
@@ -1 +1 @@
1
- {"version":3,"file":"benchmark.d.ts","sourceRoot":"","sources":["../../src/commands/benchmark.ts"],"names":[],"mappings":"AA0BA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,OAAO,CAAC;AA2W3C,eAAO,MAAM,gBAAgB,EAAE,aA8D9B,CAAC"}
1
+ {"version":3,"file":"benchmark.d.ts","sourceRoot":"","sources":["../../src/commands/benchmark.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,OAAO,CAAC;AA6B3C,eAAO,MAAM,gBAAgB,EAAE,aA2E9B,CAAC"}