@interf/compiler 0.2.5 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (276) hide show
  1. package/README.md +192 -187
  2. package/dist/bin.js +3 -3
  3. package/dist/bin.js.map +1 -1
  4. package/dist/commands/compile.d.ts +1 -0
  5. package/dist/commands/compile.d.ts.map +1 -1
  6. package/dist/commands/compile.js +45 -138
  7. package/dist/commands/compile.js.map +1 -1
  8. package/dist/commands/create-workflow-wizard.d.ts +4 -25
  9. package/dist/commands/create-workflow-wizard.d.ts.map +1 -1
  10. package/dist/commands/create-workflow-wizard.js +46 -222
  11. package/dist/commands/create-workflow-wizard.js.map +1 -1
  12. package/dist/commands/create.d.ts +2 -11
  13. package/dist/commands/create.d.ts.map +1 -1
  14. package/dist/commands/create.js +78 -477
  15. package/dist/commands/create.js.map +1 -1
  16. package/dist/commands/default.d.ts.map +1 -1
  17. package/dist/commands/default.js +27 -43
  18. package/dist/commands/default.js.map +1 -1
  19. package/dist/commands/doctor.js +2 -2
  20. package/dist/commands/doctor.js.map +1 -1
  21. package/dist/commands/executor-flow.d.ts +9 -0
  22. package/dist/commands/executor-flow.d.ts.map +1 -0
  23. package/dist/commands/executor-flow.js +55 -0
  24. package/dist/commands/executor-flow.js.map +1 -0
  25. package/dist/commands/init.d.ts +1 -0
  26. package/dist/commands/init.d.ts.map +1 -1
  27. package/dist/commands/init.js +320 -321
  28. package/dist/commands/init.js.map +1 -1
  29. package/dist/commands/list.d.ts.map +1 -1
  30. package/dist/commands/list.js +12 -22
  31. package/dist/commands/list.js.map +1 -1
  32. package/dist/commands/reset.d.ts.map +1 -1
  33. package/dist/commands/reset.js +27 -124
  34. package/dist/commands/reset.js.map +1 -1
  35. package/dist/commands/source-config-wizard.d.ts +10 -11
  36. package/dist/commands/source-config-wizard.d.ts.map +1 -1
  37. package/dist/commands/source-config-wizard.js +100 -97
  38. package/dist/commands/source-config-wizard.js.map +1 -1
  39. package/dist/commands/status.d.ts.map +1 -1
  40. package/dist/commands/status.js +60 -56
  41. package/dist/commands/status.js.map +1 -1
  42. package/dist/commands/test-flow.d.ts +21 -0
  43. package/dist/commands/test-flow.d.ts.map +1 -0
  44. package/dist/commands/test-flow.js +106 -0
  45. package/dist/commands/test-flow.js.map +1 -0
  46. package/dist/commands/test.d.ts +4 -0
  47. package/dist/commands/test.d.ts.map +1 -0
  48. package/dist/commands/test.js +131 -0
  49. package/dist/commands/test.js.map +1 -0
  50. package/dist/commands/verify.d.ts.map +1 -1
  51. package/dist/commands/verify.js +63 -98
  52. package/dist/commands/verify.js.map +1 -1
  53. package/dist/commands/workspace-flow.d.ts +21 -0
  54. package/dist/commands/workspace-flow.d.ts.map +1 -0
  55. package/dist/commands/workspace-flow.js +90 -0
  56. package/dist/commands/workspace-flow.js.map +1 -0
  57. package/dist/index.d.ts +8 -8
  58. package/dist/index.d.ts.map +1 -1
  59. package/dist/index.js +5 -7
  60. package/dist/index.js.map +1 -1
  61. package/dist/lib/agent-constants.js +1 -1
  62. package/dist/lib/agent-constants.js.map +1 -1
  63. package/dist/lib/agent-detection.js +4 -4
  64. package/dist/lib/agent-detection.js.map +1 -1
  65. package/dist/lib/agent-skills.js +6 -6
  66. package/dist/lib/agent-skills.js.map +1 -1
  67. package/dist/lib/benchmark-execution.d.ts.map +1 -1
  68. package/dist/lib/benchmark-execution.js +32 -19
  69. package/dist/lib/benchmark-execution.js.map +1 -1
  70. package/dist/lib/benchmark-sandbox.d.ts +10 -0
  71. package/dist/lib/benchmark-sandbox.d.ts.map +1 -0
  72. package/dist/lib/benchmark-sandbox.js +75 -0
  73. package/dist/lib/benchmark-sandbox.js.map +1 -0
  74. package/dist/lib/benchmark-targets.d.ts +4 -4
  75. package/dist/lib/benchmark-targets.d.ts.map +1 -1
  76. package/dist/lib/benchmark-targets.js +20 -54
  77. package/dist/lib/benchmark-targets.js.map +1 -1
  78. package/dist/lib/benchmark-types.d.ts +2 -3
  79. package/dist/lib/benchmark-types.d.ts.map +1 -1
  80. package/dist/lib/benchmark.d.ts +1 -1
  81. package/dist/lib/benchmark.d.ts.map +1 -1
  82. package/dist/lib/benchmark.js +1 -1
  83. package/dist/lib/benchmark.js.map +1 -1
  84. package/dist/lib/config.d.ts +1 -2
  85. package/dist/lib/config.d.ts.map +1 -1
  86. package/dist/lib/config.js +2 -4
  87. package/dist/lib/config.js.map +1 -1
  88. package/dist/lib/discovery.d.ts +1 -1
  89. package/dist/lib/discovery.d.ts.map +1 -1
  90. package/dist/lib/discovery.js +7 -2
  91. package/dist/lib/discovery.js.map +1 -1
  92. package/dist/lib/eval-packs.d.ts +6 -52
  93. package/dist/lib/eval-packs.d.ts.map +1 -1
  94. package/dist/lib/eval-packs.js +11 -39
  95. package/dist/lib/eval-packs.js.map +1 -1
  96. package/dist/lib/interf-bootstrap.d.ts +3 -5
  97. package/dist/lib/interf-bootstrap.d.ts.map +1 -1
  98. package/dist/lib/interf-bootstrap.js +10 -57
  99. package/dist/lib/interf-bootstrap.js.map +1 -1
  100. package/dist/lib/interf-detect.d.ts +13 -11
  101. package/dist/lib/interf-detect.d.ts.map +1 -1
  102. package/dist/lib/interf-detect.js +59 -45
  103. package/dist/lib/interf-detect.js.map +1 -1
  104. package/dist/lib/interf-scaffold.d.ts +2 -5
  105. package/dist/lib/interf-scaffold.d.ts.map +1 -1
  106. package/dist/lib/interf-scaffold.js +99 -235
  107. package/dist/lib/interf-scaffold.js.map +1 -1
  108. package/dist/lib/interf-workflow-package.d.ts +1 -2
  109. package/dist/lib/interf-workflow-package.d.ts.map +1 -1
  110. package/dist/lib/interf-workflow-package.js +99 -90
  111. package/dist/lib/interf-workflow-package.js.map +1 -1
  112. package/dist/lib/interf.d.ts +4 -5
  113. package/dist/lib/interf.d.ts.map +1 -1
  114. package/dist/lib/interf.js +3 -6
  115. package/dist/lib/interf.js.map +1 -1
  116. package/dist/lib/local-workflows.d.ts +9 -8
  117. package/dist/lib/local-workflows.d.ts.map +1 -1
  118. package/dist/lib/local-workflows.js +56 -92
  119. package/dist/lib/local-workflows.js.map +1 -1
  120. package/dist/lib/obsidian.d.ts +1 -3
  121. package/dist/lib/obsidian.d.ts.map +1 -1
  122. package/dist/lib/obsidian.js +10 -81
  123. package/dist/lib/obsidian.js.map +1 -1
  124. package/dist/lib/registry.d.ts +6 -17
  125. package/dist/lib/registry.d.ts.map +1 -1
  126. package/dist/lib/registry.js +36 -50
  127. package/dist/lib/registry.js.map +1 -1
  128. package/dist/lib/runtime-contracts.d.ts +4 -3
  129. package/dist/lib/runtime-contracts.d.ts.map +1 -1
  130. package/dist/lib/runtime-contracts.js +125 -9
  131. package/dist/lib/runtime-contracts.js.map +1 -1
  132. package/dist/lib/runtime-reconcile.d.ts +3 -5
  133. package/dist/lib/runtime-reconcile.d.ts.map +1 -1
  134. package/dist/lib/runtime-reconcile.js +70 -167
  135. package/dist/lib/runtime-reconcile.js.map +1 -1
  136. package/dist/lib/runtime-runs.d.ts.map +1 -1
  137. package/dist/lib/runtime-runs.js +61 -57
  138. package/dist/lib/runtime-runs.js.map +1 -1
  139. package/dist/lib/runtime-types.d.ts +16 -6
  140. package/dist/lib/runtime-types.d.ts.map +1 -1
  141. package/dist/lib/runtime.d.ts +2 -2
  142. package/dist/lib/runtime.d.ts.map +1 -1
  143. package/dist/lib/runtime.js +1 -1
  144. package/dist/lib/runtime.js.map +1 -1
  145. package/dist/lib/schema.d.ts +69 -311
  146. package/dist/lib/schema.d.ts.map +1 -1
  147. package/dist/lib/schema.js +49 -210
  148. package/dist/lib/schema.js.map +1 -1
  149. package/dist/lib/source-config.d.ts +8 -7
  150. package/dist/lib/source-config.d.ts.map +1 -1
  151. package/dist/lib/source-config.js +59 -63
  152. package/dist/lib/source-config.js.map +1 -1
  153. package/dist/lib/state-artifacts.d.ts +5 -11
  154. package/dist/lib/state-artifacts.d.ts.map +1 -1
  155. package/dist/lib/state-artifacts.js +8 -18
  156. package/dist/lib/state-artifacts.js.map +1 -1
  157. package/dist/lib/state-health.d.ts +4 -8
  158. package/dist/lib/state-health.d.ts.map +1 -1
  159. package/dist/lib/state-health.js +52 -233
  160. package/dist/lib/state-health.js.map +1 -1
  161. package/dist/lib/state-io.d.ts +7 -12
  162. package/dist/lib/state-io.d.ts.map +1 -1
  163. package/dist/lib/state-io.js +32 -93
  164. package/dist/lib/state-io.js.map +1 -1
  165. package/dist/lib/state-view.d.ts +4 -6
  166. package/dist/lib/state-view.d.ts.map +1 -1
  167. package/dist/lib/state-view.js +62 -101
  168. package/dist/lib/state-view.js.map +1 -1
  169. package/dist/lib/state.d.ts +5 -5
  170. package/dist/lib/state.d.ts.map +1 -1
  171. package/dist/lib/state.js +4 -4
  172. package/dist/lib/state.js.map +1 -1
  173. package/dist/lib/summarize-plan.d.ts +2 -2
  174. package/dist/lib/summarize-plan.d.ts.map +1 -1
  175. package/dist/lib/summarize-plan.js +13 -13
  176. package/dist/lib/summarize-plan.js.map +1 -1
  177. package/dist/lib/{validate-kb.d.ts → validate-workspace.d.ts} +49 -8
  178. package/dist/lib/validate-workspace.d.ts.map +1 -0
  179. package/dist/lib/validate-workspace.js +398 -0
  180. package/dist/lib/validate-workspace.js.map +1 -0
  181. package/dist/lib/validate.d.ts +5 -7
  182. package/dist/lib/validate.d.ts.map +1 -1
  183. package/dist/lib/validate.js +22 -20
  184. package/dist/lib/validate.js.map +1 -1
  185. package/dist/lib/workflow-definitions.d.ts +14 -50
  186. package/dist/lib/workflow-definitions.d.ts.map +1 -1
  187. package/dist/lib/workflow-definitions.js +100 -353
  188. package/dist/lib/workflow-definitions.js.map +1 -1
  189. package/dist/lib/workflow-helpers.d.ts +3 -4
  190. package/dist/lib/workflow-helpers.d.ts.map +1 -1
  191. package/dist/lib/workflow-helpers.js +17 -49
  192. package/dist/lib/workflow-helpers.js.map +1 -1
  193. package/dist/lib/workflow-stage-runner.d.ts +1 -2
  194. package/dist/lib/workflow-stage-runner.d.ts.map +1 -1
  195. package/dist/lib/workflow-stage-runner.js +4 -6
  196. package/dist/lib/workflow-stage-runner.js.map +1 -1
  197. package/dist/lib/workflow-starter-docs.d.ts +3 -5
  198. package/dist/lib/workflow-starter-docs.d.ts.map +1 -1
  199. package/dist/lib/workflow-starter-docs.js +2 -17
  200. package/dist/lib/workflow-starter-docs.js.map +1 -1
  201. package/dist/lib/workflows.d.ts +9 -14
  202. package/dist/lib/workflows.d.ts.map +1 -1
  203. package/dist/lib/workflows.js +15 -30
  204. package/dist/lib/workflows.js.map +1 -1
  205. package/dist/lib/workspace-compile.d.ts +51 -0
  206. package/dist/lib/workspace-compile.d.ts.map +1 -0
  207. package/dist/lib/workspace-compile.js +397 -0
  208. package/dist/lib/workspace-compile.js.map +1 -0
  209. package/package.json +9 -9
  210. package/skills/benchmark/SKILL.md +20 -27
  211. package/skills/workflow/create/SKILL.md +10 -14
  212. package/skills/workspace/shape/SKILL.md +15 -0
  213. package/skills/workspace/structure/SKILL.md +15 -0
  214. package/skills/workspace/summarize/SKILL.md +15 -0
  215. package/templates/workspace/README.md +23 -0
  216. package/templates/workspace/interfignore +2 -0
  217. package/dist/commands/benchmark.d.ts +0 -3
  218. package/dist/commands/benchmark.d.ts.map +0 -1
  219. package/dist/commands/benchmark.js +0 -374
  220. package/dist/commands/benchmark.js.map +0 -1
  221. package/dist/lib/bundled-templates.d.ts +0 -5
  222. package/dist/lib/bundled-templates.d.ts.map +0 -1
  223. package/dist/lib/bundled-templates.js +0 -23
  224. package/dist/lib/bundled-templates.js.map +0 -1
  225. package/dist/lib/interf-compile-plan.d.ts +0 -12
  226. package/dist/lib/interf-compile-plan.d.ts.map +0 -1
  227. package/dist/lib/interf-compile-plan.js +0 -143
  228. package/dist/lib/interf-compile-plan.js.map +0 -1
  229. package/dist/lib/validate-interface.d.ts +0 -79
  230. package/dist/lib/validate-interface.d.ts.map +0 -1
  231. package/dist/lib/validate-interface.js +0 -535
  232. package/dist/lib/validate-interface.js.map +0 -1
  233. package/dist/lib/validate-kb.d.ts.map +0 -1
  234. package/dist/lib/validate-kb.js +0 -252
  235. package/dist/lib/validate-kb.js.map +0 -1
  236. package/dist/lib/workflows-interface-contracts.d.ts +0 -24
  237. package/dist/lib/workflows-interface-contracts.d.ts.map +0 -1
  238. package/dist/lib/workflows-interface-contracts.js +0 -304
  239. package/dist/lib/workflows-interface-contracts.js.map +0 -1
  240. package/dist/lib/workflows-interface.d.ts +0 -72
  241. package/dist/lib/workflows-interface.d.ts.map +0 -1
  242. package/dist/lib/workflows-interface.js +0 -377
  243. package/dist/lib/workflows-interface.js.map +0 -1
  244. package/dist/lib/workflows-kb.d.ts +0 -50
  245. package/dist/lib/workflows-kb.d.ts.map +0 -1
  246. package/dist/lib/workflows-kb.js +0 -306
  247. package/dist/lib/workflows-kb.js.map +0 -1
  248. package/skills/interface/analyze/SKILL.md +0 -191
  249. package/skills/interface/compile/SKILL.md +0 -152
  250. package/skills/interface/compile/references/output-format.md +0 -48
  251. package/skills/interface/create/SKILL.md +0 -87
  252. package/skills/interface/create/references/compile-plan-format.md +0 -109
  253. package/skills/interface/create/references/workflows.md +0 -35
  254. package/skills/interface/query/SKILL.md +0 -48
  255. package/skills/interface/retrieve/SKILL.md +0 -133
  256. package/skills/knowledge-base/compile/SKILL.md +0 -196
  257. package/skills/knowledge-base/compile/references/output-format.md +0 -48
  258. package/skills/knowledge-base/compile/references/stage-claims.md +0 -60
  259. package/skills/knowledge-base/compile/references/stage-entities.md +0 -46
  260. package/skills/knowledge-base/query/SKILL.md +0 -45
  261. package/skills/knowledge-base/summarize/SKILL.md +0 -152
  262. package/templates/interface/README.md +0 -159
  263. package/templates/interface/interfaces.md +0 -102
  264. package/templates/knowledge-base/README.md +0 -137
  265. package/templates/knowledge-base/interfignore +0 -19
  266. package/templates/knowledge-base/registry.md +0 -118
  267. package/templates/workflow-package/README.md +0 -16
  268. package/templates/workflow-package/create/SKILL.md +0 -8
  269. package/templates/workflow-package/interface-query/SKILL.md +0 -29
  270. package/templates/workflow-package/interface-stage/SKILL.md +0 -13
  271. package/templates/workflow-package/knowledge-base-query/SKILL.md +0 -36
  272. package/templates/workflow-package/knowledge-base-stage/SKILL.md +0 -13
  273. package/templates/workflow-starters/interface/interf/README.md +0 -13
  274. package/templates/workflow-starters/interface/interf/create/SKILL.md +0 -15
  275. package/templates/workflow-starters/knowledge-base/interf/README.md +0 -13
  276. package/templates/workflow-starters/knowledge-base/karpathy/README.md +0 -13
package/README.md CHANGED
@@ -1,39 +1,19 @@
1
1
  # Interf
2
2
 
3
- Interf is an eval-first knowledge compiler for agents such as Claude Code and Codex.
3
+ Open-source knowledge compiler for your files.
4
4
 
5
- If you use OpenClaw, Hermes, or your own local retrieval workflow, the real problem is not opening a folder. It is getting the agent to work correctly on raw filesystem data without missing evidence, doing shallow analysis, or hallucinating once the task spans several files.
5
+ Interf measures and improves how accurately local agents answer questions from your files.
6
6
 
7
- - your files stay on your machine
8
- - you choose the local agent
9
- - you decide what must be true
10
-
11
- Agents start missing things when a task spans PDFs, charts, and several files in one folder. That usually shows up when the job depends on:
12
-
13
- - reading reports and filings
14
- - extracting a number from a chart
15
- - understanding what is inside a folder before doing work
16
- - pulling context together across several files
17
- - checking the raw source when the answer has to be exact
18
-
19
- Raw filesystem data often looks fine until it is too late. The failure shows up as a missed number, a bad comparison, or an answer that sounds confident but is wrong.
20
-
21
- That is why compilation matters. Before the agent does the real job, the folder needs preparation.
7
+ If you use Claude Code, Codex, OpenClaw, Hermes, or your own local agent setup on folders full of PDFs, docs, spreadsheets, and notes, the failure often shows up late: missed evidence, shallow analysis, bad comparisons, or answers that sound confident but are wrong.
22
8
 
23
- Interf Knowledge Compiler runs a local data pipeline with your agent as the executor. It produces a compiled workspace beside the raw files, with distilled notes and cross-file structure so the agent can understand what is in the folder, navigate it faster, and retrieve the right content without rediscovering everything from scratch.
9
+ Interf lets you write a few questions and expected answers about your files, test the raw files first if you want a baseline, build a compiled workspace on top of those files, and see whether the result actually passes.
24
10
 
25
- Interf gives you a simple loop:
26
-
27
- - point Interf at a folder
28
- - say what must be true in `interf.config.json`
29
- - run `interf benchmark` to see how your agent does on the raw folder
30
- - compile a workspace beside the raw files
31
- - run the same evals again
32
- - keep the compiled workspace only if it performs better on your evals
33
-
34
- The first output is a shared compiled workspace for the whole folder.
11
+ - your files stay on your machine
12
+ - you choose the local agent
13
+ - your raw files stay the source of truth
14
+ - Interf adds a file-based layer on top
35
15
 
36
- Create an interface only when you want a second compiled workspace for one recurring job, with narrower retrieval, job-specific outputs, and extra evals for that job.
16
+ It runs local data-processing pipelines with your agents as executors and produces a compiled workspace: a file-based layer on top of your raw files that agents can navigate, inspect, and work from.
37
17
 
38
18
  ## Quick Start
39
19
 
@@ -48,108 +28,110 @@ Install:
48
28
  npm install -g @interf/compiler
49
29
  ```
50
30
 
51
- Then run Interf in any folder:
31
+ The quickest start is the wizard:
52
32
 
53
33
  ```bash
54
34
  cd ~/my-folder
55
- interf init
56
- interf benchmark
35
+ interf
36
+ ```
37
+
38
+ If you want to see the config shape first, this is what Interf writes:
39
+
40
+ ```json
41
+ {
42
+ "workspaces": [
43
+ {
44
+ "name": "default",
45
+ "about": "General compiled workspace for the quarterly results folder.",
46
+ "checks": [
47
+ {
48
+ "question": "What full-year revenue range did the company maintain?",
49
+ "answer": "$4.8B to $5.0B in revenue."
50
+ },
51
+ {
52
+ "question": "Did gross margin improve or decline year over year?",
53
+ "answer": "Gross margin declined year over year."
54
+ }
55
+ ]
56
+ }
57
+ ]
58
+ }
59
+ ```
60
+
61
+ The root-level flow is:
62
+
63
+ ```bash
64
+ interf
57
65
  interf compile
58
- interf benchmark
66
+ interf test
59
67
  ```
60
68
 
61
- That is the whole first loop:
69
+ The first guided run can:
62
70
 
63
- - point Interf at a folder you already have
64
- - let `interf init` write the first evals in `interf.config.json`
65
- - run `interf benchmark` on the raw folder first
71
+ - save a few questions and expected answers for this folder
72
+ - run a baseline test on the raw files
66
73
  - compile the workspace
67
- - run `interf benchmark` again to see pass/fail on raw vs compiled
74
+ - run the same test against the compiled workspace
68
75
 
69
- `interf init` chooses your local agent, can draft `interf.config.json` if it is missing, and can attach the current folder right away. It does not move or replace your files.
76
+ That gives you three concrete things:
70
77
 
71
- The compiled workspace is just a normal folder. Open it in your editor, in your agent, or in Obsidian if you want the graph view.
78
+ - `interf/workspaces/default/` with the compiled workspace for your files
79
+ - `interf/benchmarks/runs/...` with the saved test result
80
+ - a pass/fail score on the same questions and expected answers you wrote
72
81
 
73
- If Interf cannot find your local agent or compile setup, run:
82
+ If `interf.config.json` is missing, `interf` or `interf init` can draft it with you before the first compile. If Interf cannot find your local agent or compile setup, run:
74
83
 
75
84
  ```bash
76
85
  interf doctor
77
86
  ```
78
87
 
79
- Fastest sample loop:
80
-
81
- ```bash
82
- cp -r examples/benchmark-demo /tmp/interf-demo
83
- cd /tmp/interf-demo
84
- interf init
85
- interf benchmark
86
- interf compile
87
- interf benchmark
88
- ```
88
+ The first flow is:
89
89
 
90
- If you want a second compiled workspace shaped for one recurring job, add an interface:
90
+ - write down a few questions your agent should be able to answer from your files
91
+ - let `interf` or `interf init` save those checks in `interf.config.json`
92
+ - optionally run a baseline test on the raw files
93
+ - run `interf compile` to build the compiled workspace
94
+ - run `interf test` to test the raw files, the compiled workspace, or both
95
+ - only create another workspace if you want a separate compiled setup with its own checks
96
+ - if needed, rerun compile or use the advanced retry path until it is good enough
91
97
 
92
- ```bash
93
- interf create interface
94
- interf compile
95
- interf benchmark
96
- ```
98
+ ## Why This Approach
97
99
 
98
- An interface is useful when the shared workspace is still too broad and you want:
100
+ Interf is built around a few simple design principles:
99
101
 
100
- - narrower retrieval for one job
101
- - job-specific outputs
102
- - extra evals on top of the shared baseline
102
+ - `Explicit`: the output is visible and inspectable, not hidden memory
103
+ - `Local`: your files stay on your machine
104
+ - `File over app`: the output is just files, so you can use your editor, Unix tools, Obsidian, or your own software on top
105
+ - `BYOAI`: use Claude Code, Codex, OpenClaw, Hermes, or your own model
103
106
 
104
- ## Start With One Small Eval
107
+ Interf does not replace your data with an opaque store. It keeps the raw files in place and adds a file-based layer on top for agents.
105
108
 
106
- `interf.config.json` is where you write what must be true.
109
+ Sample flow:
107
110
 
108
- If the file is missing, `interf init` can draft it with you before the first compile. You can edit it any time.
111
+ ```bash
112
+ cp -r examples/benchmark-demo /tmp/interf-demo
113
+ cd /tmp/interf-demo
114
+ interf
115
+ interf compile
116
+ interf test
117
+ ```
109
118
 
110
- Use it for:
119
+ ## Start With A Few Questions
111
120
 
112
- - top-level `evals` for shared baseline checks
113
- - `interfaces[].evals` for task-specific additional checks
121
+ `interf.config.json` is where you write the questions and expected answers for a folder.
114
122
 
115
- Both live in the same root `interf.config.json`.
123
+ That file uses one `workspaces` array:
116
124
 
117
- Example shape:
125
+ - most folders only need one workspace
126
+ - add another workspace only if you want a separate compiled setup with different checks
127
+ - each workspace carries its own `checks`
118
128
 
119
- Top-level `evals` are shared baseline checks for the workspace and every interface. Each entry in `interfaces` adds extra checks for one dedicated job.
129
+ If the file is missing, `interf init` can draft it with you before the first compile. You can edit it any time.
120
130
 
121
- ```json
122
- {
123
- "evals": [
124
- {
125
- "question": "What changed in full-year guidance?",
126
- "answer": "Full-year guidance was maintained at $4.8B to $5.0B in revenue."
127
- },
128
- {
129
- "question": "Did gross margin improve or decline year over year?",
130
- "answer": "Gross margin declined year over year."
131
- }
132
- ],
133
- "interfaces": [
134
- {
135
- "name": "operator-briefing",
136
- "about": "Prepare tomorrow's operator briefing from the quarterly results folder.",
137
- "evals": [
138
- {
139
- "question": "What revenue range did the company maintain for full-year guidance?",
140
- "answer": "$4.8B to $5.0B in revenue."
141
- },
142
- {
143
- "question": "What should the operator pay attention to next quarter?",
144
- "answer": "Watch guidance, gross margin, and any demand changes mentioned in the report."
145
- }
146
- ]
147
- }
148
- ]
149
- }
150
- ```
131
+ That example is just `interf.config.json`.
132
+ Advanced retry settings do not live there.
151
133
 
152
- Good first evals are small and practical:
134
+ Good first checks are small and practical:
153
135
 
154
136
  - one exact number from a chart, table, or filing
155
137
  - one short statement that should be true or false
@@ -158,158 +140,181 @@ Good first evals are small and practical:
158
140
  Then run:
159
141
 
160
142
  ```bash
161
- interf benchmark
162
143
  interf compile
163
- interf benchmark
144
+ interf test
164
145
  ```
165
146
 
166
- If the benchmark does not show an improvement over raw files, keep iterating on evals or workflow choice first. Use the experiment loop below only when you want the advanced automated path.
147
+ ## What `interf test` Compares
167
148
 
168
- ## Compare Three Things
149
+ `interf test` scores either the raw files, a compiled workspace, or both on the same saved questions and expected answers.
169
150
 
170
- Compare:
151
+ It lets you answer a simple question:
171
152
 
172
- 1. the raw folder
173
- 2. the workspace
174
- 3. an interface for one specific job
153
+ - what is the current baseline on the raw files?
154
+ - does this compiled workspace improve on that baseline?
155
+ - which compiled workspace or workflow performs better on the same folder?
156
+ - does a separate workspace with different checks work better for that job?
175
157
 
176
- `interf benchmark` runs the same evals against each one and saves a pass/fail report.
158
+ By default it loads checks from `interf.config.json`, can run a raw baseline in an isolated raw-files sandbox, can test eligible compiled workspaces under `interf/workspaces/`, and saves the run under `interf/benchmarks/runs/`.
177
159
 
178
- That gives you one clear question:
160
+ If you run `interf test` from inside a workspace, it uses that workspace's checks and tests that workspace. If you run it from the source folder, it lets you choose a saved workspace and then choose raw files, the compiled workspace, or both.
179
161
 
180
- - is the raw folder enough?
181
- - does the workspace retrieve better?
182
- - does a dedicated interface do better than both?
162
+ Live test runs use an isolated sandbox. For raw baselines, Interf gives the agent sanitized raw files only. For compiled-workspace tests, it gives the agent a copied workspace plus sanitized raw files. The source-folder control plane, `interf.config.json`, and saved test runs are not part of those sandboxes.
183
163
 
184
- ## What `interf compile` Actually Does
164
+ If you need repeated isolated experiments across workflows or models, use the advanced eval-pack runner in [docs/eval-loop.md](./docs/eval-loop.md).
185
165
 
186
- `interf compile` runs a workflow over your folder.
166
+ ## What `interf compile` Does
187
167
 
188
- That workflow is the compilation pipeline:
168
+ `interf compile` runs the Interf data-processing pipeline over your files.
189
169
 
190
- - read the files
191
- - write processed notes and navigation files
192
- - build the workspace your agent can use
193
- - optionally build an interface for one specific job
170
+ By default, that means:
194
171
 
195
- The default workflow is built in. If you want a different method, you can define your own workflow package and benchmark it on the same folder.
172
+ - summarize the source files into per-file evidence notes
173
+ - structure the cross-file knowledge layer into entities, claims, and indexes
174
+ - shape the final workspace around its saved focus and questions
196
175
 
197
- ## Experiment Loop
176
+ In other words, the built-in workflow is:
198
177
 
199
- Interf Knowledge Compiler also supports an advanced experiment loop above compile + benchmark.
178
+ 1. `summarize`
179
+ 2. `structure`
180
+ 3. `shape`
200
181
 
201
- This is the eval-first part of the product. You give Interf the folder and the evals that must pass. Interf keeps running controlled compile + benchmark attempts against that same truth surface until it either gets a working result or runs out of attempts.
182
+ In public docs, `pipeline` is the thing Interf runs. `workflow` is the saved method that defines or customizes that pipeline.
202
183
 
203
- Each attempt reruns the compilation workflow, reruns the benchmark, and records what changed. It stops when:
184
+ The default workflow is built in. If you want a different method, you can define your own workflow package and benchmark it on the same folder.
204
185
 
205
- - the evals pass
206
- - or the experiment budget is exhausted
186
+ ## What Gets Created
207
187
 
208
- In practice, that means:
188
+ After compile, Interf writes into `./interf/` beside your source files.
209
189
 
210
- - `retry_policy.max_attempts_per_profile` controls how many experiment attempts each compile profile gets
211
- - stronger diagnostic profiles can be used only after the default ones fail
212
- - the loop is still judged on the same eval truth from your folder
213
- - failure summaries can be captured between attempts for diagnosis
190
+ - `interf/workspaces/<name>/` is a compiled workspace over the folder
191
+ - `interf/benchmarks/runs/...` stores saved test runs
214
192
 
215
- Today that advanced path is configured through eval packs and explained in the deeper docs. The workflow is the part that changes. The experiment loop is the controller that keeps trying workflows and profiles against the same evals with a fixed attempt budget.
193
+ Inside those workspaces you will see things like:
216
194
 
217
- Use the simple loop first. Use the experiment loop when you want Interf to keep improving the local compilation workflow until the workspace is ready for your task or the attempt budget runs out.
195
+ - summaries of source files
196
+ - navigation notes and entrypoints for agents
197
+ - cross-file knowledge notes
198
+ - workspace-specific outputs when you define a separate job-focused workspace
218
199
 
219
- ## Use It With Your Agent
200
+ The compiled workspace is just a normal folder. Open it in your editor, in your agent, or in Obsidian if you want the graph view.
220
201
 
221
- If you already work through Claude Code, Codex, OpenClaw, or Hermes, the agent can run this loop for you.
202
+ If you use Obsidian, open `interf/workspaces/<name>/` as the vault for the compiled workspace.
222
203
 
223
- Paste something like this into Claude Code, Codex, OpenClaw, or Hermes:
204
+ ## Terminology
224
205
 
225
- ```text
226
- Install @interf/compiler, run `interf init` in this folder, and use the local agent executor.
206
+ Public terms:
227
207
 
228
- If `interf.config.json` is missing, draft evals for what must be true for this task and ask me to confirm them.
208
+ - `your files` = the source folder Interf reads from
209
+ - `questions and expected answers` = the checks you want your agent to pass
210
+ - `checks` = the pass/fail questions each workspace should satisfy
211
+ - `test` = run the saved questions and get a score
212
+ - `compiled workspace` = the output Interf produces on top of a folder
213
+ - `workspace` = one compiled setup with its own checks
229
214
 
230
- Then run `interf benchmark`, `interf compile`, and `interf benchmark` again.
215
+ Technical terms:
231
216
 
232
- Tell me whether the processed workspace beat raw files, and only recommend it if it did.
233
- ```
217
+ - `source folder` = the raw files Interf reads from
218
+ - `benchmark` = the technical alias and saved-run layer behind `interf test`
219
+ - `workflow` = the saved method that defines or customizes the pipeline
220
+ - `.interf/` = runtime state, proofs, and health artifacts
234
221
 
235
- That is the basic loop:
222
+ ## Advanced: Separate Workspaces
236
223
 
237
- - the user or agent defines what must be true
238
- - benchmark the raw folder first
239
- - Interf prepares the compiled workspace
240
- - benchmark again and keep it only if it helped
224
+ Most folders only need one workspace.
241
225
 
242
- ## What Gets Created
226
+ Create another only when you want a different compiled setup with different checks, for example:
243
227
 
244
- After compile, Interf writes into `./interf/` beside your source files.
228
+ - general folder understanding
229
+ - finance reporting
230
+ - board prep
231
+ - diligence review
245
232
 
246
- - `interf/<name>/` is the shared workspace over the folder
247
- - `interf/<name>/interfaces/<name>/` is a task-specific workspace for one job
248
- - `interf/benchmarks/runs/...` stores saved benchmark runs
233
+ Why create another one:
249
234
 
250
- Inside those workspaces you will see things like:
235
+ - it keeps a separate set of questions and expected answers
236
+ - it gives that job its own compiled output under `interf/workspaces/<name>/`
237
+ - it lets you test that job separately
251
238
 
252
- - summaries of source files
253
- - navigation notes and entrypoints for agents
254
- - task-specific outputs for one interface
255
- - benchmark artifacts you can inspect later
239
+ ## Advanced: Keep Improving Until It Passes
240
+
241
+ Interf also supports an advanced experiment path above the normal build + test flow.
256
242
 
257
- In the CLI, the main Interf workspace is called a **knowledge base**. A task-specific workspace inside it is called an **interface**.
243
+ Give it the same folder and the same checks. Interf can keep rerunning compile + test attempts until the test passes or the attempt budget runs out.
258
244
 
259
- Conceptually, the compiled workspace is a knowledge representation of the folder for agents. In the product and CLI, the concrete objects are the compiled workspace, the knowledge base, and the interface.
245
+ In practice:
260
246
 
261
- If you use Obsidian, open `interf/<name>/` as the vault for the main compiled workspace. If you are working with interfaces, open the parent knowledge-base folder so links across summaries, knowledge notes, and interfaces keep resolving.
247
+ - `retry_policy.max_attempts_per_profile` controls how many attempts each compile profile gets
248
+ - stronger diagnostic profiles can be used only after the default ones fail
249
+ - the checks stay the same across every attempt
250
+ - each attempt records what changed
251
+
252
+ Example eval-pack shape:
262
253
 
263
- ## What An Interface Is
254
+ ```jsonc
255
+ {
256
+ "workspaces": [
257
+ {
258
+ "name": "default",
259
+ "checks": [
260
+ {
261
+ "question": "What full-year revenue range did the company maintain?",
262
+ "answer": "$4.8B to $5.0B in revenue."
263
+ }
264
+ ]
265
+ }
266
+ ],
267
+ // Advanced only: retry settings live in eval packs, not in interf.config.json.
268
+ "retry_policy": {
269
+ "max_attempts_per_profile": 3
270
+ }
271
+ }
272
+ ```
264
273
 
265
- Start with one workspace.
274
+ Today this lives in the advanced eval-pack runner, not in `interf.config.json` and not in a top-level `interf compile --max-retries` flag.
266
275
 
267
- An interface is a second compiled workspace for one recurring job.
276
+ Use the normal test flow first. Use this advanced path when you want Interf to keep improving the local preparation workflow until the workspace is good enough for your task or the attempt budget runs out. It spends more tokens, so use it when that extra spend is worth the accuracy target.
268
277
 
269
- The main compiled workspace is the shared layer for the whole folder. Use it for broad questions such as:
278
+ ## Use It With Your Agent
270
279
 
271
- - what is in this folder?
272
- - what changed?
273
- - where is the source evidence?
280
+ If you already work through Claude Code, Codex, OpenClaw, or Hermes, the agent can run this process for you.
274
281
 
275
- Create an interface when that broad layer is no longer enough and the work becomes a repeatable job, for example:
282
+ Paste something like this into your agent:
276
283
 
277
- - prepare tomorrow's operator briefing
278
- - run diligence on this deal room
279
- - extract chart values from this report set
280
- - answer one recurring research question set
284
+ ```text
285
+ Install @interf/compiler, run `interf` in this folder, and use the local agent executor.
281
286
 
282
- Why create one:
287
+ If `interf.config.json` is missing, draft one workspace with a few checks this agent should be able to answer from these files and add the expected answers for me to confirm.
283
288
 
284
- - it narrows what the agent should retrieve for that job
285
- - it writes job-specific outputs on top of the shared workspace
286
- - it lets you add extra task-specific evals on top of the shared evals
289
+ Then run a raw baseline if helpful, compile the workspace, and run `interf test`.
287
290
 
288
- If the shared workspace is already enough for the job, do not create an interface yet.
291
+ Tell me whether the compiled workspace passes the checks, and only recommend it if it does.
292
+ ```
289
293
 
290
294
  ## Custom Workflows
291
295
 
292
296
  Interf ships with a default workflow.
293
297
 
294
- If you want to change how compilation happens on your data, this is the part you customize:
298
+ If you want to change how the data-processing pipeline runs on your files, this is the part you customize:
295
299
 
296
300
  ```bash
297
301
  interf create workflow
298
302
  interf verify workflow --path <path>
299
303
  ```
300
304
 
301
- Then benchmark that workflow on the same folder and the same evals.
305
+ Then benchmark that workflow on the same folder and the same checks.
302
306
 
303
307
  Workflow package docs live in [docs/workflow-spec.md](./docs/workflow-spec.md).
304
308
 
305
309
  ## Core Commands
306
310
 
307
- - `interf init` = choose your local executor and optionally attach the current folder
308
- - `interf create knowledge-base` = create the shared processed workspace for this folder
309
- - `interf create interface` = create a task-specific workspace on top
311
+ - `interf` = open the root-folder wizard
312
+ - `interf init` = alias for the root-folder wizard
313
+ - `interf create workspace` = create another compiled workspace when you need one
310
314
  - `interf create workflow` = create a reusable local workflow package
311
- - `interf compile` = build the current workspace
312
- - `interf benchmark` = compare raw files vs processed workspaces on your evals
315
+ - `interf compile` = build a selected workspace for the current folder
316
+ - `interf test` = test the raw files, a compiled workspace, or both on saved checks
317
+ - `interf benchmark` = alias for `interf test`
313
318
  - `interf doctor` = check local executor setup
314
319
  - `interf verify <check>` = run deterministic checks on major workflow steps
315
320
  - `interf reset <scope>` = remove generated state while keeping source files
@@ -319,7 +324,7 @@ Workflow package docs live in [docs/workflow-spec.md](./docs/workflow-spec.md).
319
324
  - [docs/workflow-spec.md](./docs/workflow-spec.md) for custom workflow packages
320
325
  - [docs/runtime-contract.md](./docs/runtime-contract.md) for the exact on-disk contract
321
326
  - [docs/architecture.md](./docs/architecture.md) for the deeper system model
322
- - [docs/eval-loop.md](./docs/eval-loop.md) for the advanced benchmark and experiment loop
327
+ - [docs/eval-loop.md](./docs/eval-loop.md) for advanced eval-pack experiments across workflows and models
323
328
 
324
329
  Maintainers should use [CONTRIBUTING.md](./CONTRIBUTING.md) for test and release gates.
325
330
 
package/dist/bin.js CHANGED
@@ -4,7 +4,7 @@ import { hideBin } from "yargs/helpers";
4
4
  import { initCommand } from "./commands/init.js";
5
5
  import { createCommand } from "./commands/create.js";
6
6
  import { compileCommand } from "./commands/compile.js";
7
- import { benchmarkCommand } from "./commands/benchmark.js";
7
+ import { testCommand } from "./commands/test.js";
8
8
  import { doctorCommand } from "./commands/doctor.js";
9
9
  import { listCommand } from "./commands/list.js";
10
10
  import { statusCommand } from "./commands/status.js";
@@ -15,9 +15,9 @@ yargs(hideBin(process.argv))
15
15
  .scriptName("interf")
16
16
  .command(defaultCommand)
17
17
  .command(initCommand)
18
- .command(createCommand)
19
18
  .command(compileCommand)
20
- .command(benchmarkCommand)
19
+ .command(testCommand)
20
+ .command(createCommand)
21
21
  .command(doctorCommand)
22
22
  .command(listCommand)
23
23
  .command(statusCommand)
package/dist/bin.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"bin.js","sourceRoot":"","sources":["../src/bin.ts"],"names":[],"mappings":";AACA,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,EAAE,OAAO,EAAE,MAAM,eAAe,CAAC;AACxC,OAAO,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAC;AACjD,OAAO,EAAE,aAAa,EAAE,MAAM,sBAAsB,CAAC;AACrD,OAAO,EAAE,cAAc,EAAE,MAAM,uBAAuB,CAAC;AACvD,OAAO,EAAE,gBAAgB,EAAE,MAAM,yBAAyB,CAAC;AAC3D,OAAO,EAAE,aAAa,EAAE,MAAM,sBAAsB,CAAC;AACrD,OAAO,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAC;AACjD,OAAO,EAAE,aAAa,EAAE,MAAM,sBAAsB,CAAC;AACrD,OAAO,EAAE,YAAY,EAAE,MAAM,qBAAqB,CAAC;AACnD,OAAO,EAAE,cAAc,EAAE,MAAM,uBAAuB,CAAC;AACvD,OAAO,EAAE,aAAa,EAAE,MAAM,sBAAsB,CAAC;AAErD,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;KACzB,UAAU,CAAC,QAAQ,CAAC;KACpB,OAAO,CAAC,cAAc,CAAC;KACvB,OAAO,CAAC,WAAW,CAAC;KACpB,OAAO,CAAC,aAAa,CAAC;KACtB,OAAO,CAAC,cAAc,CAAC;KACvB,OAAO,CAAC,gBAAgB,CAAC;KACzB,OAAO,CAAC,aAAa,CAAC;KACtB,OAAO,CAAC,WAAW,CAAC;KACpB,OAAO,CAAC,aAAa,CAAC;KACtB,OAAO,CAAC,aAAa,CAAC;KACtB,OAAO,CAAC,YAAY,CAAC;KACrB,MAAM,EAAE;KACR,IAAI,EAAE;KACN,OAAO,EAAE;KACT,KAAK,EAAE,CAAC"}
1
+ {"version":3,"file":"bin.js","sourceRoot":"","sources":["../src/bin.ts"],"names":[],"mappings":";AACA,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,EAAE,OAAO,EAAE,MAAM,eAAe,CAAC;AACxC,OAAO,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAC;AACjD,OAAO,EAAE,aAAa,EAAE,MAAM,sBAAsB,CAAC;AACrD,OAAO,EAAE,cAAc,EAAE,MAAM,uBAAuB,CAAC;AACvD,OAAO,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAC;AACjD,OAAO,EAAE,aAAa,EAAE,MAAM,sBAAsB,CAAC;AACrD,OAAO,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAC;AACjD,OAAO,EAAE,aAAa,EAAE,MAAM,sBAAsB,CAAC;AACrD,OAAO,EAAE,YAAY,EAAE,MAAM,qBAAqB,CAAC;AACnD,OAAO,EAAE,cAAc,EAAE,MAAM,uBAAuB,CAAC;AACvD,OAAO,EAAE,aAAa,EAAE,MAAM,sBAAsB,CAAC;AAErD,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;KACzB,UAAU,CAAC,QAAQ,CAAC;KACpB,OAAO,CAAC,cAAc,CAAC;KACvB,OAAO,CAAC,WAAW,CAAC;KACpB,OAAO,CAAC,cAAc,CAAC;KACvB,OAAO,CAAC,WAAW,CAAC;KACpB,OAAO,CAAC,aAAa,CAAC;KACtB,OAAO,CAAC,aAAa,CAAC;KACtB,OAAO,CAAC,WAAW,CAAC;KACpB,OAAO,CAAC,aAAa,CAAC;KACtB,OAAO,CAAC,aAAa,CAAC;KACtB,OAAO,CAAC,YAAY,CAAC;KACrB,MAAM,EAAE;KACR,IAAI,EAAE;KACN,OAAO,EAAE;KACT,KAAK,EAAE,CAAC"}
@@ -1,3 +1,4 @@
1
1
  import type { CommandModule } from "yargs";
2
2
  export declare const compileCommand: CommandModule;
3
+ export declare function runCompileCommand(argv?: Record<string, unknown>): Promise<void>;
3
4
  //# sourceMappingURL=compile.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"compile.d.ts","sourceRoot":"","sources":["../../src/commands/compile.ts"],"names":[],"mappings":"AAoBA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,OAAO,CAAC;AAO3C,eAAO,MAAM,cAAc,EAAE,aAiG5B,CAAC"}
1
+ {"version":3,"file":"compile.d.ts","sourceRoot":"","sources":["../../src/commands/compile.ts"],"names":[],"mappings":"AAWA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,OAAO,CAAC;AAY3C,eAAO,MAAM,cAAc,EAAE,aAO5B,CAAC;AAEF,wBAAsB,iBAAiB,CAAC,IAAI,GAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAwBzF"}