@interf/compiler 0.1.11 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (283) hide show
  1. package/README.md +254 -136
  2. package/dist/commands/benchmark.d.ts.map +1 -1
  3. package/dist/commands/benchmark.js +65 -84
  4. package/dist/commands/benchmark.js.map +1 -1
  5. package/dist/commands/compile.d.ts.map +1 -1
  6. package/dist/commands/compile.js +19 -3
  7. package/dist/commands/compile.js.map +1 -1
  8. package/dist/commands/create.d.ts +3 -0
  9. package/dist/commands/create.d.ts.map +1 -1
  10. package/dist/commands/create.js +34 -9
  11. package/dist/commands/create.js.map +1 -1
  12. package/dist/commands/default.d.ts.map +1 -1
  13. package/dist/commands/default.js +2 -0
  14. package/dist/commands/default.js.map +1 -1
  15. package/dist/commands/init.d.ts.map +1 -1
  16. package/dist/commands/init.js +3 -2
  17. package/dist/commands/init.js.map +1 -1
  18. package/dist/index.d.ts +11 -29
  19. package/dist/index.d.ts.map +1 -1
  20. package/dist/index.js +7 -16
  21. package/dist/index.js.map +1 -1
  22. package/dist/lib/agent-args.d.ts +4 -0
  23. package/dist/lib/agent-args.d.ts.map +1 -0
  24. package/dist/lib/agent-args.js +42 -0
  25. package/dist/lib/agent-args.js.map +1 -0
  26. package/dist/lib/agent-constants.d.ts +6 -0
  27. package/dist/lib/agent-constants.d.ts.map +1 -0
  28. package/dist/lib/agent-constants.js +29 -0
  29. package/dist/lib/agent-constants.js.map +1 -0
  30. package/dist/lib/agent-detection.d.ts +8 -0
  31. package/dist/lib/agent-detection.d.ts.map +1 -0
  32. package/dist/lib/agent-detection.js +66 -0
  33. package/dist/lib/agent-detection.js.map +1 -0
  34. package/dist/lib/agent-execution.d.ts +3 -0
  35. package/dist/lib/agent-execution.d.ts.map +1 -0
  36. package/dist/lib/agent-execution.js +207 -0
  37. package/dist/lib/agent-execution.js.map +1 -0
  38. package/dist/lib/agent-logs.d.ts +3 -0
  39. package/dist/lib/agent-logs.d.ts.map +1 -0
  40. package/dist/lib/agent-logs.js +18 -0
  41. package/dist/lib/agent-logs.js.map +1 -0
  42. package/dist/lib/agent-preflight.d.ts +8 -0
  43. package/dist/lib/agent-preflight.d.ts.map +1 -0
  44. package/dist/lib/agent-preflight.js +77 -0
  45. package/dist/lib/agent-preflight.js.map +1 -0
  46. package/dist/lib/agent-render.d.ts +9 -0
  47. package/dist/lib/agent-render.d.ts.map +1 -0
  48. package/dist/lib/agent-render.js +219 -0
  49. package/dist/lib/agent-render.js.map +1 -0
  50. package/dist/lib/agent-status.d.ts +4 -0
  51. package/dist/lib/agent-status.d.ts.map +1 -0
  52. package/dist/lib/agent-status.js +59 -0
  53. package/dist/lib/agent-status.js.map +1 -0
  54. package/dist/lib/agent-types.d.ts +31 -0
  55. package/dist/lib/agent-types.d.ts.map +1 -0
  56. package/dist/lib/agent-types.js +2 -0
  57. package/dist/lib/agent-types.js.map +1 -0
  58. package/dist/lib/agents.d.ts +7 -49
  59. package/dist/lib/agents.d.ts.map +1 -1
  60. package/dist/lib/agents.js +8 -554
  61. package/dist/lib/agents.js.map +1 -1
  62. package/dist/lib/benchmark-execution.d.ts +9 -0
  63. package/dist/lib/benchmark-execution.d.ts.map +1 -0
  64. package/dist/lib/benchmark-execution.js +488 -0
  65. package/dist/lib/benchmark-execution.js.map +1 -0
  66. package/dist/lib/benchmark-paths.d.ts +11 -0
  67. package/dist/lib/benchmark-paths.d.ts.map +1 -0
  68. package/dist/lib/benchmark-paths.js +38 -0
  69. package/dist/lib/benchmark-paths.js.map +1 -0
  70. package/dist/lib/benchmark-specs.d.ts +8 -0
  71. package/dist/lib/benchmark-specs.d.ts.map +1 -0
  72. package/dist/lib/benchmark-specs.js +115 -0
  73. package/dist/lib/benchmark-specs.js.map +1 -0
  74. package/dist/lib/benchmark-targets.d.ts +5 -0
  75. package/dist/lib/benchmark-targets.d.ts.map +1 -0
  76. package/dist/lib/benchmark-targets.js +72 -0
  77. package/dist/lib/benchmark-targets.js.map +1 -0
  78. package/dist/lib/benchmark-types.d.ts +19 -0
  79. package/dist/lib/benchmark-types.d.ts.map +1 -0
  80. package/dist/lib/benchmark-types.js +2 -0
  81. package/dist/lib/benchmark-types.js.map +1 -0
  82. package/dist/lib/benchmark.d.ts +4 -29
  83. package/dist/lib/benchmark.d.ts.map +1 -1
  84. package/dist/lib/benchmark.js +3 -324
  85. package/dist/lib/benchmark.js.map +1 -1
  86. package/dist/lib/bundled-templates.d.ts +5 -0
  87. package/dist/lib/bundled-templates.d.ts.map +1 -0
  88. package/dist/lib/bundled-templates.js +23 -0
  89. package/dist/lib/bundled-templates.js.map +1 -0
  90. package/dist/lib/config.d.ts +1 -0
  91. package/dist/lib/config.d.ts.map +1 -1
  92. package/dist/lib/config.js +2 -0
  93. package/dist/lib/config.js.map +1 -1
  94. package/dist/lib/eval-packs.d.ts +204 -0
  95. package/dist/lib/eval-packs.d.ts.map +1 -0
  96. package/dist/lib/eval-packs.js +177 -0
  97. package/dist/lib/eval-packs.js.map +1 -0
  98. package/dist/lib/execution-profile.d.ts +18 -0
  99. package/dist/lib/execution-profile.d.ts.map +1 -0
  100. package/dist/lib/execution-profile.js +85 -0
  101. package/dist/lib/execution-profile.js.map +1 -0
  102. package/dist/lib/interf-bootstrap.d.ts +4 -0
  103. package/dist/lib/interf-bootstrap.d.ts.map +1 -1
  104. package/dist/lib/interf-bootstrap.js +71 -68
  105. package/dist/lib/interf-bootstrap.js.map +1 -1
  106. package/dist/lib/interf-compile-plan.d.ts +12 -0
  107. package/dist/lib/interf-compile-plan.d.ts.map +1 -0
  108. package/dist/lib/interf-compile-plan.js +143 -0
  109. package/dist/lib/interf-compile-plan.js.map +1 -0
  110. package/dist/lib/interf-detect.d.ts.map +1 -1
  111. package/dist/lib/interf-detect.js +11 -10
  112. package/dist/lib/interf-detect.js.map +1 -1
  113. package/dist/lib/interf-scaffold.d.ts +1 -10
  114. package/dist/lib/interf-scaffold.d.ts.map +1 -1
  115. package/dist/lib/interf-scaffold.js +25 -362
  116. package/dist/lib/interf-scaffold.js.map +1 -1
  117. package/dist/lib/interf-workflow-package.d.ts +4 -0
  118. package/dist/lib/interf-workflow-package.d.ts.map +1 -0
  119. package/dist/lib/interf-workflow-package.js +131 -0
  120. package/dist/lib/interf-workflow-package.js.map +1 -0
  121. package/dist/lib/interf.d.ts +2 -1
  122. package/dist/lib/interf.d.ts.map +1 -1
  123. package/dist/lib/interf.js +2 -1
  124. package/dist/lib/interf.js.map +1 -1
  125. package/dist/lib/local-workflows.d.ts.map +1 -1
  126. package/dist/lib/local-workflows.js +8 -12
  127. package/dist/lib/local-workflows.js.map +1 -1
  128. package/dist/lib/logger.d.ts +4 -0
  129. package/dist/lib/logger.d.ts.map +1 -0
  130. package/dist/lib/logger.js +11 -0
  131. package/dist/lib/logger.js.map +1 -0
  132. package/dist/lib/obsidian.d.ts.map +1 -1
  133. package/dist/lib/obsidian.js +7 -3
  134. package/dist/lib/obsidian.js.map +1 -1
  135. package/dist/lib/parse.d.ts +2 -2
  136. package/dist/lib/parse.d.ts.map +1 -1
  137. package/dist/lib/parse.js +11 -7
  138. package/dist/lib/parse.js.map +1 -1
  139. package/dist/lib/registry.js +3 -3
  140. package/dist/lib/registry.js.map +1 -1
  141. package/dist/lib/runtime-acceptance.d.ts +4 -0
  142. package/dist/lib/runtime-acceptance.d.ts.map +1 -0
  143. package/dist/lib/runtime-acceptance.js +123 -0
  144. package/dist/lib/runtime-acceptance.js.map +1 -0
  145. package/dist/lib/runtime-contracts.d.ts +4 -0
  146. package/dist/lib/runtime-contracts.d.ts.map +1 -0
  147. package/dist/lib/runtime-contracts.js +63 -0
  148. package/dist/lib/runtime-contracts.js.map +1 -0
  149. package/dist/lib/runtime-paths.d.ts +8 -0
  150. package/dist/lib/runtime-paths.d.ts.map +1 -0
  151. package/dist/lib/runtime-paths.js +28 -0
  152. package/dist/lib/runtime-paths.js.map +1 -0
  153. package/dist/lib/runtime-prompt.d.ts +3 -0
  154. package/dist/lib/runtime-prompt.d.ts.map +1 -0
  155. package/dist/lib/runtime-prompt.js +59 -0
  156. package/dist/lib/runtime-prompt.js.map +1 -0
  157. package/dist/lib/runtime-reconcile.d.ts +6 -0
  158. package/dist/lib/runtime-reconcile.d.ts.map +1 -0
  159. package/dist/lib/runtime-reconcile.js +339 -0
  160. package/dist/lib/runtime-reconcile.js.map +1 -0
  161. package/dist/lib/runtime-runs.d.ts +12 -0
  162. package/dist/lib/runtime-runs.d.ts.map +1 -0
  163. package/dist/lib/runtime-runs.js +337 -0
  164. package/dist/lib/runtime-runs.js.map +1 -0
  165. package/dist/lib/runtime-types.d.ts +42 -0
  166. package/dist/lib/runtime-types.d.ts.map +1 -0
  167. package/dist/lib/runtime-types.js +2 -0
  168. package/dist/lib/runtime-types.js.map +1 -0
  169. package/dist/lib/runtime.d.ts +6 -58
  170. package/dist/lib/runtime.d.ts.map +1 -1
  171. package/dist/lib/runtime.js +5 -614
  172. package/dist/lib/runtime.js.map +1 -1
  173. package/dist/lib/schema.d.ts +156 -13
  174. package/dist/lib/schema.d.ts.map +1 -1
  175. package/dist/lib/schema.js +113 -4
  176. package/dist/lib/schema.js.map +1 -1
  177. package/dist/lib/source-config.d.ts +13 -0
  178. package/dist/lib/source-config.d.ts.map +1 -0
  179. package/dist/lib/source-config.js +75 -0
  180. package/dist/lib/source-config.js.map +1 -0
  181. package/dist/lib/state-artifacts.d.ts +15 -0
  182. package/dist/lib/state-artifacts.d.ts.map +1 -0
  183. package/dist/lib/state-artifacts.js +24 -0
  184. package/dist/lib/state-artifacts.js.map +1 -0
  185. package/dist/lib/state-health.d.ts +9 -0
  186. package/dist/lib/state-health.d.ts.map +1 -0
  187. package/dist/lib/state-health.js +330 -0
  188. package/dist/lib/state-health.js.map +1 -0
  189. package/dist/lib/state-io.d.ts +15 -0
  190. package/dist/lib/state-io.d.ts.map +1 -0
  191. package/dist/lib/state-io.js +219 -0
  192. package/dist/lib/state-io.js.map +1 -0
  193. package/dist/lib/state-paths.d.ts +5 -0
  194. package/dist/lib/state-paths.d.ts.map +1 -0
  195. package/dist/lib/state-paths.js +19 -0
  196. package/dist/lib/state-paths.js.map +1 -0
  197. package/dist/lib/state-view.d.ts +7 -0
  198. package/dist/lib/state-view.d.ts.map +1 -0
  199. package/dist/lib/state-view.js +147 -0
  200. package/dist/lib/state-view.js.map +1 -0
  201. package/dist/lib/state.d.ts +6 -46
  202. package/dist/lib/state.d.ts.map +1 -1
  203. package/dist/lib/state.js +5 -632
  204. package/dist/lib/state.js.map +1 -1
  205. package/dist/lib/summarize-plan.d.ts +1 -0
  206. package/dist/lib/summarize-plan.d.ts.map +1 -1
  207. package/dist/lib/summarize-plan.js +10 -0
  208. package/dist/lib/summarize-plan.js.map +1 -1
  209. package/dist/lib/user-config.js +2 -2
  210. package/dist/lib/user-config.js.map +1 -1
  211. package/dist/lib/validate-helpers.d.ts +21 -0
  212. package/dist/lib/validate-helpers.d.ts.map +1 -0
  213. package/dist/lib/validate-helpers.js +72 -0
  214. package/dist/lib/validate-helpers.js.map +1 -0
  215. package/dist/lib/validate-interface.d.ts +79 -0
  216. package/dist/lib/validate-interface.d.ts.map +1 -0
  217. package/dist/lib/validate-interface.js +535 -0
  218. package/dist/lib/validate-interface.js.map +1 -0
  219. package/dist/lib/validate-kb.d.ts +81 -0
  220. package/dist/lib/validate-kb.d.ts.map +1 -0
  221. package/dist/lib/validate-kb.js +252 -0
  222. package/dist/lib/validate-kb.js.map +1 -0
  223. package/dist/lib/validate.d.ts +17 -146
  224. package/dist/lib/validate.d.ts.map +1 -1
  225. package/dist/lib/validate.js +33 -709
  226. package/dist/lib/validate.js.map +1 -1
  227. package/dist/lib/workflow-definitions.d.ts +1 -1
  228. package/dist/lib/workflow-definitions.d.ts.map +1 -1
  229. package/dist/lib/workflow-definitions.js +90 -166
  230. package/dist/lib/workflow-definitions.js.map +1 -1
  231. package/dist/lib/workflow-helpers.d.ts.map +1 -1
  232. package/dist/lib/workflow-helpers.js +6 -3
  233. package/dist/lib/workflow-helpers.js.map +1 -1
  234. package/dist/lib/workflow-stage-runner.d.ts +41 -0
  235. package/dist/lib/workflow-stage-runner.d.ts.map +1 -0
  236. package/dist/lib/workflow-stage-runner.js +106 -0
  237. package/dist/lib/workflow-stage-runner.js.map +1 -0
  238. package/dist/lib/workflow-starter-docs.d.ts +9 -0
  239. package/dist/lib/workflow-starter-docs.d.ts.map +1 -0
  240. package/dist/lib/workflow-starter-docs.js +18 -0
  241. package/dist/lib/workflow-starter-docs.js.map +1 -0
  242. package/dist/lib/workflows-interface-contracts.d.ts +24 -0
  243. package/dist/lib/workflows-interface-contracts.d.ts.map +1 -0
  244. package/dist/lib/workflows-interface-contracts.js +304 -0
  245. package/dist/lib/workflows-interface-contracts.js.map +1 -0
  246. package/dist/lib/workflows-interface.d.ts +3 -10
  247. package/dist/lib/workflows-interface.d.ts.map +1 -1
  248. package/dist/lib/workflows-interface.js +117 -365
  249. package/dist/lib/workflows-interface.js.map +1 -1
  250. package/dist/lib/workflows-kb.d.ts.map +1 -1
  251. package/dist/lib/workflows-kb.js +79 -55
  252. package/dist/lib/workflows-kb.js.map +1 -1
  253. package/dist/lib/workflows.d.ts +1 -1
  254. package/dist/lib/workflows.d.ts.map +1 -1
  255. package/dist/lib/workflows.js +1 -1
  256. package/dist/lib/workflows.js.map +1 -1
  257. package/package.json +15 -4
  258. package/skills/interface/analyze/SKILL.md +79 -28
  259. package/skills/interface/compile/SKILL.md +27 -28
  260. package/skills/interface/create/SKILL.md +53 -230
  261. package/skills/interface/create/references/compile-plan-format.md +31 -31
  262. package/skills/interface/create/references/workflows.md +17 -32
  263. package/skills/interface/query/SKILL.md +15 -1
  264. package/skills/interface/retrieve/SKILL.md +32 -65
  265. package/skills/knowledge-base/compile/SKILL.md +59 -83
  266. package/skills/knowledge-base/compile/references/stage-claims.md +1 -1
  267. package/skills/knowledge-base/compile/references/stage-entities.md +2 -2
  268. package/skills/knowledge-base/query/SKILL.md +13 -1
  269. package/skills/knowledge-base/summarize/SKILL.md +54 -24
  270. package/templates/interface/README.md +13 -12
  271. package/templates/interface/interfaces.md +14 -11
  272. package/templates/knowledge-base/README.md +0 -1
  273. package/templates/knowledge-base/registry.md +15 -15
  274. package/templates/workflow-package/README.md +16 -0
  275. package/templates/workflow-package/create/SKILL.md +8 -0
  276. package/templates/workflow-package/interface-query/SKILL.md +29 -0
  277. package/templates/workflow-package/interface-stage/SKILL.md +13 -0
  278. package/templates/workflow-package/knowledge-base-query/SKILL.md +36 -0
  279. package/templates/workflow-package/knowledge-base-stage/SKILL.md +13 -0
  280. package/templates/workflow-starters/interface/interf/README.md +13 -0
  281. package/templates/workflow-starters/interface/interf/create/SKILL.md +15 -0
  282. package/templates/workflow-starters/knowledge-base/interf/README.md +13 -0
  283. package/templates/workflow-starters/knowledge-base/karpathy/README.md +13 -0
package/README.md CHANGED
@@ -1,86 +1,82 @@
1
1
  # Interf
2
2
 
3
- The open-source knowledge compiler.
3
+ The open-source eval-first knowledge compiler.
4
4
 
5
- Interf compiles folders into knowledge bases and task-specific interfaces: agent-ready workspaces with proof, structure, and benchmarks.
5
+ Interf compiles a workspace beside your files for agents: a knowledge representation they can navigate, cross-check against raw source, and prove on your evals.
6
6
 
7
- - compile any folder into a knowledge base
8
- - create focused interfaces for specific tasks
9
- - run evals and benchmarks on your own files
7
+ Your files stay the truth. Interf adds a compiled workspace and benchmark proof.
10
8
 
11
- Most LLM knowledge-base repos optimize for a demo. Interf optimizes for proof. It keeps your files on disk, compiles a visible folder an agent can actually use, and makes workflows compete on your evals instead of on marketing claims.
9
+ - point it at a folder you already have
10
+ - define or confirm what must be true for the task in `interf.config.json`
11
+ - compile a shared knowledge base plus task-specific interfaces
12
+ - benchmark raw files vs compiled workspaces and keep the best result
12
13
 
13
- ## Why Interf
14
+ Most "AI knowledge base" tools optimize for a demo. Interf optimizes for proof. It keeps the raw files on disk, compiles a visible workspace your agent can use, and makes workflows compete on your evals instead of on marketing claims.
14
15
 
15
- Interf is built around three ideas:
16
+ Interf is not a chat shell, a hosted notes app, or a generic agent OS. It is the compile + benchmark loop for turning real folders into better agent workspaces and proving they help on a real task.
16
17
 
17
- - the product surface is the compiled folder, not a hidden service
18
- - the workflow should leave proof of work on disk
19
- - the method should be benchmarkable on your task
18
+ ## What Happens
20
19
 
21
- That gives you a simple loop:
22
-
23
- 1. point Interf at a folder
24
- 2. compile a knowledge base
25
- 3. create an interface for a job
26
- 4. run evals and benchmarks to see what actually works
20
+ ```text
21
+ raw folder
22
+ -> compiled workspace beside the raw files
23
+ -> benchmark proof on your evals
24
+ ```
27
25
 
28
- ## Core concepts
26
+ The compiled workspace is for agents. It gives them:
29
27
 
30
- - **Source folder**: your real files stay where they are
31
- - **Knowledge base**: `interf/{name}/`, the shared compiled layer over that folder
32
- - **Interface**: `interf/{name}/interfaces/{interface-name}/`, a task-specific surface on top of one knowledge base
33
- - **Workflow**: the reusable method package that defines the compile pipeline
34
- - **Benchmark**: running evals across compiled knowledge bases or interfaces on the same folder
28
+ - a clearer map of the data
29
+ - task-specific outputs when broad summarization is not enough
30
+ - better evidence paths back to the raw source
31
+ - proof of whether the compiled workspace actually helped
35
32
 
36
- One source folder can host multiple knowledge bases under `interf/` when you want to compare workflows like `interf` vs `karpathy` on the same data.
33
+ Interf does not replace your agent. It gives your agent a better workspace to use.
37
34
 
38
- ## Interf primitives
35
+ ## Trust Boundary
39
36
 
40
- Interf gives you a few strong primitives instead of a giant abstraction layer:
37
+ Interf keeps one trust boundary:
41
38
 
42
- - **workflow package**: `workflow.json` plus local `workflow/` docs define the method
43
- - **stage contract**: every compile stage gets a deterministic acceptance boundary
44
- - **declarative acceptance**: workflows can declare extra acceptance rules in `workflow.json`
45
- - **CLI enforcement**: the CLI checks whether a stage actually complied instead of trusting the agent's summary
46
- - **benchmark specs**: file-based evals let you compare workflows and interfaces on the same folder
39
+ - raw files in the source folder are the content truth
40
+ - `interf.config.json` is the user-approved task truth
41
+ - the compiled workspace is the generated working surface
42
+ - the benchmark result is the proof of whether that generated surface is good enough
47
43
 
48
- That is the core product promise:
44
+ That means:
49
45
 
50
- - define what the agent should do in plain English
51
- - give the agent local workspace docs and stage contracts
52
- - validate the result deterministically
46
+ - agents may draft evals
47
+ - users approve accepted task truth
48
+ - raw files remain the final source of evidence
49
+ - compiled workspaces earn trust only if they pass the benchmark
53
50
 
54
- For workflow authors, the important surface is:
51
+ ## Who It’s For
55
52
 
56
- - `workflow.json`
57
- - `workflow/create/`
58
- - `workflow/compile/stages/<stage>/`
59
- - `workflow/use/query/`
60
- - [`docs/workflow-spec.md`](./docs/workflow-spec.md)
53
+ Interf is for people already trying to get real work done with agents on real folders:
61
54
 
62
- ## What the agent sees
55
+ - Claude Code and Codex users
56
+ - OpenClaw and Hermes-style local-agent users
57
+ - technical founders, researchers, and operators with messy source folders
58
+ - teams who want to test whether compiled workspaces beat raw files on their own tasks
63
59
 
64
- The compiled folder is the agent-facing product surface: an agent-ready workspace.
60
+ If you want a generic chat UI, this is not that product.
65
61
 
66
- Important files in a KB or interface:
62
+ ## Mental Model
67
63
 
68
- - `interf.json` = what this workspace is
69
- - `AGENTS.md` = where to start and how to navigate
70
- - `workflow/` = the editable local method package
71
- - `home.md` = entry document
72
- - `summaries/`, `knowledge/`, and `briefs/` = compiled outputs
64
+ - **Source folder**: your real files stay where they are
65
+ - **Compiled workspace**: the generated workspace beside those files for agents
66
+ - **Knowledge base**: the shared compiled workspace over the folder
67
+ - **Interface**: the task-specific compiled workspace for one job
68
+ - **Workflow**: the reusable compile method
69
+ - **Eval**: what must be true for the task
70
+ - **Benchmark**: the proof loop that compares raw and compiled results
73
71
 
74
- Manual query/use works like this:
72
+ One source folder can host multiple knowledge bases under `interf/` if you want to compare workflows on the same data.
75
73
 
76
- - open the KB or interface folder
77
- - read `AGENTS.md`
78
- - follow `workflow/use/query/SKILL.md`
79
- - for interfaces, use local interface artifacts first, then the parent KB loop, then raw files if needed
74
+ ## Install
80
75
 
81
- Interf does not require globally installed slash skills for workspace behavior. Local `workflow/.../SKILL.md` files are workspace instruction docs routed by `AGENTS.md` and stage contracts.
76
+ Requirements:
82
77
 
83
- ## Quick start
78
+ - Node.js 20+
79
+ - one local coding agent executor: Claude Code or Codex
84
80
 
85
81
  Install the published package:
86
82
 
@@ -88,29 +84,31 @@ Install the published package:
88
84
  npm install -g @interf/compiler
89
85
  ```
90
86
 
91
- Or install from source while contributing:
87
+ Sanity check the local setup:
92
88
 
93
89
  ```bash
94
- npm install
95
- npm run build
96
- npm install -g .
90
+ interf doctor
97
91
  ```
98
92
 
93
+ If you already use Claude Code or Codex locally, that is the intended path. Interf uses your local agent as the executor for compile and benchmark runs.
94
+
95
+ ## Quick Start
96
+
99
97
  Initialize Interf in any folder:
100
98
 
101
99
  ```bash
102
- cd ~/my-notes
100
+ cd ~/my-folder
103
101
  interf init
104
102
  ```
105
103
 
106
104
  That flow can:
107
105
 
108
- - choose an executor like Claude Code or Codex
109
- - optionally install global helper skills
110
- - attach the current folder as a knowledge base
111
- - compile the knowledge base immediately
106
+ 1. choose an executor like Claude Code or Codex
107
+ 2. optionally install helper skills
108
+ 3. attach the current folder as a knowledge base
109
+ 4. optionally compile the knowledge base immediately
112
110
 
113
- Then you can:
111
+ Then:
114
112
 
115
113
  ```bash
116
114
  interf create interface
@@ -118,11 +116,173 @@ interf compile
118
116
  interf benchmark
119
117
  ```
120
118
 
121
- ## Example layout
119
+ Fastest way to see the full loop:
120
+
121
+ ```bash
122
+ cp -r examples/benchmark-demo /tmp/benchmark-demo
123
+ cd /tmp/benchmark-demo
124
+ interf init
125
+ interf compile
126
+ interf benchmark
127
+ ```
128
+
129
+ What success looks like on disk:
130
+
131
+ - `interf/<kb>/` = shared compiled workspace over the folder
132
+ - `interf/<kb>/interfaces/<name>/` = task-specific compiled workspace
133
+ - `interf/benchmarks/runs/...` = saved benchmark evidence for that folder
134
+
135
+ ## 5-Minute Example
136
+
137
+ Try the full loop on the shipped sample folder:
138
+
139
+ ```bash
140
+ cp -r examples/benchmark-demo /tmp/interf-demo
141
+ cd /tmp/interf-demo
142
+ interf init
143
+ interf compile
144
+ interf benchmark
145
+ ```
146
+
147
+ This sample already includes an `interf.config.json`, so you can see the compile and benchmark loop without writing your own evals first.
148
+
149
+ ## Simple Eval Example
150
+
151
+ The default public eval file is `interf.config.json` at the source-folder root.
152
+
153
+ Minimal example:
154
+
155
+ ```json
156
+ {
157
+ "interfaces": [
158
+ {
159
+ "name": "weekly-briefing",
160
+ "about": "Summarize what changed, why it matters, and what to do next.",
161
+ "evals": [
162
+ {
163
+ "question": "From the compiled interface only, what changed and what should the operator do next?",
164
+ "answer": "A good answer says what changed, why it matters, and the next action.",
165
+ "strictness": "approximate"
166
+ }
167
+ ]
168
+ }
169
+ ]
170
+ }
171
+ ```
172
+
173
+ That is enough to start. You do not need a large benchmark harness to use Interf:
174
+
175
+ 1. write one or two questions that matter
176
+ 2. say what a good answer must preserve
177
+ 3. compile the workspace
178
+ 4. run `interf benchmark`
179
+
180
+ If the compiled workspace does not beat raw files on those evals, do not trust it yet.
181
+
182
+ ## Use It With Your Agent
183
+
184
+ For many users, the agent is the operator.
185
+
186
+ A practical agent-native loop looks like this:
187
+
188
+ 1. the agent gets a real task against a real folder
189
+ 2. it inspects raw files or prior benchmark evidence
190
+ 3. it drafts or updates evals in `interf.config.json`
191
+ 4. it asks the user to confirm the task truth when needed
192
+ 5. it runs compile + benchmark
193
+ 6. it only promotes the compiled workspace for real use once the benchmark says it helped
194
+
195
+ Paste something like this into Claude Code, Codex, OpenClaw, or Hermes:
196
+
197
+ ```text
198
+ Install @interf/compiler, run `interf init` in this folder, choose the local agent executor, and compile the workspace.
199
+
200
+ If `interf.config.json` is missing or incomplete, draft evals for what must be true for this task and ask me to confirm them before benchmarking.
201
+
202
+ Then run `interf benchmark` and tell me whether raw files or the compiled workspace performed better.
203
+ ```
204
+
205
+ ## What The Agent Sees
206
+
207
+ The compiled folder is the agent-facing product surface.
208
+
209
+ Important files in a knowledge base or interface:
210
+
211
+ - `interf.json` = what this workspace is
212
+ - `AGENTS.md` = canonical bootstrap and navigation
213
+ - `CLAUDE.md` = generated compatibility mirror of `AGENTS.md`
214
+ - `workflow/` = the editable local method package
215
+ - `home.md` = entry document
216
+ - `summaries/`, `knowledge/`, and `briefs/` = compiled outputs
217
+
218
+ Interf supports two agent modes:
219
+
220
+ - **executor mode**: the CLI launches a local agent to satisfy one stage contract during create, compile, or benchmark flows
221
+ - **use mode**: a human opens the compiled knowledge base or interface and asks an agent to navigate the finished workspace
222
+
223
+ Manual use looks like this:
224
+
225
+ 1. open the knowledge base or interface folder
226
+ 2. read `AGENTS.md`
227
+ 3. follow `workflow/use/query/SKILL.md`
228
+ 4. for interfaces, use local interface artifacts first, then the parent knowledge-base loop, then raw files if needed
229
+
230
+ Interf does not require globally installed slash skills for workspace behavior. Local `workflow/.../SKILL.md` files are workspace instruction docs routed by `AGENTS.md` and stage contracts.
231
+
232
+ ## Benchmark Proof
233
+
234
+ Interf is benchmark-first.
235
+
236
+ The default eval file lives at the source-folder root:
237
+
238
+ ```text
239
+ source-folder/
240
+ interf.config.json
241
+ ```
242
+
243
+ Saved benchmark runs live under:
244
+
245
+ ```text
246
+ source-folder/
247
+ interf/
248
+ benchmarks/
249
+ runs/
250
+ ```
251
+
252
+ Use benchmarks to answer questions like:
253
+
254
+ - does the compiled workspace beat raw files on this task?
255
+ - which workflow wins on this folder?
256
+ - which interface is best for this job?
257
+ - which model performs best on the same compiled target?
258
+
259
+ `interf benchmark` uses your evals, opens the compiled target like a real user session, asks the questions, and grades the answers. The point is not a hidden score. The point is a benchmark artifact you can inspect, diff, and rerun locally.
260
+
261
+ ## Power Mode
262
+
263
+ Most users do not need to think about improvement loops.
264
+
265
+ The basic story is:
266
+
267
+ 1. compile
268
+ 2. benchmark
269
+ 3. trust the result only if it passes
270
+
271
+ Power users and agent-native setups can go further:
272
+
273
+ - compare workflows on the same folder
274
+ - compare models on the same compiled target
275
+ - draft custom local workflows
276
+ - rerun compile + benchmark until a task-specific interface passes
277
+
278
+ That improvement loop is a real capability, but it is not the main thing users need to understand first.
279
+
280
+ ## Layout On Disk
122
281
 
123
282
  ```text
124
283
  source-folder/
125
284
  ...your files...
285
+ interf.config.json
126
286
  interf/
127
287
  workflows/
128
288
  benchmarks/
@@ -147,19 +307,27 @@ source-folder/
147
307
  summaries/
148
308
  ```
149
309
 
150
- ## Commands
310
+ ## Core Commands
151
311
 
152
312
  - `interf init` = global setup first; if run inside a normal folder, it can also attach and compile a knowledge base there
153
- - `interf create` = chooser when type is omitted
154
- - `interf create knowledge-base` = attach current folder
155
313
  - `interf create interface` = create an interface for the current folder's knowledge base
156
- - `interf create workflow` = create a reusable workflow package
157
314
  - `interf compile` = compile the current knowledge base or interface
158
- - `interf benchmark` = compare compiled knowledge bases or interfaces with file-based evals
159
- - `interf doctor` = preflight local executor setup before a real compile
160
- - `interf status` = show deterministic health
161
- - `interf verify <check>` = internal deterministic referee for major workflow steps
162
- - `interf reset <scope>` = reset generated state while keeping source files
315
+ - `interf benchmark` = compare compiled knowledge bases or interfaces with evals from `interf.config.json` or an explicit spec file
316
+
317
+ Advanced commands still exist for workflow authoring and diagnostics:
318
+
319
+ - `interf create workflow`
320
+ - `interf doctor`
321
+ - `interf status`
322
+ - `interf verify <check>`
323
+ - `interf reset <scope>`
324
+
325
+ Useful run flags:
326
+
327
+ - `--model <name>` = pin the agent model for this run
328
+ - `--profile <name>` = pass an agent-specific profile when supported
329
+ - `--effort <level>` = override model reasoning effort
330
+ - `--timeout-ms <ms>` = interrupt the local executor after this much inactivity
163
331
 
164
332
  ## Workflows
165
333
 
@@ -189,81 +357,31 @@ interf/workflows/knowledge-base/<workflow-id>/
189
357
 
190
358
  Interf keeps the public command surface stable while letting workflows vary the internal stage pipeline. The engine still owns contract kinds, required artifacts, and state flow.
191
359
 
192
- ## Benchmarks and evals
360
+ Current shipped policy:
193
361
 
194
- Interf is benchmark-first.
195
-
196
- You can:
362
+ - built-in knowledge-base workflows: `interf`, `karpathy`
363
+ - built-in interface workflow: `interf`
364
+ - if you need a custom interface method, create a local workflow package and benchmark it before treating it as better than the default
197
365
 
198
- - build multiple knowledge bases over the same folder
199
- - compare workflows on the same source set
200
- - compare interfaces for the same business task
201
- - inspect proofs, outputs, and costs locally
202
-
203
- Reusable benchmark specs and saved runs live under:
204
-
205
- ```text
206
- source-folder/
207
- interf/
208
- benchmarks/
209
- knowledge-base/*.json
210
- interface/*.json
211
- runs/
212
- ```
213
-
214
- This is the trust loop: don't trust a repo because it says its knowledge base is better. Run the benchmark on your folder.
215
-
216
- ## Builder docs
366
+ ## Builder Docs
217
367
 
218
368
  If you want to create your own workflows, start here:
219
369
 
220
370
  1. [`docs/workflow-spec.md`](./docs/workflow-spec.md)
221
371
  2. [`docs/runtime-contract.md`](./docs/runtime-contract.md)
222
372
  3. [`docs/architecture.md`](./docs/architecture.md)
373
+ 4. [`docs/eval-loop.md`](./docs/eval-loop.md)
223
374
 
224
- ## Maintainer test loop
225
-
226
- Smoke suite:
227
-
228
- ```bash
229
- npm test
230
- ```
231
-
232
- Real executor end-to-end:
233
-
234
- ```bash
235
- npm run test:e2e
236
- npm run test:e2e:compare
237
- ```
238
-
239
- Cached quick real-executor loop:
240
-
241
- ```bash
242
- npm run test:e2e:quick
243
- npm run test:full
244
- ```
245
-
246
- Underlying acceptance commands:
247
-
248
- ```bash
249
- npm run test:acceptance-live
250
- npm run test:acceptance-compare
251
- npm run test:acceptance-cache:refresh
252
- npm run test:acceptance-quick:create-interface
253
- npm run test:acceptance-quick:query-interface
254
- ```
255
-
256
- The cached quick fixture lives under `.interf-test-cache/latest-quick/`.
257
- `npm test` is the fast smoke/integration suite. When you want a real agent/executor end-to-end run, use `npm run test:e2e` or `npm run test:e2e:quick`.
258
- `npm run test:full` is the convenient day-to-day command: smoke suite plus cached quick real-executor checks.
375
+ Contributor and release-testing commands live in [`CONTRIBUTING.md`](./CONTRIBUTING.md).
259
376
 
260
- ## Design choices
377
+ ## Design Choices
261
378
 
262
379
  - filesystem-first, not service-first
380
+ - raw files remain the truth
381
+ - compiled workspaces remain visible on disk
263
382
  - workflow packages over hidden orchestration
264
383
  - contract-checked stages instead of prompt-only trust
265
384
  - benchmarkability as a core product feature
266
- - local control: your files stay on disk and run in your environment
267
385
 
268
386
  Interf is not trying to win by hiding complexity. It is trying to make the method visible, enforceable, and comparable.
269
387
 
@@ -1 +1 @@
1
- {"version":3,"file":"benchmark.d.ts","sourceRoot":"","sources":["../../src/commands/benchmark.ts"],"names":[],"mappings":"AAuBA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,OAAO,CAAC;AAkY3C,eAAO,MAAM,gBAAgB,EAAE,aAmD9B,CAAC"}
1
+ {"version":3,"file":"benchmark.d.ts","sourceRoot":"","sources":["../../src/commands/benchmark.ts"],"names":[],"mappings":"AA0BA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,OAAO,CAAC;AA2W3C,eAAO,MAAM,gBAAgB,EAAE,aA8D9B,CAAC"}