npm - @interf/compiler - Versions diffs - 0.1.11 → 0.2.0 - Mend

@interf/compiler 0.1.11 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (283) hide show

package/README.md +254 -136
package/dist/commands/benchmark.d.ts.map +1 -1
package/dist/commands/benchmark.js +65 -84
package/dist/commands/benchmark.js.map +1 -1
package/dist/commands/compile.d.ts.map +1 -1
package/dist/commands/compile.js +19 -3
package/dist/commands/compile.js.map +1 -1
package/dist/commands/create.d.ts +3 -0
package/dist/commands/create.d.ts.map +1 -1
package/dist/commands/create.js +34 -9
package/dist/commands/create.js.map +1 -1
package/dist/commands/default.d.ts.map +1 -1
package/dist/commands/default.js +2 -0
package/dist/commands/default.js.map +1 -1
package/dist/commands/init.d.ts.map +1 -1
package/dist/commands/init.js +3 -2
package/dist/commands/init.js.map +1 -1
package/dist/index.d.ts +11 -29
package/dist/index.d.ts.map +1 -1
package/dist/index.js +7 -16
package/dist/index.js.map +1 -1
package/dist/lib/agent-args.d.ts +4 -0
package/dist/lib/agent-args.d.ts.map +1 -0
package/dist/lib/agent-args.js +42 -0
package/dist/lib/agent-args.js.map +1 -0
package/dist/lib/agent-constants.d.ts +6 -0
package/dist/lib/agent-constants.d.ts.map +1 -0
package/dist/lib/agent-constants.js +29 -0
package/dist/lib/agent-constants.js.map +1 -0
package/dist/lib/agent-detection.d.ts +8 -0
package/dist/lib/agent-detection.d.ts.map +1 -0
package/dist/lib/agent-detection.js +66 -0
package/dist/lib/agent-detection.js.map +1 -0
package/dist/lib/agent-execution.d.ts +3 -0
package/dist/lib/agent-execution.d.ts.map +1 -0
package/dist/lib/agent-execution.js +207 -0
package/dist/lib/agent-execution.js.map +1 -0
package/dist/lib/agent-logs.d.ts +3 -0
package/dist/lib/agent-logs.d.ts.map +1 -0
package/dist/lib/agent-logs.js +18 -0
package/dist/lib/agent-logs.js.map +1 -0
package/dist/lib/agent-preflight.d.ts +8 -0
package/dist/lib/agent-preflight.d.ts.map +1 -0
package/dist/lib/agent-preflight.js +77 -0
package/dist/lib/agent-preflight.js.map +1 -0
package/dist/lib/agent-render.d.ts +9 -0
package/dist/lib/agent-render.d.ts.map +1 -0
package/dist/lib/agent-render.js +219 -0
package/dist/lib/agent-render.js.map +1 -0
package/dist/lib/agent-status.d.ts +4 -0
package/dist/lib/agent-status.d.ts.map +1 -0
package/dist/lib/agent-status.js +59 -0
package/dist/lib/agent-status.js.map +1 -0
package/dist/lib/agent-types.d.ts +31 -0
package/dist/lib/agent-types.d.ts.map +1 -0
package/dist/lib/agent-types.js +2 -0
package/dist/lib/agent-types.js.map +1 -0
package/dist/lib/agents.d.ts +7 -49
package/dist/lib/agents.d.ts.map +1 -1
package/dist/lib/agents.js +8 -554
package/dist/lib/agents.js.map +1 -1
package/dist/lib/benchmark-execution.d.ts +9 -0
package/dist/lib/benchmark-execution.d.ts.map +1 -0
package/dist/lib/benchmark-execution.js +488 -0
package/dist/lib/benchmark-execution.js.map +1 -0
package/dist/lib/benchmark-paths.d.ts +11 -0
package/dist/lib/benchmark-paths.d.ts.map +1 -0
package/dist/lib/benchmark-paths.js +38 -0
package/dist/lib/benchmark-paths.js.map +1 -0
package/dist/lib/benchmark-specs.d.ts +8 -0
package/dist/lib/benchmark-specs.d.ts.map +1 -0
package/dist/lib/benchmark-specs.js +115 -0
package/dist/lib/benchmark-specs.js.map +1 -0
package/dist/lib/benchmark-targets.d.ts +5 -0
package/dist/lib/benchmark-targets.d.ts.map +1 -0
package/dist/lib/benchmark-targets.js +72 -0
package/dist/lib/benchmark-targets.js.map +1 -0
package/dist/lib/benchmark-types.d.ts +19 -0
package/dist/lib/benchmark-types.d.ts.map +1 -0
package/dist/lib/benchmark-types.js +2 -0
package/dist/lib/benchmark-types.js.map +1 -0
package/dist/lib/benchmark.d.ts +4 -29
package/dist/lib/benchmark.d.ts.map +1 -1
package/dist/lib/benchmark.js +3 -324
package/dist/lib/benchmark.js.map +1 -1
package/dist/lib/bundled-templates.d.ts +5 -0
package/dist/lib/bundled-templates.d.ts.map +1 -0
package/dist/lib/bundled-templates.js +23 -0
package/dist/lib/bundled-templates.js.map +1 -0
package/dist/lib/config.d.ts +1 -0
package/dist/lib/config.d.ts.map +1 -1
package/dist/lib/config.js +2 -0
package/dist/lib/config.js.map +1 -1
package/dist/lib/eval-packs.d.ts +204 -0
package/dist/lib/eval-packs.d.ts.map +1 -0
package/dist/lib/eval-packs.js +177 -0
package/dist/lib/eval-packs.js.map +1 -0
package/dist/lib/execution-profile.d.ts +18 -0
package/dist/lib/execution-profile.d.ts.map +1 -0
package/dist/lib/execution-profile.js +85 -0
package/dist/lib/execution-profile.js.map +1 -0
package/dist/lib/interf-bootstrap.d.ts +4 -0
package/dist/lib/interf-bootstrap.d.ts.map +1 -1
package/dist/lib/interf-bootstrap.js +71 -68
package/dist/lib/interf-bootstrap.js.map +1 -1
package/dist/lib/interf-compile-plan.d.ts +12 -0
package/dist/lib/interf-compile-plan.d.ts.map +1 -0
package/dist/lib/interf-compile-plan.js +143 -0
package/dist/lib/interf-compile-plan.js.map +1 -0
package/dist/lib/interf-detect.d.ts.map +1 -1
package/dist/lib/interf-detect.js +11 -10
package/dist/lib/interf-detect.js.map +1 -1
package/dist/lib/interf-scaffold.d.ts +1 -10
package/dist/lib/interf-scaffold.d.ts.map +1 -1
package/dist/lib/interf-scaffold.js +25 -362
package/dist/lib/interf-scaffold.js.map +1 -1
package/dist/lib/interf-workflow-package.d.ts +4 -0
package/dist/lib/interf-workflow-package.d.ts.map +1 -0
package/dist/lib/interf-workflow-package.js +131 -0
package/dist/lib/interf-workflow-package.js.map +1 -0
package/dist/lib/interf.d.ts +2 -1
package/dist/lib/interf.d.ts.map +1 -1
package/dist/lib/interf.js +2 -1
package/dist/lib/interf.js.map +1 -1
package/dist/lib/local-workflows.d.ts.map +1 -1
package/dist/lib/local-workflows.js +8 -12
package/dist/lib/local-workflows.js.map +1 -1
package/dist/lib/logger.d.ts +4 -0
package/dist/lib/logger.d.ts.map +1 -0
package/dist/lib/logger.js +11 -0
package/dist/lib/logger.js.map +1 -0
package/dist/lib/obsidian.d.ts.map +1 -1
package/dist/lib/obsidian.js +7 -3
package/dist/lib/obsidian.js.map +1 -1
package/dist/lib/parse.d.ts +2 -2
package/dist/lib/parse.d.ts.map +1 -1
package/dist/lib/parse.js +11 -7
package/dist/lib/parse.js.map +1 -1
package/dist/lib/registry.js +3 -3
package/dist/lib/registry.js.map +1 -1
package/dist/lib/runtime-acceptance.d.ts +4 -0
package/dist/lib/runtime-acceptance.d.ts.map +1 -0
package/dist/lib/runtime-acceptance.js +123 -0
package/dist/lib/runtime-acceptance.js.map +1 -0
package/dist/lib/runtime-contracts.d.ts +4 -0
package/dist/lib/runtime-contracts.d.ts.map +1 -0
package/dist/lib/runtime-contracts.js +63 -0
package/dist/lib/runtime-contracts.js.map +1 -0
package/dist/lib/runtime-paths.d.ts +8 -0
package/dist/lib/runtime-paths.d.ts.map +1 -0
package/dist/lib/runtime-paths.js +28 -0
package/dist/lib/runtime-paths.js.map +1 -0
package/dist/lib/runtime-prompt.d.ts +3 -0
package/dist/lib/runtime-prompt.d.ts.map +1 -0
package/dist/lib/runtime-prompt.js +59 -0
package/dist/lib/runtime-prompt.js.map +1 -0
package/dist/lib/runtime-reconcile.d.ts +6 -0
package/dist/lib/runtime-reconcile.d.ts.map +1 -0
package/dist/lib/runtime-reconcile.js +339 -0
package/dist/lib/runtime-reconcile.js.map +1 -0
package/dist/lib/runtime-runs.d.ts +12 -0
package/dist/lib/runtime-runs.d.ts.map +1 -0
package/dist/lib/runtime-runs.js +337 -0
package/dist/lib/runtime-runs.js.map +1 -0
package/dist/lib/runtime-types.d.ts +42 -0
package/dist/lib/runtime-types.d.ts.map +1 -0
package/dist/lib/runtime-types.js +2 -0
package/dist/lib/runtime-types.js.map +1 -0
package/dist/lib/runtime.d.ts +6 -58
package/dist/lib/runtime.d.ts.map +1 -1
package/dist/lib/runtime.js +5 -614
package/dist/lib/runtime.js.map +1 -1
package/dist/lib/schema.d.ts +156 -13
package/dist/lib/schema.d.ts.map +1 -1
package/dist/lib/schema.js +113 -4
package/dist/lib/schema.js.map +1 -1
package/dist/lib/source-config.d.ts +13 -0
package/dist/lib/source-config.d.ts.map +1 -0
package/dist/lib/source-config.js +75 -0
package/dist/lib/source-config.js.map +1 -0
package/dist/lib/state-artifacts.d.ts +15 -0
package/dist/lib/state-artifacts.d.ts.map +1 -0
package/dist/lib/state-artifacts.js +24 -0
package/dist/lib/state-artifacts.js.map +1 -0
package/dist/lib/state-health.d.ts +9 -0
package/dist/lib/state-health.d.ts.map +1 -0
package/dist/lib/state-health.js +330 -0
package/dist/lib/state-health.js.map +1 -0
package/dist/lib/state-io.d.ts +15 -0
package/dist/lib/state-io.d.ts.map +1 -0
package/dist/lib/state-io.js +219 -0
package/dist/lib/state-io.js.map +1 -0
package/dist/lib/state-paths.d.ts +5 -0
package/dist/lib/state-paths.d.ts.map +1 -0
package/dist/lib/state-paths.js +19 -0
package/dist/lib/state-paths.js.map +1 -0
package/dist/lib/state-view.d.ts +7 -0
package/dist/lib/state-view.d.ts.map +1 -0
package/dist/lib/state-view.js +147 -0
package/dist/lib/state-view.js.map +1 -0
package/dist/lib/state.d.ts +6 -46
package/dist/lib/state.d.ts.map +1 -1
package/dist/lib/state.js +5 -632
package/dist/lib/state.js.map +1 -1
package/dist/lib/summarize-plan.d.ts +1 -0
package/dist/lib/summarize-plan.d.ts.map +1 -1
package/dist/lib/summarize-plan.js +10 -0
package/dist/lib/summarize-plan.js.map +1 -1
package/dist/lib/user-config.js +2 -2
package/dist/lib/user-config.js.map +1 -1
package/dist/lib/validate-helpers.d.ts +21 -0
package/dist/lib/validate-helpers.d.ts.map +1 -0
package/dist/lib/validate-helpers.js +72 -0
package/dist/lib/validate-helpers.js.map +1 -0
package/dist/lib/validate-interface.d.ts +79 -0
package/dist/lib/validate-interface.d.ts.map +1 -0
package/dist/lib/validate-interface.js +535 -0
package/dist/lib/validate-interface.js.map +1 -0
package/dist/lib/validate-kb.d.ts +81 -0
package/dist/lib/validate-kb.d.ts.map +1 -0
package/dist/lib/validate-kb.js +252 -0
package/dist/lib/validate-kb.js.map +1 -0
package/dist/lib/validate.d.ts +17 -146
package/dist/lib/validate.d.ts.map +1 -1
package/dist/lib/validate.js +33 -709
package/dist/lib/validate.js.map +1 -1
package/dist/lib/workflow-definitions.d.ts +1 -1
package/dist/lib/workflow-definitions.d.ts.map +1 -1
package/dist/lib/workflow-definitions.js +90 -166
package/dist/lib/workflow-definitions.js.map +1 -1
package/dist/lib/workflow-helpers.d.ts.map +1 -1
package/dist/lib/workflow-helpers.js +6 -3
package/dist/lib/workflow-helpers.js.map +1 -1
package/dist/lib/workflow-stage-runner.d.ts +41 -0
package/dist/lib/workflow-stage-runner.d.ts.map +1 -0
package/dist/lib/workflow-stage-runner.js +106 -0
package/dist/lib/workflow-stage-runner.js.map +1 -0
package/dist/lib/workflow-starter-docs.d.ts +9 -0
package/dist/lib/workflow-starter-docs.d.ts.map +1 -0
package/dist/lib/workflow-starter-docs.js +18 -0
package/dist/lib/workflow-starter-docs.js.map +1 -0
package/dist/lib/workflows-interface-contracts.d.ts +24 -0
package/dist/lib/workflows-interface-contracts.d.ts.map +1 -0
package/dist/lib/workflows-interface-contracts.js +304 -0
package/dist/lib/workflows-interface-contracts.js.map +1 -0
package/dist/lib/workflows-interface.d.ts +3 -10
package/dist/lib/workflows-interface.d.ts.map +1 -1
package/dist/lib/workflows-interface.js +117 -365
package/dist/lib/workflows-interface.js.map +1 -1
package/dist/lib/workflows-kb.d.ts.map +1 -1
package/dist/lib/workflows-kb.js +79 -55
package/dist/lib/workflows-kb.js.map +1 -1
package/dist/lib/workflows.d.ts +1 -1
package/dist/lib/workflows.d.ts.map +1 -1
package/dist/lib/workflows.js +1 -1
package/dist/lib/workflows.js.map +1 -1
package/package.json +15 -4
package/skills/interface/analyze/SKILL.md +79 -28
package/skills/interface/compile/SKILL.md +27 -28
package/skills/interface/create/SKILL.md +53 -230
package/skills/interface/create/references/compile-plan-format.md +31 -31
package/skills/interface/create/references/workflows.md +17 -32
package/skills/interface/query/SKILL.md +15 -1
package/skills/interface/retrieve/SKILL.md +32 -65
package/skills/knowledge-base/compile/SKILL.md +59 -83
package/skills/knowledge-base/compile/references/stage-claims.md +1 -1
package/skills/knowledge-base/compile/references/stage-entities.md +2 -2
package/skills/knowledge-base/query/SKILL.md +13 -1
package/skills/knowledge-base/summarize/SKILL.md +54 -24
package/templates/interface/README.md +13 -12
package/templates/interface/interfaces.md +14 -11
package/templates/knowledge-base/README.md +0 -1
package/templates/knowledge-base/registry.md +15 -15
package/templates/workflow-package/README.md +16 -0
package/templates/workflow-package/create/SKILL.md +8 -0
package/templates/workflow-package/interface-query/SKILL.md +29 -0
package/templates/workflow-package/interface-stage/SKILL.md +13 -0
package/templates/workflow-package/knowledge-base-query/SKILL.md +36 -0
package/templates/workflow-package/knowledge-base-stage/SKILL.md +13 -0
package/templates/workflow-starters/interface/interf/README.md +13 -0
package/templates/workflow-starters/interface/interf/create/SKILL.md +15 -0
package/templates/workflow-starters/knowledge-base/interf/README.md +13 -0
package/templates/workflow-starters/knowledge-base/karpathy/README.md +13 -0

package/README.md CHANGED Viewed

@@ -1,86 +1,82 @@
 # Interf
-The open-source knowledge compiler.
+The open-source eval-first knowledge compiler.
-Interf compiles folders into knowledge bases and task-specific interfaces: agent-ready workspaces with proof, structure, and benchmarks.
+Interf compiles a workspace beside your files for agents: a knowledge representation they can navigate, cross-check against raw source, and prove on your evals.
-- compile any folder into a knowledge base
-- create focused interfaces for specific tasks
-- run evals and benchmarks on your own files
+Your files stay the truth. Interf adds a compiled workspace and benchmark proof.
-Most LLM knowledge-base repos optimize for a demo. Interf optimizes for proof. It keeps your files on disk, compiles a visible folder an agent can actually use, and makes workflows compete on your evals instead of on marketing claims.
+- point it at a folder you already have
+- define or confirm what must be true for the task in `interf.config.json`
+- compile a shared knowledge base plus task-specific interfaces
+- benchmark raw files vs compiled workspaces and keep the best result
-## Why Interf
+Most "AI knowledge base" tools optimize for a demo. Interf optimizes for proof. It keeps the raw files on disk, compiles a visible workspace your agent can use, and makes workflows compete on your evals instead of on marketing claims.
-Interf is built around three ideas:
+Interf is not a chat shell, a hosted notes app, or a generic agent OS. It is the compile + benchmark loop for turning real folders into better agent workspaces and proving they help on a real task.
-- the product surface is the compiled folder, not a hidden service
-- the workflow should leave proof of work on disk
-- the method should be benchmarkable on your task
+## What Happens
-That gives you a simple loop:
-1. point Interf at a folder
-2. compile a knowledge base
-3. create an interface for a job
-4. run evals and benchmarks to see what actually works
+```text
+raw folder
+  -> compiled workspace beside the raw files
+  -> benchmark proof on your evals
+```
-## Core concepts
+The compiled workspace is for agents. It gives them:
-- **Source folder**: your real files stay where they are
-- **Knowledge base**: `interf/{name}/`, the shared compiled layer over that folder
-- **Interface**: `interf/{name}/interfaces/{interface-name}/`, a task-specific surface on top of one knowledge base
-- **Workflow**: the reusable method package that defines the compile pipeline
-- **Benchmark**: running evals across compiled knowledge bases or interfaces on the same folder
+- a clearer map of the data
+- task-specific outputs when broad summarization is not enough
+- better evidence paths back to the raw source
+- proof of whether the compiled workspace actually helped
-One source folder can host multiple knowledge bases under `interf/` when you want to compare workflows like `interf` vs `karpathy` on the same data.
+Interf does not replace your agent. It gives your agent a better workspace to use.
-## Interf primitives
+## Trust Boundary
-Interf gives you a few strong primitives instead of a giant abstraction layer:
+Interf keeps one trust boundary:
-- **workflow package**: `workflow.json` plus local `workflow/` docs define the method
-- **stage contract**: every compile stage gets a deterministic acceptance boundary
-- **declarative acceptance**: workflows can declare extra acceptance rules in `workflow.json`
-- **CLI enforcement**: the CLI checks whether a stage actually complied instead of trusting the agent's summary
-- **benchmark specs**: file-based evals let you compare workflows and interfaces on the same folder
+- raw files in the source folder are the content truth
+- `interf.config.json` is the user-approved task truth
+- the compiled workspace is the generated working surface
+- the benchmark result is the proof of whether that generated surface is good enough
-That is the core product promise:
+That means:
-- define what the agent should do in plain English
-- give the agent local workspace docs and stage contracts
-- validate the result deterministically
+- agents may draft evals
+- users approve accepted task truth
+- raw files remain the final source of evidence
+- compiled workspaces earn trust only if they pass the benchmark
-For workflow authors, the important surface is:
+## Who It’s For
-- `workflow.json`
-- `workflow/create/`
-- `workflow/compile/stages/<stage>/`
-- `workflow/use/query/`
-- [`docs/workflow-spec.md`](./docs/workflow-spec.md)
+Interf is for people already trying to get real work done with agents on real folders:
-## What the agent sees
+- Claude Code and Codex users
+- OpenClaw and Hermes-style local-agent users
+- technical founders, researchers, and operators with messy source folders
+- teams who want to test whether compiled workspaces beat raw files on their own tasks
-The compiled folder is the agent-facing product surface: an agent-ready workspace.
+If you want a generic chat UI, this is not that product.
-Important files in a KB or interface:
+## Mental Model
-- `interf.json` = what this workspace is
-- `AGENTS.md` = where to start and how to navigate
-- `workflow/` = the editable local method package
-- `home.md` = entry document
-- `summaries/`, `knowledge/`, and `briefs/` = compiled outputs
+- **Source folder**: your real files stay where they are
+- **Compiled workspace**: the generated workspace beside those files for agents
+- **Knowledge base**: the shared compiled workspace over the folder
+- **Interface**: the task-specific compiled workspace for one job
+- **Workflow**: the reusable compile method
+- **Eval**: what must be true for the task
+- **Benchmark**: the proof loop that compares raw and compiled results
-Manual query/use works like this:
+One source folder can host multiple knowledge bases under `interf/` if you want to compare workflows on the same data.
-- open the KB or interface folder
-- read `AGENTS.md`
-- follow `workflow/use/query/SKILL.md`
-- for interfaces, use local interface artifacts first, then the parent KB loop, then raw files if needed
+## Install
-Interf does not require globally installed slash skills for workspace behavior. Local `workflow/.../SKILL.md` files are workspace instruction docs routed by `AGENTS.md` and stage contracts.
+Requirements:
-## Quick start
+- Node.js 20+
+- one local coding agent executor: Claude Code or Codex
 Install the published package:
@@ -88,29 +84,31 @@ Install the published package:
 npm install -g @interf/compiler
 ```
-Or install from source while contributing:
+Sanity check the local setup:
 ```bash
-npm install
-npm run build
-npm install -g .
+interf doctor
 ```
+If you already use Claude Code or Codex locally, that is the intended path. Interf uses your local agent as the executor for compile and benchmark runs.
+## Quick Start
 Initialize Interf in any folder:
 ```bash
-cd ~/my-notes
+cd ~/my-folder
 interf init
 ```
 That flow can:
-- choose an executor like Claude Code or Codex
-- optionally install global helper skills
-- attach the current folder as a knowledge base
-- compile the knowledge base immediately
+1. choose an executor like Claude Code or Codex
+2. optionally install helper skills
+3. attach the current folder as a knowledge base
+4. optionally compile the knowledge base immediately
-Then you can:
+Then:
 ```bash
 interf create interface
@@ -118,11 +116,173 @@ interf compile
 interf benchmark
 ```
-## Example layout
+Fastest way to see the full loop:
+```bash
+cp -r examples/benchmark-demo /tmp/benchmark-demo
+cd /tmp/benchmark-demo
+interf init
+interf compile
+interf benchmark
+```
+What success looks like on disk:
+- `interf/<kb>/` = shared compiled workspace over the folder
+- `interf/<kb>/interfaces/<name>/` = task-specific compiled workspace
+- `interf/benchmarks/runs/...` = saved benchmark evidence for that folder
+## 5-Minute Example
+Try the full loop on the shipped sample folder:
+```bash
+cp -r examples/benchmark-demo /tmp/interf-demo
+cd /tmp/interf-demo
+interf init
+interf compile
+interf benchmark
+```
+This sample already includes an `interf.config.json`, so you can see the compile and benchmark loop without writing your own evals first.
+## Simple Eval Example
+The default public eval file is `interf.config.json` at the source-folder root.
+Minimal example:
+```json
+{
+  "interfaces": [
+    {
+      "name": "weekly-briefing",
+      "about": "Summarize what changed, why it matters, and what to do next.",
+      "evals": [
+        {
+          "question": "From the compiled interface only, what changed and what should the operator do next?",
+          "answer": "A good answer says what changed, why it matters, and the next action.",
+          "strictness": "approximate"
+        }
+      ]
+    }
+  ]
+}
+```
+That is enough to start. You do not need a large benchmark harness to use Interf:
+1. write one or two questions that matter
+2. say what a good answer must preserve
+3. compile the workspace
+4. run `interf benchmark`
+If the compiled workspace does not beat raw files on those evals, do not trust it yet.
+## Use It With Your Agent
+For many users, the agent is the operator.
+A practical agent-native loop looks like this:
+1. the agent gets a real task against a real folder
+2. it inspects raw files or prior benchmark evidence
+3. it drafts or updates evals in `interf.config.json`
+4. it asks the user to confirm the task truth when needed
+5. it runs compile + benchmark
+6. it only promotes the compiled workspace for real use once the benchmark says it helped
+Paste something like this into Claude Code, Codex, OpenClaw, or Hermes:
+```text
+Install @interf/compiler, run `interf init` in this folder, choose the local agent executor, and compile the workspace.
+If `interf.config.json` is missing or incomplete, draft evals for what must be true for this task and ask me to confirm them before benchmarking.
+Then run `interf benchmark` and tell me whether raw files or the compiled workspace performed better.
+```
+## What The Agent Sees
+The compiled folder is the agent-facing product surface.
+Important files in a knowledge base or interface:
+- `interf.json` = what this workspace is
+- `AGENTS.md` = canonical bootstrap and navigation
+- `CLAUDE.md` = generated compatibility mirror of `AGENTS.md`
+- `workflow/` = the editable local method package
+- `home.md` = entry document
+- `summaries/`, `knowledge/`, and `briefs/` = compiled outputs
+Interf supports two agent modes:
+- **executor mode**: the CLI launches a local agent to satisfy one stage contract during create, compile, or benchmark flows
+- **use mode**: a human opens the compiled knowledge base or interface and asks an agent to navigate the finished workspace
+Manual use looks like this:
+1. open the knowledge base or interface folder
+2. read `AGENTS.md`
+3. follow `workflow/use/query/SKILL.md`
+4. for interfaces, use local interface artifacts first, then the parent knowledge-base loop, then raw files if needed
+Interf does not require globally installed slash skills for workspace behavior. Local `workflow/.../SKILL.md` files are workspace instruction docs routed by `AGENTS.md` and stage contracts.
+## Benchmark Proof
+Interf is benchmark-first.
+The default eval file lives at the source-folder root:
+```text
+source-folder/
+  interf.config.json
+```
+Saved benchmark runs live under:
+```text
+source-folder/
+  interf/
+    benchmarks/
+      runs/
+```
+Use benchmarks to answer questions like:
+- does the compiled workspace beat raw files on this task?
+- which workflow wins on this folder?
+- which interface is best for this job?
+- which model performs best on the same compiled target?
+`interf benchmark` uses your evals, opens the compiled target like a real user session, asks the questions, and grades the answers. The point is not a hidden score. The point is a benchmark artifact you can inspect, diff, and rerun locally.
+## Power Mode
+Most users do not need to think about improvement loops.
+The basic story is:
+1. compile
+2. benchmark
+3. trust the result only if it passes
+Power users and agent-native setups can go further:
+- compare workflows on the same folder
+- compare models on the same compiled target
+- draft custom local workflows
+- rerun compile + benchmark until a task-specific interface passes
+That improvement loop is a real capability, but it is not the main thing users need to understand first.
+## Layout On Disk
 ```text
 source-folder/
   ...your files...
+  interf.config.json
   interf/
     workflows/
     benchmarks/
@@ -147,19 +307,27 @@ source-folder/
           summaries/
 ```
-## Commands
+## Core Commands
 - `interf init` = global setup first; if run inside a normal folder, it can also attach and compile a knowledge base there
-- `interf create` = chooser when type is omitted
-- `interf create knowledge-base` = attach current folder
 - `interf create interface` = create an interface for the current folder's knowledge base
-- `interf create workflow` = create a reusable workflow package
 - `interf compile` = compile the current knowledge base or interface
-- `interf benchmark` = compare compiled knowledge bases or interfaces with file-based evals
-- `interf doctor` = preflight local executor setup before a real compile
-- `interf status` = show deterministic health
-- `interf verify <check>` = internal deterministic referee for major workflow steps
-- `interf reset <scope>` = reset generated state while keeping source files
+- `interf benchmark` = compare compiled knowledge bases or interfaces with evals from `interf.config.json` or an explicit spec file
+Advanced commands still exist for workflow authoring and diagnostics:
+- `interf create workflow`
+- `interf doctor`
+- `interf status`
+- `interf verify <check>`
+- `interf reset <scope>`
+Useful run flags:
+- `--model <name>` = pin the agent model for this run
+- `--profile <name>` = pass an agent-specific profile when supported
+- `--effort <level>` = override model reasoning effort
+- `--timeout-ms <ms>` = interrupt the local executor after this much inactivity
 ## Workflows
@@ -189,81 +357,31 @@ interf/workflows/knowledge-base/<workflow-id>/
 Interf keeps the public command surface stable while letting workflows vary the internal stage pipeline. The engine still owns contract kinds, required artifacts, and state flow.
-## Benchmarks and evals
+Current shipped policy:
-Interf is benchmark-first.
-You can:
+- built-in knowledge-base workflows: `interf`, `karpathy`
+- built-in interface workflow: `interf`
+- if you need a custom interface method, create a local workflow package and benchmark it before treating it as better than the default
-- build multiple knowledge bases over the same folder
-- compare workflows on the same source set
-- compare interfaces for the same business task
-- inspect proofs, outputs, and costs locally
-Reusable benchmark specs and saved runs live under:
-```text
-source-folder/
-  interf/
-    benchmarks/
-      knowledge-base/*.json
-      interface/*.json
-      runs/
-```
-This is the trust loop: don't trust a repo because it says its knowledge base is better. Run the benchmark on your folder.
-## Builder docs
+## Builder Docs
 If you want to create your own workflows, start here:
 1. [`docs/workflow-spec.md`](./docs/workflow-spec.md)
 2. [`docs/runtime-contract.md`](./docs/runtime-contract.md)
 3. [`docs/architecture.md`](./docs/architecture.md)
+4. [`docs/eval-loop.md`](./docs/eval-loop.md)
-## Maintainer test loop
-Smoke suite:
-```bash
-npm test
-```
-Real executor end-to-end:
-```bash
-npm run test:e2e
-npm run test:e2e:compare
-```
-Cached quick real-executor loop:
-```bash
-npm run test:e2e:quick
-npm run test:full
-```
-Underlying acceptance commands:
-```bash
-npm run test:acceptance-live
-npm run test:acceptance-compare
-npm run test:acceptance-cache:refresh
-npm run test:acceptance-quick:create-interface
-npm run test:acceptance-quick:query-interface
-```
-The cached quick fixture lives under `.interf-test-cache/latest-quick/`.
-`npm test` is the fast smoke/integration suite. When you want a real agent/executor end-to-end run, use `npm run test:e2e` or `npm run test:e2e:quick`.
-`npm run test:full` is the convenient day-to-day command: smoke suite plus cached quick real-executor checks.
+Contributor and release-testing commands live in [`CONTRIBUTING.md`](./CONTRIBUTING.md).
-## Design choices
+## Design Choices
 - filesystem-first, not service-first
+- raw files remain the truth
+- compiled workspaces remain visible on disk
 - workflow packages over hidden orchestration
 - contract-checked stages instead of prompt-only trust
 - benchmarkability as a core product feature
-- local control: your files stay on disk and run in your environment
 Interf is not trying to win by hiding complexity. It is trying to make the method visible, enforceable, and comparable.

package/dist/commands/benchmark.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"benchmark.d.ts","sourceRoot":"","sources":["../../src/commands/benchmark.ts"],"names":[],"mappings":"~~AAuBA~~,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,OAAO,CAAC;~~AAkY3C~~,eAAO,MAAM,gBAAgB,EAAE,~~aAmD9B~~,CAAC"}
1	+ {"version":3,"file":"benchmark.d.ts","sourceRoot":"","sources":["../../src/commands/benchmark.ts"],"names":[],"mappings":"AA0BA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,OAAO,CAAC;AA2W3C,eAAO,MAAM,gBAAgB,EAAE,aA8D9B,CAAC"}