npm - @loops-adk/core - Versions diffs - 0.1.0 → 0.2.0 - Mend

@loops-adk/core 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

package/README.md +120 -13
package/assets/logo.png +0 -0
package/bin/loops.mjs +5 -5
package/dist/{agent-sdk-RF5VJZAT.js → agent-sdk-4QJDWM7N.js} +3 -3
package/dist/{agent-sdk-RF5VJZAT.js.map → agent-sdk-4QJDWM7N.js.map} +1 -1
package/dist/api.d.ts +177 -3
package/dist/api.js +26 -10
package/dist/api.js.map +1 -1
package/dist/{chunk-XC46B4FD.js → chunk-MA6NDQMO.js} +2 -2
package/dist/chunk-MA6NDQMO.js.map +1 -0
package/dist/{chunk-3BPU34DE.js → chunk-WM5QVHM2.js} +789 -46
package/dist/chunk-WM5QVHM2.js.map +1 -0
package/dist/{claude-cli-U7WEVAOL.js → claude-cli-75AOQUKG.js} +3 -3
package/dist/{claude-cli-U7WEVAOL.js.map → claude-cli-75AOQUKG.js.map} +1 -1
package/dist/{codex-6I5UZ2HM.js → codex-LYZF52WL.js} +25 -13
package/dist/codex-LYZF52WL.js.map +1 -0
package/dist/env/command.d.ts +1 -1
package/dist/env/docker.d.ts +1 -1
package/dist/env/sst.d.ts +1 -1
package/dist/index.js +249 -11
package/dist/index.js.map +1 -1
package/dist/{types-B4wGVpqo.d.ts → types-Cv_3ymr9.d.ts} +118 -37
package/package.json +10 -1
package/skills/author-loop/SKILL.md +25 -14
package/skills/design-agent-team/SKILL.md +108 -0
package/skills/supervise-loop-run/SKILL.md +64 -0
package/dist/chunk-3BPU34DE.js.map +0 -1
package/dist/chunk-XC46B4FD.js.map +0 -1
package/dist/codex-6I5UZ2HM.js.map +0 -1

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@loops-adk/core",
-  "version": "0.1.0",
+  "version": "0.2.0",
   "license": "MIT",
   "author": "Jonny Neill",
   "description": "Run an agent in a convergence loop with an honest done-gate. A small, nestable loop and DAG primitive: deterministic plus agent-judge conditions, git as memory, review-restart, budgets, and a live TUI.",
@@ -52,6 +52,7 @@
     "dist",
     "bin",
     "skills",
+    "assets",
     "README.md",
     "LICENSE"
   ],
@@ -64,6 +65,14 @@
     "typecheck": "tsc --noEmit",
     "test": "vitest run",
     "test:watch": "vitest",
+    "bench:ab": "tsx bench/ab.ts",
+    "bench:graph": "tsx bench/graph.ts",
+    "bench:signal": "BENCH_GRAPH_TASK=graph-tasks/stable-store-contract BENCH_OUT=bench/results-signal.json tsx bench/graph.ts",
+    "bench:compare": "tsx bench/compare.ts",
+    "bench:report": "tsx bench/report.ts",
+    "bench:report:sample": "tsx bench/report.ts bench/results.sample.json",
+    "bench:context:dry": "BENCH_DRY=1 BENCH_CB_GROUPS=bench/contextbench/groups.dry.json tsx bench/swecontextbench.ts",
+    "bench:mechanism": "tsx bench/mechanism.ts",
     "example:poll": "tsx src/index.ts run examples/simple-poll.loop.ts --no-tui",
     "example:gate": "tsx src/index.ts run examples/confidence-gate.loop.ts",
     "prepack": "npm run build",

package/skills/author-loop/SKILL.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
 name: author-loop
-description: Use when writing, running, or validating a loops `.loop.ts` — the mental model, the honest-convergence gate, the git-memory tiers, the loop archetypes, and copy-paste recipes for authoring convergence loops with the `loops` library. Load this before composing a loop.
+description: Use when writing, running, or validating a loops `.loop.ts`: the mental model, the honest-convergence gate, the git-memory tiers, the loop archetypes, and copy-paste recipes for authoring convergence loops with the `loops` library. Load this before composing a loop.
 ---
 # Authoring loops
@@ -11,9 +11,9 @@ description: Use when writing, running, or validating a loops `.loop.ts` — the
 There is one unit of work and two supporting types:
-- `Job = (ctx) => Promise<Outcome>` — a unit of work of any size.
-- `Condition = (ctx, last) => Promise<{ met, reason, confidence? }>` — a yes/no gate.
-- `Engine` — where an agent turn runs (a model backend).
+- `Job = (ctx) => Promise<Outcome>`: a unit of work of any size.
+- `Condition = (ctx, last) => Promise<{ met, reason, confidence? }>`: a yes/no gate.
+- `Engine`: where an agent turn runs (a model backend).
 `loop()` returns a `Job`. `dag()` returns a `Job`. So loops and DAGs **nest both ways**: a DAG node can be a loop, a loop body can be a DAG. Nesting is the absence of a special case. Author with that freedom; do not reach for a node type that only works in one position.
@@ -45,9 +45,9 @@ export default defineJob(
 ## The gate is the whole point
-The trap this library exists to avoid is "ask the model if it is done" — the model grades its own homework and always says yes. Make the gate **honest**:
+The trap this library exists to avoid is "ask the model if it is done": the model grades its own homework and always says yes. Make the gate **honest**:
-- Combine a **deterministic** signal (`commandSucceeds('npm', ['test'])` — the tests really pass) with a **separate judge** (`agentCheck`). Prefer this mixed form over a lone judge.
+- Combine a **deterministic** signal (`commandSucceeds('npm', ['test'])`: the tests really pass) with a **separate judge** (`agentCheck`). Prefer this mixed form over a lone judge.
 - `until`/`start`/`stopOn` take one item or many. Arrays are `all` by default; wrap in `any(...)` for or.
 - Harden the judge: `quorum(2, judgeA, judgeB, judgeC)` is a k-of-n jury. `agentCheck({ dimensions: [...] })` opens on the geometric mean, so one weak dimension drags the verdict down.
 - A missing confidence scores 0 (fail-closed). Never lean on the model's self-report alone.
@@ -67,16 +67,16 @@ until: [
 Progress accumulates on disk, so each iteration starts with a clean context but not a blank one.
 - `ground: true` on an `agentJob` reads the recent commit log + this run's scratch files into the next prompt, so a fresh turn knows what was already tried.
-- `commit: { subject }` (or `commit: true`) writes one structured milestone commit on convergence — the reasoning welded to the diff. Later turns ground on it.
+- `commit: { subject }` (or `commit: true`) writes one structured milestone commit on convergence: the reasoning welded to the diff. Later turns ground on it.
 - For long, noisy histories use `ground: { retrieve: true }` (select relevant commits, not recent-N); for indefinite processes add `consolidateJob` to fold history into a bounded, decision-preserving record.
 ## Three archetypes
 A loop is not one shape. Pick the one that matches the work:
-- **Converge** — one hard target, retried until a gate passes: `loop({ until: gate, max })`.
-- **Sweep** — a known worklist, one fresh task each: a `loop`/`dag` over the list.
-- **Tend** — an unbounded process picking the next unit: `loop({ until: dynamicCondition, max })`, body dispatches to a sub-loop (wrap in `isolated(...)` for its own worktree).
+- **Converge**: one hard target, retried until a gate passes: `loop({ until: gate, max })`.
+- **Sweep**: a known worklist, one fresh task each: a `loop`/`dag` over the list.
+- **Tend**: an unbounded process picking the next unit: `loop({ until: dynamicCondition, max })`, body dispatches to a sub-loop (wrap in `isolated(...)` for its own worktree).
 They nest: triage is Tend ∘ Converge; a research sweep is Sweep ∘ Converge.
@@ -97,20 +97,31 @@ dag({
 `needs` are dependencies; `optional` nodes never block; an unmet `when` skips a node; `isolation: 'worktree'` (on the dag) or `isolate: true` (per node) runs writers in parallel worktrees that land back on pass. `sequence` and `parallel` are sugar over `dag`.
+## Agents and feedback
+A node can be a named specialist instead of an inline prompt. Define it once with `defineAgent` (persona in markdown via `fromFile`, structure in TS) and hand it to `agentJob({ agent })`; `defineSkill` folds a methodology into its system. The contract fields (`tier`, `outputs`, `failureModes`, …) are metadata for `describe` and validation, not scheduling power: the `dag` orchestrates, agents stay workers.
+Review feedback is a structured revision request that flows back to the worker on one channel. In a loop, a failing `review` is threaded into the next turn as `ctx.lastReview`; set `consumeFeedback: true` and `agentJob` folds it into the prompt. Aggregate several reviewers with `reviewPanel`; route a fix back to an earlier dag node with a targeted `revisionRequest({ target, findings })` (or the terse `kickback(to, reason)`) when the dag's `maxKickbacks` allows it.
+Composing a team of specialists, gates, and routed feedback is its own skill: see `skills/design-agent-team/SKILL.md`.
 ## Author → validate → run
 ```bash
-loops validate path/to/feature.loop.ts   # offline pre-flight: loads + prints the shape, no model calls, no spend
-loops describe path/to/feature.loop.ts   # print the loop's shape (gate, body, nodes) without running
-loops run path/to/feature.loop.ts         # live Ink TUI
+loops validate path/to/feature.loop.ts     # offline pre-flight: loads + prints the shape, no model calls, no spend
+loops describe path/to/feature.loop.ts     # print the loop's shape (gate, body, nodes) without running
+loops describe path/to/feature.loop.ts --json # the same shape as JSON (incl. each agent node's contract)
+loops run path/to/feature.loop.ts          # live Ink TUI
 loops run path/to/feature.loop.ts --no-tui # plain streamed logs
-loops run path/to/feature.loop.ts --json   # NDJSON event stream (parse this from an agent)
+loops run path/to/feature.loop.ts --json   # raw NDJSON event firehose (to supervise a run, prefer --supervise + records, below)
 ```
 Always `loops validate` first. It imports and constructs the loop (catching syntax, import, and bad-export errors) without running it, so you fix authoring mistakes for free before spending a single agent turn. It also prints the loop's shape (its gate, body, and dag nodes), so you can confirm you built what you intended. `loops describe` prints that shape on its own.
 `loops run` works from any repo, including one that uses `loops` as a submodule or dependency. The recipe's folder must be an ES module scope (a `package.json` with `{"type":"module"}`); repos that consume `loops` already have this. If a load fails with an ES-module error, that scope is what is missing.
+Add `--supervise` to make a run observable from another process: it registers under `~/.loops/runs/`. From an agent, the primary read API is `loops records <runId>`, the semantic decision stream (dispatch / completion / surfacing / revision), filterable with `--kind`, `--path`, `--last`, `--json`, rather than the raw `run --json` firehose. `loops tail <runId>` streams live events, `loops status <runId>` reports terminal state, and `loops list` enumerates runs. Watching a long run or supervising several at once is its own skill: see `skills/supervise-loop-run/SKILL.md`.
 ## Gotchas
 - **Test offline first.** Use the `mock` engine, or an engine-free `fnJob`/`predicate` body, to prove the loop's shape with zero network. A change to convergence logic deserves a deterministic check, not a live model call.

package/skills/design-agent-team/SKILL.md ADDED Viewed

@@ -0,0 +1,108 @@
+---
+name: design-agent-team
+description: Use when composing a team of specialist agents in a loops `dag`: defining an `AgentDef`, folding in `defineSkill` methodologies, wiring review feedback (`reviewPanel`/`consumeFeedback`/`revisionRequest`), and gating nodes so the graph orchestrates and the agents stay workers, never dispatchers. Load this before turning a loop into a multi-agent team.
+---
+# Designing an agent team
+A `dag` of specialist agents is a team. The load-bearing rule that keeps it a team and not a swarm:
+**The graph orchestrates; agents do not.** The `dag` is the manager (toposort + dispatch), `Condition`/`quorum` are the gates, `Outcome` is the result channel. An `AgentDef` is only the *contract*: who the agent is, what it may touch, how it works. It carries no scheduling authority. An agent produces an `Outcome`; the graph decides what runs next. Never build an agent whose job is to dispatch other agents; make the graph do it.
+**REQUIRED BACKGROUND:** you compose these agents into a loop/dag. Read `skills/author-loop/SKILL.md` for the loop mental model, the honest gate, and git-memory first.
+## Two builders: a skill is a method, an agent is a worker
+- `defineSkill({ name, instructions })` is a **methodology** (how to work: TDD, writing-plans). Prose only. A skill never dispatches an agent.
+- `defineAgent({ ... })` is a **worker**: a persona plus its contract. It *composes* skills; the skills' instructions fold into its system prompt.
+Persona and methodology live in editable markdown (`fromFile`); structure and types live in TS. The `.ts` is the typed wrapper around the `.md`.
+```ts
+import { defineAgent, defineSkill, fromFile, agentJob } from '@loops-adk/core';
+const tdd = defineSkill({
+  name: 'tdd',
+  instructions: fromFile(new URL('./skills/tdd.md', import.meta.url)),
+});
+const storeEngineer = defineAgent({
+  name: 'store-engineer',
+  system: fromFile(new URL('./agents/store-engineer.md', import.meta.url)), // persona, as markdown
+  model: 'sonnet',
+  tools: ['edit', 'bash'],        // the permission boundary
+  leaf: true,                     // may not spawn sub-agents; bottoms the branch out here
+  tier: 'worker',                 // contract metadata (no scheduling power)
+  capabilities: ['storage engine', 'id stability'],
+  outputs: [{ name: 'patch' }, { name: 'test-report' }],
+  skills: [tdd],                  // methodologies fold into the system
+  requiresSkills: ['contract-first'], // metadata unless also in `skills`
+  usesSkills: ['small-diff'],
+  humanGates: [{ name: 'prod-approval', when: 'deploying production changes' }],
+  failureModes: [{ mode: 'tests-flaky', recovery: 'isolate the flake, retry once', severity: 'should-fix' }],
+});
+```
+`agentJob({ agent: storeEngineer, prompt, ground: true })` resolves the def into the engine request (`system` = persona + folded skills, plus `model`/`tools`). Inline `system`/`model`/`tools`/`allowedTools` on the `agentJob` still override the def. The contract fields beyond `system`/`model`/`tools` are **optional metadata** for validation, `loops describe`, docs, and future discovery. They change nothing at runtime; they do not grant dispatch authority.
+**`leaf` is the fan-out brake.** A leaf agent cannot spawn sub-agents (the engine withholds the sub-agent tool). Use it to stop a thorough worker from quietly expanding into a slow, expensive swarm. The team's shape stays the graph you drew, not one the agent invents.
+## Wire the team as a graph
+```ts
+import { dag, loop, agentJob, gateJob, quorum, agentCheck, commandSucceeds } from '@loops-adk/core';
+dag({
+  name: 'ship',
+  nodes: {
+    store:  loop({ name: 'store', body: agentJob({ agent: storeEngineer, prompt: 'Build the store to its tests.', ground: true }), until: commandSucceeds('npm', ['test']) }),
+    api:    { needs: ['store'], job: loop({ /* apiEngineer, same shape */ }) },
+    review: { needs: ['api'], job: gateJob('review', quorum(2,
+      agentCheck({ agent: securityReviewer, question: 'Is it safe?' }),
+      agentCheck({ agent: correctnessReviewer, question: 'Is it correct?' }),
+    )) },
+  },
+});
+```
+Each engineer is a Converge loop (build to a `test` gate); reviewers are gates. `quorum(k, ...)` is a k-of-n jury; `gateJob(name, condition)` turns a `Condition` into a `Job` so it can be a node. Because a reviewer is just an agent and `agentCheck` takes an `engine`/`model`, any reviewer runs on any model, so put the adversarial lens on a second model for a genuinely independent signal.
+## Feedback is a loop boundary, not a back-edge
+Review findings are structured, and they flow back to the worker on the same channel whether they come from a loop's `review` slot or a dag kickback.
+**In a loop:** a failing `review` outcome is threaded into the next body turn as `ctx.lastReview`. Set `consumeFeedback: true` so the worker reads it without you hand-writing "address the feedback" into every prompt:
+```ts
+const implement = agentJob({ agent: implementationAgent, prompt: brief, consumeFeedback: true });
+```
+**Aggregate several reviewers** with `reviewPanel`. Every reviewer is a gate: the panel passes when all of them clear, or `pass: N` of them (k-of-n). An empty panel is a construction error. Give each reviewer real evidence with `reviewContext`:
+```ts
+import { reviewPanel, reviewContext, agentCheck } from '@loops-adk/core';
+const review = reviewPanel({
+  pass: 2, // optional: k-of-n instead of all
+  reviewers: [
+    { name: 'security',    review: agentCheck({ question: 'Is it safe?',    context: reviewContext({ diff: true, ledger: true }) }) },
+    { name: 'correctness', review: agentCheck({ question: 'Is it correct?', context: reviewContext({ tests: { command: 'npm', args: ['test'] } }) }) },
+    { name: 'simplicity',  review: agentCheck({ question: 'Is it simple?',  context: reviewContext({ files: ['src/**'] }) }) },
+  ],
+});
+```
+A failing panel emits a `revisionRequest` carrying each failing reviewer's concern as a finding, threaded into the next pass.
+**Route feedback across a DAG** with a targeted revision. When `DagConfig.maxKickbacks > 0`, a `revisionRequest({ target, findings })` (or the terse `kickback(to, reason)`) re-runs the target node and its transitive dependents, threading the reason in as their `lastReview`. Constrain valid targets with `DagNode.acceptsKickbackTo`. Because every cycle is a bounded re-run, not a graph edge, it always terminates.
+Give a worker just enough map to act on routed feedback without seeing the whole orchestration, with `graphContext: true`, which appends a small block naming this node, its direct dependencies, and its direct dependents.
+## Verify the contract before spending a turn
+```bash
+loops validate team.loop.ts          # loads + constructs, no model calls
+loops describe team.loop.ts --json   # the shape, incl. each agent node's contract (tier, outputs, failure modes)
+```
+`describe --json` reflects the contract you declared back at you, so you confirm the team you built is the team you meant. To watch or supervise the team once it runs, see `skills/supervise-loop-run/SKILL.md`.

package/skills/supervise-loop-run/SKILL.md ADDED Viewed

@@ -0,0 +1,64 @@
+---
+name: supervise-loop-run
+description: Use when an agent needs to observe, monitor, or supervise a running loops job from another process: discover live runs, read a run's state and shape, stream its events, inspect the decisions it made (dispatch/completion/surfacing/revision), or decide whether to intervene. Load this when watching a long run or supervising several at once. Requires the run to have been started with `--supervise`.
+---
+# Supervising a loop run
+A run started with `loops run <file> --supervise` registers itself under `~/.loops/runs/<runId>/` and writes its live state, raw events (`events.jsonl`), and semantic decisions (`semantic.jsonl`) there as it goes. Another process reads those files with no daemon and no socket: the filesystem is the channel. Every command below is read-only; supervising never touches the run.
+```bash
+loops run build.loop.ts --supervise   # in one terminal (or backgrounded)
+```
+## The loop: list → status → tail → records → decide
+**`loops list`** (alias `ls`) discovers runs. Each line is the runId, state (`running` / `dead` / a terminal status like `pass`/`fail`/`paused`), current iteration, age, and title. A run whose process is gone is marked `dead`.
+**`loops status <runId>`** prints a point-in-time snapshot: terminal-or-live state, the loop's shape, the last gate verdict (which gate, met, confidence, reason), the last outcome, and token usage. Use this to answer where a run stands and whether it is healthy.
+**`loops tail <runId>`** streams the raw event log live (Ctrl-C to stop). It ends on its own when the run reaches a terminal status or its process disappears. Use this to watch a turn unfold.
+**`loops records <runId>`** is the **primary agent API**: the semantic decision stream, one line per meaningful thing the run decided. This is what an agent reads to reason about a run, not the raw `--json` event firehose. Five kinds:
+| kind | meaning |
+| --- | --- |
+| `dispatch` | a job or dag-node started |
+| `completion` | a job / loop / dag finished (carries the outcome status + summary) |
+| `surfacing` | a review or kickback raised feedback (carries severity + reason) |
+| `revision-emitted` | an outcome asked for another pass |
+| `revision-routed` | that revision was routed to a target (accepted/rejected) |
+Filter it down for a machine-readable slice:
+```bash
+loops records <runId> --json                                  # everything, as JSONL
+loops records <runId> --kind completion                       # just what finished
+loops records <runId> --kind revision                         # both revision kinds (emitted + routed)
+loops records <runId> --path ship/implementation --json       # only this subtree of the loop tree
+loops records <runId> --kind surfacing --since 2026-07-01T09:00:00Z
+loops records <runId> --last 20                               # the most recent 20 matching records
+```
+`--path` is a slash-separated prefix over the record's position in the loop tree. `--kind revision` is the convenience union of `revision-emitted` and `revision-routed`.
+## Deciding what to do next
+Read `records` (and `status` for tokens/gate) to choose an action, since loops does not act for you:
+- **Converged**: a top-level `completion` with `status: pass`. Done; nothing to do.
+- **Stuck in review**: repeated `surfacing` / `revision-routed` on the same node with a `block`/`should-fix` severity and the iteration climbing toward its cap. The gate is doing its job or the worker cannot satisfy it; inspect the reason and decide whether to let it run, abort, or (if you drive the run) route different feedback.
+- **Dead**: `list` shows `dead`, or `status` says the process is gone with no terminal outcome. The run crashed or was killed; investigate its last `completion`/event.
+- **Budget-bound**: `status` shows tokens near the run's budget; expect a `paused` outcome next.
+## Build your own supervisor
+The read side is on the public surface, so an agent supervising a fleet (killing the ones that drift, watching the ones mid-revision) reads the same files programmatically:
+```ts
+import { listRuns, readRunStatus, runEventsPath, runSemanticRecordsPath } from '@loops-adk/core';
+```
+`listRuns()` and `readRunStatus(runId)` mirror `list`/`status`; `runEventsPath`/`runSemanticRecordsPath` locate the two JSONL streams to read directly. `semanticRecordsFromEvent(event)` derives the semantic records from a raw event if you tail the event stream yourself.
+To author or shape the run you are supervising, see `skills/author-loop/SKILL.md`; to compose the agent team inside it, see `skills/design-agent-team/SKILL.md`.