@loops-adk/core 0.1.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +132 -15
- package/assets/logo.png +0 -0
- package/dist/{agent-sdk-RF5VJZAT.js → agent-sdk-4QJDWM7N.js} +3 -3
- package/dist/{agent-sdk-RF5VJZAT.js.map → agent-sdk-4QJDWM7N.js.map} +1 -1
- package/dist/api.d.ts +119 -3
- package/dist/api.js +26 -10
- package/dist/api.js.map +1 -1
- package/dist/{chunk-6BDWTFOS.js → chunk-3PMVII43.js} +784 -37
- package/dist/chunk-3PMVII43.js.map +1 -0
- package/dist/{chunk-XC46B4FD.js → chunk-MA6NDQMO.js} +2 -2
- package/dist/chunk-MA6NDQMO.js.map +1 -0
- package/dist/{claude-cli-U7WEVAOL.js → claude-cli-75AOQUKG.js} +3 -3
- package/dist/{claude-cli-U7WEVAOL.js.map → claude-cli-75AOQUKG.js.map} +1 -1
- package/dist/{codex-6I5UZ2HM.js → codex-LYZF52WL.js} +25 -13
- package/dist/codex-LYZF52WL.js.map +1 -0
- package/dist/env/command.d.ts +1 -1
- package/dist/env/docker.d.ts +1 -1
- package/dist/env/sst.d.ts +1 -1
- package/dist/index.js +155 -14
- package/dist/index.js.map +1 -1
- package/dist/{types-B4wGVpqo.d.ts → types-CpB03Jj4.d.ts} +255 -38
- package/package.json +11 -1
- package/skills/author-loop/SKILL.md +14 -5
- package/skills/design-agent-team/SKILL.md +108 -0
- package/skills/supervise-loop-run/SKILL.md +64 -0
- package/dist/chunk-6BDWTFOS.js.map +0 -1
- package/dist/chunk-XC46B4FD.js.map +0 -1
- package/dist/codex-6I5UZ2HM.js.map +0 -1
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
* fixed provider set. (`mock` is constructed directly in tests/examples, not
|
|
10
10
|
* registered by name, so it is intentionally not listed here.)
|
|
11
11
|
*/
|
|
12
|
-
type EngineName = 'agent-sdk' | 'claude-cli' | 'anthropic-api' | (string & {});
|
|
12
|
+
type EngineName = 'agent-sdk' | 'claude-cli' | 'codex' | 'anthropic-api' | (string & {});
|
|
13
13
|
interface Usage {
|
|
14
14
|
inputTokens: number;
|
|
15
15
|
outputTokens: number;
|
|
@@ -30,7 +30,7 @@ interface AgentRequest {
|
|
|
30
30
|
* disallow the sub-agent tool (`SUBAGENT_TOOLS`), so a branch of the graph bottoms out
|
|
31
31
|
* here instead of expanding into an uncontrolled swarm — control over where work stops.
|
|
32
32
|
* Authoritative over `allowedTools` (a disallow wins). Engines with no sub-agent tool
|
|
33
|
-
* (anthropic-api, mock) ignore it.
|
|
33
|
+
* (anthropic-api, codex, mock) ignore it.
|
|
34
34
|
*/
|
|
35
35
|
leaf?: boolean;
|
|
36
36
|
}
|
|
@@ -77,10 +77,10 @@ interface Engine {
|
|
|
77
77
|
type EngineRef = EngineName | Engine;
|
|
78
78
|
declare function isEngine(ref: EngineRef | undefined): ref is Engine;
|
|
79
79
|
/**
|
|
80
|
-
* How a tool-using engine
|
|
81
|
-
*
|
|
82
|
-
*
|
|
83
|
-
*
|
|
80
|
+
* How a tool-using engine treats permission prompts. Mirrors the Claude Code
|
|
81
|
+
* values. `bypassPermissions` lets a headless worker read/write/run without
|
|
82
|
+
* prompting — required for an unattended agent that must touch the filesystem or
|
|
83
|
+
* shell, and to be set deliberately.
|
|
84
84
|
*/
|
|
85
85
|
type PermissionMode = 'default' | 'acceptEdits' | 'bypassPermissions' | 'plan' | 'dontAsk' | 'auto';
|
|
86
86
|
/** Per-run options that the registry uses to construct engines. */
|
|
@@ -88,13 +88,14 @@ interface EngineOptions {
|
|
|
88
88
|
/** Default model when a request/step does not name one. */
|
|
89
89
|
defaultModel?: string;
|
|
90
90
|
apiKey?: string;
|
|
91
|
-
/** For
|
|
91
|
+
/** For CLI-backed engines: path to the binary. */
|
|
92
92
|
cliBinary?: string;
|
|
93
|
-
/** Extra args appended to
|
|
93
|
+
/** Extra args appended to CLI-backed engine invocations. */
|
|
94
94
|
cliArgs?: string[];
|
|
95
95
|
/**
|
|
96
|
-
* Permission mode for tool-using engines
|
|
97
|
-
*
|
|
96
|
+
* Permission mode for tool-using engines. Unset = the engine/CLI default
|
|
97
|
+
* where applicable; the Codex adapter stays read-only unless explicitly set
|
|
98
|
+
* to `bypassPermissions`.
|
|
98
99
|
*/
|
|
99
100
|
permissionMode?: PermissionMode;
|
|
100
101
|
}
|
|
@@ -199,6 +200,27 @@ interface Skill {
|
|
|
199
200
|
/** The methodology instructions — prepended to the agent's system when it applies them. */
|
|
200
201
|
instructions: string;
|
|
201
202
|
}
|
|
203
|
+
type AgentTier = 'worker' | 'reviewer' | 'lead' | 'specialist' | 'utility' | (string & {});
|
|
204
|
+
type AgentSkillRef = string | Skill;
|
|
205
|
+
interface AgentOutputContract {
|
|
206
|
+
/** Stable output name, such as `patch`, `review`, or `test-report`. */
|
|
207
|
+
name: string;
|
|
208
|
+
description?: string;
|
|
209
|
+
/** Optional structured schema owned by the loop author. Loops stores it, it does not interpret it. */
|
|
210
|
+
schema?: unknown;
|
|
211
|
+
}
|
|
212
|
+
interface AgentHumanGate {
|
|
213
|
+
/** Stable gate name, such as `prod-approval` or `security-signoff`. */
|
|
214
|
+
name: string;
|
|
215
|
+
description?: string;
|
|
216
|
+
when?: string;
|
|
217
|
+
}
|
|
218
|
+
interface AgentFailureMode {
|
|
219
|
+
mode: string;
|
|
220
|
+
recovery: string;
|
|
221
|
+
detection?: string;
|
|
222
|
+
severity?: 'block' | 'should-fix' | 'nice-to-have' | (string & {});
|
|
223
|
+
}
|
|
202
224
|
interface AgentDef {
|
|
203
225
|
/** Identity (also the default job label). */
|
|
204
226
|
name: string;
|
|
@@ -216,15 +238,31 @@ interface AgentDef {
|
|
|
216
238
|
* stop a thorough agent from quietly expanding into a slow, expensive swarm.
|
|
217
239
|
*/
|
|
218
240
|
leaf?: boolean;
|
|
241
|
+
/** Contract tier for humans, describe output, and future discovery. No scheduling authority. */
|
|
242
|
+
tier?: AgentTier;
|
|
219
243
|
/** Structured job descriptions (not prose) — for discovery / docs. */
|
|
220
244
|
capabilities?: string[];
|
|
245
|
+
/** Structured outputs this agent is expected to produce. */
|
|
246
|
+
outputs?: AgentOutputContract[];
|
|
221
247
|
/** Methodologies the agent applies; their instructions are folded into the system. */
|
|
222
248
|
skills?: Skill[];
|
|
249
|
+
/** Skills the caller should supply before the turn. Metadata only unless also listed in `skills`. */
|
|
250
|
+
requiresSkills?: AgentSkillRef[];
|
|
251
|
+
/** Skills the agent is known to use. Metadata only unless also listed in `skills`. */
|
|
252
|
+
usesSkills?: AgentSkillRef[];
|
|
253
|
+
/** Human approvals or external handoffs this agent may need. Metadata only. */
|
|
254
|
+
humanGates?: AgentHumanGate[];
|
|
223
255
|
/** Named failure modes + their recovery — first-class contracts, not buried prose. */
|
|
224
|
-
failureModes?:
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
256
|
+
failureModes?: AgentFailureMode[];
|
|
257
|
+
}
|
|
258
|
+
interface AgentContractSummary {
|
|
259
|
+
tier?: string;
|
|
260
|
+
capabilities?: string[];
|
|
261
|
+
outputs?: string[];
|
|
262
|
+
requiresSkills?: string[];
|
|
263
|
+
usesSkills?: string[];
|
|
264
|
+
humanGates?: string[];
|
|
265
|
+
failureModes?: string[];
|
|
228
266
|
}
|
|
229
267
|
/** Read a markdown file as a string — for `system` or skill `instructions`. Pass an
|
|
230
268
|
* absolute path, or `new URL('./x.md', import.meta.url)` for a path relative to the file. */
|
|
@@ -233,6 +271,7 @@ declare function fromFile(path: string | URL): string;
|
|
|
233
271
|
declare function defineSkill(skill: Skill): Skill;
|
|
234
272
|
/** Define an agent. Identity + validation; strongly typed (the wrapper around the md). */
|
|
235
273
|
declare function defineAgent(def: AgentDef): AgentDef;
|
|
274
|
+
declare function agentContract(agent: AgentDef | undefined): AgentContractSummary | undefined;
|
|
236
275
|
/**
|
|
237
276
|
* Resolve an agent's system prompt, folding in its skills' methodologies. This is what
|
|
238
277
|
* `agentJob` hands the engine as `system`.
|
|
@@ -269,6 +308,17 @@ interface AgentJobConfig {
|
|
|
269
308
|
* tool), so a branch bottoms out here. Falls back to the agent def's `leaf`.
|
|
270
309
|
*/
|
|
271
310
|
leaf?: boolean;
|
|
311
|
+
/**
|
|
312
|
+
* Append the current `ctx.lastReview` / revision feedback to the prompt. This
|
|
313
|
+
* keeps implementation agents from having to remember to manually read the
|
|
314
|
+
* runtime feedback channel in every prompt function.
|
|
315
|
+
*/
|
|
316
|
+
consumeFeedback?: boolean;
|
|
317
|
+
/**
|
|
318
|
+
* Append a small DAG-position block: this node, its direct dependencies, and
|
|
319
|
+
* its direct dependents, without handing the agent the whole orchestration graph.
|
|
320
|
+
*/
|
|
321
|
+
graphContext?: boolean;
|
|
272
322
|
/** Working dir for the turn. Default: the workspace dir (the worktree). */
|
|
273
323
|
cwd?: string;
|
|
274
324
|
timeoutMs?: number;
|
|
@@ -341,20 +391,122 @@ interface CommitJobConfig {
|
|
|
341
391
|
* dropping the work's record.
|
|
342
392
|
*/
|
|
343
393
|
declare function commitJob(config: CommitJobConfig): Job;
|
|
344
|
-
|
|
345
|
-
* Build an `Outcome` that sends work back to an earlier dag node — real-team
|
|
346
|
-
* feedback ("marketing found the contract drifted; re-run engineering"). Return
|
|
347
|
-
* it from any job or `agentJob({ outcome })` mapper. The enclosing `dag` re-runs
|
|
348
|
-
* `to` and its dependents with `reason` threaded in as `lastReview`, bounded by
|
|
349
|
-
* `DagConfig.maxKickbacks`. Defaults to a `fail` status, so an unresolved
|
|
350
|
-
* kickback (budget spent) leaves the dag failing honestly; override via `over`
|
|
351
|
-
* (e.g. `{ status: 'pass' }`) when the kicking node's own work is fine and it is
|
|
352
|
-
* only requesting an upstream revision.
|
|
353
|
-
*/
|
|
354
|
-
declare function kickback(to: string, reason: string, over?: Partial<Outcome>): Outcome;
|
|
394
|
+
|
|
355
395
|
/** A deterministic step from a plain function — for glue, checks, side effects. */
|
|
356
396
|
declare function fnJob(label: string, fn: (ctx: JobContext) => Outcome | Promise<Outcome>): Job;
|
|
357
397
|
|
|
398
|
+
/**
|
|
399
|
+
* No-progress (stall) detection — the third hard stop, alongside `max` and
|
|
400
|
+
* `budget`. `max` bounds how many attempts a loop gets and `budget` bounds what
|
|
401
|
+
* they cost; neither can tell "slow but real convergence" from "the same failure
|
|
402
|
+
* five turns running". This module supplies that sensor, so a doomed loop exits
|
|
403
|
+
* at iteration N+window instead of burning everything it was given.
|
|
404
|
+
*
|
|
405
|
+
* The decision rule is NOVELTY, not change. An iteration makes progress when it
|
|
406
|
+
* reaches a state this run has never seen:
|
|
407
|
+
*
|
|
408
|
+
* - the workspace fingerprint (HEAD + pending diff + untracked content) is new
|
|
409
|
+
* — so an agent oscillating A→B→A gets no credit for the return trip;
|
|
410
|
+
* - a caller-supplied `signal` value is new — the escape hatch for loops whose
|
|
411
|
+
* progress lives outside the worktree (a queue length, a passing-test count);
|
|
412
|
+
* - the gate confidence beats its previous best by `minConfidenceDelta` — a
|
|
413
|
+
* high-water mark, so judge jitter around a flat score is not progress but
|
|
414
|
+
* slow, steady improvement accumulates until it clears the bar.
|
|
415
|
+
*
|
|
416
|
+
* `window` consecutive iterations with evidence and no novelty = stalled. The
|
|
417
|
+
* default is deliberately conservative (any channel's novelty counts): a false
|
|
418
|
+
* "stalled" on work that was actually converging is worse than one more
|
|
419
|
+
* iteration. An iteration with NO evidence channel at all (no git workspace, no
|
|
420
|
+
* confidence, no signal) is indeterminate — it neither extends nor resets the
|
|
421
|
+
* stall run, and the detector reports itself inert so the loop can warn once.
|
|
422
|
+
* Gate/review reasons are deliberately NOT compared: judge prose varies between
|
|
423
|
+
* identical verdicts, so it is quoted in the report but never used as evidence.
|
|
424
|
+
*/
|
|
425
|
+
|
|
426
|
+
interface NoProgressConfig {
|
|
427
|
+
/** Consecutive no-progress iterations before the loop stalls out. Default 3. */
|
|
428
|
+
window?: number;
|
|
429
|
+
/**
|
|
430
|
+
* How far the gate confidence must beat its previous best to count as
|
|
431
|
+
* progress (the high-water mark). Default 0.02.
|
|
432
|
+
*/
|
|
433
|
+
minConfidenceDelta?: number;
|
|
434
|
+
/**
|
|
435
|
+
* A caller-supplied progress fingerprint for state the workspace cannot see
|
|
436
|
+
* (a queue length, a passing-test count, an external resource). Returning a
|
|
437
|
+
* value this run has already produced counts as no progress; `undefined`
|
|
438
|
+
* leaves the channel out of this iteration's evidence. A throw is a bug in
|
|
439
|
+
* the definition and fails the loop, like any other guarded user code.
|
|
440
|
+
*/
|
|
441
|
+
signal?: (ctx: JobContext, last: Outcome | undefined) => string | number | undefined | Promise<string | number | undefined>;
|
|
442
|
+
/**
|
|
443
|
+
* Read the workspace fingerprint each iteration (a few git subprocesses).
|
|
444
|
+
* Default true; set false when a custom `signal` is the only honest channel.
|
|
445
|
+
*/
|
|
446
|
+
workspace?: boolean;
|
|
447
|
+
}
|
|
448
|
+
/** What `LoopConfig.noProgress` accepts: a bare window, or the full config. */
|
|
449
|
+
type NoProgressInput = number | NoProgressConfig;
|
|
450
|
+
/** The evidence a stalled loop carries out — on the outcome and the event. */
|
|
451
|
+
interface StallReport {
|
|
452
|
+
/** The configured window that was filled. */
|
|
453
|
+
window: number;
|
|
454
|
+
/** The consecutive no-progress iterations, in order. */
|
|
455
|
+
iterations: number[];
|
|
456
|
+
/** The last gate/review reason observed — what kept failing. */
|
|
457
|
+
reason: string;
|
|
458
|
+
/** Per-channel assessment of the tripping iteration. */
|
|
459
|
+
evidence: string[];
|
|
460
|
+
}
|
|
461
|
+
/** One completed, non-converged iteration as the tracker sees it. */
|
|
462
|
+
interface ProgressSample {
|
|
463
|
+
iteration: number;
|
|
464
|
+
/** Workspace fingerprint, when the workspace is a git repo. */
|
|
465
|
+
fingerprint?: string;
|
|
466
|
+
/** The confidence that gated this turn (review ?? until ?? body). */
|
|
467
|
+
confidence?: number;
|
|
468
|
+
/** The custom signal value, when a `signal` fn is configured. */
|
|
469
|
+
signal?: string;
|
|
470
|
+
/** The gate/review reason — reporting only, never evidence. */
|
|
471
|
+
reason?: string;
|
|
472
|
+
}
|
|
473
|
+
/** Resolve the `noProgress` sugar (`3` ⇒ `{ window: 3 }`) with defaults applied. */
|
|
474
|
+
declare function resolveNoProgress(input: NoProgressInput | undefined): Required<Pick<NoProgressConfig, 'window' | 'minConfidenceDelta'>> & NoProgressConfig | undefined;
|
|
475
|
+
/**
|
|
476
|
+
* The novelty tracker behind `LoopConfig.noProgress`. Feed it one sample per
|
|
477
|
+
* non-converged iteration; it returns a `StallReport` the moment `window`
|
|
478
|
+
* consecutive samples show evidence and no novelty.
|
|
479
|
+
*/
|
|
480
|
+
declare class ProgressTracker {
|
|
481
|
+
readonly window: number;
|
|
482
|
+
readonly minConfidenceDelta: number;
|
|
483
|
+
/** Every state this run has reached, namespaced by channel. */
|
|
484
|
+
private readonly seen;
|
|
485
|
+
/** Confidence high-water mark — the best score at the last progress point. */
|
|
486
|
+
private best;
|
|
487
|
+
/** The current run of consecutive no-progress iterations. */
|
|
488
|
+
private stalledRun;
|
|
489
|
+
private lastEvidence;
|
|
490
|
+
private lastReason;
|
|
491
|
+
private indeterminate;
|
|
492
|
+
private sampled;
|
|
493
|
+
constructor(cfg: {
|
|
494
|
+
window: number;
|
|
495
|
+
minConfidenceDelta: number;
|
|
496
|
+
});
|
|
497
|
+
/**
|
|
498
|
+
* Record one iteration. Returns a `StallReport` when this sample fills the
|
|
499
|
+
* window, else undefined.
|
|
500
|
+
*/
|
|
501
|
+
record(sample: ProgressSample): StallReport | undefined;
|
|
502
|
+
/**
|
|
503
|
+
* True when the detector has seen a full window of samples and none carried
|
|
504
|
+
* any evidence channel — detection is configured but cannot fire. The loop
|
|
505
|
+
* uses this to warn once instead of failing silently-inert.
|
|
506
|
+
*/
|
|
507
|
+
isInert(): boolean;
|
|
508
|
+
}
|
|
509
|
+
|
|
358
510
|
/**
|
|
359
511
|
* The Environment provider — the third axis, after Engine (where the agent
|
|
360
512
|
* thinks) and Workspace (where the code lives). Environment is where the code
|
|
@@ -556,18 +708,23 @@ interface Outcome {
|
|
|
556
708
|
/** Present when `status` is driven by a failure. */
|
|
557
709
|
error?: LoopError;
|
|
558
710
|
/**
|
|
559
|
-
*
|
|
560
|
-
*
|
|
561
|
-
*
|
|
562
|
-
*
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
711
|
+
* Present when a loop ended `exhausted` because its `noProgress` detector
|
|
712
|
+
* tripped: the evidence that the last `window` iterations reached no state
|
|
713
|
+
* the run had not already seen. Lets a supervisor tell "stalled, re-brief it"
|
|
714
|
+
* from "ran out of runway mid-progress" without parsing the summary.
|
|
715
|
+
*/
|
|
716
|
+
stall?: StallReport;
|
|
717
|
+
/**
|
|
718
|
+
* Structured feedback asking an earlier unit of work for another pass, and the
|
|
719
|
+
* single channel for it. When `revision.target` is set, the enclosing `dag`
|
|
720
|
+
* re-runs that node and its transitive dependents with `revision.reason`
|
|
721
|
+
* threaded in as `lastReview`, bounded by `DagConfig.maxKickbacks` (default
|
|
722
|
+
* 0 — ignored). A feedback cycle is a loop boundary, not a backward edge: the
|
|
723
|
+
* graph stays acyclic and the re-run budget guarantees it terminates. Produce
|
|
724
|
+
* one with `revisionRequest({ target, findings })` or the terse
|
|
725
|
+
* `kickback(to, reason)`.
|
|
726
|
+
*/
|
|
727
|
+
revision?: RevisionRequest;
|
|
571
728
|
}
|
|
572
729
|
type LogLevel = 'debug' | 'info' | 'warn' | 'error';
|
|
573
730
|
/**
|
|
@@ -583,6 +740,33 @@ interface Workspace {
|
|
|
583
740
|
/** The branch checked out in `dir`, when known (undefined on detached HEAD). */
|
|
584
741
|
readonly branch?: string;
|
|
585
742
|
}
|
|
743
|
+
type FeedbackActionSeverity = 'block' | 'should-fix' | 'nice-to-have' | 'approve';
|
|
744
|
+
/** `blocking` and `advisory` are legacy aliases kept for source compatibility. */
|
|
745
|
+
type FeedbackSeverity = FeedbackActionSeverity | 'blocking' | 'advisory';
|
|
746
|
+
type FeedbackDecision = 'accepted' | 'rejected' | 'deferred' | 'escalated';
|
|
747
|
+
interface FeedbackFinding {
|
|
748
|
+
reviewer?: string;
|
|
749
|
+
severity?: FeedbackSeverity;
|
|
750
|
+
decision?: FeedbackDecision;
|
|
751
|
+
evidence: string;
|
|
752
|
+
recommendation?: string;
|
|
753
|
+
}
|
|
754
|
+
type RevisionRerun = 'target-and-dependents';
|
|
755
|
+
interface RevisionRequest {
|
|
756
|
+
target?: string;
|
|
757
|
+
reason: string;
|
|
758
|
+
findings?: FeedbackFinding[];
|
|
759
|
+
rerun?: RevisionRerun;
|
|
760
|
+
source?: string;
|
|
761
|
+
decision?: FeedbackDecision;
|
|
762
|
+
}
|
|
763
|
+
interface GraphPosition {
|
|
764
|
+
dag: string;
|
|
765
|
+
node: string;
|
|
766
|
+
path: readonly string[];
|
|
767
|
+
needs: readonly string[];
|
|
768
|
+
dependents: readonly string[];
|
|
769
|
+
}
|
|
586
770
|
/**
|
|
587
771
|
* Threaded into every `Job`. Carries the engine, the abort signal, the event
|
|
588
772
|
* sink, a mutable scratchpad shared across the run, the workspace the work
|
|
@@ -612,6 +796,8 @@ interface JobContext {
|
|
|
612
796
|
readonly depth: number;
|
|
613
797
|
/** Loop/step names from the root down to here. */
|
|
614
798
|
readonly path: readonly string[];
|
|
799
|
+
/** The current DAG node position, when this job is running inside a dag node. */
|
|
800
|
+
readonly graph?: GraphPosition;
|
|
615
801
|
/** The previous body outcome in the enclosing loop (used by `review`/gates). */
|
|
616
802
|
readonly lastOutcome?: Outcome;
|
|
617
803
|
/** The most recent failed-review outcome, so a restart can act on it. */
|
|
@@ -680,6 +866,17 @@ interface LoopConfig {
|
|
|
680
866
|
stopOn?: ConditionInput;
|
|
681
867
|
/** Iteration cap. Reached without passing => `exhausted`. */
|
|
682
868
|
max?: number;
|
|
869
|
+
/**
|
|
870
|
+
* The third hard stop, alongside `max` and `budget`: end the loop `exhausted`
|
|
871
|
+
* when this many consecutive iterations make no observable progress — no
|
|
872
|
+
* workspace state the run has not already visited, no custom `signal` value
|
|
873
|
+
* not already seen, no gate confidence beating its previous best. A bare
|
|
874
|
+
* number is the window (`3` ⇒ three flat iterations); pass a `NoProgressConfig`
|
|
875
|
+
* for the full knobs. Off by default: a polling loop legitimately makes no
|
|
876
|
+
* progress until the outside world changes, so this is opt-in like `commit`.
|
|
877
|
+
* The stalled outcome carries the evidence as `Outcome.stall`.
|
|
878
|
+
*/
|
|
879
|
+
noProgress?: NoProgressInput;
|
|
683
880
|
/**
|
|
684
881
|
* Runs when `until` is met. If it returns `pass`, the loop completes.
|
|
685
882
|
* Any other status re-enters the loop — this is the "review fails, run the
|
|
@@ -800,12 +997,26 @@ type LoopEvent = {
|
|
|
800
997
|
ts: number;
|
|
801
998
|
path: string[];
|
|
802
999
|
outcome: Outcome;
|
|
1000
|
+
/**
|
|
1001
|
+
* Whether the loop will re-enter to act on a failing review (the review's
|
|
1002
|
+
* revision was accepted), vs give up because it exhausted its iterations or
|
|
1003
|
+
* `maxReviewRestarts`. Mirrors `dag:kickback`'s `accepted`. Only meaningful
|
|
1004
|
+
* for a non-pass review; a downstream consumer that omits it (e.g. a test
|
|
1005
|
+
* fixture) is treated as accepted.
|
|
1006
|
+
*/
|
|
1007
|
+
accepted?: boolean;
|
|
803
1008
|
} | {
|
|
804
1009
|
kind: 'loop:end';
|
|
805
1010
|
ts: number;
|
|
806
1011
|
path: string[];
|
|
807
1012
|
outcome: Outcome;
|
|
808
1013
|
iterations: number;
|
|
1014
|
+
} | {
|
|
1015
|
+
kind: 'loop:stall';
|
|
1016
|
+
ts: number;
|
|
1017
|
+
path: string[];
|
|
1018
|
+
iteration: number;
|
|
1019
|
+
report: StallReport;
|
|
809
1020
|
} | {
|
|
810
1021
|
kind: 'limit:wait';
|
|
811
1022
|
ts: number;
|
|
@@ -834,6 +1045,12 @@ type LoopEvent = {
|
|
|
834
1045
|
node: string;
|
|
835
1046
|
phase: NodePhase;
|
|
836
1047
|
outcome?: Outcome;
|
|
1048
|
+
/**
|
|
1049
|
+
* Which run of this node this is: 1 on the first pass, incremented each time
|
|
1050
|
+
* a kickback re-runs it. Lets a records consumer tell a re-run's completion
|
|
1051
|
+
* from the original and correlate it with the revision that caused it.
|
|
1052
|
+
*/
|
|
1053
|
+
attempt?: number;
|
|
837
1054
|
} | {
|
|
838
1055
|
kind: 'dag:end';
|
|
839
1056
|
ts: number;
|
|
@@ -895,4 +1112,4 @@ type LoopEvent = {
|
|
|
895
1112
|
code: string;
|
|
896
1113
|
};
|
|
897
1114
|
|
|
898
|
-
export {
|
|
1115
|
+
export { type NoProgressInput as $, type AgentDef as A, type BudgetConfig as B, type ConditionInput as C, type DagConfig as D, type Environment as E, type FeedbackFinding as F, type GraphPosition as G, type CommitJobConfig as H, type ConditionResult as I, type Job as J, type DagNode as K, type LoopConfig as L, type EngineStreamEvent as M, type ForgeOpts as N, type Outcome as O, GhForge as P, type GroundConfig as Q, type RevisionRerun as R, type LogLevel as S, LoopError as T, type Usage as U, type LoopErrorCode as V, type Workspace as W, type MergeOptions as X, MockForge as Y, type MockForgeOptions as Z, type NoProgressConfig as _, type FeedbackDecision as a, type OutcomeStatus as a0, type PrInput as a1, type PrPatch as a2, type PrRef as a3, type ProgressSample as a4, ProgressTracker as a5, type RawPredicate as a6, type RetryPolicy as a7, SUBAGENT_TOOLS as a8, type Skill as a9, type StallReport as aa, agentContract as ab, agentJob as ac, buildChecksArgs as ad, buildCreateArgs as ae, buildEditArgs as af, buildMergeArgs as ag, buildViewArgs as ah, commitJob as ai, defineAgent as aj, defineSkill as ak, fnJob as al, fromFile as am, isEngine as an, isEnvironment as ao, isForge as ap, resolveNoProgress as aq, resolveSystem as ar, type FeedbackSeverity as b, type FeedbackActionSeverity as c, type JobContext as d, type RevisionRequest as e, type JobMeta as f, type EngineRef as g, type Condition as h, type EngineOptions as i, type Engine as j, type EngineName as k, type AgentRequest as l, type EngineEventSink as m, type AgentResult as n, type EnvHandle as o, type LoopEvent as p, type Forge as q, type LimitPolicy as r, type AgentContractSummary as s, type AgentFailureMode as t, type AgentHumanGate as u, type AgentJobConfig as v, type AgentOutputContract as w, type AgentSkillRef as x, type AgentTier as y, Budget as z };
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@loops-adk/core",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.3.0",
|
|
4
4
|
"license": "MIT",
|
|
5
5
|
"author": "Jonny Neill",
|
|
6
6
|
"description": "Run an agent in a convergence loop with an honest done-gate. A small, nestable loop and DAG primitive: deterministic plus agent-judge conditions, git as memory, review-restart, budgets, and a live TUI.",
|
|
@@ -52,6 +52,7 @@
|
|
|
52
52
|
"dist",
|
|
53
53
|
"bin",
|
|
54
54
|
"skills",
|
|
55
|
+
"assets",
|
|
55
56
|
"README.md",
|
|
56
57
|
"LICENSE"
|
|
57
58
|
],
|
|
@@ -64,7 +65,16 @@
|
|
|
64
65
|
"typecheck": "tsc --noEmit",
|
|
65
66
|
"test": "vitest run",
|
|
66
67
|
"test:watch": "vitest",
|
|
68
|
+
"bench:ab": "tsx bench/ab.ts",
|
|
69
|
+
"bench:graph": "tsx bench/graph.ts",
|
|
70
|
+
"bench:signal": "BENCH_GRAPH_TASK=graph-tasks/stable-store-contract BENCH_OUT=bench/results-signal.json tsx bench/graph.ts",
|
|
71
|
+
"bench:compare": "tsx bench/compare.ts",
|
|
72
|
+
"bench:report": "tsx bench/report.ts",
|
|
73
|
+
"bench:report:sample": "tsx bench/report.ts bench/results.sample.json",
|
|
74
|
+
"bench:context:dry": "BENCH_DRY=1 BENCH_CB_GROUPS=bench/contextbench/groups.dry.json tsx bench/swecontextbench.ts",
|
|
75
|
+
"bench:mechanism": "tsx bench/mechanism.ts",
|
|
67
76
|
"example:poll": "tsx src/index.ts run examples/simple-poll.loop.ts --no-tui",
|
|
77
|
+
"example:stall": "tsx src/index.ts run examples/stall-demo.loop.ts --no-tui",
|
|
68
78
|
"example:gate": "tsx src/index.ts run examples/confidence-gate.loop.ts",
|
|
69
79
|
"prepack": "npm run build",
|
|
70
80
|
"prepublishOnly": "npm run typecheck"
|
|
@@ -97,21 +97,30 @@ dag({
|
|
|
97
97
|
|
|
98
98
|
`needs` are dependencies; `optional` nodes never block; an unmet `when` skips a node; `isolation: 'worktree'` (on the dag) or `isolate: true` (per node) runs writers in parallel worktrees that land back on pass. `sequence` and `parallel` are sugar over `dag`.
|
|
99
99
|
|
|
100
|
+
## Agents and feedback
|
|
101
|
+
|
|
102
|
+
A node can be a named specialist instead of an inline prompt. Define it once with `defineAgent` (persona in markdown via `fromFile`, structure in TS) and hand it to `agentJob({ agent })`; `defineSkill` folds a methodology into its system. The contract fields (`tier`, `outputs`, `failureModes`, …) are metadata for `describe` and validation, not scheduling power: the `dag` orchestrates, agents stay workers.
|
|
103
|
+
|
|
104
|
+
Review feedback is a structured revision request that flows back to the worker on one channel. In a loop, a failing `review` is threaded into the next turn as `ctx.lastReview`; set `consumeFeedback: true` and `agentJob` folds it into the prompt. Aggregate several reviewers with `reviewPanel`; route a fix back to an earlier dag node with a targeted `revisionRequest({ target, findings })` (or the terse `kickback(to, reason)`) when the dag's `maxKickbacks` allows it.
|
|
105
|
+
|
|
106
|
+
Composing a team of specialists, gates, and routed feedback is its own skill: see `skills/design-agent-team/SKILL.md`.
|
|
107
|
+
|
|
100
108
|
## Author → validate → run
|
|
101
109
|
|
|
102
110
|
```bash
|
|
103
|
-
loops validate path/to/feature.loop.ts
|
|
104
|
-
loops describe path/to/feature.loop.ts
|
|
105
|
-
loops
|
|
111
|
+
loops validate path/to/feature.loop.ts # offline pre-flight: loads + prints the shape, no model calls, no spend
|
|
112
|
+
loops describe path/to/feature.loop.ts # print the loop's shape (gate, body, nodes) without running
|
|
113
|
+
loops describe path/to/feature.loop.ts --json # the same shape as JSON (incl. each agent node's contract)
|
|
114
|
+
loops run path/to/feature.loop.ts # live Ink TUI
|
|
106
115
|
loops run path/to/feature.loop.ts --no-tui # plain streamed logs
|
|
107
|
-
loops run path/to/feature.loop.ts --json # NDJSON event
|
|
116
|
+
loops run path/to/feature.loop.ts --json # raw NDJSON event firehose (to supervise a run, prefer --supervise + records, below)
|
|
108
117
|
```
|
|
109
118
|
|
|
110
119
|
Always `loops validate` first. It imports and constructs the loop (catching syntax, import, and bad-export errors) without running it, so you fix authoring mistakes for free before spending a single agent turn. It also prints the loop's shape (its gate, body, and dag nodes), so you can confirm you built what you intended. `loops describe` prints that shape on its own.
|
|
111
120
|
|
|
112
121
|
`loops run` works from any repo, including one that uses `loops` as a submodule or dependency. The recipe's folder must be an ES module scope (a `package.json` with `{"type":"module"}`); repos that consume `loops` already have this. If a load fails with an ES-module error, that scope is what is missing.
|
|
113
122
|
|
|
114
|
-
Add `--supervise` to make a run observable from another process: it registers under `~/.loops/runs
|
|
123
|
+
Add `--supervise` to make a run observable from another process: it registers under `~/.loops/runs/`. From an agent, the primary read API is `loops records <runId>`, the semantic decision stream (dispatch / completion / surfacing / revision), filterable with `--kind`, `--path`, `--last`, `--json`, rather than the raw `run --json` firehose. `loops tail <runId>` streams live events, `loops status <runId>` reports terminal state, and `loops list` enumerates runs. Watching a long run or supervising several at once is its own skill: see `skills/supervise-loop-run/SKILL.md`.
|
|
115
124
|
|
|
116
125
|
## Gotchas
|
|
117
126
|
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: design-agent-team
|
|
3
|
+
description: Use when composing a team of specialist agents in a loops `dag`: defining an `AgentDef`, folding in `defineSkill` methodologies, wiring review feedback (`reviewPanel`/`consumeFeedback`/`revisionRequest`), and gating nodes so the graph orchestrates and the agents stay workers, never dispatchers. Load this before turning a loop into a multi-agent team.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# Designing an agent team
|
|
7
|
+
|
|
8
|
+
A `dag` of specialist agents is a team. The load-bearing rule that keeps it a team and not a swarm:
|
|
9
|
+
|
|
10
|
+
**The graph orchestrates; agents do not.** The `dag` is the manager (toposort + dispatch), `Condition`/`quorum` are the gates, `Outcome` is the result channel. An `AgentDef` is only the *contract*: who the agent is, what it may touch, how it works. It carries no scheduling authority. An agent produces an `Outcome`; the graph decides what runs next. Never build an agent whose job is to dispatch other agents; make the graph do it.
|
|
11
|
+
|
|
12
|
+
**REQUIRED BACKGROUND:** you compose these agents into a loop/dag. Read `skills/author-loop/SKILL.md` for the loop mental model, the honest gate, and git-memory first.
|
|
13
|
+
|
|
14
|
+
## Two builders: a skill is a method, an agent is a worker
|
|
15
|
+
|
|
16
|
+
- `defineSkill({ name, instructions })` is a **methodology** (how to work: TDD, writing-plans). Prose only. A skill never dispatches an agent.
|
|
17
|
+
- `defineAgent({ ... })` is a **worker**: a persona plus its contract. It *composes* skills; the skills' instructions fold into its system prompt.
|
|
18
|
+
|
|
19
|
+
Persona and methodology live in editable markdown (`fromFile`); structure and types live in TS. The `.ts` is the typed wrapper around the `.md`.
|
|
20
|
+
|
|
21
|
+
```ts
|
|
22
|
+
import { defineAgent, defineSkill, fromFile, agentJob } from '@loops-adk/core';
|
|
23
|
+
|
|
24
|
+
const tdd = defineSkill({
|
|
25
|
+
name: 'tdd',
|
|
26
|
+
instructions: fromFile(new URL('./skills/tdd.md', import.meta.url)),
|
|
27
|
+
});
|
|
28
|
+
|
|
29
|
+
const storeEngineer = defineAgent({
|
|
30
|
+
name: 'store-engineer',
|
|
31
|
+
system: fromFile(new URL('./agents/store-engineer.md', import.meta.url)), // persona, as markdown
|
|
32
|
+
model: 'sonnet',
|
|
33
|
+
tools: ['edit', 'bash'], // the permission boundary
|
|
34
|
+
leaf: true, // may not spawn sub-agents; bottoms the branch out here
|
|
35
|
+
tier: 'worker', // contract metadata (no scheduling power)
|
|
36
|
+
capabilities: ['storage engine', 'id stability'],
|
|
37
|
+
outputs: [{ name: 'patch' }, { name: 'test-report' }],
|
|
38
|
+
skills: [tdd], // methodologies fold into the system
|
|
39
|
+
requiresSkills: ['contract-first'], // metadata unless also in `skills`
|
|
40
|
+
usesSkills: ['small-diff'],
|
|
41
|
+
humanGates: [{ name: 'prod-approval', when: 'deploying production changes' }],
|
|
42
|
+
failureModes: [{ mode: 'tests-flaky', recovery: 'isolate the flake, retry once', severity: 'should-fix' }],
|
|
43
|
+
});
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
`agentJob({ agent: storeEngineer, prompt, ground: true })` resolves the def into the engine request (`system` = persona + folded skills, plus `model`/`tools`). Inline `system`/`model`/`tools`/`allowedTools` on the `agentJob` still override the def. The contract fields beyond `system`/`model`/`tools` are **optional metadata** for validation, `loops describe`, docs, and future discovery. They change nothing at runtime; they do not grant dispatch authority.
|
|
47
|
+
|
|
48
|
+
**`leaf` is the fan-out brake.** A leaf agent cannot spawn sub-agents (the engine withholds the sub-agent tool). Use it to stop a thorough worker from quietly expanding into a slow, expensive swarm. The team's shape stays the graph you drew, not one the agent invents.
|
|
49
|
+
|
|
50
|
+
## Wire the team as a graph
|
|
51
|
+
|
|
52
|
+
```ts
|
|
53
|
+
import { dag, loop, agentJob, gateJob, quorum, agentCheck, commandSucceeds } from '@loops-adk/core';
|
|
54
|
+
|
|
55
|
+
dag({
|
|
56
|
+
name: 'ship',
|
|
57
|
+
nodes: {
|
|
58
|
+
store: loop({ name: 'store', body: agentJob({ agent: storeEngineer, prompt: 'Build the store to its tests.', ground: true }), until: commandSucceeds('npm', ['test']) }),
|
|
59
|
+
api: { needs: ['store'], job: loop({ /* apiEngineer, same shape */ }) },
|
|
60
|
+
review: { needs: ['api'], job: gateJob('review', quorum(2,
|
|
61
|
+
agentCheck({ agent: securityReviewer, question: 'Is it safe?' }),
|
|
62
|
+
agentCheck({ agent: correctnessReviewer, question: 'Is it correct?' }),
|
|
63
|
+
)) },
|
|
64
|
+
},
|
|
65
|
+
});
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
Each engineer is a Converge loop (build to a `test` gate); reviewers are gates. `quorum(k, ...)` is a k-of-n jury; `gateJob(name, condition)` turns a `Condition` into a `Job` so it can be a node. Because a reviewer is just an agent and `agentCheck` takes an `engine`/`model`, any reviewer runs on any model, so put the adversarial lens on a second model for a genuinely independent signal.
|
|
69
|
+
|
|
70
|
+
## Feedback is a loop boundary, not a back-edge
|
|
71
|
+
|
|
72
|
+
Review findings are structured, and they flow back to the worker on the same channel whether they come from a loop's `review` slot or a dag kickback.
|
|
73
|
+
|
|
74
|
+
**In a loop:** a failing `review` outcome is threaded into the next body turn as `ctx.lastReview`. Set `consumeFeedback: true` so the worker reads it without you hand-writing "address the feedback" into every prompt:
|
|
75
|
+
|
|
76
|
+
```ts
|
|
77
|
+
const implement = agentJob({ agent: implementationAgent, prompt: brief, consumeFeedback: true });
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
**Aggregate several reviewers** with `reviewPanel`. Every reviewer is a gate: the panel passes when all of them clear, or `pass: N` of them (k-of-n). An empty panel is a construction error. Give each reviewer real evidence with `reviewContext`:
|
|
81
|
+
|
|
82
|
+
```ts
|
|
83
|
+
import { reviewPanel, reviewContext, agentCheck } from '@loops-adk/core';
|
|
84
|
+
|
|
85
|
+
const review = reviewPanel({
|
|
86
|
+
pass: 2, // optional: k-of-n instead of all
|
|
87
|
+
reviewers: [
|
|
88
|
+
{ name: 'security', review: agentCheck({ question: 'Is it safe?', context: reviewContext({ diff: true, ledger: true }) }) },
|
|
89
|
+
{ name: 'correctness', review: agentCheck({ question: 'Is it correct?', context: reviewContext({ tests: { command: 'npm', args: ['test'] } }) }) },
|
|
90
|
+
{ name: 'simplicity', review: agentCheck({ question: 'Is it simple?', context: reviewContext({ files: ['src/**'] }) }) },
|
|
91
|
+
],
|
|
92
|
+
});
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
A failing panel emits a `revisionRequest` carrying each failing reviewer's concern as a finding, threaded into the next pass.
|
|
96
|
+
|
|
97
|
+
**Route feedback across a DAG** with a targeted revision. When `DagConfig.maxKickbacks > 0`, a `revisionRequest({ target, findings })` (or the terse `kickback(to, reason)`) re-runs the target node and its transitive dependents, threading the reason in as their `lastReview`. Constrain valid targets with `DagNode.acceptsKickbackTo`. Because every cycle is a bounded re-run, not a graph edge, it always terminates.
|
|
98
|
+
|
|
99
|
+
Give a worker just enough map to act on routed feedback without seeing the whole orchestration, with `graphContext: true`, which appends a small block naming this node, its direct dependencies, and its direct dependents.
|
|
100
|
+
|
|
101
|
+
## Verify the contract before spending a turn
|
|
102
|
+
|
|
103
|
+
```bash
|
|
104
|
+
loops validate team.loop.ts # loads + constructs, no model calls
|
|
105
|
+
loops describe team.loop.ts --json # the shape, incl. each agent node's contract (tier, outputs, failure modes)
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
`describe --json` reflects the contract you declared back at you, so you confirm the team you built is the team you meant. To watch or supervise the team once it runs, see `skills/supervise-loop-run/SKILL.md`.
|