@minhduydev/mdpi 0.4.1 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/dist/index.js +4 -2
  2. package/dist/template/.pi/AGENTS.md +1 -1
  3. package/dist/template/.pi/README.md +2 -3
  4. package/dist/template/.pi/VERSION +1 -1
  5. package/dist/template/.pi/agents/explore.md +1 -1
  6. package/dist/template/.pi/agents/scout.md +1 -1
  7. package/dist/template/.pi/extensions/templates-injector.ts +35 -7
  8. package/dist/template/.pi/prompts/INDEX.md +3 -9
  9. package/dist/template/.pi/prompts/gc.md +2 -1
  10. package/dist/template/.pi/prompts/verify.md +24 -0
  11. package/dist/template/.pi/skills/INDEX.md +40 -8
  12. package/dist/template/.pi/skills/dcp-hygiene/SKILL.md +1 -1
  13. package/dist/template/.pi/skills/frontend-design/SKILL.md +1 -1
  14. package/dist/template/.pi/skills/frontend-design/references/animation/motion-advanced.md +88 -15
  15. package/dist/template/.pi/skills/frontend-design/references/animation/motion-core.md +148 -13
  16. package/dist/template/.pi/skills/frontend-design/references/shadcn/setup.md +127 -20
  17. package/dist/template/.pi/skills/nextjs-app-router/SKILL.md +334 -0
  18. package/dist/template/.pi/skills/nextjs-cache/SKILL.md +262 -0
  19. package/dist/template/.pi/skills/react-best-practices/SKILL.md +79 -1
  20. package/dist/template/.pi/skills/react-compiler/SKILL.md +237 -0
  21. package/dist/template/.pi/skills/react-hook-form/SKILL.md +374 -0
  22. package/dist/template/.pi/skills/react-server-actions/SKILL.md +299 -0
  23. package/dist/template/.pi/skills/shadcn-ui/SKILL.md +404 -0
  24. package/dist/template/.pi/skills/tanstack-query/SKILL.md +330 -0
  25. package/dist/template/.pi/skills/v0/SKILL.md +264 -0
  26. package/dist/template/.pi/skills/zustand/SKILL.md +333 -0
  27. package/package.json +1 -1
  28. package/dist/template/.pi/context/fallow.md +0 -137
  29. package/dist/template/.pi/prompts/loop-check.md +0 -87
  30. package/dist/template/.pi/prompts/loop-init.md +0 -157
  31. package/dist/template/.pi/prompts/loop-review.md +0 -90
  32. package/dist/template/.pi/skills/loop-audit/SKILL.md +0 -141
  33. package/dist/template/.pi/skills/loop-cost/SKILL.md +0 -130
  34. package/dist/template/.pi/skills/loop-engineering/SKILL.md +0 -175
  35. package/dist/template/.pi/templates/loop-github-action.yml +0 -162
  36. package/dist/template/.pi/templates/loop-orchestrator.sh +0 -514
  37. package/dist/template/.pi/templates/loop-orchestrator.test.ts +0 -332
  38. package/dist/template/.pi/templates/loop-orchestrator.ts +0 -936
  39. package/dist/template/.pi/templates/loop-state.json +0 -24
  40. package/dist/template/.pi/templates/loop-state.md +0 -98
  41. package/dist/template/.pi/templates/loop-vision.md +0 -110
@@ -1,936 +0,0 @@
1
- /**
2
- * loop-orchestrator.ts — Node SDK primary orchestrator for the pi
3
- * loop-engineering harness (T9).
4
- *
5
- * Mirrors loop-orchestrator.sh (T10 — the portable bash alt). Composes the
6
- * Node pi-coding-agent SDK: `createAgentSession` with a capability-deprivation
7
- * tool allowlist (the maker structurally cannot ship), git worktree isolation
8
- * (parallel loops never collide), an exit-code gate (computational, never an
9
- * LLM's opinion), dedup state (JSON), and ship-on-pass (push `loop/<name>/<ts>`
10
- * + `gh pr create`).
11
- *
12
- * GRACEFUL DEGRADATION (FR10): every phase is wrapped in try/catch. A loop
13
- * failure is logged + recorded in STATE.json + the scheduler continues. This
14
- * module never throws out of `runOnce` for a loop failure; it only rejects for
15
- * operator/config errors raised before any loop work begins (bad args,
16
- * missing VISION.md).
17
- *
18
- * GATE-PARSE CONTRACT (must match T2's loop-vision.md EXACTLY — the bash
19
- * orchestrator T10 uses the identical contract; keep them in parity):
20
- * The gate command is extracted from `.pi/loops/<name>/VISION.md`:
21
- * THE FIRST fenced ```bash block located DIRECTLY UNDER the `## Gate`
22
- * heading. Extraction: find the `## Gate` heading line (allow trailing
23
- * whitespace), scan forward to the first opening fence line whose
24
- * info-string is `bash` (a line equal to ```bash, trailing whitespace
25
- * tolerated), take every line until the next closing fence (a line equal to
26
- * ```), strip leading/trailing whitespace on each line, drop blank lines,
27
- * join with `\n`, run via `bash -c "<command>"`, read the exit code.
28
- * exit 0 -> PASS -> ship (push `loop/<name>/<ts>` + `gh pr create`)
29
- * non-zero -> FAIL -> no ship; record in STATE.json.failures[]; cleanup
30
- * The gate decision is computational (exit code), never an LLM's opinion
31
- * (avoids the Ralph Wiggum loop). Keep exactly ONE ```bash block directly
32
- * under `## Gate` in VISION.md; this parser returns null on zero or >1 bash
33
- * blocks in the Gate section (ambiguous spec — refuse to guess).
34
- *
35
- * IDEMPOTENCE (FR9): an item already in STATE.json.processed is skipped
36
- * (NOTHING_TO_DO). Re-running is always safe; deleting STATE.json reprocesses.
37
- *
38
- * SDK-SHAPE NOTE: `@earendil-works/pi-coding-agent` is globally installed
39
- * (resolved by the pi runtime loader), NOT resolvable from this repo's
40
- * standalone tsc/tsx. The value import (createAgentSession / SessionManager) is
41
- * therefore deferred to a dynamic `await import()` inside the runtime
42
- * functions so the module remains importable in tests; the type-only
43
- * `import type { ... }` is erased at compile time (TS2307 for that import is
44
- * the one tolerated gap in the verify command). Local structural mirrors of
45
- * the SDK shapes are declared so tsc type-checks cleanly regardless of
46
- * module resolution.
47
- *
48
- * PHASES (each logs INFO/WARN/ERROR with instance id + duration):
49
- * A. parse args → resolve loop dir + repo root
50
- * B. load VISION.md gate (parse contract above) → GATE_CMD
51
- * C. git worktree add --detach <tmp> HEAD; createAgentSession + prompt
52
- * D. (BUDGET-CAP HOOK POINT — T13 fills `enforceBudgetCap`; see below)
53
- * E. run gate via `bash -c "$GATE_CMD"`; capture exit code
54
- * F. on 0: ship: git push -u origin loop/<name>/<ts> + gh pr create
55
- * G. update STATE.json (processed/failures/metrics)
56
- * H. git worktree remove --force (try/finally cleanup)
57
- *
58
- * Usage (runtime):
59
- * const r = await runOnce({ loopName: "ci-triage", repoRoot: process.cwd() });
60
- *
61
- * Requires: pi SDK (global), git, gh (authenticated, for PR ship; falls back to
62
- * commit-only/log if absent). API key in env or CI secrets.
63
- */
64
-
65
- import * as fs from "node:fs";
66
- import * as path from "node:path";
67
- import * as os from "node:os";
68
- import { execFileSync, execSync } from "node:child_process";
69
- import { pathToFileURL } from "node:url";
70
-
71
- // Type-only SDK import — erased at compile time. TS2307 for this line is the
72
- // single tolerated gap in the verify command (the package is globally
73
- // installed, not a local dependency). Runtime value import is dynamic below.
74
- import type {
75
- AgentSession as SDKAgentSessionType,
76
- SessionStats as SDKSessionStatsType,
77
- } from "@earendil-works/pi-coding-agent";
78
-
79
- // =============================================================================
80
- // CONFIG BLOCK (mirror of T10's loop-orchestrator.sh config block)
81
- // =============================================================================
82
- // LOOP_NAME — set from runOnce({ loopName }); the .pi/loops/<name>/ dir.
83
- // GATE — auto-loaded from .pi/loops/<name>/VISION.md (parseGateCommand).
84
- // REPO_ROOT — set from runOnce({ repoRoot }); where `git worktree` runs.
85
- // TOKEN_CAP — placeholder; T13 (budget cap) will fill this. When non-null and
86
- // the maker's session token usage exceeds it, enforceBudgetCap
87
- // returns kill=true and runOnce calls session.abort().
88
- // MAKER_TOOLS — capability-deprivation allowlist (FR6). The maker CANNOT call
89
- // push/PR/Slack — they are not in this list. The maker only
90
- // stages files in the worktree + writes PR_BODY.md; the
91
- // orchestrator ships after the gate passes.
92
- // SHIP_TOOLS — the audit denylist for the tool-execution-start audit (FR6).
93
- // LOOP_DIR — resolved .pi/loops/<LOOP_NAME>/ (vision + state live here).
94
- // VISION_FILE — .pi/loops/<LOOP_NAME>/VISION.md (anti goal-drift contract).
95
- // STATE_FILE — .pi/loops/<LOOP_NAME>/STATE.json (dedup + metrics ledger).
96
-
97
- export const MAKER_TOOLS = ["read", "edit", "write", "bash", "grep", "find"] as const;
98
- export const SHIP_TOOLS: ReadonlySet<string> = new Set(["push", "pr", "slack"]);
99
-
100
- // FR13 budget cap: per-run token ceiling. null disables enforcement.
101
- // Override per-run via runOnce({ tokenCap }). When the maker's cumulative
102
- // message_end token usage exceeds this, the loop is killed mid-stream
103
- // (session.abort()) and STATE.json.metrics.killed=true is recorded.
104
- export const TOKEN_CAP: number | null = null;
105
-
106
- // =============================================================================
107
- // LOCAL SDK SHAPE MIRRORS (so tsc type-checks without resolving the package)
108
- // =============================================================================
109
-
110
- /** Mirror of the SDK `SessionStats` (subset we consume). */
111
- export interface SessionStatsMirror {
112
- sessionId?: string;
113
- userMessages?: number;
114
- assistantMessages?: number;
115
- toolCalls?: number;
116
- toolResults?: number;
117
- totalMessages?: number;
118
- tokens?: {
119
- input?: number;
120
- output?: number;
121
- cacheRead?: number;
122
- cacheWrite?: number;
123
- total?: number;
124
- // Verified SDK shape: AssistantMessage.usage.totalTokens (the canonical
125
- // per-message cumulative token count emitted on message_end events).
126
- totalTokens?: number;
127
- };
128
- cost?: number;
129
- contextUsage?: number;
130
- }
131
-
132
- /** Mirror of the SDK `AgentSessionEvent` union (subset we subscribe to). */
133
- export interface AgentSessionEventMirror {
134
- type: string;
135
- // message_end carries an AssistantMessage with .role + .usage. user
136
- // message_end events have no .usage, so we gate accumulation on role.
137
- message?: { role?: string; usage?: SessionStatsMirror["tokens"] };
138
- // tool_execution_start / tool_execution_end
139
- toolCallId?: string;
140
- toolName?: string;
141
- args?: unknown;
142
- result?: unknown;
143
- isError?: boolean;
144
- // agent_end
145
- willRetry?: boolean;
146
- messages?: unknown[];
147
- }
148
-
149
- /** Mirror of the SDK `AgentSession` (methods we call). */
150
- export interface AgentSessionMirror {
151
- prompt(text: string, options?: Record<string, unknown>): Promise<void>;
152
- subscribe(listener: (event: AgentSessionEventMirror) => void): () => void;
153
- abort(): Promise<void>;
154
- getSessionStats(): SessionStatsMirror;
155
- }
156
-
157
- /** Mirror of the SDK `SessionManager` static surface. */
158
- export interface SessionManagerMirror {
159
- inMemory(cwd?: string): unknown;
160
- create(cwd: string): unknown;
161
- }
162
-
163
- /** Mirror of `createAgentSession` options (subset we pass). */
164
- export interface CreateAgentSessionOptionsMirror {
165
- cwd?: string;
166
- model?: string;
167
- tools?: string[];
168
- noTools?: boolean;
169
- customTools?: unknown[];
170
- sessionManager?: unknown;
171
- settingsManager?: unknown;
172
- modelRegistry?: unknown;
173
- thinkingLevel?: unknown;
174
- }
175
-
176
- export interface CreateAgentSessionResultMirror {
177
- session: AgentSessionMirror;
178
- }
179
-
180
- /** Structural supertype of the SDK module namespace we consume. */
181
- interface PiSdkNamespace {
182
- createAgentSession(options?: CreateAgentSessionOptionsMirror): Promise<CreateAgentSessionResultMirror>;
183
- SessionManager: SessionManagerMirror;
184
- isToolCallEventType?: unknown;
185
- }
186
-
187
- // =============================================================================
188
- // STATE TYPES (mirror of .pi/templates/loop-state.json)
189
- // =============================================================================
190
-
191
- export interface LoopMetrics {
192
- runs: number;
193
- killed: boolean;
194
- kill_reason: string | null;
195
- tokens_used: number;
196
- token_cap: number | null;
197
- pr_opened: number;
198
- items_fixed: number;
199
- items_skipped: number;
200
- items_escalated: number;
201
- }
202
-
203
- export interface LoopState {
204
- loop_name: string;
205
- owner: string;
206
- cadence: string;
207
- last_run: string | null;
208
- in_progress: string[];
209
- completed: Array<{ item: string; branch: string; pr: string; at: string }>;
210
- escalated: unknown[];
211
- failures: Array<{ item: string; reason: string; at: string }>;
212
- lessons: unknown[];
213
- processed: string[];
214
- stop_conditions_met: unknown[];
215
- metrics: LoopMetrics;
216
- }
217
-
218
- // =============================================================================
219
- // PURE HELPERS — no SDK, no I/O. Unit-tested (TDD).
220
- // =============================================================================
221
-
222
- /**
223
- * Extract the gate command from a VISION.md document.
224
- *
225
- * Contract: the FIRST fenced ```bash block located DIRECTLY under the `## Gate`
226
- * heading. The Gate section runs from the `## Gate` heading until the next
227
- * level-1/level-2 heading or EOF. Within that section we count fenced ```bash
228
- * blocks: exactly one non-empty block → return its trimmed content (each line
229
- * trimmed, blank lines dropped, joined with `\n`); zero or >1 block, or an
230
- * unterminated block, or empty content → return null (refuse to guess).
231
- *
232
- * Returns null when: no `## Gate` heading, no bash block under it, an
233
- * unterminated block, an empty block, or multiple bash blocks (ambiguous spec).
234
- */
235
- export function parseGateCommand(visionMd: string): string | null {
236
- if (!visionMd) return null;
237
- const lines = visionMd.split(/\r?\n/);
238
-
239
- // 1. locate the `## Gate` heading (trailing whitespace tolerated).
240
- let gateIdx = -1;
241
- for (let i = 0; i < lines.length; i++) {
242
- if (/^## Gate[ \t]*$/.test(lines[i])) {
243
- gateIdx = i;
244
- break;
245
- }
246
- }
247
- if (gateIdx === -1) return null;
248
-
249
- // 2. collect fenced ```bash blocks until the next level-1/2 heading or EOF.
250
- const blocks: string[] = [];
251
- let j = gateIdx + 1;
252
- while (j < lines.length) {
253
- const line = lines[j];
254
- // A new level-1 or level-2 heading ends the Gate section.
255
- if (/^#{1,2} /.test(line)) break;
256
- if (/^```bash[ \t]*$/.test(line)) {
257
- const buf: string[] = [];
258
- let k = j + 1;
259
- while (k < lines.length && !/^```[ \t]*$/.test(lines[k])) {
260
- buf.push(lines[k].trim());
261
- k++;
262
- }
263
- if (k >= lines.length) return null; // unterminated fence
264
- const content = buf.filter((l) => l.length > 0).join("\n");
265
- blocks.push(content);
266
- j = k + 1;
267
- continue;
268
- }
269
- j++;
270
- }
271
-
272
- if (blocks.length !== 1) return null; // zero or >1 → ambiguous
273
- const cmd = blocks[0];
274
- if (cmd.length === 0) return null; // empty block
275
- return cmd;
276
- }
277
-
278
- /**
279
- * Build the maker prompt. Single source of truth (kept identical in spirit to
280
- * T10's `build_maker_prompt`). The maker is told to reread VISION.md, stay in
281
- * scope, stage changes in the worktree (NO ship — the orchestrator ships after
282
- * the gate passes), and write a PR_BODY.md.
283
- */
284
- export function buildMakerPrompt(
285
- loopName: string,
286
- _vision: string,
287
- _state: LoopState,
288
- opts?: { itemId?: string; instanceId?: string },
289
- ): string {
290
- const itemId = opts?.itemId ?? "unknown";
291
- const instanceId = opts?.instanceId ?? "unknown";
292
- return `You are the MAKER phase of loop "${loopName}" (instance ${instanceId}, item "${itemId}").
293
-
294
- BEFORE ACTING: reread .pi/loops/${loopName}/VISION.md and treat its boundaries as
295
- authoritative. Do NOT act outside that file. If a proposed action is not clearly
296
- inside Scope, treat it as Out-of-scope and write a diagnosis to PR_BODY.md
297
- instead of editing.
298
-
299
- GOAL: achieve the Definition-of-done in VISION.md for item "${itemId}".
300
- SCOPE: only touch paths/actions listed under ## Scope in VISION.md.
301
- HARD STOPS: honor every entry under ## Hard stops in VISION.md.
302
- YOU CANNOT SHIP: the orchestrator pushes the branch and opens the PR after the
303
- gate passes. Do not attempt to push, open a PR, or message anyone — you do not
304
- have those tools. Just stage your changes in this worktree (git add) and write a
305
- PR_BODY.md summarizing what you did and citing VISION.md.
306
-
307
- When done, write nothing to stdout that matters; the orchestrator runs the gate.
308
- `;
309
- }
310
-
311
- /**
312
- * Pick the next item id to process: the first in-progress item, else the
313
- * provided fallback, else a generated manual id.
314
- */
315
- export function nextItemId(state: LoopState, fallback?: string): string {
316
- const head = state?.in_progress?.[0];
317
- if (typeof head === "string" && head.length > 0) return head;
318
- if (fallback && fallback.length > 0) return fallback;
319
- return `manual-${Date.now()}`;
320
- }
321
-
322
- /**
323
- * Pure, immutable state patch. Shallow-merges top-level keys; shallow-merges
324
- * `metrics` so callers can pass a partial metrics object without clobbering
325
- * the rest. Does NOT mutate the input.
326
- */
327
- export function updateStateJson(state: LoopState, patch: Partial<LoopState>): LoopState {
328
- const next: LoopState = { ...state };
329
- if (patch.metrics) {
330
- next.metrics = { ...state.metrics, ...patch.metrics };
331
- }
332
- for (const key of Object.keys(patch) as Array<keyof LoopState>) {
333
- if (key === "metrics") continue;
334
- // @ts-expect-error — generic shallow assignment across LoopState keys.
335
- next[key] = patch[key];
336
- }
337
- return next;
338
- }
339
-
340
- /**
341
- * Idempotence check (FR9): true if the item is already in STATE.processed.
342
- */
343
- export function isAlreadyProcessed(state: LoopState, itemId: string): boolean {
344
- const processed = state?.processed;
345
- return Array.isArray(processed) && processed.includes(itemId);
346
- }
347
-
348
- /**
349
- * Ship-tool audit (FR6): scan the list of tool names the maker actually called
350
- * (captured from `tool_execution_start` events) and flag any ship tool
351
- * (`push`, `pr`, `slack`). The maker's allowlist (MAKER_TOOLS) structurally
352
- * excludes these, so any offender is a policy violation worth recording.
353
- */
354
- export function auditShipToolCalls(toolNames: string[]): { ok: boolean; offenders: string[] } {
355
- const offenders = toolNames.filter((n) => typeof n === "string" && SHIP_TOOLS.has(n.toLowerCase()));
356
- return { ok: offenders.length === 0, offenders };
357
- }
358
-
359
- // =============================================================================
360
- // BUDGET-CAP ENFORCEMENT — Phase D (FR13)
361
- // =============================================================================
362
- //
363
- // Per-event token-cap enforcement. runOnce subscribes to `message_end` events
364
- // and accumulates `event.message.usage.totalTokens` for assistant messages
365
- // (user message_end events carry no .usage). After each accumulation it calls
366
- // enforceBudgetCap; if cumulative > cap, runOnce calls `session.abort()`
367
- // mid-stream, records STATE.json.metrics.killed=true +
368
- // metrics.kill_reason="budget_cap_exceeded", skips the gate + ship phases,
369
- // cleans up the worktree, and resolves gracefully (FR10 — never throws).
370
- //
371
- // Pure predicate (no side effects) so it can be unit-tested without a session.
372
- export interface BudgetCapDecision {
373
- kill: boolean;
374
- reason: string | null;
375
- }
376
-
377
- export function enforceBudgetCap(stats: SessionStatsMirror, cap: number | null): BudgetCapDecision {
378
- if (cap == null) return { kill: false, reason: null };
379
- const used = stats?.tokens?.total ?? 0;
380
- if (used > cap) {
381
- return { kill: true, reason: "budget_cap_exceeded" };
382
- }
383
- return { kill: false, reason: null };
384
- }
385
-
386
- /**
387
- * Pure token accumulator (FR13 — budget cap). Subscribes to `message_end`
388
- * events and sums the per-turn token delta for assistant messages.
389
- *
390
- * IMPORTANT: `Usage.totalTokens` (on AssistantMessage.usage, emitted via the
391
- * `message_end` event) is a PER-TURN DELTA built from each API response's
392
- * usage — it is NOT a session-cumulative value. Therefore accumulation with
393
- * `+=` is correct: each assistant turn contributes its own delta, and the
394
- * running sum is the cumulative session total.
395
- *
396
- * Extracted as a pure helper so the accumulation is unit-testable without a
397
- * live SDK session.
398
- */
399
- export function accumulateUsage(
400
- tokenTotal: number,
401
- events: AgentSessionEventMirror[],
402
- ): number {
403
- let total = tokenTotal;
404
- for (const event of events) {
405
- if (
406
- event.type === "message_end" &&
407
- event.message?.role === "assistant" &&
408
- event.message?.usage
409
- ) {
410
- const u = event.message.usage;
411
- const t = u.totalTokens ?? u.total ?? ((u.input ?? 0) + (u.output ?? 0));
412
- // Usage.totalTokens is per-turn delta; accumulate with +=.
413
- total += t;
414
- }
415
- }
416
- return total;
417
- }
418
-
419
- // =============================================================================
420
- // SDK DYNAMIC LOADER — cached; value import deferred (resolution gap).
421
- // =============================================================================
422
-
423
- let sdkCache: PiSdkNamespace | null = null;
424
- async function loadSdk(): Promise<PiSdkNamespace> {
425
- if (sdkCache) return sdkCache;
426
- // Dynamic import: the package is globally installed and resolved by the pi
427
- // runtime loader. A static top-level value import would fail under tsx/tsc
428
- // in this repo (not a local dependency), so we defer it here.
429
- const sdk = (await import("@earendil-works/pi-coding-agent")) as unknown as PiSdkNamespace;
430
- sdkCache = sdk;
431
- return sdk;
432
- }
433
-
434
- // =============================================================================
435
- // RUNTIME HELPERS — file I/O + shell (kept thin; tested only at T15 smoke)
436
- // =============================================================================
437
-
438
- function readState(stateFile: string): LoopState {
439
- const raw = fs.readFileSync(stateFile, "utf8");
440
- return JSON.parse(raw) as LoopState;
441
- }
442
-
443
- function writeState(stateFile: string, state: LoopState): void {
444
- fs.writeFileSync(stateFile, JSON.stringify(state, null, 2) + "\n", "utf8");
445
- }
446
-
447
- function ensureState(stateFile: string, loopName: string, templateFile: string): void {
448
- if (fs.existsSync(stateFile)) return;
449
- let seed: LoopState;
450
- if (fs.existsSync(templateFile)) {
451
- seed = JSON.parse(fs.readFileSync(templateFile, "utf8")) as LoopState;
452
- seed.loop_name = loopName;
453
- } else {
454
- seed = {
455
- loop_name: loopName,
456
- owner: "",
457
- cadence: "manual",
458
- last_run: null,
459
- in_progress: [],
460
- completed: [],
461
- escalated: [],
462
- failures: [],
463
- lessons: [],
464
- processed: [],
465
- stop_conditions_met: [],
466
- metrics: {
467
- runs: 0,
468
- killed: false,
469
- kill_reason: null,
470
- tokens_used: 0,
471
- token_cap: null,
472
- pr_opened: 0,
473
- items_fixed: 0,
474
- items_skipped: 0,
475
- items_escalated: 0,
476
- },
477
- };
478
- }
479
- writeState(stateFile, seed);
480
- }
481
-
482
- function nowIso(): string {
483
- return new Date().toISOString();
484
- }
485
-
486
- function timestampSlug(d = new Date()): string {
487
- const pad = (n: number) => String(n).padStart(2, "0");
488
- return `${d.getUTCFullYear()}${pad(d.getUTCMonth() + 1)}${pad(d.getUTCDate())}T${pad(d.getUTCHours())}${pad(d.getUTCMinutes())}${pad(d.getUTCSeconds())}`;
489
- }
490
-
491
- function log(level: string, instanceId: string, phase: string, msg: string): void {
492
- const stamp = new Date().toISOString();
493
- // eslint-disable-next-line no-console
494
- console.error(`${stamp} [${level}] instance=${instanceId} phase=${phase} — ${msg}`);
495
- }
496
-
497
- function runGate(worktreeDir: string, gateCmd: string): { exitCode: number; stdout: string } {
498
- try {
499
- const stdout = execFileSync("bash", ["-c", gateCmd], {
500
- cwd: worktreeDir,
501
- encoding: "utf8",
502
- stdio: ["ignore", "pipe", "pipe"],
503
- });
504
- return { exitCode: 0, stdout };
505
- } catch (err: unknown) {
506
- const e = err as { status?: number; stdout?: string; stderr?: string };
507
- return { exitCode: typeof e.status === "number" ? e.status : 1, stdout: e.stdout ?? "" };
508
- }
509
- }
510
-
511
- function hasBin(bin: string): boolean {
512
- try {
513
- execSync(`command -v ${bin}`, { stdio: "ignore" });
514
- return true;
515
- } catch {
516
- return false;
517
- }
518
- }
519
-
520
- // =============================================================================
521
- // RUNTIME — runOnce (the primary orchestrator entry point)
522
- // =============================================================================
523
-
524
- export interface RunOnceOptions {
525
- loopName: string;
526
- repoRoot?: string;
527
- itemId?: string;
528
- tokenCap?: number | null;
529
- }
530
-
531
- export interface RunResult {
532
- ok: boolean;
533
- itemId: string;
534
- branch: string | null;
535
- prUrl: string | null;
536
- reason: string | null;
537
- skipped: boolean;
538
- audit: { ok: boolean; offenders: string[] };
539
- }
540
-
541
- /**
542
- * Run one loop cycle. Each phase is wrapped in try/catch (FR10): a loop failure
543
- * is recorded in STATE.json and the function resolves (never rejects for a
544
- * loop failure). Only operator/config errors before any loop work begins
545
- * reject (missing VISION.md, bad repoRoot).
546
- */
547
- export async function runOnce(opts: RunOnceOptions): Promise<RunResult> {
548
- const repoRoot = path.resolve(opts.repoRoot ?? process.cwd());
549
- const loopName = opts.loopName;
550
- const loopDir = path.join(repoRoot, ".pi", "loops", loopName);
551
- const visionFile = path.join(loopDir, "VISION.md");
552
- const stateFile = path.join(loopDir, "STATE.json");
553
- const templateState = path.join(repoRoot, ".pi", "templates", "loop-state.json");
554
- const instanceId = `${loopName}-${timestampSlug()}-${process.pid}`;
555
- const branch = `loop/${loopName}/${timestampSlug()}`;
556
- const tokenCap = opts.tokenCap ?? TOKEN_CAP;
557
-
558
- if (!fs.existsSync(visionFile)) {
559
- return fail(loopName, instanceId, "A_parse_args", `VISION.md missing: ${visionFile}`, opts.itemId ?? "", null);
560
- }
561
- if (!fs.existsSync(loopDir)) fs.mkdirSync(loopDir, { recursive: true });
562
- ensureState(stateFile, loopName, templateState);
563
-
564
- let state = readState(stateFile);
565
-
566
- // FR13 early-exit: if no explicit item was requested and the watchlist
567
- // (STATE.json in_progress) is empty, there is nothing to process — exit
568
- // cheaply (<5k tokens target) before running the maker.
569
- const hasExplicitItem = typeof opts.itemId === "string" && opts.itemId.length > 0;
570
- if (!hasExplicitItem && (!Array.isArray(state.in_progress) || state.in_progress.length === 0)) {
571
- log("INFO", instanceId, "main", "NOTHING_TO_DO — watchlist (in_progress) empty; early-exit (<5k tokens)");
572
- writeState(stateFile, updateStateJson(state, { last_run: nowIso() }));
573
- return { ok: true, itemId: "", branch: null, prUrl: null, reason: "nothing-to-do", skipped: true, audit: { ok: true, offenders: [] } };
574
- }
575
-
576
- const itemId = opts.itemId ?? nextItemId(state);
577
-
578
- // Idempotence (FR9): skip already-processed items.
579
- if (isAlreadyProcessed(state, itemId)) {
580
- log("INFO", instanceId, "main", `NOTHING_TO_DO — item ${itemId} already processed (idempotent skip)`);
581
- state = updateStateJson(state, {
582
- processed: Array.from(new Set([...state.processed, itemId])),
583
- metrics: { ...state.metrics, items_skipped: state.metrics.items_skipped + 1 },
584
- last_run: nowIso(),
585
- });
586
- writeState(stateFile, state);
587
- return {
588
- ok: true,
589
- itemId,
590
- branch: null,
591
- prUrl: null,
592
- reason: "idempotent-skip",
593
- skipped: true,
594
- audit: { ok: true, offenders: [] },
595
- };
596
- }
597
-
598
- state = updateStateJson(state, { metrics: { ...state.metrics, runs: state.metrics.runs + 1 } });
599
- writeState(stateFile, state);
600
-
601
- let worktreeDir = "";
602
- const audit: { ok: boolean; offenders: string[] } = { ok: true, offenders: [] };
603
-
604
- try {
605
- // ---- Phase B: load gate ----
606
- const visionMd = fs.readFileSync(visionFile, "utf8");
607
- const gateCmd = parseGateCommand(visionMd);
608
- if (!gateCmd) {
609
- return recordFailureAndReturn(
610
- stateFile, state, itemId, "gate-parse-failed",
611
- `no fenced bash gate block found under ## Gate in ${visionFile}`,
612
- instanceId, loopName,
613
- );
614
- }
615
- log("INFO", instanceId, "B_load_gate", `gate loaded (${gateCmd.length} chars): ${gateCmd.split("\n")[0]}`);
616
-
617
- // ---- Phase C: worktree + maker session ----
618
- worktreeDir = fs.mkdtempSync(path.join(os.tmpdir(), `loop-${loopName}-`));
619
- fs.rmSync(worktreeDir, { recursive: true, force: true });
620
- execFileSync("git", ["-C", repoRoot, "worktree", "add", "--detach", worktreeDir, "HEAD"], {
621
- stdio: "pipe",
622
- encoding: "utf8",
623
- });
624
- log("INFO", instanceId, "C_worktree", `worktree at ${worktreeDir}`);
625
-
626
- // ---- Phase C (cont.): createAgentSession + prompt ----
627
- const sdk = await loadSdk();
628
- const { session } = await sdk.createAgentSession({
629
- cwd: worktreeDir,
630
- tools: [...MAKER_TOOLS],
631
- sessionManager: sdk.SessionManager.inMemory(worktreeDir),
632
- });
633
-
634
- const toolNames: string[] = [];
635
- let tokenTotal = 0;
636
- let budgetKilled = false;
637
- // FR13: subscribe to message_end events; accumulate assistant message
638
- // token usage (verified SDK shape: AssistantMessage.usage.totalTokens).
639
- // message_end fires for both user and assistant messages — only
640
- // assistant messages carry .usage, so we gate on role === "assistant".
641
- // After each accumulation, call enforceBudgetCap; if cumulative > cap,
642
- // call session.abort() mid-stream and flag the kill (FR10 — never crash).
643
- // NOTE: Usage.totalTokens is a per-turn delta (each API response's usage),
644
- // NOT a session cumulative — hence `+=` is correct (see accumulateUsage).
645
- const unsub = session.subscribe((event: AgentSessionEventMirror) => {
646
- if (
647
- event.type === "message_end" &&
648
- event.message?.role === "assistant" &&
649
- event.message?.usage
650
- ) {
651
- const u = event.message.usage;
652
- // Usage.totalTokens is per-turn delta; accumulate with +=.
653
- const t = u.totalTokens ?? u.total ?? ((u.input ?? 0) + (u.output ?? 0));
654
- tokenTotal += t;
655
- const decision = enforceBudgetCap({ tokens: { total: tokenTotal } }, tokenCap);
656
- if (decision.kill && !budgetKilled) {
657
- budgetKilled = true;
658
- log("WARN", instanceId, "D_budget_cap", `cumulative tokens ${tokenTotal} > cap ${tokenCap}; calling session.abort()`);
659
- // Fire-and-forget abort; the prompt() await unwinds below.
660
- void session.abort().catch(() => {});
661
- }
662
- } else if (event.type === "tool_execution_start" && typeof event.toolName === "string") {
663
- toolNames.push(event.toolName);
664
- }
665
- });
666
-
667
- const prompt = buildMakerPrompt(loopName, visionMd, state, { itemId, instanceId });
668
- log("INFO", instanceId, "C_maker", `running createAgentSession.prompt (cwd=${worktreeDir}, tools=${MAKER_TOOLS.join(",")})`);
669
- try {
670
- await session.prompt(prompt);
671
- } catch (err) {
672
- log("WARN", instanceId, "C_maker", `session.prompt failed (recorded, not fatal — FR10): ${String(err)}`);
673
- }
674
- unsub();
675
- log("INFO", instanceId, "C_maker", `maker phase complete (tokens=${tokenTotal}, killed=${budgetKilled})`);
676
-
677
- // ---- Phase D: BUDGET-CAP enforcement (FR13) ----
678
- // Per-event accumulation above already called session.abort() if the
679
- // cumulative token count exceeded the cap mid-stream. Record the kill
680
- // in STATE.json (metrics.killed=true, kill_reason="budget_cap_exceeded")
681
- // and exit gracefully — FR10 (never throw, never crash).
682
- if (budgetKilled) {
683
- log("WARN", instanceId, "D_budget_cap", `budget cap exceeded (${tokenTotal} > ${tokenCap}); recording kill + skipping gate/ship`);
684
- const killedState = updateStateJson(state, {
685
- metrics: { ...state.metrics, killed: true, kill_reason: "budget_cap_exceeded", tokens_used: tokenTotal, token_cap: tokenCap },
686
- last_run: nowIso(),
687
- });
688
- writeState(stateFile, killedState);
689
- return { ok: false, itemId, branch: null, prUrl: null, reason: "budget-cap-kill: budget_cap_exceeded", skipped: false, audit };
690
- }
691
-
692
- // Fallback: post-prompt check via session stats (covers cases where the
693
- // event stream was not granular enough to trip the per-event check).
694
- const stats = session.getSessionStats();
695
- const capDecision = enforceBudgetCap(stats, tokenCap);
696
- if (capDecision.kill) {
697
- log("WARN", instanceId, "D_budget_cap", `kill: ${capDecision.reason} — calling session.abort()`);
698
- try {
699
- await session.abort();
700
- } catch (err) {
701
- log("WARN", instanceId, "D_budget_cap", `session.abort() failed: ${String(err)}`);
702
- }
703
- const killedState = updateStateJson(state, {
704
- metrics: { ...state.metrics, killed: true, kill_reason: "budget_cap_exceeded", tokens_used: tokenTotal, token_cap: tokenCap },
705
- last_run: nowIso(),
706
- });
707
- writeState(stateFile, killedState);
708
- return { ok: false, itemId, branch: null, prUrl: null, reason: "budget-cap-kill: budget_cap_exceeded", skipped: false, audit };
709
- }
710
-
711
- // ---- Ship-tool audit (FR6): zero push/pr/slack calls expected. ----
712
- audit.ok = auditShipToolCalls(toolNames).ok;
713
- audit.offenders = auditShipToolCalls(toolNames).offenders;
714
- if (!audit.ok) {
715
- log("ERROR", instanceId, "C_audit", `ship-tool audit FAILED: ${audit.offenders.join(",")}`);
716
- }
717
-
718
- // ---- Phase E: gate (exit code is the decision — computational, FR7) ----
719
- log("INFO", instanceId, "E_gate", "running gate via bash -c");
720
- const gateRes = runGate(worktreeDir, gateCmd);
721
- if (gateRes.exitCode !== 0) {
722
- log("ERROR", instanceId, "E_gate", `gate FAILED (exit ${gateRes.exitCode})`);
723
- return recordFailureAndReturn(
724
- stateFile, state, itemId, "gate-failed-exit-nonzero",
725
- `gate exited ${gateRes.exitCode}`, instanceId, loopName, { audit },
726
- );
727
- }
728
- log("INFO", instanceId, "E_gate", "gate PASSED (exit 0)");
729
-
730
- // ---- Phase F: ship (push branch + gh pr create) ----
731
- const ship = shipPass(worktreeDir, repoRoot, loopName, instanceId, itemId, branch);
732
- if (!ship.ok) {
733
- return recordFailureAndReturn(
734
- stateFile, state, itemId, "ship-failed", ship.reason ?? "ship-failed",
735
- instanceId, loopName, { audit },
736
- );
737
- }
738
-
739
- // ---- Phase G: update STATE.json (processed/completed/metrics) ----
740
- const shipped = updateStateJson(state, {
741
- completed: [...state.completed, { item: itemId, branch, pr: ship.prUrl ?? "", at: nowIso() }],
742
- processed: Array.from(new Set([...state.processed, itemId])),
743
- metrics: {
744
- ...state.metrics,
745
- items_fixed: state.metrics.items_fixed + 1,
746
- pr_opened: state.metrics.pr_opened + (ship.prUrl ? 1 : 0),
747
- tokens_used: tokenTotal,
748
- },
749
- last_run: nowIso(),
750
- });
751
- writeState(stateFile, shipped);
752
- log("INFO", instanceId, "main", `DONE — shipped ${itemId} on ${branch} (${ship.prUrl ?? "no-pr"})`);
753
-
754
- return { ok: true, itemId, branch, prUrl: ship.prUrl, reason: null, skipped: false, audit };
755
- } catch (err) {
756
- log("ERROR", instanceId, "runOnce", `loop failure (recorded, not fatal — FR10): ${String(err)}`);
757
- // FR10 hardening: readState can itself throw if STATE.json is corrupt or
758
- // missing at this moment, which would escape runOnce (scheduler crash).
759
- // Prefer the in-memory `state` we already loaded; only re-read as a last
760
- // resort, and fall back to a minimal seed if that also fails.
761
- let catchState: LoopState;
762
- try {
763
- catchState = readState(stateFile);
764
- } catch (readErr) {
765
- log("WARN", instanceId, "runOnce", `readState failed in catch (using minimal seed): ${String(readErr)}`);
766
- catchState = {
767
- loop_name: loopName,
768
- owner: "",
769
- cadence: "manual",
770
- last_run: null,
771
- in_progress: [],
772
- completed: [],
773
- escalated: [],
774
- failures: [],
775
- lessons: [],
776
- processed: [],
777
- stop_conditions_met: [],
778
- metrics: {
779
- runs: 0, killed: false, kill_reason: null, tokens_used: 0,
780
- token_cap: null, pr_opened: 0, items_fixed: 0,
781
- items_skipped: 0, items_escalated: 0,
782
- },
783
- };
784
- }
785
- // Prefer the in-memory state (already loaded); fall back to the file read
786
- // (or seed) only if `state` was never assigned (early throw).
787
- const recordState: LoopState = state ?? catchState;
788
- return recordFailureAndReturn(
789
- stateFile, recordState, itemId, "loop-exception", String(err),
790
- instanceId, loopName, { audit },
791
- );
792
- } finally {
793
- // ---- Phase H: cleanup worktree (FR8) ----
794
- if (worktreeDir) {
795
- try {
796
- execFileSync("git", ["-C", repoRoot, "worktree", "remove", "--force", worktreeDir], {
797
- stdio: "ignore",
798
- encoding: "utf8",
799
- });
800
- } catch {
801
- try {
802
- fs.rmSync(worktreeDir, { recursive: true, force: true });
803
- } catch (err) {
804
- log("WARN", instanceId, "cleanup", `failed to remove ${worktreeDir}: ${String(err)}`);
805
- }
806
- }
807
- }
808
- }
809
- }
810
-
811
- /** Phase F helper: commit + push + (optional) gh pr create. */
812
- function shipPass(
813
- worktreeDir: string,
814
- repoRoot: string,
815
- loopName: string,
816
- instanceId: string,
817
- itemId: string,
818
- branch: string,
819
- ): { ok: boolean; prUrl: string | null; reason: string | null } {
820
- try {
821
- execFileSync("git", ["-C", worktreeDir, "add", "-A"], { stdio: "ignore", encoding: "utf8" });
822
- try {
823
- execFileSync(
824
- "git",
825
- ["-C", worktreeDir, "commit", "-m", `loop(${loopName}): ${itemId} (instance ${instanceId})`],
826
- { stdio: "ignore", encoding: "utf8" },
827
- );
828
- } catch {
829
- /* maybe nothing to commit */
830
- }
831
- execFileSync("git", ["-C", worktreeDir, "checkout", "-b", branch], { stdio: "ignore", encoding: "utf8" });
832
- execFileSync("git", ["-C", worktreeDir, "push", "-u", "origin", branch], { stdio: "pipe", encoding: "utf8" });
833
- log("INFO", instanceId, "F_push", `pushed ${branch}`);
834
-
835
- let prUrl: string | null = null;
836
- if (hasBin("gh")) {
837
- const bodyFile = path.join(worktreeDir, "PR_BODY.md");
838
- const body = fs.existsSync(bodyFile)
839
- ? fs.readFileSync(bodyFile, "utf8")
840
- : `Auto-generated by loop-orchestrator.ts (instance ${instanceId}).`;
841
- try {
842
- // Shell-injection-safe: arg array, no shell interpolation. body and
843
- // itemId are passed as discrete argv elements (never concatenated
844
- // into a shell string), so JSON.stringify/shell-escaping is moot.
845
- const out = execFileSync(
846
- "gh",
847
- ["pr", "create", "--base", "main", "--head", branch,
848
- "--title", `loop(${loopName}): ${itemId}`, "--body", body],
849
- { stdio: ["ignore", "pipe", "pipe"], encoding: "utf8", cwd: repoRoot },
850
- );
851
- prUrl = out.trim().split("\n").pop() ?? "";
852
- } catch (err) {
853
- log("WARN", instanceId, "F_pr", `gh pr create failed; branch pushed, no PR (commit-only fallback): ${String(err)}`);
854
- prUrl = "";
855
- }
856
- log("INFO", instanceId, "F_pr", `PR: ${prUrl || "<none>"}`);
857
- } else {
858
- log("WARN", instanceId, "F_pr", "gh not installed; branch pushed, no PR (commit-only)");
859
- prUrl = "";
860
- }
861
- return { ok: true, prUrl: prUrl || null, reason: null };
862
- } catch (err) {
863
- return { ok: false, prUrl: null, reason: `ship-failed: ${String(err)}` };
864
- }
865
- }
866
-
867
- /** Record a failure in STATE.json and return a failing RunResult (FR10 — graceful). */
868
- function recordFailureAndReturn(
869
- stateFile: string,
870
- state: LoopState,
871
- itemId: string,
872
- reason: string,
873
- detail: string,
874
- instanceId: string,
875
- _loopName: string,
876
- extra?: { audit: { ok: boolean; offenders: string[] } },
877
- ): RunResult {
878
- const audit = extra?.audit ?? { ok: true, offenders: [] };
879
- const msg = `${reason}: ${detail}`;
880
- log("ERROR", instanceId, "record", msg);
881
- const next = updateStateJson(state, {
882
- failures: [...(state.failures ?? []), { item: itemId, reason: msg, at: nowIso() }],
883
- last_run: nowIso(),
884
- });
885
- try {
886
- writeState(stateFile, next);
887
- } catch (err) {
888
- log("WARN", instanceId, "record", `failed to write state: ${String(err)}`);
889
- }
890
- return { ok: false, itemId, branch: null, prUrl: null, reason: msg, skipped: false, audit };
891
- }
892
-
893
- /** Convenience for early (pre-loop) failures — no state write. */
894
- function fail(
895
- _loopName: string,
896
- instanceId: string,
897
- phase: string,
898
- msg: string,
899
- itemId: string,
900
- _branch: string | null,
901
- ): RunResult {
902
- log("ERROR", instanceId, phase, msg);
903
- return { ok: false, itemId, branch: null, prUrl: null, reason: msg, skipped: false, audit: { ok: true, offenders: [] } };
904
- }
905
-
906
- // Keep the type-only SDK mirrors referenced so tsc does not flag them unused
907
- // when the runtime path is the only consumer (and vice-versa for tests).
908
- export type { SDKAgentSessionType, SDKSessionStatsType };
909
-
910
- // =============================================================================
911
- // CLI MAIN — `node .../loop-orchestrator.ts run-once <loop-name> [repo-root] [item-id]`
912
- // =============================================================================
913
- // Without this guard the module loads and exits 0 WITHOUT running anything
914
- // (the GH Action would silently no-op green). The guard matches only when this
915
- // file is the entry point. FR10: loop failures exit 0; only operator misuse
916
- // exits non-zero — but per the spec we keep ALL exits 0 here (loop failure AND
917
- // operator misuse) so a scheduler never aborts the run on a loop failure.
918
- if (process.argv[1] && import.meta.url === pathToFileURL(process.argv[1]).href) {
919
- const [, , action, loopName, repoRoot, itemId] = process.argv;
920
- if (action === "run-once" && loopName) {
921
- runOnce({ loopName, repoRoot: repoRoot ?? ".", itemId })
922
- .then((r) => {
923
- // FR10: never exit non-zero on a loop failure (only on operator misuse).
924
- // To be safe per FR10, keep loop-failure exits 0 too.
925
- void r;
926
- process.exit(0);
927
- })
928
- .catch((err) => {
929
- console.error("FATAL:", err?.message ?? err);
930
- process.exit(0); // FR10
931
- });
932
- } else {
933
- console.error("usage: loop-orchestrator.ts run-once <loop-name> [repo-root] [item-id]");
934
- process.exit(0);
935
- }
936
- }