npm - @exaudeus/workrail - Versions diffs - 3.31.1 → 3.33.0 - Mend

@exaudeus/workrail 3.31.1 → 3.33.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (82) hide show

package/dist/cli/commands/index.d.ts +1 -0
package/dist/cli/commands/index.js +3 -1
package/dist/cli/commands/worktrain-await.js +11 -9
package/dist/cli/commands/worktrain-daemon-install.d.ts +35 -0
package/dist/cli/commands/worktrain-daemon-install.js +291 -0
package/dist/cli/commands/worktrain-daemon.d.ts +31 -0
package/dist/cli/commands/worktrain-daemon.js +272 -0
package/dist/cli/commands/worktrain-spawn.js +11 -9
package/dist/cli-worktrain.js +329 -0
package/dist/cli.js +4 -22
package/dist/console/standalone-console.d.ts +28 -0
package/dist/console/standalone-console.js +142 -0
package/dist/{console/assets/index-6H9DeFxj.js → console-ui/assets/index-BuJFLLfY.js} +1 -1
package/dist/{console → console-ui}/index.html +1 -1
package/dist/daemon/agent-loop.d.ts +26 -0
package/dist/daemon/agent-loop.js +53 -2
package/dist/daemon/daemon-events.d.ts +103 -0
package/dist/daemon/daemon-events.js +56 -0
package/dist/daemon/workflow-runner.d.ts +6 -3
package/dist/daemon/workflow-runner.js +229 -33
package/dist/infrastructure/session/HttpServer.js +133 -34
package/dist/manifest.json +134 -70
package/dist/mcp/output-schemas.d.ts +30 -30
package/dist/mcp/transports/bridge-events.d.ts +4 -0
package/dist/mcp/transports/fatal-exit.js +4 -0
package/dist/mcp/transports/http-entry.js +2 -0
package/dist/mcp/transports/stdio-entry.js +26 -6
package/dist/mcp/v2/tools.d.ts +4 -4
package/dist/trigger/adapters/github-poller.d.ts +44 -0
package/dist/trigger/adapters/github-poller.js +190 -0
package/dist/trigger/adapters/gitlab-poller.d.ts +27 -0
package/dist/trigger/adapters/gitlab-poller.js +81 -0
package/dist/trigger/delivery-client.d.ts +2 -1
package/dist/trigger/delivery-client.js +4 -1
package/dist/trigger/index.d.ts +4 -1
package/dist/trigger/index.js +5 -1
package/dist/trigger/polled-event-store.d.ts +22 -0
package/dist/trigger/polled-event-store.js +173 -0
package/dist/trigger/polling-scheduler.d.ts +20 -0
package/dist/trigger/polling-scheduler.js +249 -0
package/dist/trigger/trigger-listener.d.ts +5 -0
package/dist/trigger/trigger-listener.js +53 -4
package/dist/trigger/trigger-router.d.ts +4 -2
package/dist/trigger/trigger-router.js +7 -4
package/dist/trigger/trigger-store.js +114 -33
package/dist/trigger/types.d.ts +17 -1
package/dist/v2/durable-core/schemas/export-bundle/index.d.ts +224 -224
package/dist/v2/durable-core/schemas/session/events.d.ts +42 -42
package/dist/v2/durable-core/schemas/session/manifest.d.ts +6 -6
package/dist/v2/durable-core/schemas/session/validation-event.d.ts +2 -2
package/dist/v2/durable-core/tokens/payloads.d.ts +52 -52
package/dist/v2/usecases/console-routes.js +3 -3
package/dist/v2/usecases/console-service.js +133 -9
package/dist/v2/usecases/console-types.d.ts +7 -0
package/docs/design/daemon-conversation-logging-plan.md +98 -0
package/docs/design/daemon-conversation-logging-review.md +55 -0
package/docs/design/daemon-conversation-logging.md +129 -0
package/docs/design/github-polling-adapter-design-candidates.md +226 -0
package/docs/design/github-polling-adapter-design-review-findings.md +131 -0
package/docs/design/github-polling-adapter-implementation-plan.md +284 -0
package/docs/design/implementation_plan.md +192 -0
package/docs/design/workflow-id-validation-at-startup.md +146 -0
package/docs/design/workflow-id-validation-design-review.md +87 -0
package/docs/design/workflow-id-validation-implementation-plan.md +185 -0
package/docs/design/worktrain-system-prompt-report-issue-candidates.md +135 -0
package/docs/design/worktrain-system-prompt-report-issue-design-review.md +73 -0
package/docs/ideas/backlog.md +465 -0
package/package.json +1 -1
package/workflows/architecture-scalability-audit.json +1 -1
package/workflows/bug-investigation.agentic.v2.json +3 -3
package/workflows/coding-task-workflow-agentic.json +32 -32
package/workflows/coding-task-workflow-agentic.lean.v2.json +1 -1
package/workflows/coding-task-workflow-agentic.v2.json +7 -7
package/workflows/mr-review-workflow.agentic.v2.json +21 -12
package/workflows/personal-learning-materials-creation-branched.json +2 -2
package/workflows/production-readiness-audit.json +1 -1
package/workflows/relocation-workflow-us.json +2 -2
package/workflows/ui-ux-design-workflow.json +14 -14
package/workflows/workflow-for-workflows.json +3 -3
package/workflows/workflow-for-workflows.v2.json +2 -2
package/workflows/wr.discovery.json +1 -1
/package/dist/{console → console-ui}/assets/index-8dh0Psu-.css +0 -0

package/dist/v2/usecases/console-service.js CHANGED Viewed

@@ -1,6 +1,42 @@
 "use strict";
+var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
+    if (k2 === undefined) k2 = k;
+    var desc = Object.getOwnPropertyDescriptor(m, k);
+    if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
+      desc = { enumerable: true, get: function() { return m[k]; } };
+    }
+    Object.defineProperty(o, k2, desc);
+}) : (function(o, m, k, k2) {
+    if (k2 === undefined) k2 = k;
+    o[k2] = m[k];
+}));
+var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
+    Object.defineProperty(o, "default", { enumerable: true, value: v });
+}) : function(o, v) {
+    o["default"] = v;
+});
+var __importStar = (this && this.__importStar) || (function () {
+    var ownKeys = function(o) {
+        ownKeys = Object.getOwnPropertyNames || function (o) {
+            var ar = [];
+            for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
+            return ar;
+        };
+        return ownKeys(o);
+    };
+    return function (mod) {
+        if (mod && mod.__esModule) return mod;
+        var result = {};
+        if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
+        __setModuleDefault(result, mod);
+        return result;
+    };
+})();
 Object.defineProperty(exports, "__esModule", { value: true });
 exports.ConsoleService = void 0;
+const fs = __importStar(require("node:fs/promises"));
+const path = __importStar(require("node:path"));
+const os = __importStar(require("node:os"));
 const neverthrow_1 = require("neverthrow");
 const neverthrow_2 = require("neverthrow");
 const session_health_js_1 = require("../projections/session-health.js");
@@ -21,6 +57,57 @@ const DORMANCY_THRESHOLD_MS = (() => {
     return Number.isFinite(override) && override > 0 ? override : 60 * 60 * 1000;
 })();
 const AUTONOMOUS_HEARTBEAT_THRESHOLD_MS = 10 * 60 * 1000;
+const LIVE_ACTIVITY_MAX_ENTRIES = 5;
+const DAEMON_EVENT_LOG_READ_LIMIT_BYTES = 100 * 1024;
+const DAEMON_EVENTS_DIR = path.join(os.homedir(), '.workrail', 'events', 'daemon');
+async function readLiveActivity(workrailSessionId, maxEntries) {
+    const date = new Date().toISOString().slice(0, 10);
+    const filePath = path.join(DAEMON_EVENTS_DIR, `${date}.jsonl`);
+    try {
+        let raw;
+        const stat = await fs.stat(filePath);
+        if (stat.size > DAEMON_EVENT_LOG_READ_LIMIT_BYTES) {
+            const fd = await fs.open(filePath, 'r');
+            const offset = stat.size - DAEMON_EVENT_LOG_READ_LIMIT_BYTES;
+            const buf = Buffer.alloc(DAEMON_EVENT_LOG_READ_LIMIT_BYTES);
+            try {
+                await fd.read(buf, 0, DAEMON_EVENT_LOG_READ_LIMIT_BYTES, offset);
+            }
+            finally {
+                await fd.close();
+            }
+            raw = buf.toString('utf8');
+        }
+        else {
+            raw = await fs.readFile(filePath, 'utf8');
+        }
+        const activities = [];
+        for (const line of raw.split('\n')) {
+            if (!line.trim())
+                continue;
+            try {
+                const event = JSON.parse(line);
+                if (event['kind'] !== 'tool_called' ||
+                    event['workrailSessionId'] !== workrailSessionId ||
+                    typeof event['toolName'] !== 'string' ||
+                    typeof event['ts'] !== 'number') {
+                    continue;
+                }
+                activities.push({
+                    toolName: event['toolName'],
+                    ...(typeof event['summary'] === 'string' ? { summary: event['summary'] } : {}),
+                    ts: event['ts'],
+                });
+            }
+            catch {
+            }
+        }
+        return activities.slice(-maxEntries);
+    }
+    catch {
+        return null;
+    }
+}
 class ConsoleService {
     constructor(ports) {
         this.ports = ports;
@@ -49,6 +136,7 @@ class ConsoleService {
     }
     getSessionDetail(sessionIdStr) {
         const sessionId = (0, index_js_1.asSessionId)(sessionIdStr);
+        const nowMs = Date.now();
         return this.ports.sessionStore
             .load(sessionId)
             .mapErr((storeErr) => ({
@@ -57,16 +145,29 @@ class ConsoleService {
         }))
             .andThen((truth) => {
             const dagRes = (0, run_dag_js_1.projectRunDagV2)(truth.events);
-            if (dagRes.isErr()) {
-                return resolveRunCompletion(truth.events, this.ports.snapshotStore)
-                    .map((completionMap) => projectSessionDetail(sessionId, truth, completionMap, {}, {}));
+            const detailRA = (() => {
+                if (dagRes.isErr()) {
+                    return resolveRunCompletion(truth.events, this.ports.snapshotStore)
+                        .map((completionMap) => projectSessionDetail(sessionId, truth, completionMap, {}, {}));
+                }
+                const dag = dagRes.value;
+                return neverthrow_1.ResultAsync.combine([
+                    resolveRunCompletion(truth.events, this.ports.snapshotStore),
+                    resolveStepLabels(dag, this.ports.snapshotStore, this.ports.pinnedWorkflowStore),
+                    resolveWorkflowNames(dag, this.ports.pinnedWorkflowStore),
+                ]).map(([completionMap, stepLabels, workflowNames]) => projectSessionDetail(sessionId, truth, completionMap, stepLabels, workflowNames));
+            })();
+            const registryEntry = this.ports.daemonRegistry?.snapshot().get(sessionId);
+            const isLive = registryEntry !== undefined
+                && (nowMs - registryEntry.lastHeartbeatMs) < AUTONOMOUS_HEARTBEAT_THRESHOLD_MS;
+            if (!isLive) {
+                return detailRA.map((detail) => ({ ...detail, liveActivity: null }));
             }
-            const dag = dagRes.value;
-            return neverthrow_1.ResultAsync.combine([
-                resolveRunCompletion(truth.events, this.ports.snapshotStore),
-                resolveStepLabels(dag, this.ports.snapshotStore, this.ports.pinnedWorkflowStore),
-                resolveWorkflowNames(dag, this.ports.pinnedWorkflowStore),
-            ]).map(([completionMap, stepLabels, workflowNames]) => projectSessionDetail(sessionId, truth, completionMap, stepLabels, workflowNames));
+            const liveActivityRA = neverthrow_1.ResultAsync.fromSafePromise(readLiveActivity(sessionIdStr, LIVE_ACTIVITY_MAX_ENTRIES));
+            return neverthrow_1.ResultAsync.combine([detailRA, liveActivityRA]).map(([detail, liveActivity]) => ({
+                ...detail,
+                liveActivity,
+            }));
         });
     }
     getNodeDetail(sessionIdStr, nodeId) {
@@ -413,6 +514,26 @@ function extractGitBranch(events) {
     }
     return null;
 }
+function extractRepoRoot(events) {
+    let workspacePathFallback = null;
+    for (const e of events) {
+        if (e.kind === constants_js_1.EVENT_KIND.OBSERVATION_RECORDED && e.data.key === 'repo_root') {
+            return e.data.value.value;
+        }
+        if (e.kind === constants_js_1.EVENT_KIND.CONTEXT_SET &&
+            e.data.source === 'initial' &&
+            workspacePathFallback === null) {
+            const ctx = e.data.context;
+            if (ctx && typeof ctx === 'object' && !Array.isArray(ctx)) {
+                const wp = ctx['workspacePath'];
+                if (typeof wp === 'string' && wp.length > 0) {
+                    workspacePathFallback = wp;
+                }
+            }
+        }
+    }
+    return workspacePathFallback;
+}
 function truncateTitle(text, maxLen = 120) {
     if (text.length <= maxLen)
         return text;
@@ -439,6 +560,7 @@ function projectSessionSummary(sessionId, truth, completionByRunId, workflowName
     const gapsRes = sortedEventsRes.isOk() ? (0, gaps_js_1.projectGapsV2)(sortedEventsRes.value) : (0, neverthrow_2.err)(sortedEventsRes.error);
     const sessionTitle = sortedEventsRes.isOk() ? deriveSessionTitle(sortedEventsRes.value) : null;
     const gitBranch = extractGitBranch(events);
+    const repoRoot = extractRepoRoot(events);
     const isAutonomous = (() => {
         if (!sortedEventsRes.isOk())
             return false;
@@ -466,6 +588,7 @@ function projectSessionSummary(sessionId, truth, completionByRunId, workflowName
             hasUnresolvedGaps: false,
             recapSnippet: null,
             gitBranch,
+            repoRoot,
             lastModifiedMs,
             isAutonomous,
             isLive,
@@ -510,6 +633,7 @@ function projectSessionSummary(sessionId, truth, completionByRunId, workflowName
         hasUnresolvedGaps,
         recapSnippet,
         gitBranch,
+        repoRoot,
         lastModifiedMs,
         isAutonomous,
         isLive,

package/dist/v2/usecases/console-types.d.ts CHANGED Viewed

@@ -16,6 +16,7 @@ export interface ConsoleSessionSummary {
     readonly hasUnresolvedGaps: boolean;
     readonly recapSnippet: string | null;
     readonly gitBranch: string | null;
+    readonly repoRoot: string | null;
     readonly lastModifiedMs: number;
     readonly isAutonomous: boolean;
     readonly isLive: boolean;
@@ -76,11 +77,17 @@ export interface ConsoleDagRun {
     readonly executionTraceSummary: ConsoleExecutionTraceSummary | null;
     readonly skippedSteps: readonly ConsoleGhostStep[];
 }
+export interface ConsoleToolActivity {
+    readonly toolName: string;
+    readonly summary?: string;
+    readonly ts: number;
+}
 export interface ConsoleSessionDetail {
     readonly sessionId: string;
     readonly sessionTitle: string | null;
     readonly health: ConsoleSessionHealth;
     readonly runs: readonly ConsoleDagRun[];
+    readonly liveActivity?: readonly ConsoleToolActivity[] | null;
 }
 export type ConsoleValidationOutcome = 'pass' | 'fail';
 export interface ConsoleValidationResult {

package/docs/design/daemon-conversation-logging-plan.md ADDED Viewed

@@ -0,0 +1,98 @@
+# Implementation Plan: Daemon Conversation Logging
+## Problem Statement
+The WorkRail daemon runs workflows autonomously but provides minimal visibility into what the agent is actually doing. Today you can see `session_started`, `tool_called`, and `session_completed` in the JSONL event log - but you cannot see what the LLM decided, which tools it requested, how long each tool took, or whether a tool succeeded. Adding `llm_turn_started`, `llm_turn_completed`, `tool_call_started`, `tool_call_completed`, and `tool_call_failed` events - plus a `worktrain logs` CLI command - turns the event file into a real-time audit trail of agent behavior.
+## Acceptance Criteria
+1. After an LLM API call in `_runLoop()`, `llm_turn_started` is written before the call and `llm_turn_completed` after the response.
+2. For every tool execution via `_executeTools()`, `tool_call_started` is written before `tool.execute()`, and either `tool_call_completed` or `tool_call_failed` is written after.
+3. `tool_call_started` args are truncated to max 200 chars. `tool_call_completed` result summary truncated to max 200 chars.
+4. All new events appear in the same daily JSONL file as existing events.
+5. `worktrain logs` reads today's log file and prints each event formatted for humans.
+6. `worktrain logs --follow` polls the file every 500ms and prints new events as they arrive.
+7. `worktrain logs --session <id>` filters events to those with matching `sessionId`.
+8. `worktrain logs --follow` handles midnight file rotation (switches to new date file).
+9. If the log file doesn't exist, `worktrain logs` prints a helpful message; `--follow` waits for the file.
+10. TypeScript compiles without errors. Existing tests pass.
+## Non-Goals
+- NOT putting events in the v2 session event store
+- NOT adding a Console Timeline tab
+- NOT deprecating `tool_called` events (backward compat)
+- NOT implementing accurate pre-call token counting (message count proxy is sufficient)
+- NOT searching across multiple day files for `--session` filter
+## Philosophy-Driven Constraints
+- **Fire-and-forget invariant**: All callbacks in AgentLoop are wrapped in try/catch that swallow errors.
+- **DI for boundaries**: AgentLoop receives callbacks, not DaemonEventEmitter itself.
+- **Make illegal states unrepresentable**: New event kinds added to `DaemonEvent` discriminated union.
+- **YAGNI**: Only the specified event kinds and fields.
+## Invariants
+1. `tool_call_started` is always followed by either `tool_call_completed` or `tool_call_failed`.
+2. `llm_turn_started` may have no matching `llm_turn_completed` on API error - this is intentional signal.
+3. Callbacks in AgentLoop never propagate exceptions to the caller.
+4. `DaemonEvent` union remains exhaustive.
+## Selected Approach
+AgentLoopOptions callbacks: 5 optional callback properties on `AgentLoopOptions` called in `_runLoop()` and `_executeTools()`. workflow-runner.ts wires them to `emitter?.emit()`.
+## Vertical Slices
+### Slice 1: New event types in daemon-events.ts
+- Add interfaces: `LlmTurnStartedEvent`, `LlmTurnCompletedEvent`, `ToolCallStartedEvent`, `ToolCallCompletedEvent`, `ToolCallFailedEvent`
+- Extend `DaemonEvent` union with all 5
+### Slice 2: AgentLoopOptions callbacks + emission in agent-loop.ts
+- Add 5 optional callbacks to `AgentLoopOptions`
+- Call with try/catch in `_runLoop()` before/after `client.messages.create()`
+- Call with try/catch in `_executeTools()` before/after `tool.execute()`
+- Add `Date.now()` timing for tool calls
+### Slice 3: Wire callbacks in workflow-runner.ts
+- In `runWorkflow()`, pass `AgentLoop` constructor the 5 callbacks
+- Each callback calls `emitter?.emit()` with the appropriate new event kind
+### Slice 4: `worktrain logs` CLI command
+- Add `program.command('logs')` with `--follow` and `--session <id>` options
+- Read daily JSONL, format each line, handle ENOENT
+- Polling loop with midnight rotation
+### Slice 5: Tests
+- `daemon-events.test.ts`: Add 5 new event kinds to exhaustiveness test
+- `agent-loop.test.ts`: Add tests for callback timing, completion, failure, and try/catch guards
+## Test Design
+- onToolCallStarted fires before tool execute (verified via call order recording)
+- onToolCallCompleted fires after successful execute (verified with durationMs > 0)
+- onToolCallFailed fires when tool throws (loop continues normally)
+- onLlmTurnStarted fires with correct messageCount before API call
+- onLlmTurnCompleted fires with actual token counts from API response
+- Callbacks that throw do not crash the loop
+## Risk Register
+| Risk | Mitigation |
+|---|---|
+| Callback throws crash the session | try/catch on all 5 callback invocations |
+| --follow misses events at midnight | Date-check on each poll iteration |
+## PR Strategy
+Single PR: `feat/daemon-conversation-logging`
+## Philosophy Alignment
+- DI for boundaries: Satisfied (callbacks, not DaemonEventEmitter in AgentLoop)
+- Make illegal states unrepresentable: Satisfied (discriminated union)
+- Errors are data: Satisfied (tool throws -> tool_call_failed, not propagated)
+- Fire-and-forget: Satisfied (try/catch guards)
+- YAGNI: Satisfied
+- Exhaustiveness: Satisfied (union extended + test updated)

package/docs/design/daemon-conversation-logging-review.md ADDED Viewed

@@ -0,0 +1,55 @@
+# Daemon Conversation Logging: Design Review Findings
+## Tradeoff Review
+| Tradeoff | Assessment | Conditions for failure |
+|---|---|---|
+| AgentLoopOptions gains 5 optional callbacks | Acceptable - all optional, zero cost when absent | Would matter if AgentLoop were a versioned public library |
+| Dual `tool_called` + `tool_call_started` events in log | Minor duplication, harmless - different fields, different consumers | If a consumer enforced "one event per tool execution" |
+| `llm_turn_started` uses message count (proxy) | Spec-compliant - user explicitly said "estimate from message count" | If accurate pre-call token counts were needed for routing |
+| `--follow` polls at 500ms interval | Acceptable for human-readable monitoring | If sub-100ms stream was required |
+## Failure Mode Review
+| Failure mode | Status | Mitigation |
+|---|---|---|
+| Callback throws, propagates into agent loop | **UNMITIGATED - REQUIRES FIX** | Add try/catch around all 5 callback invocations in agent-loop.ts |
+| `tool_call_started` without matching `tool_call_completed` | Handled - catch block emits `tool_call_failed` | No action needed |
+| `llm_turn_started` without matching `llm_turn_completed` (API error) | Acceptable - unmatched started = API error signal | No action needed |
+| `--follow` misses events at midnight file rotation | **REQUIRES FIX** | Check `new Date()` on each poll; switch to new file when date changes |
+| Log file doesn't exist (daemon not started) | Handled - ENOENT returns graceful message | No action needed |
+## Runner-Up / Simpler Alternative Review
+- **Runner-up (Candidate B, per-tool factories)**: No elements worth borrowing. Centralizing in `_executeTools()` is strictly better.
+- **Simpler alternative (no AgentLoop changes + `turn_end` subscriber for LLM events)**: Fails spec - `turn_end` fires after tool results, not after API response. Not a valid simplification.
+- **Hybrid (callbacks for LLM, per-factory for tools)**: Two patterns for the same concern. Worse than either pure approach.
+## Philosophy Alignment
+**Satisfied**: DI for boundaries, immutability, make illegal states unrepresentable, errors as data, determinism, YAGNI, validate at boundaries.
+**Under tension (acceptable)**:
+- Type safety: `argsSummary` is deliberately a truncated string - this is spec-required (max 200 chars) and appropriate for JSONL serialization.
+- Exhaustiveness: DaemonEvent union grows by 5; no switch consumers exist so this is theoretical only.
+## Findings
+### Red (blocking)
+None.
+### Orange (should fix before implementation)
+1. **Missing try/catch around callbacks in agent-loop.ts**: A buggy callback passed to `AgentLoop` would propagate a throw into the agent loop and crash the session. This violates the fire-and-forget invariant that all observability in the daemon upholds. Fix: wrap each of the 5 callback invocations with `try { callback(info); } catch { /* swallow */ }`.
+### Yellow (fix during implementation)
+2. **Midnight file rotation in `--follow`**: The polling loop should check `new Date().toISOString().slice(0, 10)` on each iteration and switch to the new file when the date changes. 3-line fix in the polling loop.
+## Recommended Revisions
+1. Add try/catch guards around all callback invocations in `_runLoop()` and `_executeTools()` in `agent-loop.ts`.
+2. Add date-aware file switching in the `--follow` polling loop in `cli-worktrain.ts`.
+## Residual Concerns
+- The `tool_called` + `tool_call_started` dual events: a future cleanup task could deprecate `tool_called` once all consumers migrate to `tool_call_started`. Not in scope for this PR.
+- The `worktrain logs` command reads from the daily JSONL file directly. If sessions span multiple days, `--session <id>` would only find events in the current day's file. A future improvement could search across all files. Not in scope.

package/docs/design/daemon-conversation-logging.md ADDED Viewed

@@ -0,0 +1,129 @@
+# Daemon Conversation Logging: Design Candidates
+## Problem Understanding
+### Core tensions
+1. **AgentLoop decoupling vs. LLM turn visibility**: AgentLoop is intentionally decoupled from all observability infrastructure (no DaemonEventEmitter import). To emit LLM turn events FROM inside `_runLoop()`, we need to bridge this gap without coupling AgentLoop to daemon-specific types. Options: inject callbacks, use the existing AgentEvent subscriber system, or violate the boundary. The subscriber system fires at `turn_end` (after tool results), which is not the right boundary for `llm_turn_started` / `llm_turn_completed`. Callbacks are the right choice.
+2. **Single-source vs. dual-source tool events**: Today each tool factory (`makeBashTool`, `makeReadTool`, etc.) emits `tool_called` directly. Adding `tool_call_started/completed/failed` in `_executeTools()` creates a single centralized emission point. The existing `tool_called` events remain for backward compatibility; new event kinds are additive.
+3. **Input token estimation**: True token counts require a tokenizer (tiktoken or the API's usage field). The API returns `response.usage.input_tokens` and `response.usage.output_tokens` in the response. For `llm_turn_started`, emit message count as proxy. For `llm_turn_completed`, emit actual token counts from the API response.
+4. **`worktrain logs --follow` streaming**: Node.js file watching is noisy; polling every 500ms is reliable and simple for MVP.
+### What makes this hard
+Nothing is architecturally hard. The tricky parts are:
+- Getting tool event timing exactly right (started before execute, completed/failed after)
+- For `worktrain logs --follow`: handling the case where the log file doesn't exist yet
+- TypeScript type checking: callback signatures must be precise for ts-strict
+### Likely seam
+The real seam for tool events is `_executeTools()` in `agent-loop.ts` - it's the single place all tools execute. The real seam for LLM turn events is the `client.messages.create()` call in `_runLoop()`. Both are in `agent-loop.ts`.
+## Philosophy Constraints
+From `CLAUDE.md` (system-wide):
+- **DI for boundaries**: inject external effects (observability) to keep core logic testable
+- **YAGNI with discipline**: no speculative fields beyond what's in the spec
+- **Exhaustiveness everywhere**: new event kinds extend the `DaemonEvent` discriminated union
+- **Fire-and-forget invariant**: `emit()` is void, errors swallowed - observability never affects correctness
+- **Prefer fakes over mocks**: FakeAnthropicClient pattern in agent-loop tests
+No philosophy conflicts found between stated principles and existing repo patterns.
+## Impact Surface
+- `runWorkflow()` in `workflow-runner.ts`: constructs AgentLoop, must pass new callbacks
+- `AgentLoopOptions` interface: extended with optional callbacks (non-breaking)
+- `DaemonEvent` union: extended with new members (exhaustiveness tests must update)
+- `tests/unit/daemon-events.test.ts`: the exhaustiveness test at line 169 must list new event kinds
+- `tests/unit/agent-loop.test.ts`: needs tests for callback invocation timing
+- No public API changes - all daemon-internal
+## Candidates
+### Candidate A: AgentLoopOptions callbacks (recommended)
+**Summary**: Add 5 optional callback properties to `AgentLoopOptions` in `agent-loop.ts`. Call them synchronously in `_runLoop()` and `_executeTools()`. Wire in `workflow-runner.ts` to call `emitter?.emit()`.
+**New properties on AgentLoopOptions**:
+```typescript
+onLlmTurnStarted?: (info: { messageCount: number }) => void
+onLlmTurnCompleted?: (info: {
+  stopReason: string;
+  outputTokens: number;
+  inputTokens: number;
+  toolNamesRequested: string[];
+}) => void
+onToolCallStarted?: (info: { toolName: string; argsSummary: string }) => void
+onToolCallCompleted?: (info: { toolName: string; durationMs: number; resultSummary: string }) => void
+onToolCallFailed?: (info: { toolName: string; durationMs: number; errorMessage: string }) => void
+```
+**Tensions resolved**: AgentLoop stays decoupled from DaemonEventEmitter. Single source of truth for tool event timing.
+**Boundary**: AgentLoop / workflow-runner.ts interface. Correct seam - AgentLoop is a reusable primitive; workflow-runner.ts is the daemon-specific orchestrator.
+**Failure mode**: If a callback throws, it propagates into the agent loop. Mitigated by: callbacks call `emitter?.emit()` which is fire-and-forget and never throws.
+**Follows existing pattern**: `DaemonRegistry` uses the same inject-as-optional pattern. `toolExecution: 'sequential'` is already a strategy parameter on `AgentLoopOptions`.
+**Gains**: Central timing; no changes to individual tool factories; clean separation; new tools get events automatically.
+**Gives up**: `AgentLoopOptions` interface is slightly heavier (5 optional callbacks). Callbacks are less discoverable than per-tool pattern.
+**Scope**: best-fit.
+**Philosophy**: honors DI-for-boundaries, YAGNI, exhaustiveness. No conflicts.
+---
+### Candidate B: Extend per-tool factory pattern (adapt existing)
+**Summary**: Keep the existing per-tool `emitter?.emit({ kind: 'tool_called' })` approach. Add `tool_call_started` emit before `tool.execute()` and `tool_call_completed`/`tool_call_failed` after, inside each of the 5 tool factory closures. Add LLM turn callbacks to `AgentLoopOptions` only for the LLM-specific events.
+**Tensions resolved**: Minimizes changes to AgentLoop (only 2 callbacks instead of 5). Follows the exact existing pattern.
+**Boundary**: Each tool factory is the emission point.
+**Failure mode**: 5 tool factories x 3 events each = 15 new emit calls. Duplication risk. New tools added later won't automatically get events.
+**Follows existing pattern**: Pure adaptation of the existing `tool_called` pattern.
+**Gains**: No callbacks for tool events in AgentLoopOptions; no risk of propagated errors.
+**Gives up**: DRY principle - timing logic duplicated 5x. Maintenance trap.
+**Scope**: best-fit for existing tools, but creates technical debt.
+**Philosophy**: conflicts with "compose with small, pure functions" (duplication). Honors DI-for-boundaries.
+## Comparison and Recommendation
+**Recommendation: Candidate A**
+Candidate A wins on every meaningful dimension:
+- **Best-fit boundary**: `_executeTools()` is the single canonical execution point for all tools.
+- **Most manageable failure mode**: callbacks call `emitter?.emit()` which can never throw.
+- **Best philosophy fit**: "Compose with small, pure functions" and "DI for boundaries" both point to A.
+- **Easiest to evolve**: Adding a 6th tool gets events automatically.
+- **Consistent with repo patterns**: Same pattern as `DaemonRegistry` injection.
+## Self-Critique
+**Strongest argument against**: Candidate A adds 5 callback properties to `AgentLoopOptions`. If `AgentLoop` is used in tests without an emitter, the interface is heavier. Counter: all 5 are optional (`?`), zero cost when absent.
+**Narrower option that was considered**: Only add LLM turn callbacks (skip `tool_call_started/completed/failed`). Doesn't satisfy the spec.
+**Broader option**: Put the emitter directly in `AgentLoopOptions`. Would require `AgentLoop` to import `DaemonEventEmitter`, coupling the modules. Unjustified.
+**Invalidating assumption**: None. `_executeTools()` is the only tool execution path in `AgentLoop`.
+## Open Questions for Implementation
+1. The existing `tool_called` events in per-tool factories (`makeBashTool`, `makeReadTool`, `makeWriteTool`, `makeContinueWorkflowTool`) - keep them as-is for backward compat, or remove them now that `tool_call_started` supersedes them? Decision: keep for backward compat since consumers may depend on them.
+2. For the `worktrain logs --follow` command, should it print historical lines first then follow? Yes - show existing entries then poll for new ones.
+3. Should `worktrain logs --session <id>` filter by exact sessionId match? Yes.