@exaudeus/workrail 3.31.1 → 3.33.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. package/dist/cli/commands/index.d.ts +1 -0
  2. package/dist/cli/commands/index.js +3 -1
  3. package/dist/cli/commands/worktrain-await.js +11 -9
  4. package/dist/cli/commands/worktrain-daemon-install.d.ts +35 -0
  5. package/dist/cli/commands/worktrain-daemon-install.js +291 -0
  6. package/dist/cli/commands/worktrain-daemon.d.ts +31 -0
  7. package/dist/cli/commands/worktrain-daemon.js +272 -0
  8. package/dist/cli/commands/worktrain-spawn.js +11 -9
  9. package/dist/cli-worktrain.js +329 -0
  10. package/dist/cli.js +4 -22
  11. package/dist/console/standalone-console.d.ts +28 -0
  12. package/dist/console/standalone-console.js +142 -0
  13. package/dist/{console/assets/index-6H9DeFxj.js → console-ui/assets/index-BuJFLLfY.js} +1 -1
  14. package/dist/{console → console-ui}/index.html +1 -1
  15. package/dist/daemon/agent-loop.d.ts +26 -0
  16. package/dist/daemon/agent-loop.js +53 -2
  17. package/dist/daemon/daemon-events.d.ts +103 -0
  18. package/dist/daemon/daemon-events.js +56 -0
  19. package/dist/daemon/workflow-runner.d.ts +6 -3
  20. package/dist/daemon/workflow-runner.js +229 -33
  21. package/dist/infrastructure/session/HttpServer.js +133 -34
  22. package/dist/manifest.json +134 -70
  23. package/dist/mcp/output-schemas.d.ts +30 -30
  24. package/dist/mcp/transports/bridge-events.d.ts +4 -0
  25. package/dist/mcp/transports/fatal-exit.js +4 -0
  26. package/dist/mcp/transports/http-entry.js +2 -0
  27. package/dist/mcp/transports/stdio-entry.js +26 -6
  28. package/dist/mcp/v2/tools.d.ts +4 -4
  29. package/dist/trigger/adapters/github-poller.d.ts +44 -0
  30. package/dist/trigger/adapters/github-poller.js +190 -0
  31. package/dist/trigger/adapters/gitlab-poller.d.ts +27 -0
  32. package/dist/trigger/adapters/gitlab-poller.js +81 -0
  33. package/dist/trigger/delivery-client.d.ts +2 -1
  34. package/dist/trigger/delivery-client.js +4 -1
  35. package/dist/trigger/index.d.ts +4 -1
  36. package/dist/trigger/index.js +5 -1
  37. package/dist/trigger/polled-event-store.d.ts +22 -0
  38. package/dist/trigger/polled-event-store.js +173 -0
  39. package/dist/trigger/polling-scheduler.d.ts +20 -0
  40. package/dist/trigger/polling-scheduler.js +249 -0
  41. package/dist/trigger/trigger-listener.d.ts +5 -0
  42. package/dist/trigger/trigger-listener.js +53 -4
  43. package/dist/trigger/trigger-router.d.ts +4 -2
  44. package/dist/trigger/trigger-router.js +7 -4
  45. package/dist/trigger/trigger-store.js +114 -33
  46. package/dist/trigger/types.d.ts +17 -1
  47. package/dist/v2/durable-core/schemas/export-bundle/index.d.ts +224 -224
  48. package/dist/v2/durable-core/schemas/session/events.d.ts +42 -42
  49. package/dist/v2/durable-core/schemas/session/manifest.d.ts +6 -6
  50. package/dist/v2/durable-core/schemas/session/validation-event.d.ts +2 -2
  51. package/dist/v2/durable-core/tokens/payloads.d.ts +52 -52
  52. package/dist/v2/usecases/console-routes.js +3 -3
  53. package/dist/v2/usecases/console-service.js +133 -9
  54. package/dist/v2/usecases/console-types.d.ts +7 -0
  55. package/docs/design/daemon-conversation-logging-plan.md +98 -0
  56. package/docs/design/daemon-conversation-logging-review.md +55 -0
  57. package/docs/design/daemon-conversation-logging.md +129 -0
  58. package/docs/design/github-polling-adapter-design-candidates.md +226 -0
  59. package/docs/design/github-polling-adapter-design-review-findings.md +131 -0
  60. package/docs/design/github-polling-adapter-implementation-plan.md +284 -0
  61. package/docs/design/implementation_plan.md +192 -0
  62. package/docs/design/workflow-id-validation-at-startup.md +146 -0
  63. package/docs/design/workflow-id-validation-design-review.md +87 -0
  64. package/docs/design/workflow-id-validation-implementation-plan.md +185 -0
  65. package/docs/design/worktrain-system-prompt-report-issue-candidates.md +135 -0
  66. package/docs/design/worktrain-system-prompt-report-issue-design-review.md +73 -0
  67. package/docs/ideas/backlog.md +465 -0
  68. package/package.json +1 -1
  69. package/workflows/architecture-scalability-audit.json +1 -1
  70. package/workflows/bug-investigation.agentic.v2.json +3 -3
  71. package/workflows/coding-task-workflow-agentic.json +32 -32
  72. package/workflows/coding-task-workflow-agentic.lean.v2.json +1 -1
  73. package/workflows/coding-task-workflow-agentic.v2.json +7 -7
  74. package/workflows/mr-review-workflow.agentic.v2.json +21 -12
  75. package/workflows/personal-learning-materials-creation-branched.json +2 -2
  76. package/workflows/production-readiness-audit.json +1 -1
  77. package/workflows/relocation-workflow-us.json +2 -2
  78. package/workflows/ui-ux-design-workflow.json +14 -14
  79. package/workflows/workflow-for-workflows.json +3 -3
  80. package/workflows/workflow-for-workflows.v2.json +2 -2
  81. package/workflows/wr.discovery.json +1 -1
  82. /package/dist/{console → console-ui}/assets/index-8dh0Psu-.css +0 -0
@@ -1,6 +1,42 @@
1
1
  "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
15
+ }) : function(o, v) {
16
+ o["default"] = v;
17
+ });
18
+ var __importStar = (this && this.__importStar) || (function () {
19
+ var ownKeys = function(o) {
20
+ ownKeys = Object.getOwnPropertyNames || function (o) {
21
+ var ar = [];
22
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
+ return ar;
24
+ };
25
+ return ownKeys(o);
26
+ };
27
+ return function (mod) {
28
+ if (mod && mod.__esModule) return mod;
29
+ var result = {};
30
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
+ __setModuleDefault(result, mod);
32
+ return result;
33
+ };
34
+ })();
2
35
  Object.defineProperty(exports, "__esModule", { value: true });
3
36
  exports.ConsoleService = void 0;
37
+ const fs = __importStar(require("node:fs/promises"));
38
+ const path = __importStar(require("node:path"));
39
+ const os = __importStar(require("node:os"));
4
40
  const neverthrow_1 = require("neverthrow");
5
41
  const neverthrow_2 = require("neverthrow");
6
42
  const session_health_js_1 = require("../projections/session-health.js");
@@ -21,6 +57,57 @@ const DORMANCY_THRESHOLD_MS = (() => {
21
57
  return Number.isFinite(override) && override > 0 ? override : 60 * 60 * 1000;
22
58
  })();
23
59
  const AUTONOMOUS_HEARTBEAT_THRESHOLD_MS = 10 * 60 * 1000;
60
+ const LIVE_ACTIVITY_MAX_ENTRIES = 5;
61
+ const DAEMON_EVENT_LOG_READ_LIMIT_BYTES = 100 * 1024;
62
+ const DAEMON_EVENTS_DIR = path.join(os.homedir(), '.workrail', 'events', 'daemon');
63
+ async function readLiveActivity(workrailSessionId, maxEntries) {
64
+ const date = new Date().toISOString().slice(0, 10);
65
+ const filePath = path.join(DAEMON_EVENTS_DIR, `${date}.jsonl`);
66
+ try {
67
+ let raw;
68
+ const stat = await fs.stat(filePath);
69
+ if (stat.size > DAEMON_EVENT_LOG_READ_LIMIT_BYTES) {
70
+ const fd = await fs.open(filePath, 'r');
71
+ const offset = stat.size - DAEMON_EVENT_LOG_READ_LIMIT_BYTES;
72
+ const buf = Buffer.alloc(DAEMON_EVENT_LOG_READ_LIMIT_BYTES);
73
+ try {
74
+ await fd.read(buf, 0, DAEMON_EVENT_LOG_READ_LIMIT_BYTES, offset);
75
+ }
76
+ finally {
77
+ await fd.close();
78
+ }
79
+ raw = buf.toString('utf8');
80
+ }
81
+ else {
82
+ raw = await fs.readFile(filePath, 'utf8');
83
+ }
84
+ const activities = [];
85
+ for (const line of raw.split('\n')) {
86
+ if (!line.trim())
87
+ continue;
88
+ try {
89
+ const event = JSON.parse(line);
90
+ if (event['kind'] !== 'tool_called' ||
91
+ event['workrailSessionId'] !== workrailSessionId ||
92
+ typeof event['toolName'] !== 'string' ||
93
+ typeof event['ts'] !== 'number') {
94
+ continue;
95
+ }
96
+ activities.push({
97
+ toolName: event['toolName'],
98
+ ...(typeof event['summary'] === 'string' ? { summary: event['summary'] } : {}),
99
+ ts: event['ts'],
100
+ });
101
+ }
102
+ catch {
103
+ }
104
+ }
105
+ return activities.slice(-maxEntries);
106
+ }
107
+ catch {
108
+ return null;
109
+ }
110
+ }
24
111
  class ConsoleService {
25
112
  constructor(ports) {
26
113
  this.ports = ports;
@@ -49,6 +136,7 @@ class ConsoleService {
49
136
  }
50
137
  getSessionDetail(sessionIdStr) {
51
138
  const sessionId = (0, index_js_1.asSessionId)(sessionIdStr);
139
+ const nowMs = Date.now();
52
140
  return this.ports.sessionStore
53
141
  .load(sessionId)
54
142
  .mapErr((storeErr) => ({
@@ -57,16 +145,29 @@ class ConsoleService {
57
145
  }))
58
146
  .andThen((truth) => {
59
147
  const dagRes = (0, run_dag_js_1.projectRunDagV2)(truth.events);
60
- if (dagRes.isErr()) {
61
- return resolveRunCompletion(truth.events, this.ports.snapshotStore)
62
- .map((completionMap) => projectSessionDetail(sessionId, truth, completionMap, {}, {}));
148
+ const detailRA = (() => {
149
+ if (dagRes.isErr()) {
150
+ return resolveRunCompletion(truth.events, this.ports.snapshotStore)
151
+ .map((completionMap) => projectSessionDetail(sessionId, truth, completionMap, {}, {}));
152
+ }
153
+ const dag = dagRes.value;
154
+ return neverthrow_1.ResultAsync.combine([
155
+ resolveRunCompletion(truth.events, this.ports.snapshotStore),
156
+ resolveStepLabels(dag, this.ports.snapshotStore, this.ports.pinnedWorkflowStore),
157
+ resolveWorkflowNames(dag, this.ports.pinnedWorkflowStore),
158
+ ]).map(([completionMap, stepLabels, workflowNames]) => projectSessionDetail(sessionId, truth, completionMap, stepLabels, workflowNames));
159
+ })();
160
+ const registryEntry = this.ports.daemonRegistry?.snapshot().get(sessionId);
161
+ const isLive = registryEntry !== undefined
162
+ && (nowMs - registryEntry.lastHeartbeatMs) < AUTONOMOUS_HEARTBEAT_THRESHOLD_MS;
163
+ if (!isLive) {
164
+ return detailRA.map((detail) => ({ ...detail, liveActivity: null }));
63
165
  }
64
- const dag = dagRes.value;
65
- return neverthrow_1.ResultAsync.combine([
66
- resolveRunCompletion(truth.events, this.ports.snapshotStore),
67
- resolveStepLabels(dag, this.ports.snapshotStore, this.ports.pinnedWorkflowStore),
68
- resolveWorkflowNames(dag, this.ports.pinnedWorkflowStore),
69
- ]).map(([completionMap, stepLabels, workflowNames]) => projectSessionDetail(sessionId, truth, completionMap, stepLabels, workflowNames));
166
+ const liveActivityRA = neverthrow_1.ResultAsync.fromSafePromise(readLiveActivity(sessionIdStr, LIVE_ACTIVITY_MAX_ENTRIES));
167
+ return neverthrow_1.ResultAsync.combine([detailRA, liveActivityRA]).map(([detail, liveActivity]) => ({
168
+ ...detail,
169
+ liveActivity,
170
+ }));
70
171
  });
71
172
  }
72
173
  getNodeDetail(sessionIdStr, nodeId) {
@@ -413,6 +514,26 @@ function extractGitBranch(events) {
413
514
  }
414
515
  return null;
415
516
  }
517
+ function extractRepoRoot(events) {
518
+ let workspacePathFallback = null;
519
+ for (const e of events) {
520
+ if (e.kind === constants_js_1.EVENT_KIND.OBSERVATION_RECORDED && e.data.key === 'repo_root') {
521
+ return e.data.value.value;
522
+ }
523
+ if (e.kind === constants_js_1.EVENT_KIND.CONTEXT_SET &&
524
+ e.data.source === 'initial' &&
525
+ workspacePathFallback === null) {
526
+ const ctx = e.data.context;
527
+ if (ctx && typeof ctx === 'object' && !Array.isArray(ctx)) {
528
+ const wp = ctx['workspacePath'];
529
+ if (typeof wp === 'string' && wp.length > 0) {
530
+ workspacePathFallback = wp;
531
+ }
532
+ }
533
+ }
534
+ }
535
+ return workspacePathFallback;
536
+ }
416
537
  function truncateTitle(text, maxLen = 120) {
417
538
  if (text.length <= maxLen)
418
539
  return text;
@@ -439,6 +560,7 @@ function projectSessionSummary(sessionId, truth, completionByRunId, workflowName
439
560
  const gapsRes = sortedEventsRes.isOk() ? (0, gaps_js_1.projectGapsV2)(sortedEventsRes.value) : (0, neverthrow_2.err)(sortedEventsRes.error);
440
561
  const sessionTitle = sortedEventsRes.isOk() ? deriveSessionTitle(sortedEventsRes.value) : null;
441
562
  const gitBranch = extractGitBranch(events);
563
+ const repoRoot = extractRepoRoot(events);
442
564
  const isAutonomous = (() => {
443
565
  if (!sortedEventsRes.isOk())
444
566
  return false;
@@ -466,6 +588,7 @@ function projectSessionSummary(sessionId, truth, completionByRunId, workflowName
466
588
  hasUnresolvedGaps: false,
467
589
  recapSnippet: null,
468
590
  gitBranch,
591
+ repoRoot,
469
592
  lastModifiedMs,
470
593
  isAutonomous,
471
594
  isLive,
@@ -510,6 +633,7 @@ function projectSessionSummary(sessionId, truth, completionByRunId, workflowName
510
633
  hasUnresolvedGaps,
511
634
  recapSnippet,
512
635
  gitBranch,
636
+ repoRoot,
513
637
  lastModifiedMs,
514
638
  isAutonomous,
515
639
  isLive,
@@ -16,6 +16,7 @@ export interface ConsoleSessionSummary {
16
16
  readonly hasUnresolvedGaps: boolean;
17
17
  readonly recapSnippet: string | null;
18
18
  readonly gitBranch: string | null;
19
+ readonly repoRoot: string | null;
19
20
  readonly lastModifiedMs: number;
20
21
  readonly isAutonomous: boolean;
21
22
  readonly isLive: boolean;
@@ -76,11 +77,17 @@ export interface ConsoleDagRun {
76
77
  readonly executionTraceSummary: ConsoleExecutionTraceSummary | null;
77
78
  readonly skippedSteps: readonly ConsoleGhostStep[];
78
79
  }
80
+ export interface ConsoleToolActivity {
81
+ readonly toolName: string;
82
+ readonly summary?: string;
83
+ readonly ts: number;
84
+ }
79
85
  export interface ConsoleSessionDetail {
80
86
  readonly sessionId: string;
81
87
  readonly sessionTitle: string | null;
82
88
  readonly health: ConsoleSessionHealth;
83
89
  readonly runs: readonly ConsoleDagRun[];
90
+ readonly liveActivity?: readonly ConsoleToolActivity[] | null;
84
91
  }
85
92
  export type ConsoleValidationOutcome = 'pass' | 'fail';
86
93
  export interface ConsoleValidationResult {
@@ -0,0 +1,98 @@
1
+ # Implementation Plan: Daemon Conversation Logging
2
+
3
+ ## Problem Statement
4
+
5
+ The WorkRail daemon runs workflows autonomously but provides minimal visibility into what the agent is actually doing. Today you can see `session_started`, `tool_called`, and `session_completed` in the JSONL event log - but you cannot see what the LLM decided, which tools it requested, how long each tool took, or whether a tool succeeded. Adding `llm_turn_started`, `llm_turn_completed`, `tool_call_started`, `tool_call_completed`, and `tool_call_failed` events - plus a `worktrain logs` CLI command - turns the event file into a real-time audit trail of agent behavior.
6
+
7
+ ## Acceptance Criteria
8
+
9
+ 1. After an LLM API call in `_runLoop()`, `llm_turn_started` is written before the call and `llm_turn_completed` after the response.
10
+ 2. For every tool execution via `_executeTools()`, `tool_call_started` is written before `tool.execute()`, and either `tool_call_completed` or `tool_call_failed` is written after.
11
+ 3. `tool_call_started` args are truncated to max 200 chars. `tool_call_completed` result summary truncated to max 200 chars.
12
+ 4. All new events appear in the same daily JSONL file as existing events.
13
+ 5. `worktrain logs` reads today's log file and prints each event formatted for humans.
14
+ 6. `worktrain logs --follow` polls the file every 500ms and prints new events as they arrive.
15
+ 7. `worktrain logs --session <id>` filters events to those with matching `sessionId`.
16
+ 8. `worktrain logs --follow` handles midnight file rotation (switches to new date file).
17
+ 9. If the log file doesn't exist, `worktrain logs` prints a helpful message; `--follow` waits for the file.
18
+ 10. TypeScript compiles without errors. Existing tests pass.
19
+
20
+ ## Non-Goals
21
+
22
+ - NOT putting events in the v2 session event store
23
+ - NOT adding a Console Timeline tab
24
+ - NOT deprecating `tool_called` events (backward compat)
25
+ - NOT implementing accurate pre-call token counting (message count proxy is sufficient)
26
+ - NOT searching across multiple day files for `--session` filter
27
+
28
+ ## Philosophy-Driven Constraints
29
+
30
+ - **Fire-and-forget invariant**: All callbacks in AgentLoop are wrapped in try/catch that swallow errors.
31
+ - **DI for boundaries**: AgentLoop receives callbacks, not DaemonEventEmitter itself.
32
+ - **Make illegal states unrepresentable**: New event kinds added to `DaemonEvent` discriminated union.
33
+ - **YAGNI**: Only the specified event kinds and fields.
34
+
35
+ ## Invariants
36
+
37
+ 1. `tool_call_started` is always followed by either `tool_call_completed` or `tool_call_failed`.
38
+ 2. `llm_turn_started` may have no matching `llm_turn_completed` on API error - this is intentional signal.
39
+ 3. Callbacks in AgentLoop never propagate exceptions to the caller.
40
+ 4. `DaemonEvent` union remains exhaustive.
41
+
42
+ ## Selected Approach
43
+
44
+ AgentLoopOptions callbacks: 5 optional callback properties on `AgentLoopOptions` called in `_runLoop()` and `_executeTools()`. workflow-runner.ts wires them to `emitter?.emit()`.
45
+
46
+ ## Vertical Slices
47
+
48
+ ### Slice 1: New event types in daemon-events.ts
49
+ - Add interfaces: `LlmTurnStartedEvent`, `LlmTurnCompletedEvent`, `ToolCallStartedEvent`, `ToolCallCompletedEvent`, `ToolCallFailedEvent`
50
+ - Extend `DaemonEvent` union with all 5
51
+
52
+ ### Slice 2: AgentLoopOptions callbacks + emission in agent-loop.ts
53
+ - Add 5 optional callbacks to `AgentLoopOptions`
54
+ - Call with try/catch in `_runLoop()` before/after `client.messages.create()`
55
+ - Call with try/catch in `_executeTools()` before/after `tool.execute()`
56
+ - Add `Date.now()` timing for tool calls
57
+
58
+ ### Slice 3: Wire callbacks in workflow-runner.ts
59
+ - In `runWorkflow()`, pass `AgentLoop` constructor the 5 callbacks
60
+ - Each callback calls `emitter?.emit()` with the appropriate new event kind
61
+
62
+ ### Slice 4: `worktrain logs` CLI command
63
+ - Add `program.command('logs')` with `--follow` and `--session <id>` options
64
+ - Read daily JSONL, format each line, handle ENOENT
65
+ - Polling loop with midnight rotation
66
+
67
+ ### Slice 5: Tests
68
+ - `daemon-events.test.ts`: Add 5 new event kinds to exhaustiveness test
69
+ - `agent-loop.test.ts`: Add tests for callback timing, completion, failure, and try/catch guards
70
+
71
+ ## Test Design
72
+
73
+ - onToolCallStarted fires before tool execute (verified via call order recording)
74
+ - onToolCallCompleted fires after successful execute (verified with durationMs > 0)
75
+ - onToolCallFailed fires when tool throws (loop continues normally)
76
+ - onLlmTurnStarted fires with correct messageCount before API call
77
+ - onLlmTurnCompleted fires with actual token counts from API response
78
+ - Callbacks that throw do not crash the loop
79
+
80
+ ## Risk Register
81
+
82
+ | Risk | Mitigation |
83
+ |---|---|
84
+ | Callback throws crash the session | try/catch on all 5 callback invocations |
85
+ | --follow misses events at midnight | Date-check on each poll iteration |
86
+
87
+ ## PR Strategy
88
+
89
+ Single PR: `feat/daemon-conversation-logging`
90
+
91
+ ## Philosophy Alignment
92
+
93
+ - DI for boundaries: Satisfied (callbacks, not DaemonEventEmitter in AgentLoop)
94
+ - Make illegal states unrepresentable: Satisfied (discriminated union)
95
+ - Errors are data: Satisfied (tool throws -> tool_call_failed, not propagated)
96
+ - Fire-and-forget: Satisfied (try/catch guards)
97
+ - YAGNI: Satisfied
98
+ - Exhaustiveness: Satisfied (union extended + test updated)
@@ -0,0 +1,55 @@
1
+ # Daemon Conversation Logging: Design Review Findings
2
+
3
+ ## Tradeoff Review
4
+
5
+ | Tradeoff | Assessment | Conditions for failure |
6
+ |---|---|---|
7
+ | AgentLoopOptions gains 5 optional callbacks | Acceptable - all optional, zero cost when absent | Would matter if AgentLoop were a versioned public library |
8
+ | Dual `tool_called` + `tool_call_started` events in log | Minor duplication, harmless - different fields, different consumers | If a consumer enforced "one event per tool execution" |
9
+ | `llm_turn_started` uses message count (proxy) | Spec-compliant - user explicitly said "estimate from message count" | If accurate pre-call token counts were needed for routing |
10
+ | `--follow` polls at 500ms interval | Acceptable for human-readable monitoring | If sub-100ms stream was required |
11
+
12
+ ## Failure Mode Review
13
+
14
+ | Failure mode | Status | Mitigation |
15
+ |---|---|---|
16
+ | Callback throws, propagates into agent loop | **UNMITIGATED - REQUIRES FIX** | Add try/catch around all 5 callback invocations in agent-loop.ts |
17
+ | `tool_call_started` without matching `tool_call_completed` | Handled - catch block emits `tool_call_failed` | No action needed |
18
+ | `llm_turn_started` without matching `llm_turn_completed` (API error) | Acceptable - unmatched started = API error signal | No action needed |
19
+ | `--follow` misses events at midnight file rotation | **REQUIRES FIX** | Check `new Date()` on each poll; switch to new file when date changes |
20
+ | Log file doesn't exist (daemon not started) | Handled - ENOENT returns graceful message | No action needed |
21
+
22
+ ## Runner-Up / Simpler Alternative Review
23
+
24
+ - **Runner-up (Candidate B, per-tool factories)**: No elements worth borrowing. Centralizing in `_executeTools()` is strictly better.
25
+ - **Simpler alternative (no AgentLoop changes + `turn_end` subscriber for LLM events)**: Fails spec - `turn_end` fires after tool results, not after API response. Not a valid simplification.
26
+ - **Hybrid (callbacks for LLM, per-factory for tools)**: Two patterns for the same concern. Worse than either pure approach.
27
+
28
+ ## Philosophy Alignment
29
+
30
+ **Satisfied**: DI for boundaries, immutability, make illegal states unrepresentable, errors as data, determinism, YAGNI, validate at boundaries.
31
+
32
+ **Under tension (acceptable)**:
33
+ - Type safety: `argsSummary` is deliberately a truncated string - this is spec-required (max 200 chars) and appropriate for JSONL serialization.
34
+ - Exhaustiveness: DaemonEvent union grows by 5; no switch consumers exist so this is theoretical only.
35
+
36
+ ## Findings
37
+
38
+ ### Red (blocking)
39
+ None.
40
+
41
+ ### Orange (should fix before implementation)
42
+ 1. **Missing try/catch around callbacks in agent-loop.ts**: A buggy callback passed to `AgentLoop` would propagate a throw into the agent loop and crash the session. This violates the fire-and-forget invariant that all observability in the daemon upholds. Fix: wrap each of the 5 callback invocations with `try { callback(info); } catch { /* swallow */ }`.
43
+
44
+ ### Yellow (fix during implementation)
45
+ 2. **Midnight file rotation in `--follow`**: The polling loop should check `new Date().toISOString().slice(0, 10)` on each iteration and switch to the new file when the date changes. 3-line fix in the polling loop.
46
+
47
+ ## Recommended Revisions
48
+
49
+ 1. Add try/catch guards around all callback invocations in `_runLoop()` and `_executeTools()` in `agent-loop.ts`.
50
+ 2. Add date-aware file switching in the `--follow` polling loop in `cli-worktrain.ts`.
51
+
52
+ ## Residual Concerns
53
+
54
+ - The `tool_called` + `tool_call_started` dual events: a future cleanup task could deprecate `tool_called` once all consumers migrate to `tool_call_started`. Not in scope for this PR.
55
+ - The `worktrain logs` command reads from the daily JSONL file directly. If sessions span multiple days, `--session <id>` would only find events in the current day's file. A future improvement could search across all files. Not in scope.
@@ -0,0 +1,129 @@
1
+ # Daemon Conversation Logging: Design Candidates
2
+
3
+ ## Problem Understanding
4
+
5
+ ### Core tensions
6
+
7
+ 1. **AgentLoop decoupling vs. LLM turn visibility**: AgentLoop is intentionally decoupled from all observability infrastructure (no DaemonEventEmitter import). To emit LLM turn events FROM inside `_runLoop()`, we need to bridge this gap without coupling AgentLoop to daemon-specific types. Options: inject callbacks, use the existing AgentEvent subscriber system, or violate the boundary. The subscriber system fires at `turn_end` (after tool results), which is not the right boundary for `llm_turn_started` / `llm_turn_completed`. Callbacks are the right choice.
8
+
9
+ 2. **Single-source vs. dual-source tool events**: Today each tool factory (`makeBashTool`, `makeReadTool`, etc.) emits `tool_called` directly. Adding `tool_call_started/completed/failed` in `_executeTools()` creates a single centralized emission point. The existing `tool_called` events remain for backward compatibility; new event kinds are additive.
10
+
11
+ 3. **Input token estimation**: True token counts require a tokenizer (tiktoken or the API's usage field). The API returns `response.usage.input_tokens` and `response.usage.output_tokens` in the response. For `llm_turn_started`, emit message count as proxy. For `llm_turn_completed`, emit actual token counts from the API response.
12
+
13
+ 4. **`worktrain logs --follow` streaming**: Node.js file watching is noisy; polling every 500ms is reliable and simple for MVP.
14
+
15
+ ### What makes this hard
16
+
17
+ Nothing is architecturally hard. The tricky parts are:
18
+ - Getting tool event timing exactly right (started before execute, completed/failed after)
19
+ - For `worktrain logs --follow`: handling the case where the log file doesn't exist yet
20
+ - TypeScript type checking: callback signatures must be precise for ts-strict
21
+
22
+ ### Likely seam
23
+
24
+ The real seam for tool events is `_executeTools()` in `agent-loop.ts` - it's the single place all tools execute. The real seam for LLM turn events is the `client.messages.create()` call in `_runLoop()`. Both are in `agent-loop.ts`.
25
+
26
+ ## Philosophy Constraints
27
+
28
+ From `CLAUDE.md` (system-wide):
29
+ - **DI for boundaries**: inject external effects (observability) to keep core logic testable
30
+ - **YAGNI with discipline**: no speculative fields beyond what's in the spec
31
+ - **Exhaustiveness everywhere**: new event kinds extend the `DaemonEvent` discriminated union
32
+ - **Fire-and-forget invariant**: `emit()` is void, errors swallowed - observability never affects correctness
33
+ - **Prefer fakes over mocks**: FakeAnthropicClient pattern in agent-loop tests
34
+
35
+ No philosophy conflicts found between stated principles and existing repo patterns.
36
+
37
+ ## Impact Surface
38
+
39
+ - `runWorkflow()` in `workflow-runner.ts`: constructs AgentLoop, must pass new callbacks
40
+ - `AgentLoopOptions` interface: extended with optional callbacks (non-breaking)
41
+ - `DaemonEvent` union: extended with new members (exhaustiveness tests must update)
42
+ - `tests/unit/daemon-events.test.ts`: the exhaustiveness test at line 169 must list new event kinds
43
+ - `tests/unit/agent-loop.test.ts`: needs tests for callback invocation timing
44
+ - No public API changes - all daemon-internal
45
+
46
+ ## Candidates
47
+
48
+ ### Candidate A: AgentLoopOptions callbacks (recommended)
49
+
50
+ **Summary**: Add 5 optional callback properties to `AgentLoopOptions` in `agent-loop.ts`. Call them synchronously in `_runLoop()` and `_executeTools()`. Wire in `workflow-runner.ts` to call `emitter?.emit()`.
51
+
52
+ **New properties on AgentLoopOptions**:
53
+ ```typescript
54
+ onLlmTurnStarted?: (info: { messageCount: number }) => void
55
+ onLlmTurnCompleted?: (info: {
56
+ stopReason: string;
57
+ outputTokens: number;
58
+ inputTokens: number;
59
+ toolNamesRequested: string[];
60
+ }) => void
61
+ onToolCallStarted?: (info: { toolName: string; argsSummary: string }) => void
62
+ onToolCallCompleted?: (info: { toolName: string; durationMs: number; resultSummary: string }) => void
63
+ onToolCallFailed?: (info: { toolName: string; durationMs: number; errorMessage: string }) => void
64
+ ```
65
+
66
+ **Tensions resolved**: AgentLoop stays decoupled from DaemonEventEmitter. Single source of truth for tool event timing.
67
+
68
+ **Boundary**: AgentLoop / workflow-runner.ts interface. Correct seam - AgentLoop is a reusable primitive; workflow-runner.ts is the daemon-specific orchestrator.
69
+
70
+ **Failure mode**: If a callback throws, it propagates into the agent loop. Mitigated by: callbacks call `emitter?.emit()` which is fire-and-forget and never throws.
71
+
72
+ **Follows existing pattern**: `DaemonRegistry` uses the same inject-as-optional pattern. `toolExecution: 'sequential'` is already a strategy parameter on `AgentLoopOptions`.
73
+
74
+ **Gains**: Central timing; no changes to individual tool factories; clean separation; new tools get events automatically.
75
+ **Gives up**: `AgentLoopOptions` interface is slightly heavier (5 optional callbacks). Callbacks are less discoverable than per-tool pattern.
76
+
77
+ **Scope**: best-fit.
78
+
79
+ **Philosophy**: honors DI-for-boundaries, YAGNI, exhaustiveness. No conflicts.
80
+
81
+ ---
82
+
83
+ ### Candidate B: Extend per-tool factory pattern (adapt existing)
84
+
85
+ **Summary**: Keep the existing per-tool `emitter?.emit({ kind: 'tool_called' })` approach. Add `tool_call_started` emit before `tool.execute()` and `tool_call_completed`/`tool_call_failed` after, inside each of the 5 tool factory closures. Add LLM turn callbacks to `AgentLoopOptions` only for the LLM-specific events.
86
+
87
+ **Tensions resolved**: Minimizes changes to AgentLoop (only 2 callbacks instead of 5). Follows the exact existing pattern.
88
+
89
+ **Boundary**: Each tool factory is the emission point.
90
+
91
+ **Failure mode**: 5 tool factories x 3 events each = 15 new emit calls. Duplication risk. New tools added later won't automatically get events.
92
+
93
+ **Follows existing pattern**: Pure adaptation of the existing `tool_called` pattern.
94
+
95
+ **Gains**: No callbacks for tool events in AgentLoopOptions; no risk of propagated errors.
96
+ **Gives up**: DRY principle - timing logic duplicated 5x. Maintenance trap.
97
+
98
+ **Scope**: best-fit for existing tools, but creates technical debt.
99
+
100
+ **Philosophy**: conflicts with "compose with small, pure functions" (duplication). Honors DI-for-boundaries.
101
+
102
+ ## Comparison and Recommendation
103
+
104
+ **Recommendation: Candidate A**
105
+
106
+ Candidate A wins on every meaningful dimension:
107
+ - **Best-fit boundary**: `_executeTools()` is the single canonical execution point for all tools.
108
+ - **Most manageable failure mode**: callbacks call `emitter?.emit()` which can never throw.
109
+ - **Best philosophy fit**: "Compose with small, pure functions" and "DI for boundaries" both point to A.
110
+ - **Easiest to evolve**: Adding a 6th tool gets events automatically.
111
+ - **Consistent with repo patterns**: Same pattern as `DaemonRegistry` injection.
112
+
113
+ ## Self-Critique
114
+
115
+ **Strongest argument against**: Candidate A adds 5 callback properties to `AgentLoopOptions`. If `AgentLoop` is used in tests without an emitter, the interface is heavier. Counter: all 5 are optional (`?`), zero cost when absent.
116
+
117
+ **Narrower option that was considered**: Only add LLM turn callbacks (skip `tool_call_started/completed/failed`). Doesn't satisfy the spec.
118
+
119
+ **Broader option**: Put the emitter directly in `AgentLoopOptions`. Would require `AgentLoop` to import `DaemonEventEmitter`, coupling the modules. Unjustified.
120
+
121
+ **Invalidating assumption**: None. `_executeTools()` is the only tool execution path in `AgentLoop`.
122
+
123
+ ## Open Questions for Implementation
124
+
125
+ 1. The existing `tool_called` events in per-tool factories (`makeBashTool`, `makeReadTool`, `makeWriteTool`, `makeContinueWorkflowTool`) - keep them as-is for backward compat, or remove them now that `tool_call_started` supersedes them? Decision: keep for backward compat since consumers may depend on them.
126
+
127
+ 2. For the `worktrain logs --follow` command, should it print historical lines first then follow? Yes - show existing entries then poll for new ones.
128
+
129
+ 3. Should `worktrain logs --session <id>` filter by exact sessionId match? Yes.