macro-agent 0.2.2 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,404 @@
1
+ /**
2
+ * Cascade Diff Server — services `cascade/diff.request` notifications.
3
+ *
4
+ * The OpenHive hub fetches unified diffs from this runtime on demand. Flow:
5
+ *
6
+ * 1. Hub sends `cascade/diff.request` with `request_id`, `stream_id`,
7
+ * `head`, optional `base` / `file_paths` / `files_only`.
8
+ * 2. This handler resolves a worktree path from the stream id (falling
9
+ * back to the bare repo when no live worktree is checked out on the
10
+ * stream), shells out to `git show` or `git diff`, and produces a
11
+ * unified diff blob (or a name-only list when `files_only: true`).
12
+ * 3. Response is emitted as a `cascade/diff.response` notification —
13
+ * inline when ≤ 512 KB, streamed via N `cascade/diff.chunk`
14
+ * notifications when larger.
15
+ *
16
+ * The 50 MB raw cap (`MAX_DIFF_BYTES`) defends against runaway monorepo
17
+ * diffs. Errors fold into the same `cascade/diff.response` method via the
18
+ * `error` shape (mirrors trajectory/content.response).
19
+ *
20
+ * @module map/cascade-diff-server
21
+ */
22
+
23
+ import { spawn } from 'child_process';
24
+ import { createHash, randomBytes } from 'crypto';
25
+ import { existsSync } from 'fs';
26
+ import type { GitCascadeAdapter } from '../workspace/git-cascade-adapter.js';
27
+
28
+ const REQUEST_METHOD = 'cascade/diff.request';
29
+ const RESPONSE_METHOD = 'cascade/diff.response';
30
+ const CHUNK_METHOD = 'cascade/diff.chunk';
31
+
32
+ const INLINE_THRESHOLD_BYTES = 512 * 1024;
33
+ const CHUNK_SIZE_BYTES = 1024 * 1024;
34
+ const MAX_DIFF_BYTES = 50 * 1024 * 1024;
35
+ const GIT_TIMEOUT_MS = 30_000;
36
+
37
+ export interface CascadeDiffServerConnection {
38
+ onNotification(
39
+ method: string,
40
+ handler: (params: unknown) => void | Promise<void>,
41
+ ): void;
42
+ offNotification(
43
+ method: string,
44
+ handler: (params: unknown) => void | Promise<void>,
45
+ ): void;
46
+ /** Send a JSON-RPC notification back to the hub. */
47
+ sendNotification(method: string, params: Record<string, unknown>): void | Promise<void>;
48
+ }
49
+
50
+ export interface CascadeDiffRequestParams {
51
+ request_id: string;
52
+ stream_id: string;
53
+ head: string;
54
+ base?: string;
55
+ file_paths?: string[];
56
+ files_only?: boolean;
57
+ format?: 'unified';
58
+ }
59
+
60
+ type DiffErrorCode =
61
+ | 'not_found'
62
+ | 'bad_request'
63
+ | 'integrity_failed'
64
+ | 'internal';
65
+
66
+ interface ProduceResult {
67
+ blob: Buffer;
68
+ filesTouched: string[];
69
+ truncated: boolean;
70
+ }
71
+
72
+ /**
73
+ * Register the cascade/diff.request handler on the connection.
74
+ * Returns a cleanup function that removes the registration.
75
+ */
76
+ export function setupCascadeDiffServer(
77
+ connection: CascadeDiffServerConnection,
78
+ adapter: GitCascadeAdapter,
79
+ ): () => void {
80
+ const handler = async (params: unknown): Promise<void> => {
81
+ const req = params as CascadeDiffRequestParams | null;
82
+ if (!req?.request_id || typeof req.request_id !== 'string') return;
83
+ if (!req.stream_id || !req.head) {
84
+ await sendError(
85
+ connection,
86
+ req.request_id,
87
+ 'bad_request',
88
+ 'missing stream_id or head',
89
+ );
90
+ return;
91
+ }
92
+
93
+ const workdir = resolveWorkdir(adapter, req.stream_id);
94
+ if (!workdir) {
95
+ await sendError(
96
+ connection,
97
+ req.request_id,
98
+ 'not_found',
99
+ `no worktree or repo path for stream ${req.stream_id}`,
100
+ );
101
+ return;
102
+ }
103
+
104
+ let produced: ProduceResult;
105
+ try {
106
+ produced = await runGit({
107
+ workdir,
108
+ head: req.head,
109
+ base: req.base,
110
+ filePaths: req.file_paths,
111
+ filesOnly: req.files_only === true,
112
+ });
113
+ } catch (err) {
114
+ await sendError(
115
+ connection,
116
+ req.request_id,
117
+ 'internal',
118
+ `git failed: ${(err as Error).message}`,
119
+ );
120
+ return;
121
+ }
122
+
123
+ if (produced.blob.length <= INLINE_THRESHOLD_BYTES) {
124
+ await connection.sendNotification(RESPONSE_METHOD, {
125
+ request_id: req.request_id,
126
+ streaming: false,
127
+ diff: produced.blob.toString('utf-8'),
128
+ files_touched: produced.filesTouched,
129
+ truncated: produced.truncated,
130
+ });
131
+ return;
132
+ }
133
+
134
+ await streamLargeBlob(
135
+ connection,
136
+ req.request_id,
137
+ produced.blob,
138
+ produced.filesTouched,
139
+ produced.truncated,
140
+ );
141
+ };
142
+
143
+ connection.onNotification(REQUEST_METHOD, handler);
144
+ return () => {
145
+ try {
146
+ connection.offNotification(REQUEST_METHOD, handler);
147
+ } catch {
148
+ /* non-fatal */
149
+ }
150
+ };
151
+ }
152
+
153
+ // ============================================================================
154
+ // Worktree resolution
155
+ // ============================================================================
156
+
157
+ function resolveWorkdir(
158
+ adapter: GitCascadeAdapter,
159
+ streamId: string,
160
+ ): string | null {
161
+ // Prefer a live worktree currently checked out on this stream.
162
+ try {
163
+ const match = adapter
164
+ .listWorktrees()
165
+ .find((wt) => wt.currentStream === streamId);
166
+ if (match?.path && existsSync(match.path)) return match.path;
167
+ } catch {
168
+ /* fall through to repo-path fallback */
169
+ }
170
+
171
+ // Fallback: bare repo path. `git show <sha>` works repo-wide.
172
+ try {
173
+ const repoPath = adapter.repoPath;
174
+ if (repoPath && existsSync(repoPath)) return repoPath;
175
+ } catch {
176
+ /* fall through */
177
+ }
178
+ return null;
179
+ }
180
+
181
+ // ============================================================================
182
+ // Git shell-out
183
+ // ============================================================================
184
+
185
+ interface RunGitArgs {
186
+ workdir: string;
187
+ head: string;
188
+ base?: string;
189
+ filePaths?: string[];
190
+ filesOnly: boolean;
191
+ }
192
+
193
+ async function runGit(args: RunGitArgs): Promise<ProduceResult> {
194
+ const gitArgs = buildGitArgs(args);
195
+ const buf = await spawnCapped(args.workdir, gitArgs);
196
+
197
+ if (args.filesOnly) {
198
+ const files = parseNameOnly(buf.data);
199
+ return { blob: Buffer.from(''), filesTouched: files, truncated: buf.truncated };
200
+ }
201
+
202
+ // Extract files_touched from the diff blob headers. Cheap regex; the
203
+ // sidecar can avoid a second git invocation.
204
+ const files = extractFilesFromDiffHeaders(buf.data);
205
+ return { blob: buf.data, filesTouched: files, truncated: buf.truncated };
206
+ }
207
+
208
+ function buildGitArgs(args: RunGitArgs): string[] {
209
+ // --no-textconv: don't apply textconv filters (force raw bytes).
210
+ // -U3: 3 lines of context (default unified-diff window).
211
+ // --binary suppressed: default "Binary files differ" markers are fine.
212
+ if (args.filesOnly) {
213
+ if (args.base) {
214
+ return ['diff', '--name-only', `${args.base}..${args.head}`, '--', ...(args.filePaths ?? [])];
215
+ }
216
+ return [
217
+ 'show',
218
+ '--no-textconv',
219
+ '--format=',
220
+ '--name-only',
221
+ args.head,
222
+ '--',
223
+ ...(args.filePaths ?? []),
224
+ ];
225
+ }
226
+ if (args.base) {
227
+ return [
228
+ 'diff',
229
+ '--no-textconv',
230
+ '-U3',
231
+ `${args.base}..${args.head}`,
232
+ '--',
233
+ ...(args.filePaths ?? []),
234
+ ];
235
+ }
236
+ return [
237
+ 'show',
238
+ '--no-textconv',
239
+ '-U3',
240
+ '--format=',
241
+ args.head,
242
+ '--',
243
+ ...(args.filePaths ?? []),
244
+ ];
245
+ }
246
+
247
+ interface CappedSpawnResult {
248
+ data: Buffer;
249
+ truncated: boolean;
250
+ }
251
+
252
+ /**
253
+ * Spawn git, capture stdout up to `MAX_DIFF_BYTES`. Beyond that, drain
254
+ * the rest into /dev/null and mark `truncated: true`. Always returns
255
+ * (no rejection on overflow); rejects only on spawn / non-zero exit.
256
+ */
257
+ function spawnCapped(cwd: string, args: string[]): Promise<CappedSpawnResult> {
258
+ return new Promise((resolve, reject) => {
259
+ const proc = spawn('git', args, {
260
+ cwd,
261
+ stdio: ['ignore', 'pipe', 'pipe'],
262
+ });
263
+
264
+ const parts: Buffer[] = [];
265
+ let total = 0;
266
+ let truncated = false;
267
+ let killTimer: ReturnType<typeof setTimeout> | null = null;
268
+ let stderrBuf = '';
269
+
270
+ proc.stdout.on('data', (chunk: Buffer) => {
271
+ if (truncated) return;
272
+ if (total + chunk.length > MAX_DIFF_BYTES) {
273
+ const remaining = MAX_DIFF_BYTES - total;
274
+ if (remaining > 0) parts.push(chunk.subarray(0, remaining));
275
+ total = MAX_DIFF_BYTES;
276
+ truncated = true;
277
+ return;
278
+ }
279
+ parts.push(chunk);
280
+ total += chunk.length;
281
+ });
282
+
283
+ proc.stderr.on('data', (chunk: Buffer) => {
284
+ // Cap stderr so a flood doesn't OOM us.
285
+ if (stderrBuf.length < 8192) {
286
+ stderrBuf += chunk.toString('utf-8');
287
+ }
288
+ });
289
+
290
+ killTimer = setTimeout(() => {
291
+ truncated = true;
292
+ try { proc.kill('SIGKILL'); } catch { /* nothing to kill */ }
293
+ }, GIT_TIMEOUT_MS);
294
+
295
+ proc.on('error', (err) => {
296
+ if (killTimer) clearTimeout(killTimer);
297
+ reject(err);
298
+ });
299
+
300
+ proc.on('close', (code) => {
301
+ if (killTimer) clearTimeout(killTimer);
302
+ if (code !== 0 && !truncated) {
303
+ reject(
304
+ new Error(
305
+ `git ${args.join(' ')} exited ${code}: ${stderrBuf.trim().slice(0, 200)}`,
306
+ ),
307
+ );
308
+ return;
309
+ }
310
+ resolve({ data: Buffer.concat(parts), truncated });
311
+ });
312
+ });
313
+ }
314
+
315
+ // ============================================================================
316
+ // Output parsing
317
+ // ============================================================================
318
+
319
+ function parseNameOnly(buf: Buffer): string[] {
320
+ return buf
321
+ .toString('utf-8')
322
+ .split('\n')
323
+ .map((s) => s.trim())
324
+ .filter((s) => s.length > 0);
325
+ }
326
+
327
+ /**
328
+ * Pull file paths out of the per-file header pair `--- a/X` / `+++ b/Y`.
329
+ *
330
+ * The `diff --git a/X b/Y` line is ambiguous for filenames containing
331
+ * ` b/` (the regex can't tell where the a-side ends and the b-side
332
+ * begins, and git only quotes paths containing control chars / quotes /
333
+ * backslash, not plain spaces). The `--- ` and `+++ ` lines are
334
+ * unambiguous: each appears once per file, anchored at line start, with
335
+ * the full path running to end-of-line.
336
+ *
337
+ * Handles renames (different a/ and b/ paths → both surfaced as touched),
338
+ * new files (`--- /dev/null` → skipped, `+++ b/X` → X), and deletions
339
+ * (`--- a/X` → X, `+++ /dev/null` → skipped). Dedup via Set.
340
+ */
341
+ function extractFilesFromDiffHeaders(buf: Buffer): string[] {
342
+ const seen = new Set<string>();
343
+ const text = buf.toString('utf-8');
344
+ const regex = /^[-+]{3} (?:a|b)\/(.+)$/gm;
345
+ let m: RegExpExecArray | null;
346
+ while ((m = regex.exec(text)) !== null) {
347
+ seen.add(m[1]);
348
+ }
349
+ return Array.from(seen);
350
+ }
351
+
352
+ // ============================================================================
353
+ // Streaming
354
+ // ============================================================================
355
+
356
+ async function streamLargeBlob(
357
+ connection: CascadeDiffServerConnection,
358
+ requestId: string,
359
+ blob: Buffer,
360
+ filesTouched: string[],
361
+ truncated: boolean,
362
+ ): Promise<void> {
363
+ // `request_id` is already unique per-request from the hub, but we add
364
+ // a random nonce so two sidecars routing through the same hub can't
365
+ // collide on the hub-side `chunkStreamToRequest` map even in the
366
+ // unlikely case the same request_id arrives twice (e.g. on retries).
367
+ const chunkStreamId = `cdiff-${requestId}-${randomBytes(6).toString('hex')}`;
368
+
369
+ await connection.sendNotification(RESPONSE_METHOD, {
370
+ request_id: requestId,
371
+ streaming: true,
372
+ chunk_stream_id: chunkStreamId,
373
+ total_size: blob.length,
374
+ files_touched: filesTouched,
375
+ });
376
+
377
+ const sha = createHash('sha256').update(blob).digest('hex');
378
+ const totalChunks = Math.ceil(blob.length / CHUNK_SIZE_BYTES);
379
+
380
+ for (let i = 0; i < totalChunks; i++) {
381
+ const start = i * CHUNK_SIZE_BYTES;
382
+ const end = Math.min(start + CHUNK_SIZE_BYTES, blob.length);
383
+ const slice = blob.subarray(start, end);
384
+ const isFinal = i === totalChunks - 1;
385
+ await connection.sendNotification(CHUNK_METHOD, {
386
+ chunk_stream_id: chunkStreamId,
387
+ seq: i,
388
+ data: slice.toString('base64'),
389
+ ...(isFinal ? { final: true, sha256: sha, truncated } : {}),
390
+ });
391
+ }
392
+ }
393
+
394
+ async function sendError(
395
+ connection: CascadeDiffServerConnection,
396
+ requestId: string,
397
+ code: DiffErrorCode,
398
+ message: string,
399
+ ): Promise<void> {
400
+ await connection.sendNotification(RESPONSE_METHOD, {
401
+ request_id: requestId,
402
+ error: { code, message },
403
+ });
404
+ }
@@ -173,6 +173,12 @@ export function createMAPSidecar(
173
173
  canUpdate: true,
174
174
  canList: true,
175
175
  },
176
+ // Diff serving is gated on whether a git-cascade adapter is wired
177
+ // — without it there are no worktrees to shell out against, so
178
+ // declaring canServeDiff would just produce timeouts on the hub.
179
+ ...(gitCascadeAdapter
180
+ ? { cascade: { canServeDiff: true } }
181
+ : {}),
176
182
  workspace: workspaceCapability,
177
183
  },
178
184
  metadata: {
@@ -646,9 +652,15 @@ export function createMAPSidecar(
646
652
  const { setupCascadeActionHandlers } = await import("./cascade-action-handler.js");
647
653
  const actionCleanup = setupCascadeActionHandlers(connection, gitCascadeAdapter);
648
654
 
655
+ // 5c. Inbound diff server — receives cascade/diff.request from hub
656
+ // and replies with cascade/diff.response (+ chunks for large blobs).
657
+ const { setupCascadeDiffServer } = await import("./cascade-diff-server.js");
658
+ const diffCleanup = setupCascadeDiffServer(connection, gitCascadeAdapter);
659
+
649
660
  cascadeBridgeCleanup = () => {
650
661
  cascadeBridge.dispose();
651
662
  actionCleanup();
663
+ diffCleanup();
652
664
  };
653
665
  }
654
666
 
@@ -82,6 +82,63 @@ export async function loadTeam(
82
82
  throw mapToTeamLoadError(err, teamName, teamDir);
83
83
  }
84
84
 
85
+ return finalizeTemplate(template, teamName, roleRegistry);
86
+ }
87
+
88
+ /**
89
+ * Load a team template from in-memory content (no filesystem).
90
+ *
91
+ * Wire-delivery counterpart to {@link loadTeam}: instead of reading
92
+ * `team.yaml` + `roles/*.yaml` + `prompts/*` from disk, hydrate via
93
+ * `TemplateLoader.fromObject` from a structured snapshot. Used by hosts
94
+ * that ship the team config inline at boot — most prominently OpenHive's
95
+ * spawn manager packing `bootstrap.openteams.team_content` into the
96
+ * `OPENSWARM_BOOTSTRAP_TOKEN` env var.
97
+ *
98
+ * The result is structurally identical to `loadTeam`'s — same macro-
99
+ * agent enrichment, same validation, same downstream contract.
100
+ *
101
+ * @param teamName - Logical team name (used for error messages + the
102
+ * returned manifest's `name` when the inlined manifest doesn't carry
103
+ * one of its own).
104
+ * @param content - Inline team content. Same shape as openteams's
105
+ * `TemplateLoader.fromObject` input.
106
+ * @param roleRegistry - Role registry for resolving extends chains.
107
+ */
108
+ export async function loadTeamFromContent(
109
+ teamName: string,
110
+ content: {
111
+ manifest: OpenTeamsManifest;
112
+ roles?: Record<string, RoleDefinition>;
113
+ loadouts?: Record<string, unknown>;
114
+ prompts?: Record<string, unknown>;
115
+ },
116
+ roleRegistry: RoleRegistry,
117
+ ): Promise<TeamManifest> {
118
+ let template;
119
+ try {
120
+ template = TemplateLoader.fromObject(content as never, {
121
+ resolveExternalRole: (name) => mapRegistryRole(roleRegistry, name),
122
+ postProcessRole: (role, manifest) =>
123
+ enrichRoleWithSpawnRules(role, manifest),
124
+ });
125
+ } catch (err) {
126
+ throw mapToTeamLoadError(err, teamName, `<inline:${teamName}>`);
127
+ }
128
+ return finalizeTemplate(template, teamName, roleRegistry);
129
+ }
130
+
131
+ /**
132
+ * Shared post-load processing applied to whatever `TemplateLoader`
133
+ * (sync or async, disk or in-memory) produces. Builds the enrichment-
134
+ * enriched role map, assembles the multi-file prompt index, validates
135
+ * communication, and returns the macro-agent `TeamManifest` shape.
136
+ */
137
+ function finalizeTemplate(
138
+ template: ReturnType<typeof TemplateLoader.fromObject>,
139
+ teamName: string,
140
+ roleRegistry: RoleRegistry,
141
+ ): TeamManifest {
85
142
  const manifest = template.manifest;
86
143
  const communication = (manifest.communication ?? {}) as CommunicationConfig;
87
144
  const macroAgent = parseMacroAgentExtensions(
@@ -131,9 +131,7 @@ export class TeamManagerV2 {
131
131
  * @returns The team instance ID
132
132
  */
133
133
  async startTeam(name: string, basePath?: string): Promise<string> {
134
- const { agentManager, inboxAdapter, tasksAdapter, workspaceManager } = this.services;
135
-
136
- // Load template
134
+ const { agentManager } = this.services;
137
135
  const { loadTeam } = await import("./team-loader.js");
138
136
  const roleRegistry = agentManager.getRoleRegistry();
139
137
  const manifest: TeamManifest = await loadTeam(
@@ -141,7 +139,47 @@ export class TeamManagerV2 {
141
139
  roleRegistry,
142
140
  basePath ?? process.cwd()
143
141
  );
142
+ return this.startTeamWithManifest(name, manifest);
143
+ }
144
+
145
+ /**
146
+ * Start a team from an in-memory manifest snapshot — used by hosts
147
+ * that ship the team config inline at boot (OpenHive's spawn manager
148
+ * packing `bootstrap.openteams.team_content` into the bootstrap
149
+ * token, for example). Skips disk I/O entirely; otherwise identical
150
+ * to {@link startTeam}.
151
+ */
152
+ async startTeamFromContent(
153
+ name: string,
154
+ content: {
155
+ manifest: import("openteams").TeamManifest;
156
+ roles?: Record<string, import("../roles/types.js").RoleDefinition>;
157
+ loadouts?: Record<string, unknown>;
158
+ prompts?: Record<string, unknown>;
159
+ },
160
+ ): Promise<string> {
161
+ const { agentManager } = this.services;
162
+ const { loadTeamFromContent } = await import("./team-loader.js");
163
+ const roleRegistry = agentManager.getRoleRegistry();
164
+ const manifest: TeamManifest = await loadTeamFromContent(
165
+ name,
166
+ content,
167
+ roleRegistry,
168
+ );
169
+ return this.startTeamWithManifest(name, manifest);
170
+ }
144
171
 
172
+ /**
173
+ * Shared post-load flow: wire optional topology, build the runtime,
174
+ * bootstrap the team's root + companions, install scoped filters,
175
+ * and register the instance. Callable from any loader path
176
+ * (disk-based `startTeam` or wire-based `startTeamFromContent`).
177
+ */
178
+ private async startTeamWithManifest(
179
+ name: string,
180
+ manifest: TeamManifest,
181
+ ): Promise<string> {
182
+ const { agentManager, inboxAdapter, tasksAdapter, workspaceManager } = this.services;
145
183
  // V3: auto-wire TopologyPolicy when the team declares
146
184
  // `macro_agent.workspace`. Requires a WorkspaceManager to be present.
147
185
  if (workspaceManager) {