@bastani/atomic 0.5.17 → 0.5.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. package/README.md +14 -1
  2. package/dist/sdk/workflows/builtin/deep-research-codebase/claude/index.d.ts +50 -54
  3. package/dist/sdk/workflows/builtin/deep-research-codebase/claude/index.d.ts.map +1 -1
  4. package/dist/sdk/workflows/builtin/deep-research-codebase/copilot/index.d.ts +17 -36
  5. package/dist/sdk/workflows/builtin/deep-research-codebase/copilot/index.d.ts.map +1 -1
  6. package/dist/sdk/workflows/builtin/deep-research-codebase/helpers/heuristic.d.ts +1 -1
  7. package/dist/sdk/workflows/builtin/deep-research-codebase/helpers/prompts.d.ts +64 -44
  8. package/dist/sdk/workflows/builtin/deep-research-codebase/helpers/prompts.d.ts.map +1 -1
  9. package/dist/sdk/workflows/builtin/deep-research-codebase/helpers/scratch.d.ts +43 -0
  10. package/dist/sdk/workflows/builtin/deep-research-codebase/helpers/scratch.d.ts.map +1 -0
  11. package/dist/sdk/workflows/builtin/deep-research-codebase/opencode/index.d.ts +17 -39
  12. package/dist/sdk/workflows/builtin/deep-research-codebase/opencode/index.d.ts.map +1 -1
  13. package/package.json +1 -1
  14. package/src/cli.ts +21 -2
  15. package/src/commands/cli/session.test.ts +223 -0
  16. package/src/commands/cli/session.ts +117 -1
  17. package/src/completions/bash.ts +3 -3
  18. package/src/completions/fish.ts +13 -7
  19. package/src/completions/powershell.ts +3 -0
  20. package/src/completions/zsh.ts +2 -1
  21. package/src/sdk/workflows/builtin/deep-research-codebase/claude/index.ts +260 -157
  22. package/src/sdk/workflows/builtin/deep-research-codebase/copilot/index.ts +224 -125
  23. package/src/sdk/workflows/builtin/deep-research-codebase/helpers/heuristic.ts +2 -2
  24. package/src/sdk/workflows/builtin/deep-research-codebase/helpers/prompts.ts +428 -469
  25. package/src/sdk/workflows/builtin/deep-research-codebase/helpers/scratch.ts +115 -0
  26. package/src/sdk/workflows/builtin/deep-research-codebase/opencode/index.ts +249 -137
@@ -1,51 +1,33 @@
1
1
  /**
2
2
  * deep-research-codebase / copilot
3
3
  *
4
- * Copilot replica of the Claude deep-research-codebase workflow. The Claude
5
- * version dispatches specialist sub-agents (codebase-locator, codebase-
6
- * analyzer, etc.) inside a single explorer session via `@"name (agent)"`
7
- * syntax a Claude-specific feature. Copilot sessions are bound to a single
8
- * agent for their entire lifetime, so we keep the SAME graph topology
9
- * (scout ∥ history → explorer-1..N → aggregator) but drive each explorer
10
- * through the locate → analyze → patterns → synthesize sequence inline using
11
- * the default agent's built-in file tools.
4
+ * Copilot replica of the Claude deep-research-codebase workflow. Specialist
5
+ * sub-agents are dispatched as separate headless `ctx.stage()` calls — each
6
+ * binds the SDK's session to a single named agent via `sessionOpts: { agent }`,
7
+ * which is the SDK-native way to spawn a sub-agent on Copilot.
12
8
  *
13
- * Topology (identical to Claude version):
9
+ * Copilot-specific concerns baked in (see references/failure-modes.md):
14
10
  *
15
- * ┌─→ codebase-scout
16
- * parent ─┤
17
- * └─→ research-history
18
- * │
19
- * ▼
20
- * ┌──────────────────────────────────────────────────┐
21
- * │ explorer-1 explorer-2 ... explorer-N │ (Promise.all, headless)
22
- * └──────────────────────────────────────────────────┘
23
- * │
24
- * ▼
25
- * aggregator
11
+ * F5 — every `ctx.stage()` is a FRESH session. Each specialist receives
12
+ * everything it needs (research question, scope, scout overview, and —
13
+ * for layer-2 specialists — verbatim locator output) in its first prompt.
26
14
  *
27
- * Explorers run headless (in-process, no tmux window) they are transparent
28
- * to the graph, so the visible topology is: [scout, history] → aggregator.
15
+ * F1 Copilot's last assistant turn is often empty when the agent ends
16
+ * on a tool call. We use `getAssistantText()` (canonical concatenation
17
+ * of every top-level non-empty assistant turn, ignoring sub-agent
18
+ * `parentToolCallId` traffic) instead of `.at(-1).data.content`.
29
19
  *
30
- * Copilot-specific concerns baked in:
20
+ * F6 every prompt explicitly requires trailing prose AFTER any tool
21
+ * call so `getAssistantText()` and downstream `transcript()` reads are
22
+ * never empty.
31
23
  *
24
+ * • F9 — `s.save()` receives `SessionEvent[]` from `s.session.getMessages()`.
32
25
  *
33
- * F5 — every `ctx.stage()` call is a FRESH session with no memory of prior
34
- * stages. We forward the scout overview, history overview, and partition
35
- * assignment explicitly into each explorer's first prompt. The aggregator
36
- * gets the same plus the explorer scratch file paths.
37
- *
38
- * • F9 — `s.save()` receives `SessionEvent[]` via `s.session.getMessages()`
39
- * (Copilot's correct shape). Passing anything else breaks downstream
40
- * `transcript()` reads.
41
- *
42
- * • F6 — every prompt explicitly requires trailing prose AFTER any tool
43
- * call, so `transcript()` is never empty. A Copilot turn whose final
44
- * message is a tool call produces an empty assistant.message terminator
45
- * (F1); trailing prose is our insurance.
26
+ * See claude/index.ts for the full design rationale and topology diagram.
46
27
  */
47
28
 
48
29
  import { defineWorkflow } from "../../../index.ts";
30
+ import type { SessionEvent } from "@github/copilot-sdk";
49
31
  import { mkdir } from "node:fs/promises";
50
32
  import path from "node:path";
51
33
 
@@ -60,41 +42,65 @@ import {
60
42
  } from "../helpers/heuristic.ts";
61
43
  import {
62
44
  buildAggregatorPrompt,
63
- buildExplorerPromptGeneric,
64
- buildHistoryPromptGeneric,
45
+ buildAnalyzerPrompt,
46
+ buildHistoryAnalyzerPrompt,
47
+ buildHistoryLocatorPrompt,
48
+ buildLocatorPrompt,
49
+ buildOnlineResearcherPrompt,
50
+ buildPatternFinderPrompt,
65
51
  buildScoutPrompt,
66
52
  slugifyPrompt,
67
53
  } from "../helpers/prompts.ts";
54
+ import { writeExplorerScratchFile } from "../helpers/scratch.ts";
55
+
56
+ /**
57
+ * Concatenate every top-level assistant turn's non-empty content. The final
58
+ * `assistant.message` of a Copilot turn is often empty when the agent ends
59
+ * on a tool call (F1), and sub-agent traffic is signalled by `parentToolCallId`.
60
+ */
61
+ function getAssistantText(messages: SessionEvent[]): string {
62
+ return messages
63
+ .filter(
64
+ (m): m is Extract<SessionEvent, { type: "assistant.message" }> =>
65
+ m.type === "assistant.message" && !m.data.parentToolCallId,
66
+ )
67
+ .map((m) => m.data.content)
68
+ .filter((c) => c.length > 0)
69
+ .join("\n\n");
70
+ }
68
71
 
69
72
  export default defineWorkflow({
70
- name: "deep-research-codebase",
71
- description:
72
- "Deterministic deep codebase research: scout → LOC-driven parallel explorers → aggregator",
73
- inputs: [
74
- { name: "prompt", type: "text", required: true, description: "research question" },
75
- ],
76
- })
73
+ name: "deep-research-codebase",
74
+ description:
75
+ "Deterministic deep codebase research: scout → per-partition specialist sub-agents → aggregator",
76
+ inputs: [
77
+ {
78
+ name: "prompt",
79
+ type: "text",
80
+ required: true,
81
+ description: "research question",
82
+ },
83
+ ],
84
+ })
77
85
  .for<"copilot">()
78
86
  .run(async (ctx) => {
79
- // Destructure once so every stage below can close over a bare
80
- // `prompt` string without re-reaching into ctx.inputs.
81
87
  const prompt = ctx.inputs.prompt ?? "";
82
88
  const root = getCodebaseRoot();
83
89
  const startedAt = new Date();
84
90
  const isoDate = startedAt.toISOString().slice(0, 10);
85
91
  const slug = slugifyPrompt(prompt);
86
92
 
87
- // ── Stages 1a + 1b: codebase-scout research-history ──────────────────
88
- const [scout, history] = await Promise.all([
93
+ // ── Stage 1a: codebase-scout Stage 1b: research-history pipeline ────
94
+ const [scout, historyOverview] = await Promise.all([
89
95
  ctx.stage(
90
96
  {
91
97
  name: "codebase-scout",
92
- description: "Map codebase, count LOC, partition for parallel explorers",
98
+ description:
99
+ "Map codebase, count LOC, partition for parallel specialists",
93
100
  },
94
101
  {},
95
102
  {},
96
103
  async (s) => {
97
- // 1. Deterministic scouting (pure TypeScript — no LLM).
98
104
  const data = scoutCodebase(root);
99
105
  if (data.units.length === 0) {
100
106
  throw new Error(
@@ -103,13 +109,10 @@ export default defineWorkflow({
103
109
  );
104
110
  }
105
111
 
106
- // 2. Heuristic decides explorer count (capped by available units).
107
112
  const targetCount = calculateExplorerCount(data.totalLoc);
108
113
  const partitions = partitionUnits(data.units, targetCount);
109
114
  const actualCount = partitions.length;
110
115
 
111
- // 3. Scratch directory for explorer outputs (timestamped to avoid
112
- // collisions across runs).
113
116
  const scratchDir = path.join(
114
117
  root,
115
118
  "research",
@@ -118,9 +121,6 @@ export default defineWorkflow({
118
121
  );
119
122
  await mkdir(scratchDir, { recursive: true });
120
123
 
121
- // 4. Short LLM call: architectural orientation for downstream
122
- // explorers. The prompt forbids the agent from answering the
123
- // research question — its only job here is to orient.
124
124
  await s.session.send({
125
125
  prompt: buildScoutPrompt({
126
126
  question: prompt,
@@ -146,88 +146,186 @@ export default defineWorkflow({
146
146
  };
147
147
  },
148
148
  ),
149
- ctx.stage(
150
- {
151
- name: "research-history",
152
- description: "Surface prior research from research/ directory",
153
- },
154
- {},
155
- {},
156
- async (s) => {
157
- // The generic history prompt drives a single default-agent session
158
- // through locate → analyze → synthesize inline, instead of Claude's
159
- // sub-agent dispatch.
160
- await s.session.send({
161
- prompt: buildHistoryPromptGeneric({ question: prompt, root }),
162
- });
163
- s.save(await s.session.getMessages());
164
- },
165
- ),
166
- ]);
167
-
168
- const {
169
- partitions,
170
- explorerCount,
171
- scratchDir,
172
- totalLoc,
173
- totalFiles,
174
- } = scout.result;
175
-
176
- // Pull both scout transcripts ONCE at the workflow level so every
177
- // explorer + the aggregator can embed them in their prompts (F5). Both
178
- // stages have completed here (we're past Promise.all), so these reads
179
- // are safe (F13).
180
- const scoutOverview = (await ctx.transcript(scout)).content;
181
- const historyOverview = (await ctx.transcript(history)).content;
182
-
183
- // ── Stage 2: parallel headless explorers ─────────────────────────────────
184
- // Each explorer runs headless (in-process, no tmux pane) via Promise.all.
185
- // They are invisible in the workflow graph but tracked by the background
186
- // task counter in the statusline. Because each session is fresh (F5),
187
- // every piece of context it needs — question, architectural orientation,
188
- // historical context, partition assignment, scratch path — is injected
189
- // into the first prompt via buildExplorerPromptGeneric.
190
- const explorerHandles = await Promise.all(
191
- partitions.map((partition, idx) => {
192
- const i = idx + 1;
193
- const scratchPath = path.join(scratchDir, `explorer-${i}.md`);
194
- return ctx.stage(
149
+ // research-history pipeline: sequential locator → analyzer, both headless.
150
+ (async (): Promise<string> => {
151
+ const historyLocator = await ctx.stage(
195
152
  {
196
- name: `explorer-${i}`,
153
+ name: "history-locator",
197
154
  headless: true,
198
- description: `Explore ${partition
199
- .map((u) => u.path)
200
- .join(", ")} (${partition.reduce((s, u) => s + u.fileCount, 0)} files)`,
155
+ description: "Locate prior research docs (codebase-research-locator)",
201
156
  },
202
157
  {},
158
+ { agent: "codebase-research-locator" },
159
+ async (s) => {
160
+ await s.session.send({
161
+ prompt: buildHistoryLocatorPrompt({ question: prompt, root }),
162
+ });
163
+ const messages = await s.session.getMessages();
164
+ s.save(messages);
165
+ return getAssistantText(messages);
166
+ },
167
+ );
168
+
169
+ const historyAnalyzer = await ctx.stage(
170
+ {
171
+ name: "history-analyzer",
172
+ headless: true,
173
+ description: "Synthesize prior research (codebase-research-analyzer)",
174
+ },
203
175
  {},
176
+ { agent: "codebase-research-analyzer" },
204
177
  async (s) => {
205
178
  await s.session.send({
206
- prompt: buildExplorerPromptGeneric({
179
+ prompt: buildHistoryAnalyzerPrompt({
207
180
  question: prompt,
208
- index: i,
209
- total: explorerCount,
210
- partition,
211
- scoutOverview,
212
- historyOverview,
213
- scratchPath,
181
+ locatorOutput: historyLocator.result,
214
182
  root,
215
183
  }),
216
184
  });
217
- s.save(await s.session.getMessages());
218
-
219
- // Returning structured metadata lets the aggregator stage reach
220
- // each explorer's scratch path without re-parsing transcripts.
221
- return { index: i, scratchPath, partition };
185
+ const messages = await s.session.getMessages();
186
+ s.save(messages);
187
+ return getAssistantText(messages);
222
188
  },
223
189
  );
190
+
191
+ return historyAnalyzer.result;
192
+ })(),
193
+ ]);
194
+
195
+ const { partitions, explorerCount, scratchDir, totalLoc, totalFiles } =
196
+ scout.result;
197
+
198
+ const scoutOverview = (await ctx.transcript(scout)).content;
199
+
200
+ // ── Stage 2: per-partition specialist fan-out ─────────────────────────
201
+ const explorerHandles = await Promise.all(
202
+ partitions.map(async (partition, idx) => {
203
+ const i = idx + 1;
204
+ const scratchPath = path.join(scratchDir, `explorer-${i}.md`);
205
+
206
+ // Layer 1: locator + pattern-finder run independently.
207
+ const [locator, patternFinder] = await Promise.all([
208
+ ctx.stage(
209
+ {
210
+ name: `locator-${i}`,
211
+ headless: true,
212
+ description: `codebase-locator over partition ${i}`,
213
+ },
214
+ {},
215
+ { agent: "codebase-locator" },
216
+ async (s) => {
217
+ await s.session.send({
218
+ prompt: buildLocatorPrompt({
219
+ question: prompt,
220
+ partition,
221
+ root,
222
+ scoutOverview,
223
+ index: i,
224
+ total: explorerCount,
225
+ }),
226
+ });
227
+ const messages = await s.session.getMessages();
228
+ s.save(messages);
229
+ return getAssistantText(messages);
230
+ },
231
+ ),
232
+ ctx.stage(
233
+ {
234
+ name: `pattern-finder-${i}`,
235
+ headless: true,
236
+ description: `codebase-pattern-finder over partition ${i}`,
237
+ },
238
+ {},
239
+ { agent: "codebase-pattern-finder" },
240
+ async (s) => {
241
+ await s.session.send({
242
+ prompt: buildPatternFinderPrompt({
243
+ question: prompt,
244
+ partition,
245
+ root,
246
+ scoutOverview,
247
+ index: i,
248
+ total: explorerCount,
249
+ }),
250
+ });
251
+ const messages = await s.session.getMessages();
252
+ s.save(messages);
253
+ return getAssistantText(messages);
254
+ },
255
+ ),
256
+ ]);
257
+
258
+ const locatorOutput = locator.result;
259
+ const patternsOutput = patternFinder.result;
260
+
261
+ // Layer 2: analyzer + online-researcher consume locator output.
262
+ const [analyzer, onlineResearcher] = await Promise.all([
263
+ ctx.stage(
264
+ {
265
+ name: `analyzer-${i}`,
266
+ headless: true,
267
+ description: `codebase-analyzer over partition ${i}`,
268
+ },
269
+ {},
270
+ { agent: "codebase-analyzer" },
271
+ async (s) => {
272
+ await s.session.send({
273
+ prompt: buildAnalyzerPrompt({
274
+ question: prompt,
275
+ partition,
276
+ locatorOutput,
277
+ root,
278
+ scoutOverview,
279
+ index: i,
280
+ total: explorerCount,
281
+ }),
282
+ });
283
+ const messages = await s.session.getMessages();
284
+ s.save(messages);
285
+ return getAssistantText(messages);
286
+ },
287
+ ),
288
+ ctx.stage(
289
+ {
290
+ name: `online-researcher-${i}`,
291
+ headless: true,
292
+ description: `codebase-online-researcher over partition ${i}`,
293
+ },
294
+ {},
295
+ { agent: "codebase-online-researcher" },
296
+ async (s) => {
297
+ await s.session.send({
298
+ prompt: buildOnlineResearcherPrompt({
299
+ question: prompt,
300
+ partition,
301
+ locatorOutput,
302
+ root,
303
+ index: i,
304
+ total: explorerCount,
305
+ }),
306
+ });
307
+ const messages = await s.session.getMessages();
308
+ s.save(messages);
309
+ return getAssistantText(messages);
310
+ },
311
+ ),
312
+ ]);
313
+
314
+ await writeExplorerScratchFile(scratchPath, {
315
+ index: i,
316
+ total: explorerCount,
317
+ partition,
318
+ locatorOutput,
319
+ patternsOutput,
320
+ analyzerOutput: analyzer.result,
321
+ onlineOutput: onlineResearcher.result,
322
+ });
323
+
324
+ return { index: i, scratchPath, partition };
224
325
  }),
225
326
  );
226
327
 
227
- // ── Stage 3: aggregator ────────────────────────────────────────────────
228
- // Reads explorer findings via FILE PATHS (filesystem-context skill) to
229
- // keep the aggregator's own context lean — we deliberately do NOT inline
230
- // N transcripts into the prompt. Token cost stays roughly constant in N.
328
+ // ── Stage 3: aggregator ───────────────────────────────────────────────
231
329
  const finalPath = path.join(
232
330
  root,
233
331
  "research",
@@ -238,7 +336,8 @@ export default defineWorkflow({
238
336
  await ctx.stage(
239
337
  {
240
338
  name: "aggregator",
241
- description: "Synthesize explorer findings + history into final research doc",
339
+ description:
340
+ "Synthesize partition findings + history into final research doc",
242
341
  },
243
342
  {},
244
343
  {},
@@ -249,7 +348,7 @@ export default defineWorkflow({
249
348
  totalLoc,
250
349
  totalFiles,
251
350
  explorerCount,
252
- explorerFiles: explorerHandles.map((h) => h.result),
351
+ explorerFiles: explorerHandles,
253
352
  finalPath,
254
353
  scoutOverview,
255
354
  historyOverview,
@@ -1,11 +1,11 @@
1
1
  /** Target LOC per explorer sub-agent. */
2
- const LOC_PER_EXPLORER = 2_500;
2
+ const LOC_PER_EXPLORER = 5_000;
3
3
 
4
4
  /**
5
5
  * Determine how many parallel explorer sub-agents to spawn for the
6
6
  * deep-research-codebase workflow, based on lines of code in the codebase.
7
7
  *
8
- * Scales linearly: one explorer per `LOC_PER_EXPLORER` (2.5K) lines of code,
8
+ * Scales linearly: one explorer per `LOC_PER_EXPLORER` (5K) lines of code,
9
9
  * with a floor of 2 for tiny or empty codebases. The actual number of
10
10
  * spawned explorers is still bounded by the number of partition units
11
11
  * the scout finds (see `partitionUnits` in ./scout.ts), so we never get