@gswangg/duncan-cc 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/query.ts ADDED
@@ -0,0 +1,626 @@
1
+ /**
2
+ * Duncan Query Dispatch
3
+ *
4
+ * Queries CC sessions using the Anthropic API with structured output
5
+ * via the duncan_response tool.
6
+ */
7
+
8
+ import Anthropic from "@anthropic-ai/sdk";
9
+ import { randomUUID } from "node:crypto";
10
+ import { readFileSync, existsSync } from "node:fs";
11
+ import { join } from "node:path";
12
+ import { homedir } from "node:os";
13
+ import { processSessionFile, processSessionWindows, type PipelineResult, type WindowPipelineResult } from "./pipeline.js";
14
+ import { resolveSessionFilesExcludingSelf, findCallingSession, listAllSessionFiles, type RoutingParams, type RoutingResult } from "./discovery.js";
15
+
16
+ // ============================================================================
17
+ // OAuth token resolution
18
+ // ============================================================================
19
+
20
+ /**
21
+ * Resolve Anthropic auth from:
22
+ * 1. Explicit apiKey/token parameter
23
+ * 2. CC's OAuth credentials (~/.claude/.credentials.json)
24
+ * 3. ANTHROPIC_API_KEY env var
25
+ */
26
+
27
+
28
+
29
+ interface ResolvedAuth {
30
+ apiKey?: string | null;
31
+ authToken?: string;
32
+ defaultHeaders?: Record<string, string>;
33
+ }
34
+
35
+ function resolveAuth(explicit?: string): ResolvedAuth {
36
+ if (explicit) {
37
+ if (explicit.includes("sk-ant-oat")) {
38
+ return oauthClientConfig(explicit);
39
+ }
40
+ return { apiKey: explicit };
41
+ }
42
+
43
+ // CC's OAuth — primary auth for CC users
44
+ const ccCredsPath = join(homedir(), ".claude", ".credentials.json");
45
+ if (existsSync(ccCredsPath)) {
46
+ try {
47
+ const creds = JSON.parse(readFileSync(ccCredsPath, "utf-8"));
48
+ if (creds.claudeAiOauth?.accessToken) {
49
+ return oauthClientConfig(creds.claudeAiOauth.accessToken);
50
+ }
51
+ } catch {}
52
+ }
53
+
54
+ // Fallback: API key from environment
55
+ if (process.env.ANTHROPIC_API_KEY) return { apiKey: process.env.ANTHROPIC_API_KEY };
56
+
57
+ throw new Error("No Anthropic auth found. Authenticate via Claude Code or set ANTHROPIC_API_KEY.");
58
+ }
59
+
60
+ function oauthClientConfig(token: string): ResolvedAuth {
61
+ return {
62
+ apiKey: null,
63
+ authToken: token,
64
+ defaultHeaders: {
65
+ "accept": "application/json",
66
+ "anthropic-dangerous-direct-browser-access": "true",
67
+ "anthropic-beta": "claude-code-20250219,oauth-2025-04-20,fine-grained-tool-streaming-2025-05-14",
68
+ "user-agent": "duncan-cc/0.1.0",
69
+ "x-app": "cli",
70
+ },
71
+ };
72
+ }
73
+
74
+ // ============================================================================
75
+ // Duncan Response Tool
76
+ // ============================================================================
77
+
78
+ const DUNCAN_RESPONSE_TOOL: Anthropic.Tool = {
79
+ name: "duncan_response",
80
+ description: "Provide your answer to the query.",
81
+ input_schema: {
82
+ type: "object" as const,
83
+ properties: {
84
+ hasContext: {
85
+ type: "boolean",
86
+ description: "true if the conversation contained specific information to answer the question, false if it did not",
87
+ },
88
+ answer: {
89
+ type: "string",
90
+ description: "Your answer based on the conversation context, or a brief explanation of why you lack context",
91
+ },
92
+ },
93
+ required: ["hasContext", "answer"],
94
+ },
95
+ };
96
+
97
+ const DUNCAN_PREFIX = `Answer solely based on the conversation above. If you don't explicitly have context from the conversation on this topic, say so. Use the duncan_response tool to provide your answer.
98
+
99
+ `;
100
+
101
+ // ============================================================================
102
+ // Types
103
+ // ============================================================================
104
+
105
+ export interface DuncanResult {
106
+ hasContext: boolean;
107
+ answer: string;
108
+ }
109
+
110
+ export interface DuncanQueryResult {
111
+ queryId: string;
112
+ sessionFile: string;
113
+ sessionId: string;
114
+ windowIndex: number;
115
+ model: string;
116
+ result: DuncanResult;
117
+ }
118
+
119
+ export interface DuncanBatchResult {
120
+ queryId: string;
121
+ question: string;
122
+ results: DuncanQueryResult[];
123
+ totalWindows: number;
124
+ hasMore: boolean;
125
+ offset: number;
126
+ }
127
+
128
+ // ============================================================================
129
+ // Single Session Query
130
+ // ============================================================================
131
+
132
+ const MAX_RETRIES = 3;
133
+
134
+ /**
135
+ * Query a single session window with a question.
136
+ */
137
+ export async function querySingleWindow(
138
+ pipeline: PipelineResult | WindowPipelineResult,
139
+ question: string,
140
+ opts: {
141
+ apiKey?: string;
142
+ model?: string;
143
+ signal?: AbortSignal;
144
+ } = {},
145
+ ): Promise<DuncanResult> {
146
+ const auth = resolveAuth(opts.apiKey);
147
+ const isOAuth = !!auth.authToken;
148
+ const client = new Anthropic({
149
+ ...auth,
150
+ dangerouslyAllowBrowser: true,
151
+ } as any);
152
+ const model = opts.model ?? pipeline.modelInfo?.modelId ?? "claude-sonnet-4-20250514";
153
+
154
+ // Build messages: session context + question
155
+ const messages: Anthropic.MessageParam[] = [
156
+ ...pipeline.messages.map((m) => ({
157
+ role: m.role as "user" | "assistant",
158
+ content: m.content,
159
+ })),
160
+ {
161
+ role: "user" as const,
162
+ content: DUNCAN_PREFIX + question,
163
+ },
164
+ ];
165
+
166
+ // Ensure messages alternate correctly (the question might create user→user)
167
+ const fixedMessages = ensureAlternation(messages);
168
+
169
+ // Add cache_control breakpoints for prompt caching.
170
+ // Strategy: cache the session context (stable across queries), let the
171
+ // duncan query question (last user message) vary without invalidating cache.
172
+ // Place breakpoint on the last content block of the penultimate message.
173
+ addCacheBreakpoints(fixedMessages);
174
+
175
+ // Build system prompt — OAuth requires Claude Code identity prefix
176
+ // Each section gets cache_control for system prompt caching.
177
+ const systemBlocks: Anthropic.TextBlockParam[] = [];
178
+ if (isOAuth) {
179
+ systemBlocks.push({
180
+ type: "text",
181
+ text: "You are Claude Code, Anthropic's official CLI for Claude.",
182
+ cache_control: { type: "ephemeral" },
183
+ } as any);
184
+ }
185
+ if (pipeline.systemPrompt) {
186
+ systemBlocks.push({
187
+ type: "text",
188
+ text: pipeline.systemPrompt,
189
+ cache_control: { type: "ephemeral" },
190
+ } as any);
191
+ }
192
+
193
+ for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
194
+ const response = await client.messages.create({
195
+ model,
196
+ system: systemBlocks.length > 0 ? systemBlocks : undefined,
197
+ messages: fixedMessages,
198
+ tools: [DUNCAN_RESPONSE_TOOL],
199
+ max_tokens: 16384,
200
+ });
201
+
202
+ // Look for duncan_response tool call
203
+ const toolCall = response.content.find(
204
+ (c): c is Anthropic.ToolUseBlock => c.type === "tool_use" && c.name === "duncan_response",
205
+ );
206
+
207
+ if (toolCall) {
208
+ const input = toolCall.input as { hasContext: boolean; answer: string };
209
+ if (typeof input.hasContext === "boolean" && typeof input.answer === "string") {
210
+ return { hasContext: input.hasContext, answer: input.answer };
211
+ }
212
+ }
213
+
214
+ // Retry: ask the model to use the tool
215
+ if (attempt < MAX_RETRIES) {
216
+ fixedMessages.push(
217
+ { role: "assistant", content: response.content },
218
+ {
219
+ role: "user",
220
+ content: "You must respond by calling the duncan_response tool with { hasContext: boolean, answer: string }. Do not respond with plain text.",
221
+ },
222
+ );
223
+ }
224
+ }
225
+
226
+ throw new Error(`Duncan query failed after ${MAX_RETRIES} retries: model did not produce a valid duncan_response tool call`);
227
+ }
228
+
229
+ // ============================================================================
230
+ // Batch Query
231
+ // ============================================================================
232
+
233
+ /**
234
+ * Query multiple sessions with a question.
235
+ */
236
+ export async function queryBatch(
237
+ question: string,
238
+ routing: RoutingParams & { toolUseId?: string },
239
+ opts: {
240
+ apiKey?: string;
241
+ model?: string;
242
+ signal?: AbortSignal;
243
+ batchSize?: number;
244
+ onProgress?: (completed: number, total: number) => void;
245
+ } = {},
246
+ ): Promise<DuncanBatchResult> {
247
+ const queryId = randomUUID();
248
+ const resolved = resolveSessionFilesExcludingSelf(routing);
249
+
250
+ if (resolved.sessions.length === 0) {
251
+ return {
252
+ queryId,
253
+ question,
254
+ results: [],
255
+ totalWindows: 0,
256
+ hasMore: false,
257
+ offset: routing.offset ?? 0,
258
+ };
259
+ }
260
+
261
+ // Process each session into windows
262
+ const targets: Array<{
263
+ sessionFile: string;
264
+ sessionId: string;
265
+ pipeline: WindowPipelineResult;
266
+ }> = [];
267
+
268
+ for (const session of resolved.sessions) {
269
+ try {
270
+ const windows = processSessionWindows(session.path);
271
+ for (const w of windows) {
272
+ if (w.messages.length === 0) continue;
273
+ targets.push({
274
+ sessionFile: session.path,
275
+ sessionId: session.sessionId,
276
+ pipeline: w,
277
+ });
278
+ }
279
+ } catch {
280
+ // Skip unprocessable sessions
281
+ }
282
+ }
283
+
284
+ const batchSize = opts.batchSize ?? 5;
285
+ const results: DuncanQueryResult[] = [];
286
+ let completed = 0;
287
+
288
+ for (let i = 0; i < targets.length; i += batchSize) {
289
+ if (opts.signal?.aborted) break;
290
+
291
+ const batch = targets.slice(i, i + batchSize);
292
+ const batchResults = await Promise.all(
293
+ batch.map(async (target) => {
294
+ try {
295
+ const result = await querySingleWindow(target.pipeline, question, {
296
+ apiKey: opts.apiKey,
297
+ model: opts.model ?? target.pipeline.modelInfo?.modelId,
298
+ signal: opts.signal,
299
+ });
300
+ completed++;
301
+ opts.onProgress?.(completed, targets.length);
302
+ return {
303
+ queryId,
304
+ sessionFile: target.sessionFile,
305
+ sessionId: target.sessionId,
306
+ windowIndex: target.pipeline.windowIndex,
307
+ model: target.pipeline.modelInfo?.modelId ?? "unknown",
308
+ result,
309
+ };
310
+ } catch (err: any) {
311
+ completed++;
312
+ opts.onProgress?.(completed, targets.length);
313
+ return {
314
+ queryId,
315
+ sessionFile: target.sessionFile,
316
+ sessionId: target.sessionId,
317
+ windowIndex: target.pipeline.windowIndex,
318
+ model: target.pipeline.modelInfo?.modelId ?? "unknown",
319
+ result: {
320
+ hasContext: false,
321
+ answer: `Error: ${err.message}`,
322
+ },
323
+ };
324
+ }
325
+ }),
326
+ );
327
+
328
+ results.push(...batchResults);
329
+ }
330
+
331
+ return {
332
+ queryId,
333
+ question,
334
+ results,
335
+ totalWindows: targets.length,
336
+ hasMore: resolved.hasMore,
337
+ offset: routing.offset ?? 0,
338
+ };
339
+ }
340
+
341
+ // ============================================================================
342
+ // Self Query — multiple samples from the active window
343
+ // ============================================================================
344
+
345
+ /**
346
+ * Query the calling session's own active window N times for sampling diversity.
347
+ *
348
+ * Uses a two-wave strategy to leverage prompt caching:
349
+ * 1. Wave 1: Send 1 query to prime the cache (pays full input cost)
350
+ * 2. Wave 2: Send remaining N-1 queries in batches (hit cached prefix)
351
+ *
352
+ * The active session is identified by toolUseId (from MCP _meta).
353
+ */
354
+ export async function querySelf(
355
+ question: string,
356
+ opts: {
357
+ toolUseId: string;
358
+ copies?: number;
359
+ batchSize?: number;
360
+ apiKey?: string;
361
+ model?: string;
362
+ signal?: AbortSignal;
363
+ onProgress?: (completed: number, total: number) => void;
364
+ },
365
+ ): Promise<DuncanBatchResult> {
366
+ const queryId = randomUUID();
367
+ const copies = opts.copies ?? 3;
368
+
369
+ // Find the calling session by toolUseId
370
+ const allSessions = listAllSessionFiles();
371
+ const callingSessionId = findCallingSession(opts.toolUseId, allSessions);
372
+ if (!callingSessionId) {
373
+ return {
374
+ queryId, question, results: [], totalWindows: 0, hasMore: false, offset: 0,
375
+ };
376
+ }
377
+
378
+ const session = allSessions.find(s => s.sessionId === callingSessionId);
379
+ if (!session) {
380
+ return {
381
+ queryId, question, results: [], totalWindows: 0, hasMore: false, offset: 0,
382
+ };
383
+ }
384
+
385
+ // Process the session and get the LAST (active) window
386
+ const windows = processSessionWindows(session.path);
387
+ if (windows.length === 0) {
388
+ return {
389
+ queryId, question, results: [], totalWindows: 0, hasMore: false, offset: 0,
390
+ };
391
+ }
392
+ const activeWindow = windows[windows.length - 1];
393
+ if (activeWindow.messages.length === 0) {
394
+ return {
395
+ queryId, question, results: [], totalWindows: 0, hasMore: false, offset: 0,
396
+ };
397
+ }
398
+
399
+ const total = copies;
400
+ let completed = 0;
401
+ const results: DuncanQueryResult[] = [];
402
+
403
+ const queryOnce = async (): Promise<DuncanQueryResult> => {
404
+ try {
405
+ const result = await querySingleWindow(activeWindow, question, {
406
+ apiKey: opts.apiKey,
407
+ model: opts.model ?? activeWindow.modelInfo?.modelId,
408
+ signal: opts.signal,
409
+ });
410
+ completed++;
411
+ opts.onProgress?.(completed, total);
412
+ return {
413
+ queryId,
414
+ sessionFile: session.path,
415
+ sessionId: session.sessionId,
416
+ windowIndex: activeWindow.windowIndex,
417
+ model: activeWindow.modelInfo?.modelId ?? "unknown",
418
+ result,
419
+ };
420
+ } catch (err: any) {
421
+ completed++;
422
+ opts.onProgress?.(completed, total);
423
+ return {
424
+ queryId,
425
+ sessionFile: session.path,
426
+ sessionId: session.sessionId,
427
+ windowIndex: activeWindow.windowIndex,
428
+ model: activeWindow.modelInfo?.modelId ?? "unknown",
429
+ result: { hasContext: false, answer: `Error: ${err.message}` },
430
+ };
431
+ }
432
+ };
433
+
434
+ // Wave 1: prime the cache with a single query
435
+ results.push(await queryOnce());
436
+ if (opts.signal?.aborted || copies <= 1) {
437
+ return { queryId, question, results, totalWindows: total, hasMore: false, offset: 0 };
438
+ }
439
+
440
+ // Wave 2: remaining copies in batches, hitting cached prefix
441
+ const remaining = copies - 1;
442
+ const batchSize = opts.batchSize ?? 5;
443
+ for (let i = 0; i < remaining; i += batchSize) {
444
+ if (opts.signal?.aborted) break;
445
+ const batchCount = Math.min(batchSize, remaining - i);
446
+ const batchResults = await Promise.all(
447
+ Array.from({ length: batchCount }, () => queryOnce()),
448
+ );
449
+ results.push(...batchResults);
450
+ }
451
+
452
+ return { queryId, question, results, totalWindows: total, hasMore: false, offset: 0 };
453
+ }
454
+
455
+ // ============================================================================
456
+ // Ancestors Query — prior compaction windows of the active session
457
+ // ============================================================================
458
+
459
+ /**
460
+ * Query the calling session's prior compaction windows (excluding active).
461
+ *
462
+ * In CC (no dfork), "ancestors" means the compacted windows of the current
463
+ * session — the context that was summarized away. Returns nothing if the
464
+ * session has no compaction boundaries.
465
+ */
466
+ export async function queryAncestors(
467
+ question: string,
468
+ opts: {
469
+ toolUseId: string;
470
+ limit?: number;
471
+ offset?: number;
472
+ batchSize?: number;
473
+ apiKey?: string;
474
+ model?: string;
475
+ signal?: AbortSignal;
476
+ onProgress?: (completed: number, total: number) => void;
477
+ },
478
+ ): Promise<DuncanBatchResult> {
479
+ const queryId = randomUUID();
480
+ const limit = opts.limit ?? 50;
481
+ const offset = opts.offset ?? 0;
482
+
483
+ // Find the calling session
484
+ const allSessions = listAllSessionFiles();
485
+ const callingSessionId = findCallingSession(opts.toolUseId, allSessions);
486
+ if (!callingSessionId) {
487
+ return { queryId, question, results: [], totalWindows: 0, hasMore: false, offset };
488
+ }
489
+
490
+ const session = allSessions.find(s => s.sessionId === callingSessionId);
491
+ if (!session) {
492
+ return { queryId, question, results: [], totalWindows: 0, hasMore: false, offset };
493
+ }
494
+
495
+ // Get all windows, drop the last (active) one
496
+ const allWindows = processSessionWindows(session.path);
497
+ const ancestorWindows = allWindows.slice(0, -1).filter(w => w.messages.length > 0);
498
+
499
+ if (ancestorWindows.length === 0) {
500
+ return { queryId, question, results: [], totalWindows: 0, hasMore: false, offset };
501
+ }
502
+
503
+ const totalWindows = ancestorWindows.length;
504
+ const page = ancestorWindows.slice(offset, offset + limit);
505
+
506
+ const batchSize = opts.batchSize ?? 5;
507
+ const results: DuncanQueryResult[] = [];
508
+ let completed = 0;
509
+
510
+ for (let i = 0; i < page.length; i += batchSize) {
511
+ if (opts.signal?.aborted) break;
512
+
513
+ const batch = page.slice(i, i + batchSize);
514
+ const batchResults = await Promise.all(
515
+ batch.map(async (window) => {
516
+ try {
517
+ const result = await querySingleWindow(window, question, {
518
+ apiKey: opts.apiKey,
519
+ model: opts.model ?? window.modelInfo?.modelId,
520
+ signal: opts.signal,
521
+ });
522
+ completed++;
523
+ opts.onProgress?.(completed, page.length);
524
+ return {
525
+ queryId,
526
+ sessionFile: session.path,
527
+ sessionId: session.sessionId,
528
+ windowIndex: window.windowIndex,
529
+ model: window.modelInfo?.modelId ?? "unknown",
530
+ result,
531
+ };
532
+ } catch (err: any) {
533
+ completed++;
534
+ opts.onProgress?.(completed, page.length);
535
+ return {
536
+ queryId,
537
+ sessionFile: session.path,
538
+ sessionId: session.sessionId,
539
+ windowIndex: window.windowIndex,
540
+ model: window.modelInfo?.modelId ?? "unknown",
541
+ result: { hasContext: false, answer: `Error: ${err.message}` },
542
+ };
543
+ }
544
+ }),
545
+ );
546
+ results.push(...batchResults);
547
+ }
548
+
549
+ return {
550
+ queryId,
551
+ question,
552
+ results,
553
+ totalWindows,
554
+ hasMore: offset + limit < totalWindows,
555
+ offset,
556
+ };
557
+ }
558
+
559
+ // ============================================================================
560
+ // Helpers
561
+ // ============================================================================
562
+
563
+ /**
564
+ * Add cache_control breakpoints to messages for prompt caching.
565
+ *
566
+ * Places an ephemeral cache breakpoint on the last content block of the
567
+ * penultimate message. This caches all session context while allowing
568
+ * the duncan query (last message) to vary without invalidating the cache.
569
+ *
570
+ * Matches CC's caching strategy (CC API format functions) where the last content block
571
+ * of each message gets cache_control when caching is enabled.
572
+ */
573
+ function addCacheBreakpoints(messages: Anthropic.MessageParam[]): void {
574
+ if (messages.length < 2) return;
575
+
576
+ // Find the penultimate message (last session context message before the duncan query)
577
+ const penultimate = messages[messages.length - 2];
578
+ if (!penultimate) return;
579
+
580
+ const content = penultimate.content;
581
+ if (typeof content === "string") {
582
+ // Convert to block format to add cache_control
583
+ penultimate.content = [
584
+ {
585
+ type: "text" as const,
586
+ text: content,
587
+ cache_control: { type: "ephemeral" as const },
588
+ } as any,
589
+ ];
590
+ } else if (Array.isArray(content) && content.length > 0) {
591
+ // Add cache_control to the last block
592
+ const lastBlock = content[content.length - 1] as any;
593
+ content[content.length - 1] = {
594
+ ...lastBlock,
595
+ cache_control: { type: "ephemeral" as const },
596
+ };
597
+ }
598
+ }
599
+
600
+ /** Ensure messages alternate user/assistant */
601
+ function ensureAlternation(messages: Anthropic.MessageParam[]): Anthropic.MessageParam[] {
602
+ if (messages.length === 0) return messages;
603
+
604
+ const result: Anthropic.MessageParam[] = [messages[0]];
605
+ for (let i = 1; i < messages.length; i++) {
606
+ const prev = result[result.length - 1];
607
+ if (messages[i].role === prev.role) {
608
+ // Merge same-role messages
609
+ const prevContent = Array.isArray(prev.content) ? prev.content : [{ type: "text" as const, text: prev.content }];
610
+ const curContent = Array.isArray(messages[i].content) ? messages[i].content : [{ type: "text" as const, text: messages[i].content as string }];
611
+ result[result.length - 1] = {
612
+ role: prev.role,
613
+ content: [...prevContent, ...curContent] as any,
614
+ };
615
+ } else {
616
+ result.push(messages[i]);
617
+ }
618
+ }
619
+
620
+ // Ensure first message is user
621
+ if (result[0].role !== "user") {
622
+ result.unshift({ role: "user", content: "[Session context follows]" });
623
+ }
624
+
625
+ return result;
626
+ }