aiwcli 0.12.6 → 0.12.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. package/bin/dev.cmd +3 -3
  2. package/bin/dev.js +16 -16
  3. package/bin/run.cmd +3 -3
  4. package/bin/run.js +21 -21
  5. package/dist/commands/branch.js +7 -2
  6. package/dist/lib/bmad-installer.js +37 -37
  7. package/dist/lib/terminal.d.ts +2 -0
  8. package/dist/lib/terminal.js +57 -7
  9. package/dist/templates/CLAUDE.md +205 -205
  10. package/dist/templates/_shared/.claude/commands/handoff-resume.md +12 -12
  11. package/dist/templates/_shared/.claude/commands/handoff.md +12 -12
  12. package/dist/templates/_shared/.claude/settings.json +65 -65
  13. package/dist/templates/_shared/.codex/workflows/handoff.md +226 -226
  14. package/dist/templates/_shared/.windsurf/workflows/handoff.md +226 -226
  15. package/dist/templates/_shared/handoff-system/CLAUDE.md +421 -421
  16. package/dist/templates/_shared/handoff-system/lib/document-generator.ts +215 -215
  17. package/dist/templates/_shared/handoff-system/lib/handoff-reader.ts +158 -158
  18. package/dist/templates/_shared/handoff-system/scripts/resume_handoff.ts +373 -373
  19. package/dist/templates/_shared/handoff-system/scripts/save_handoff.ts +469 -469
  20. package/dist/templates/_shared/handoff-system/workflows/handoff-resume.md +66 -66
  21. package/dist/templates/_shared/handoff-system/workflows/handoff.md +254 -254
  22. package/dist/templates/_shared/hooks-ts/_utils/git-state.ts +2 -2
  23. package/dist/templates/_shared/hooks-ts/archive_plan.ts +159 -159
  24. package/dist/templates/_shared/hooks-ts/context_monitor.ts +147 -147
  25. package/dist/templates/_shared/hooks-ts/file-suggestion.ts +128 -128
  26. package/dist/templates/_shared/hooks-ts/pre_compact.ts +49 -49
  27. package/dist/templates/_shared/hooks-ts/session_end.ts +196 -196
  28. package/dist/templates/_shared/hooks-ts/session_start.ts +163 -163
  29. package/dist/templates/_shared/hooks-ts/task_create_capture.ts +48 -48
  30. package/dist/templates/_shared/hooks-ts/task_update_capture.ts +74 -74
  31. package/dist/templates/_shared/hooks-ts/user_prompt_submit.ts +93 -93
  32. package/dist/templates/_shared/lib-ts/CLAUDE.md +367 -367
  33. package/dist/templates/_shared/lib-ts/base/atomic-write.ts +138 -138
  34. package/dist/templates/_shared/lib-ts/base/constants.ts +303 -303
  35. package/dist/templates/_shared/lib-ts/base/git-state.ts +58 -58
  36. package/dist/templates/_shared/lib-ts/base/hook-utils.ts +582 -582
  37. package/dist/templates/_shared/lib-ts/base/inference.ts +301 -301
  38. package/dist/templates/_shared/lib-ts/base/logger.ts +247 -247
  39. package/dist/templates/_shared/lib-ts/base/state-io.ts +202 -202
  40. package/dist/templates/_shared/lib-ts/base/stop-words.ts +184 -184
  41. package/dist/templates/_shared/lib-ts/base/utils.ts +184 -184
  42. package/dist/templates/_shared/lib-ts/context/context-formatter.ts +566 -566
  43. package/dist/templates/_shared/lib-ts/context/context-selector.ts +524 -524
  44. package/dist/templates/_shared/lib-ts/context/context-store.ts +712 -712
  45. package/dist/templates/_shared/lib-ts/context/plan-manager.ts +312 -312
  46. package/dist/templates/_shared/lib-ts/context/task-tracker.ts +185 -185
  47. package/dist/templates/_shared/lib-ts/package.json +20 -20
  48. package/dist/templates/_shared/lib-ts/templates/formatters.ts +102 -102
  49. package/dist/templates/_shared/lib-ts/templates/plan-context.ts +58 -58
  50. package/dist/templates/_shared/lib-ts/tsconfig.json +13 -13
  51. package/dist/templates/_shared/lib-ts/types.ts +186 -186
  52. package/dist/templates/_shared/scripts/resolve_context.ts +33 -33
  53. package/dist/templates/_shared/scripts/status_line.ts +690 -690
  54. package/dist/templates/cc-native/.claude/commands/cc-native/rlm/ask.md +136 -136
  55. package/dist/templates/cc-native/.claude/commands/cc-native/rlm/index.md +21 -21
  56. package/dist/templates/cc-native/.claude/commands/cc-native/rlm/overview.md +56 -56
  57. package/dist/templates/cc-native/.claude/commands/cc-native/specdev.md +10 -10
  58. package/dist/templates/cc-native/.windsurf/workflows/cc-native/fix.md +8 -8
  59. package/dist/templates/cc-native/.windsurf/workflows/cc-native/implement.md +8 -8
  60. package/dist/templates/cc-native/.windsurf/workflows/cc-native/research.md +8 -8
  61. package/dist/templates/cc-native/CC-NATIVE-README.md +189 -189
  62. package/dist/templates/cc-native/TEMPLATE-SCHEMA.md +304 -304
  63. package/dist/templates/cc-native/_cc-native/agents/CLAUDE.md +143 -143
  64. package/dist/templates/cc-native/_cc-native/agents/PLAN-ORCHESTRATOR.md +213 -213
  65. package/dist/templates/cc-native/_cc-native/agents/plan-questions/PLAN-QUESTIONER.md +70 -70
  66. package/dist/templates/cc-native/_cc-native/cc-native.config.json +96 -96
  67. package/dist/templates/cc-native/_cc-native/hooks/CLAUDE.md +247 -247
  68. package/dist/templates/cc-native/_cc-native/hooks/cc-native-plan-review.ts +76 -76
  69. package/dist/templates/cc-native/_cc-native/hooks/enhance_plan_post_subagent.ts +54 -54
  70. package/dist/templates/cc-native/_cc-native/hooks/enhance_plan_post_write.ts +51 -51
  71. package/dist/templates/cc-native/_cc-native/hooks/mark_questions_asked.ts +53 -53
  72. package/dist/templates/cc-native/_cc-native/hooks/plan_questions_early.ts +61 -61
  73. package/dist/templates/cc-native/_cc-native/lib-ts/agent-selection.ts +163 -163
  74. package/dist/templates/cc-native/_cc-native/lib-ts/aggregate-agents.ts +156 -156
  75. package/dist/templates/cc-native/_cc-native/lib-ts/artifacts/format.ts +597 -597
  76. package/dist/templates/cc-native/_cc-native/lib-ts/artifacts/index.ts +26 -26
  77. package/dist/templates/cc-native/_cc-native/lib-ts/artifacts/tracker.ts +107 -107
  78. package/dist/templates/cc-native/_cc-native/lib-ts/artifacts/write.ts +119 -119
  79. package/dist/templates/cc-native/_cc-native/lib-ts/artifacts.ts +21 -21
  80. package/dist/templates/cc-native/_cc-native/lib-ts/cc-native-state.ts +319 -319
  81. package/dist/templates/cc-native/_cc-native/lib-ts/cli-output-parser.ts +144 -144
  82. package/dist/templates/cc-native/_cc-native/lib-ts/config.ts +57 -57
  83. package/dist/templates/cc-native/_cc-native/lib-ts/constants.ts +83 -83
  84. package/dist/templates/cc-native/_cc-native/lib-ts/corroboration.ts +119 -119
  85. package/dist/templates/cc-native/_cc-native/lib-ts/debug.ts +79 -79
  86. package/dist/templates/cc-native/_cc-native/lib-ts/graduation.ts +132 -132
  87. package/dist/templates/cc-native/_cc-native/lib-ts/index.ts +116 -116
  88. package/dist/templates/cc-native/_cc-native/lib-ts/json-parser.ts +168 -168
  89. package/dist/templates/cc-native/_cc-native/lib-ts/orchestrator.ts +70 -70
  90. package/dist/templates/cc-native/_cc-native/lib-ts/output-builder.ts +130 -130
  91. package/dist/templates/cc-native/_cc-native/lib-ts/plan-discovery.ts +80 -80
  92. package/dist/templates/cc-native/_cc-native/lib-ts/plan-enhancement.ts +41 -41
  93. package/dist/templates/cc-native/_cc-native/lib-ts/plan-questions.ts +101 -101
  94. package/dist/templates/cc-native/_cc-native/lib-ts/review-pipeline.ts +511 -511
  95. package/dist/templates/cc-native/_cc-native/lib-ts/reviewers/agent.ts +71 -71
  96. package/dist/templates/cc-native/_cc-native/lib-ts/reviewers/base/base-agent.ts +217 -217
  97. package/dist/templates/cc-native/_cc-native/lib-ts/reviewers/index.ts +12 -12
  98. package/dist/templates/cc-native/_cc-native/lib-ts/reviewers/providers/claude-agent.ts +66 -66
  99. package/dist/templates/cc-native/_cc-native/lib-ts/reviewers/providers/codex-agent.ts +184 -184
  100. package/dist/templates/cc-native/_cc-native/lib-ts/reviewers/providers/gemini-agent.ts +39 -39
  101. package/dist/templates/cc-native/_cc-native/lib-ts/reviewers/providers/orchestrator-claude-agent.ts +196 -196
  102. package/dist/templates/cc-native/_cc-native/lib-ts/reviewers/schemas.ts +201 -201
  103. package/dist/templates/cc-native/_cc-native/lib-ts/reviewers/types.ts +21 -21
  104. package/dist/templates/cc-native/_cc-native/lib-ts/rlm/CLAUDE.md +480 -480
  105. package/dist/templates/cc-native/_cc-native/lib-ts/rlm/embedding-indexer.ts +287 -287
  106. package/dist/templates/cc-native/_cc-native/lib-ts/rlm/hyde.ts +148 -148
  107. package/dist/templates/cc-native/_cc-native/lib-ts/rlm/index.ts +54 -54
  108. package/dist/templates/cc-native/_cc-native/lib-ts/rlm/logger.ts +58 -58
  109. package/dist/templates/cc-native/_cc-native/lib-ts/rlm/ollama-client.ts +208 -208
  110. package/dist/templates/cc-native/_cc-native/lib-ts/rlm/retrieval-pipeline.ts +460 -460
  111. package/dist/templates/cc-native/_cc-native/lib-ts/rlm/transcript-indexer.ts +446 -446
  112. package/dist/templates/cc-native/_cc-native/lib-ts/rlm/transcript-loader.ts +280 -280
  113. package/dist/templates/cc-native/_cc-native/lib-ts/rlm/transcript-searcher.ts +274 -274
  114. package/dist/templates/cc-native/_cc-native/lib-ts/rlm/types.ts +201 -201
  115. package/dist/templates/cc-native/_cc-native/lib-ts/rlm/vector-store.ts +278 -278
  116. package/dist/templates/cc-native/_cc-native/lib-ts/settings.ts +184 -184
  117. package/dist/templates/cc-native/_cc-native/lib-ts/state.ts +275 -275
  118. package/dist/templates/cc-native/_cc-native/lib-ts/tsconfig.json +18 -18
  119. package/dist/templates/cc-native/_cc-native/lib-ts/types.ts +329 -329
  120. package/dist/templates/cc-native/_cc-native/lib-ts/verdict.ts +72 -72
  121. package/dist/templates/cc-native/_cc-native/workflows/specdev.md +9 -9
  122. package/oclif.manifest.json +1 -1
  123. package/package.json +108 -108
  124. package/dist/templates/cc-native/_cc-native/lib-ts/nul +0 -3
@@ -1,460 +1,460 @@
1
- #!/usr/bin/env bun
2
- /**
3
- * Retrieval Pipeline — Semantic search across session transcripts.
4
- *
5
- * Orchestrates a 4-stage pipeline:
6
- * Stage 2: Embed query → KNN search → top chunks
7
- * Stage 3: Parallel haiku summarizers per session
8
- * Stage 4: Sonnet ranker → structured JSON per session
9
- * Stage 5: Sonnet synthesizer → final markdown answer
10
- *
11
- * Usage:
12
- * bun retrieval-pipeline.ts "query" [--top=20] [--project=name]
13
- */
14
-
15
- import { z } from "zod";
16
- import {
17
- VECTOR_TOP_K,
18
- MAX_PARALLEL_SUMMARIZERS,
19
- HYDE_ENABLED,
20
- HYDE_NUM_RESPONSES,
21
- HYDE_MAX_TOKENS,
22
- HYDE_TIMEOUT_MS,
23
- HYDE_FALLBACK_TO_QUERY,
24
- type VectorSearchResult,
25
- type ChunkSummary,
26
- type RankedSession,
27
- type RetrievalResult,
28
- } from "./types.js";
29
- import { logInfo, logWarn, logError, logDebug } from "./logger.js";
30
- import { checkOllamaHealth, embedOne } from "./ollama-client.js";
31
- import { openVectorDb, searchKnn } from "./vector-store.js";
32
- import { loadTranscript } from "./transcript-loader.js";
33
- import { hydeQueryEmbedding } from "./hyde.js";
34
-
35
- const HOOK_NAME = "rlm_retrieve";
36
-
37
- // Dynamic import for inference (crosses package boundary)
38
- let inferenceAsync: typeof import("../../../../_shared/lib-ts/base/inference.js").inferenceAsync;
39
-
40
- try {
41
- const mod = await import("../../../../_shared/lib-ts/base/inference.js");
42
- inferenceAsync = mod.inferenceAsync;
43
- } catch {
44
- // Fallback: warn and provide a stub that always fails
45
- logWarn(HOOK_NAME, "Could not import inferenceAsync, AI stages will fail");
46
- inferenceAsync = async () => ({
47
- success: false,
48
- output: "",
49
- error: "inferenceAsync not available",
50
- latency_ms: 0,
51
- });
52
- }
53
-
54
- // Zod schema for AI ranking response
55
- const RankingItemSchema = z.object({
56
- index: z.number(),
57
- relevant: z.boolean(),
58
- confidence: z.number(),
59
- topics: z.array(z.string()),
60
- key_findings: z.array(z.string()),
61
- });
62
- const RankingsSchema = z.array(RankingItemSchema);
63
-
64
- // ---------------------------------------------------------------------------
65
- // CLI entry
66
- // ---------------------------------------------------------------------------
67
-
68
- const args = process.argv.slice(2);
69
- const query = args.find((a) => !a.startsWith("--"));
70
- const topArg = args.find((a) => a.startsWith("--top="));
71
- const topK = topArg ? parseInt(topArg.split("=")[1], 10) : VECTOR_TOP_K;
72
- const projectArg = args.find((a) => a.startsWith("--project="));
73
- const projectFilter = projectArg ? projectArg.split("=")[1] : undefined;
74
-
75
- if (!query) {
76
- process.stderr.write(
77
- 'Usage: bun retrieval-pipeline.ts "query" [--top=20] [--project=name]\n',
78
- );
79
- process.exitCode = 1;
80
- } else {
81
- runPipeline(query, topK, projectFilter).catch((e) => {
82
- logError(HOOK_NAME, `Fatal: ${e}`, { stderr: true });
83
- process.exitCode = 1;
84
- });
85
- }
86
-
87
- // ---------------------------------------------------------------------------
88
- // Pipeline orchestrator
89
- // ---------------------------------------------------------------------------
90
-
91
- async function runPipeline(
92
- query: string,
93
- topK: number,
94
- project?: string,
95
- ): Promise<void> {
96
- const totalStart = Date.now();
97
- const timings = {
98
- embed_query_ms: 0,
99
- vector_search_ms: 0,
100
- summarize_ms: 0,
101
- rank_ms: 0,
102
- synthesize_ms: 0,
103
- total_ms: 0,
104
- };
105
-
106
- // Pre-flight: check Ollama
107
- const health = await checkOllamaHealth();
108
- if (!health.ok) {
109
- logError(HOOK_NAME, health.error ?? "Unknown Ollama health check error", { stderr: true });
110
- process.exitCode = 1;
111
- return;
112
- }
113
-
114
- // Stage 2: Embed query + KNN search
115
- let t = Date.now();
116
- let queryEmbedding: Float32Array;
117
- let hydeTiming = 0;
118
-
119
- if (HYDE_ENABLED) {
120
- try {
121
- const hydeStart = Date.now();
122
- queryEmbedding = await hydeQueryEmbedding(query, {
123
- numResponses: HYDE_NUM_RESPONSES,
124
- maxTokens: HYDE_MAX_TOKENS,
125
- timeout: HYDE_TIMEOUT_MS,
126
- fallbackToQuery: HYDE_FALLBACK_TO_QUERY,
127
- });
128
- hydeTiming = Date.now() - hydeStart;
129
- logInfo(HOOK_NAME, `HyDE query embedding completed in ${hydeTiming}ms`);
130
- } catch (e) {
131
- logWarn(HOOK_NAME, `HyDE failed: ${e}, falling back to direct query embedding`);
132
- queryEmbedding = await embedOne(query);
133
- }
134
- } else {
135
- queryEmbedding = await embedOne(query);
136
- }
137
-
138
- timings.embed_query_ms = Date.now() - t;
139
- if (hydeTiming > 0) {
140
- (timings as any).hyde_ms = hydeTiming;
141
- }
142
-
143
- t = Date.now();
144
- const db = openVectorDb();
145
- let results: VectorSearchResult[];
146
- try {
147
- results = searchKnn(db, queryEmbedding, topK, project);
148
- } finally {
149
- db.close();
150
- }
151
- timings.vector_search_ms = Date.now() - t;
152
-
153
- if (results.length === 0) {
154
- const empty: RetrievalResult = {
155
- query,
156
- synthesis:
157
- "No results found. Suggestions:\n" +
158
- "- Try a different query\n" +
159
- "- Run `/rlm:embed-index` to build/refresh the vector index\n" +
160
- "- Use `/rlm:search` for keyword-based fallback",
161
- sources: [],
162
- stage_timings: { ...timings, total_ms: Date.now() - totalStart },
163
- };
164
- process.stdout.write(JSON.stringify(empty, null, 2) + "\n");
165
- return;
166
- }
167
-
168
- // Deduplicate by session_id (keep best chunk per session)
169
- const sessionMap = new Map<
170
- string,
171
- { result: VectorSearchResult; chunks: VectorSearchResult[] }
172
- >();
173
- for (const r of results) {
174
- const key = `${r.session_id}:${r.project}`;
175
- const existing = sessionMap.get(key);
176
- if (!existing) {
177
- sessionMap.set(key, { result: r, chunks: [r] });
178
- } else {
179
- existing.chunks.push(r);
180
- if (r.distance < existing.result.distance) {
181
- existing.result = r;
182
- }
183
- }
184
- }
185
- const sessions = Array.from(sessionMap.values());
186
- logInfo(
187
- HOOK_NAME,
188
- `Stage 2: ${results.length} chunks → ${sessions.length} sessions`,
189
- );
190
-
191
- // Stage 3: Parallel haiku summarization
192
- t = Date.now();
193
- const summaries = await summarizeSessions(query, sessions);
194
- timings.summarize_ms = Date.now() - t;
195
-
196
- if (summaries.length === 0) {
197
- const noSummaries: RetrievalResult = {
198
- query,
199
- synthesis: "Found matching chunks but all summarization attempts failed.",
200
- sources: [],
201
- stage_timings: { ...timings, total_ms: Date.now() - totalStart },
202
- };
203
- process.stdout.write(JSON.stringify(noSummaries, null, 2) + "\n");
204
- return;
205
- }
206
-
207
- // Stage 4: Sonnet ranking
208
- t = Date.now();
209
- const ranked = await rankSessions(query, summaries);
210
- timings.rank_ms = Date.now() - t;
211
-
212
- // Stage 5: Sonnet synthesis
213
- t = Date.now();
214
- const relevant = ranked.filter((r) => r.relevant);
215
- let synthesis: string;
216
- if (relevant.length > 0) {
217
- synthesis = await synthesize(query, relevant, summaries);
218
- } else {
219
- synthesis =
220
- "No sessions were deemed relevant to your query.\n" +
221
- "Suggestions:\n" +
222
- "- Try a different or broader query\n" +
223
- "- Use `/rlm:search` for keyword-based fallback";
224
- }
225
- timings.synthesize_ms = Date.now() - t;
226
- timings.total_ms = Date.now() - totalStart;
227
-
228
- const output: RetrievalResult = {
229
- query,
230
- synthesis,
231
- sources: ranked,
232
- stage_timings: timings,
233
- };
234
-
235
- process.stdout.write(JSON.stringify(output, null, 2) + "\n");
236
- }
237
-
238
- // ---------------------------------------------------------------------------
239
- // Stage 3: Parallel haiku summarization
240
- // ---------------------------------------------------------------------------
241
-
242
- async function summarizeSessions(
243
- query: string,
244
- sessions: Array<{
245
- result: VectorSearchResult;
246
- chunks: VectorSearchResult[];
247
- }>,
248
- ): Promise<ChunkSummary[]> {
249
- const results: ChunkSummary[] = [];
250
-
251
- // Process in batches of MAX_PARALLEL_SUMMARIZERS
252
- for (let i = 0; i < sessions.length; i += MAX_PARALLEL_SUMMARIZERS) {
253
- const batch = sessions.slice(i, i + MAX_PARALLEL_SUMMARIZERS);
254
- const promises = batch.map(async (session) => {
255
- try {
256
- return await summarizeOneSession(query, session);
257
- } catch (e) {
258
- logWarn(
259
- HOOK_NAME,
260
- `Summarize failed for ${session.result.session_id}: ${e}`,
261
- );
262
- return null;
263
- }
264
- });
265
-
266
- const batchResults = await Promise.all(promises);
267
- for (const r of batchResults) {
268
- if (r) results.push(r);
269
- }
270
- }
271
-
272
- return results;
273
- }
274
-
275
- async function summarizeOneSession(
276
- query: string,
277
- session: { result: VectorSearchResult; chunks: VectorSearchResult[] },
278
- ): Promise<ChunkSummary | null> {
279
- const best = session.result;
280
-
281
- // Load transcript segment
282
- let content: string;
283
- try {
284
- const loaded = await loadTranscript(
285
- best.source_path,
286
- [best.line_start, best.line_end],
287
- 4000,
288
- );
289
- content = loaded.content;
290
- } catch {
291
- content = `[Could not load transcript. Topic: ${best.topic}]`;
292
- }
293
-
294
- if (!content || content.length < 20) return null;
295
-
296
- const systemPrompt =
297
- "You are a session transcript summarizer. Extract ONLY information relevant to the query. " +
298
- "Mention specific file names, function names, decisions made, and outcomes. " +
299
- "If nothing in the transcript is relevant to the query, respond with exactly: Not relevant. " +
300
- "Keep your summary under 200 words.";
301
-
302
- const userPrompt =
303
- `Query: ${query}\n\n` +
304
- `Session: ${best.session_id} (${best.project}, ${best.date})\n` +
305
- `Topic: ${best.topic}\n\n` +
306
- `Transcript:\n${content}`;
307
-
308
- const result = await inferenceAsync(systemPrompt, userPrompt, "fast", 30);
309
-
310
- if (!result.success || !result.output) {
311
- logWarn(HOOK_NAME, `Summarize inference failed: ${result.error}`);
312
- return null;
313
- }
314
-
315
- if (result.output.trim().toLowerCase() === "not relevant.") {
316
- return null;
317
- }
318
-
319
- return {
320
- session_id: best.session_id,
321
- project: best.project,
322
- date: best.date,
323
- segment_lines: [best.line_start, best.line_end],
324
- summary: result.output.trim(),
325
- source_path: best.source_path,
326
- };
327
- }
328
-
329
- // ---------------------------------------------------------------------------
330
- // Stage 4: Sonnet ranking
331
- // ---------------------------------------------------------------------------
332
-
333
- async function rankSessions(
334
- query: string,
335
- summaries: ChunkSummary[],
336
- ): Promise<RankedSession[]> {
337
- const summaryText = summaries
338
- .map(
339
- (s, i) =>
340
- `[${i + 1}] Session: ${s.session_id} | Project: ${s.project} | Date: ${s.date}\nSummary: ${s.summary}`,
341
- )
342
- .join("\n\n");
343
-
344
- const systemPrompt =
345
- "You are a session relevance ranker. Given a query and session summaries, " +
346
- "evaluate each session's relevance. Output a JSON array where each element has:\n" +
347
- ' { "index": number, "relevant": boolean, "confidence": number (0-1), "topics": string[], "key_findings": string[] }\n' +
348
- "Output ONLY the JSON array, no other text.";
349
-
350
- const userPrompt = `Query: ${query}\n\nSessions:\n${summaryText}`;
351
-
352
- const result = await inferenceAsync(systemPrompt, userPrompt, "standard", 60);
353
-
354
- if (!result.success || !result.output) {
355
- logWarn(HOOK_NAME, `Rank inference failed: ${result.error}, marking all as relevant`);
356
- return summaries.map((s) => ({
357
- session_id: s.session_id,
358
- project: s.project,
359
- date: s.date,
360
- relevant: true,
361
- confidence: 0.3,
362
- topics: [],
363
- key_findings: [s.summary.slice(0, 200)],
364
- }));
365
- }
366
-
367
- try {
368
- // Extract JSON array from response (may be wrapped in markdown code blocks)
369
- let jsonStr = result.output.trim();
370
- const codeBlockMatch = jsonStr.match(/```(?:json)?\s*([\s\S]*?)```/);
371
- if (codeBlockMatch) {
372
- jsonStr = codeBlockMatch[1].trim();
373
- }
374
-
375
- const rawJson = JSON.parse(jsonStr);
376
- const parseResult = RankingsSchema.safeParse(rawJson);
377
- if (!parseResult.success) {
378
- throw new Error(`Invalid ranking response format: ${parseResult.error.message}`);
379
- }
380
-
381
- const rankings = parseResult.data;
382
-
383
- return rankings.map((r) => {
384
- // Safe array indexing with bounds check
385
- if (r.index < 1 || r.index > summaries.length) {
386
- logWarn(HOOK_NAME, `Rank index ${r.index} out of bounds (1-${summaries.length})`);
387
- return null;
388
- }
389
- const summary = summaries[r.index - 1];
390
- return {
391
- session_id: summary.session_id,
392
- project: summary.project,
393
- date: summary.date,
394
- relevant: r.relevant,
395
- confidence: r.confidence,
396
- topics: r.topics,
397
- key_findings: r.key_findings,
398
- };
399
- }).filter((r): r is RankedSession => r !== null);
400
- } catch (e) {
401
- logWarn(HOOK_NAME, `Rank parse failed: ${e}, marking all as relevant`);
402
- return summaries.map((s) => ({
403
- session_id: s.session_id,
404
- project: s.project,
405
- date: s.date,
406
- relevant: true,
407
- confidence: 0.3,
408
- topics: [],
409
- key_findings: [s.summary.slice(0, 200)],
410
- }));
411
- }
412
- }
413
-
414
- // ---------------------------------------------------------------------------
415
- // Stage 5: Sonnet synthesis
416
- // ---------------------------------------------------------------------------
417
-
418
- async function synthesize(
419
- query: string,
420
- relevant: RankedSession[],
421
- summaries: ChunkSummary[],
422
- ): Promise<string> {
423
- // Build context from relevant sessions
424
- const summaryMap = new Map(summaries.map((s) => [s.session_id, s]));
425
-
426
- const context = relevant
427
- .map((r) => {
428
- const summary = summaryMap.get(r.session_id);
429
- return (
430
- `Session: ${r.session_id} | Project: ${r.project} | Date: ${r.date}\n` +
431
- `Topics: ${r.topics.join(", ")}\n` +
432
- `Key Findings: ${r.key_findings.join("; ")}\n` +
433
- `Full Summary: ${summary?.summary ?? "(no summary)"}`
434
- );
435
- })
436
- .join("\n\n---\n\n");
437
-
438
- const systemPrompt =
439
- "You are a knowledge synthesizer. Given a query and relevant session findings, " +
440
- "produce a coherent markdown answer. Include session citations inline as " +
441
- '"(session: {date}, {project})". Highlight the most recent and relevant information. ' +
442
- "Note any contradictions or evolution across sessions. Be concise but thorough.";
443
-
444
- const userPrompt = `Query: ${query}\n\nRelevant Sessions:\n${context}`;
445
-
446
- const result = await inferenceAsync(systemPrompt, userPrompt, "standard", 60);
447
-
448
- if (!result.success || !result.output) {
449
- logWarn(HOOK_NAME, `Synthesize inference failed: ${result.error}`);
450
- // Fallback: concatenate key findings
451
- return relevant
452
- .map(
453
- (r) =>
454
- `**${r.date} (${r.project}):** ${r.key_findings.join(". ")}`,
455
- )
456
- .join("\n\n");
457
- }
458
-
459
- return result.output.trim();
460
- }
1
+ #!/usr/bin/env bun
2
+ /**
3
+ * Retrieval Pipeline — Semantic search across session transcripts.
4
+ *
5
+ * Orchestrates a 4-stage pipeline:
6
+ * Stage 2: Embed query → KNN search → top chunks
7
+ * Stage 3: Parallel haiku summarizers per session
8
+ * Stage 4: Sonnet ranker → structured JSON per session
9
+ * Stage 5: Sonnet synthesizer → final markdown answer
10
+ *
11
+ * Usage:
12
+ * bun retrieval-pipeline.ts "query" [--top=20] [--project=name]
13
+ */
14
+
15
+ import { z } from "zod";
16
+ import {
17
+ VECTOR_TOP_K,
18
+ MAX_PARALLEL_SUMMARIZERS,
19
+ HYDE_ENABLED,
20
+ HYDE_NUM_RESPONSES,
21
+ HYDE_MAX_TOKENS,
22
+ HYDE_TIMEOUT_MS,
23
+ HYDE_FALLBACK_TO_QUERY,
24
+ type VectorSearchResult,
25
+ type ChunkSummary,
26
+ type RankedSession,
27
+ type RetrievalResult,
28
+ } from "./types.js";
29
+ import { logInfo, logWarn, logError, logDebug } from "./logger.js";
30
+ import { checkOllamaHealth, embedOne } from "./ollama-client.js";
31
+ import { openVectorDb, searchKnn } from "./vector-store.js";
32
+ import { loadTranscript } from "./transcript-loader.js";
33
+ import { hydeQueryEmbedding } from "./hyde.js";
34
+
35
+ const HOOK_NAME = "rlm_retrieve";
36
+
37
+ // Dynamic import for inference (crosses package boundary)
38
+ let inferenceAsync: typeof import("../../../../_shared/lib-ts/base/inference.js").inferenceAsync;
39
+
40
+ try {
41
+ const mod = await import("../../../../_shared/lib-ts/base/inference.js");
42
+ inferenceAsync = mod.inferenceAsync;
43
+ } catch {
44
+ // Fallback: warn and provide a stub that always fails
45
+ logWarn(HOOK_NAME, "Could not import inferenceAsync, AI stages will fail");
46
+ inferenceAsync = async () => ({
47
+ success: false,
48
+ output: "",
49
+ error: "inferenceAsync not available",
50
+ latency_ms: 0,
51
+ });
52
+ }
53
+
54
+ // Zod schema for AI ranking response
55
+ const RankingItemSchema = z.object({
56
+ index: z.number(),
57
+ relevant: z.boolean(),
58
+ confidence: z.number(),
59
+ topics: z.array(z.string()),
60
+ key_findings: z.array(z.string()),
61
+ });
62
+ const RankingsSchema = z.array(RankingItemSchema);
63
+
64
+ // ---------------------------------------------------------------------------
65
+ // CLI entry
66
+ // ---------------------------------------------------------------------------
67
+
68
+ const args = process.argv.slice(2);
69
+ const query = args.find((a) => !a.startsWith("--"));
70
+ const topArg = args.find((a) => a.startsWith("--top="));
71
+ const topK = topArg ? parseInt(topArg.split("=")[1], 10) : VECTOR_TOP_K;
72
+ const projectArg = args.find((a) => a.startsWith("--project="));
73
+ const projectFilter = projectArg ? projectArg.split("=")[1] : undefined;
74
+
75
+ if (!query) {
76
+ process.stderr.write(
77
+ 'Usage: bun retrieval-pipeline.ts "query" [--top=20] [--project=name]\n',
78
+ );
79
+ process.exitCode = 1;
80
+ } else {
81
+ runPipeline(query, topK, projectFilter).catch((e) => {
82
+ logError(HOOK_NAME, `Fatal: ${e}`, { stderr: true });
83
+ process.exitCode = 1;
84
+ });
85
+ }
86
+
87
+ // ---------------------------------------------------------------------------
88
+ // Pipeline orchestrator
89
+ // ---------------------------------------------------------------------------
90
+
91
+ async function runPipeline(
92
+ query: string,
93
+ topK: number,
94
+ project?: string,
95
+ ): Promise<void> {
96
+ const totalStart = Date.now();
97
+ const timings = {
98
+ embed_query_ms: 0,
99
+ vector_search_ms: 0,
100
+ summarize_ms: 0,
101
+ rank_ms: 0,
102
+ synthesize_ms: 0,
103
+ total_ms: 0,
104
+ };
105
+
106
+ // Pre-flight: check Ollama
107
+ const health = await checkOllamaHealth();
108
+ if (!health.ok) {
109
+ logError(HOOK_NAME, health.error ?? "Unknown Ollama health check error", { stderr: true });
110
+ process.exitCode = 1;
111
+ return;
112
+ }
113
+
114
+ // Stage 2: Embed query + KNN search
115
+ let t = Date.now();
116
+ let queryEmbedding: Float32Array;
117
+ let hydeTiming = 0;
118
+
119
+ if (HYDE_ENABLED) {
120
+ try {
121
+ const hydeStart = Date.now();
122
+ queryEmbedding = await hydeQueryEmbedding(query, {
123
+ numResponses: HYDE_NUM_RESPONSES,
124
+ maxTokens: HYDE_MAX_TOKENS,
125
+ timeout: HYDE_TIMEOUT_MS,
126
+ fallbackToQuery: HYDE_FALLBACK_TO_QUERY,
127
+ });
128
+ hydeTiming = Date.now() - hydeStart;
129
+ logInfo(HOOK_NAME, `HyDE query embedding completed in ${hydeTiming}ms`);
130
+ } catch (e) {
131
+ logWarn(HOOK_NAME, `HyDE failed: ${e}, falling back to direct query embedding`);
132
+ queryEmbedding = await embedOne(query);
133
+ }
134
+ } else {
135
+ queryEmbedding = await embedOne(query);
136
+ }
137
+
138
+ timings.embed_query_ms = Date.now() - t;
139
+ if (hydeTiming > 0) {
140
+ (timings as any).hyde_ms = hydeTiming;
141
+ }
142
+
143
+ t = Date.now();
144
+ const db = openVectorDb();
145
+ let results: VectorSearchResult[];
146
+ try {
147
+ results = searchKnn(db, queryEmbedding, topK, project);
148
+ } finally {
149
+ db.close();
150
+ }
151
+ timings.vector_search_ms = Date.now() - t;
152
+
153
+ if (results.length === 0) {
154
+ const empty: RetrievalResult = {
155
+ query,
156
+ synthesis:
157
+ "No results found. Suggestions:\n" +
158
+ "- Try a different query\n" +
159
+ "- Run `/rlm:embed-index` to build/refresh the vector index\n" +
160
+ "- Use `/rlm:search` for keyword-based fallback",
161
+ sources: [],
162
+ stage_timings: { ...timings, total_ms: Date.now() - totalStart },
163
+ };
164
+ process.stdout.write(JSON.stringify(empty, null, 2) + "\n");
165
+ return;
166
+ }
167
+
168
+ // Deduplicate by session_id (keep best chunk per session)
169
+ const sessionMap = new Map<
170
+ string,
171
+ { result: VectorSearchResult; chunks: VectorSearchResult[] }
172
+ >();
173
+ for (const r of results) {
174
+ const key = `${r.session_id}:${r.project}`;
175
+ const existing = sessionMap.get(key);
176
+ if (!existing) {
177
+ sessionMap.set(key, { result: r, chunks: [r] });
178
+ } else {
179
+ existing.chunks.push(r);
180
+ if (r.distance < existing.result.distance) {
181
+ existing.result = r;
182
+ }
183
+ }
184
+ }
185
+ const sessions = Array.from(sessionMap.values());
186
+ logInfo(
187
+ HOOK_NAME,
188
+ `Stage 2: ${results.length} chunks → ${sessions.length} sessions`,
189
+ );
190
+
191
+ // Stage 3: Parallel haiku summarization
192
+ t = Date.now();
193
+ const summaries = await summarizeSessions(query, sessions);
194
+ timings.summarize_ms = Date.now() - t;
195
+
196
+ if (summaries.length === 0) {
197
+ const noSummaries: RetrievalResult = {
198
+ query,
199
+ synthesis: "Found matching chunks but all summarization attempts failed.",
200
+ sources: [],
201
+ stage_timings: { ...timings, total_ms: Date.now() - totalStart },
202
+ };
203
+ process.stdout.write(JSON.stringify(noSummaries, null, 2) + "\n");
204
+ return;
205
+ }
206
+
207
+ // Stage 4: Sonnet ranking
208
+ t = Date.now();
209
+ const ranked = await rankSessions(query, summaries);
210
+ timings.rank_ms = Date.now() - t;
211
+
212
+ // Stage 5: Sonnet synthesis
213
+ t = Date.now();
214
+ const relevant = ranked.filter((r) => r.relevant);
215
+ let synthesis: string;
216
+ if (relevant.length > 0) {
217
+ synthesis = await synthesize(query, relevant, summaries);
218
+ } else {
219
+ synthesis =
220
+ "No sessions were deemed relevant to your query.\n" +
221
+ "Suggestions:\n" +
222
+ "- Try a different or broader query\n" +
223
+ "- Use `/rlm:search` for keyword-based fallback";
224
+ }
225
+ timings.synthesize_ms = Date.now() - t;
226
+ timings.total_ms = Date.now() - totalStart;
227
+
228
+ const output: RetrievalResult = {
229
+ query,
230
+ synthesis,
231
+ sources: ranked,
232
+ stage_timings: timings,
233
+ };
234
+
235
+ process.stdout.write(JSON.stringify(output, null, 2) + "\n");
236
+ }
237
+
238
+ // ---------------------------------------------------------------------------
239
+ // Stage 3: Parallel haiku summarization
240
+ // ---------------------------------------------------------------------------
241
+
242
+ async function summarizeSessions(
243
+ query: string,
244
+ sessions: Array<{
245
+ result: VectorSearchResult;
246
+ chunks: VectorSearchResult[];
247
+ }>,
248
+ ): Promise<ChunkSummary[]> {
249
+ const results: ChunkSummary[] = [];
250
+
251
+ // Process in batches of MAX_PARALLEL_SUMMARIZERS
252
+ for (let i = 0; i < sessions.length; i += MAX_PARALLEL_SUMMARIZERS) {
253
+ const batch = sessions.slice(i, i + MAX_PARALLEL_SUMMARIZERS);
254
+ const promises = batch.map(async (session) => {
255
+ try {
256
+ return await summarizeOneSession(query, session);
257
+ } catch (e) {
258
+ logWarn(
259
+ HOOK_NAME,
260
+ `Summarize failed for ${session.result.session_id}: ${e}`,
261
+ );
262
+ return null;
263
+ }
264
+ });
265
+
266
+ const batchResults = await Promise.all(promises);
267
+ for (const r of batchResults) {
268
+ if (r) results.push(r);
269
+ }
270
+ }
271
+
272
+ return results;
273
+ }
274
+
275
+ async function summarizeOneSession(
276
+ query: string,
277
+ session: { result: VectorSearchResult; chunks: VectorSearchResult[] },
278
+ ): Promise<ChunkSummary | null> {
279
+ const best = session.result;
280
+
281
+ // Load transcript segment
282
+ let content: string;
283
+ try {
284
+ const loaded = await loadTranscript(
285
+ best.source_path,
286
+ [best.line_start, best.line_end],
287
+ 4000,
288
+ );
289
+ content = loaded.content;
290
+ } catch {
291
+ content = `[Could not load transcript. Topic: ${best.topic}]`;
292
+ }
293
+
294
+ if (!content || content.length < 20) return null;
295
+
296
+ const systemPrompt =
297
+ "You are a session transcript summarizer. Extract ONLY information relevant to the query. " +
298
+ "Mention specific file names, function names, decisions made, and outcomes. " +
299
+ "If nothing in the transcript is relevant to the query, respond with exactly: Not relevant. " +
300
+ "Keep your summary under 200 words.";
301
+
302
+ const userPrompt =
303
+ `Query: ${query}\n\n` +
304
+ `Session: ${best.session_id} (${best.project}, ${best.date})\n` +
305
+ `Topic: ${best.topic}\n\n` +
306
+ `Transcript:\n${content}`;
307
+
308
+ const result = await inferenceAsync(systemPrompt, userPrompt, "fast", 30);
309
+
310
+ if (!result.success || !result.output) {
311
+ logWarn(HOOK_NAME, `Summarize inference failed: ${result.error}`);
312
+ return null;
313
+ }
314
+
315
+ if (result.output.trim().toLowerCase() === "not relevant.") {
316
+ return null;
317
+ }
318
+
319
+ return {
320
+ session_id: best.session_id,
321
+ project: best.project,
322
+ date: best.date,
323
+ segment_lines: [best.line_start, best.line_end],
324
+ summary: result.output.trim(),
325
+ source_path: best.source_path,
326
+ };
327
+ }
328
+
329
+ // ---------------------------------------------------------------------------
330
+ // Stage 4: Sonnet ranking
331
+ // ---------------------------------------------------------------------------
332
+
333
+ async function rankSessions(
334
+ query: string,
335
+ summaries: ChunkSummary[],
336
+ ): Promise<RankedSession[]> {
337
+ const summaryText = summaries
338
+ .map(
339
+ (s, i) =>
340
+ `[${i + 1}] Session: ${s.session_id} | Project: ${s.project} | Date: ${s.date}\nSummary: ${s.summary}`,
341
+ )
342
+ .join("\n\n");
343
+
344
+ const systemPrompt =
345
+ "You are a session relevance ranker. Given a query and session summaries, " +
346
+ "evaluate each session's relevance. Output a JSON array where each element has:\n" +
347
+ ' { "index": number, "relevant": boolean, "confidence": number (0-1), "topics": string[], "key_findings": string[] }\n' +
348
+ "Output ONLY the JSON array, no other text.";
349
+
350
+ const userPrompt = `Query: ${query}\n\nSessions:\n${summaryText}`;
351
+
352
+ const result = await inferenceAsync(systemPrompt, userPrompt, "standard", 60);
353
+
354
+ if (!result.success || !result.output) {
355
+ logWarn(HOOK_NAME, `Rank inference failed: ${result.error}, marking all as relevant`);
356
+ return summaries.map((s) => ({
357
+ session_id: s.session_id,
358
+ project: s.project,
359
+ date: s.date,
360
+ relevant: true,
361
+ confidence: 0.3,
362
+ topics: [],
363
+ key_findings: [s.summary.slice(0, 200)],
364
+ }));
365
+ }
366
+
367
+ try {
368
+ // Extract JSON array from response (may be wrapped in markdown code blocks)
369
+ let jsonStr = result.output.trim();
370
+ const codeBlockMatch = jsonStr.match(/```(?:json)?\s*([\s\S]*?)```/);
371
+ if (codeBlockMatch) {
372
+ jsonStr = codeBlockMatch[1].trim();
373
+ }
374
+
375
+ const rawJson = JSON.parse(jsonStr);
376
+ const parseResult = RankingsSchema.safeParse(rawJson);
377
+ if (!parseResult.success) {
378
+ throw new Error(`Invalid ranking response format: ${parseResult.error.message}`);
379
+ }
380
+
381
+ const rankings = parseResult.data;
382
+
383
+ return rankings.map((r) => {
384
+ // Safe array indexing with bounds check
385
+ if (r.index < 1 || r.index > summaries.length) {
386
+ logWarn(HOOK_NAME, `Rank index ${r.index} out of bounds (1-${summaries.length})`);
387
+ return null;
388
+ }
389
+ const summary = summaries[r.index - 1];
390
+ return {
391
+ session_id: summary.session_id,
392
+ project: summary.project,
393
+ date: summary.date,
394
+ relevant: r.relevant,
395
+ confidence: r.confidence,
396
+ topics: r.topics,
397
+ key_findings: r.key_findings,
398
+ };
399
+ }).filter((r): r is RankedSession => r !== null);
400
+ } catch (e) {
401
+ logWarn(HOOK_NAME, `Rank parse failed: ${e}, marking all as relevant`);
402
+ return summaries.map((s) => ({
403
+ session_id: s.session_id,
404
+ project: s.project,
405
+ date: s.date,
406
+ relevant: true,
407
+ confidence: 0.3,
408
+ topics: [],
409
+ key_findings: [s.summary.slice(0, 200)],
410
+ }));
411
+ }
412
+ }
413
+
414
+ // ---------------------------------------------------------------------------
415
+ // Stage 5: Sonnet synthesis
416
+ // ---------------------------------------------------------------------------
417
+
418
+ async function synthesize(
419
+ query: string,
420
+ relevant: RankedSession[],
421
+ summaries: ChunkSummary[],
422
+ ): Promise<string> {
423
+ // Build context from relevant sessions
424
+ const summaryMap = new Map(summaries.map((s) => [s.session_id, s]));
425
+
426
+ const context = relevant
427
+ .map((r) => {
428
+ const summary = summaryMap.get(r.session_id);
429
+ return (
430
+ `Session: ${r.session_id} | Project: ${r.project} | Date: ${r.date}\n` +
431
+ `Topics: ${r.topics.join(", ")}\n` +
432
+ `Key Findings: ${r.key_findings.join("; ")}\n` +
433
+ `Full Summary: ${summary?.summary ?? "(no summary)"}`
434
+ );
435
+ })
436
+ .join("\n\n---\n\n");
437
+
438
+ const systemPrompt =
439
+ "You are a knowledge synthesizer. Given a query and relevant session findings, " +
440
+ "produce a coherent markdown answer. Include session citations inline as " +
441
+ '"(session: {date}, {project})". Highlight the most recent and relevant information. ' +
442
+ "Note any contradictions or evolution across sessions. Be concise but thorough.";
443
+
444
+ const userPrompt = `Query: ${query}\n\nRelevant Sessions:\n${context}`;
445
+
446
+ const result = await inferenceAsync(systemPrompt, userPrompt, "standard", 60);
447
+
448
+ if (!result.success || !result.output) {
449
+ logWarn(HOOK_NAME, `Synthesize inference failed: ${result.error}`);
450
+ // Fallback: concatenate key findings
451
+ return relevant
452
+ .map(
453
+ (r) =>
454
+ `**${r.date} (${r.project}):** ${r.key_findings.join(". ")}`,
455
+ )
456
+ .join("\n\n");
457
+ }
458
+
459
+ return result.output.trim();
460
+ }