aiwcli 0.12.3 → 0.12.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (125) hide show
  1. package/bin/dev.cmd +3 -3
  2. package/bin/dev.js +16 -16
  3. package/bin/run.cmd +3 -3
  4. package/bin/run.js +21 -21
  5. package/dist/commands/branch.js +7 -2
  6. package/dist/lib/bmad-installer.js +37 -37
  7. package/dist/lib/terminal.d.ts +2 -0
  8. package/dist/lib/terminal.js +57 -7
  9. package/dist/templates/CLAUDE.md +205 -205
  10. package/dist/templates/_shared/.claude/commands/handoff-resume.md +12 -64
  11. package/dist/templates/_shared/.claude/commands/handoff.md +12 -198
  12. package/dist/templates/_shared/.claude/settings.json +65 -65
  13. package/dist/templates/_shared/.codex/workflows/handoff.md +226 -226
  14. package/dist/templates/_shared/.windsurf/workflows/handoff.md +226 -226
  15. package/dist/templates/_shared/handoff-system/CLAUDE.md +421 -0
  16. package/dist/templates/_shared/{lib-ts/handoff → handoff-system/lib}/document-generator.ts +215 -216
  17. package/dist/templates/_shared/{lib-ts/handoff → handoff-system/lib}/handoff-reader.ts +157 -158
  18. package/dist/templates/_shared/{scripts → handoff-system/scripts}/resume_handoff.ts +373 -373
  19. package/dist/templates/_shared/{scripts → handoff-system/scripts}/save_handoff.ts +469 -358
  20. package/dist/templates/_shared/handoff-system/workflows/handoff-resume.md +66 -0
  21. package/dist/templates/_shared/{workflows → handoff-system/workflows}/handoff.md +254 -254
  22. package/dist/templates/_shared/hooks-ts/_utils/git-state.ts +2 -2
  23. package/dist/templates/_shared/hooks-ts/archive_plan.ts +159 -159
  24. package/dist/templates/_shared/hooks-ts/context_monitor.ts +147 -147
  25. package/dist/templates/_shared/hooks-ts/file-suggestion.ts +128 -128
  26. package/dist/templates/_shared/hooks-ts/pre_compact.ts +49 -49
  27. package/dist/templates/_shared/hooks-ts/session_end.ts +196 -183
  28. package/dist/templates/_shared/hooks-ts/session_start.ts +163 -151
  29. package/dist/templates/_shared/hooks-ts/task_create_capture.ts +48 -48
  30. package/dist/templates/_shared/hooks-ts/task_update_capture.ts +74 -74
  31. package/dist/templates/_shared/hooks-ts/user_prompt_submit.ts +93 -93
  32. package/dist/templates/_shared/lib-ts/CLAUDE.md +367 -367
  33. package/dist/templates/_shared/lib-ts/base/atomic-write.ts +138 -138
  34. package/dist/templates/_shared/lib-ts/base/constants.ts +303 -303
  35. package/dist/templates/_shared/lib-ts/base/git-state.ts +58 -58
  36. package/dist/templates/_shared/lib-ts/base/hook-utils.ts +582 -582
  37. package/dist/templates/_shared/lib-ts/base/inference.ts +301 -301
  38. package/dist/templates/_shared/lib-ts/base/logger.ts +247 -247
  39. package/dist/templates/_shared/lib-ts/base/state-io.ts +202 -130
  40. package/dist/templates/_shared/lib-ts/base/stop-words.ts +184 -184
  41. package/dist/templates/_shared/lib-ts/base/subprocess-utils.ts +56 -0
  42. package/dist/templates/_shared/lib-ts/base/utils.ts +184 -184
  43. package/dist/templates/_shared/lib-ts/context/context-formatter.ts +566 -560
  44. package/dist/templates/_shared/lib-ts/context/context-selector.ts +524 -515
  45. package/dist/templates/_shared/lib-ts/context/context-store.ts +712 -668
  46. package/dist/templates/_shared/lib-ts/context/plan-manager.ts +312 -312
  47. package/dist/templates/_shared/lib-ts/context/task-tracker.ts +185 -185
  48. package/dist/templates/_shared/lib-ts/package.json +20 -20
  49. package/dist/templates/_shared/lib-ts/templates/formatters.ts +102 -102
  50. package/dist/templates/_shared/lib-ts/templates/plan-context.ts +58 -58
  51. package/dist/templates/_shared/lib-ts/tsconfig.json +13 -13
  52. package/dist/templates/_shared/lib-ts/types.ts +186 -180
  53. package/dist/templates/_shared/scripts/resolve_context.ts +33 -33
  54. package/dist/templates/_shared/scripts/status_line.ts +690 -690
  55. package/dist/templates/cc-native/.claude/commands/{rlm → cc-native/rlm}/ask.md +136 -136
  56. package/dist/templates/cc-native/.claude/commands/{rlm → cc-native/rlm}/index.md +21 -21
  57. package/dist/templates/cc-native/.claude/commands/{rlm → cc-native/rlm}/overview.md +56 -56
  58. package/dist/templates/cc-native/.claude/commands/cc-native/specdev.md +10 -10
  59. package/dist/templates/cc-native/.windsurf/workflows/cc-native/fix.md +8 -8
  60. package/dist/templates/cc-native/.windsurf/workflows/cc-native/implement.md +8 -8
  61. package/dist/templates/cc-native/.windsurf/workflows/cc-native/research.md +8 -8
  62. package/dist/templates/cc-native/CC-NATIVE-README.md +189 -189
  63. package/dist/templates/cc-native/TEMPLATE-SCHEMA.md +304 -304
  64. package/dist/templates/cc-native/_cc-native/agents/CLAUDE.md +143 -143
  65. package/dist/templates/cc-native/_cc-native/agents/PLAN-ORCHESTRATOR.md +213 -213
  66. package/dist/templates/cc-native/_cc-native/agents/plan-questions/PLAN-QUESTIONER.md +70 -70
  67. package/dist/templates/cc-native/_cc-native/cc-native.config.json +96 -96
  68. package/dist/templates/cc-native/_cc-native/hooks/CLAUDE.md +247 -247
  69. package/dist/templates/cc-native/_cc-native/hooks/cc-native-plan-review.ts +76 -76
  70. package/dist/templates/cc-native/_cc-native/hooks/enhance_plan_post_subagent.ts +54 -54
  71. package/dist/templates/cc-native/_cc-native/hooks/enhance_plan_post_write.ts +51 -51
  72. package/dist/templates/cc-native/_cc-native/hooks/mark_questions_asked.ts +53 -53
  73. package/dist/templates/cc-native/_cc-native/hooks/plan_questions_early.ts +61 -61
  74. package/dist/templates/cc-native/_cc-native/lib-ts/agent-selection.ts +163 -163
  75. package/dist/templates/cc-native/_cc-native/lib-ts/aggregate-agents.ts +156 -156
  76. package/dist/templates/cc-native/_cc-native/lib-ts/artifacts/format.ts +597 -597
  77. package/dist/templates/cc-native/_cc-native/lib-ts/artifacts/index.ts +26 -26
  78. package/dist/templates/cc-native/_cc-native/lib-ts/artifacts/tracker.ts +107 -107
  79. package/dist/templates/cc-native/_cc-native/lib-ts/artifacts/write.ts +119 -119
  80. package/dist/templates/cc-native/_cc-native/lib-ts/artifacts.ts +21 -21
  81. package/dist/templates/cc-native/_cc-native/lib-ts/cc-native-state.ts +319 -319
  82. package/dist/templates/cc-native/_cc-native/lib-ts/cli-output-parser.ts +144 -144
  83. package/dist/templates/cc-native/_cc-native/lib-ts/config.ts +57 -57
  84. package/dist/templates/cc-native/_cc-native/lib-ts/constants.ts +83 -83
  85. package/dist/templates/cc-native/_cc-native/lib-ts/corroboration.ts +119 -119
  86. package/dist/templates/cc-native/_cc-native/lib-ts/debug.ts +79 -79
  87. package/dist/templates/cc-native/_cc-native/lib-ts/graduation.ts +132 -132
  88. package/dist/templates/cc-native/_cc-native/lib-ts/index.ts +116 -116
  89. package/dist/templates/cc-native/_cc-native/lib-ts/json-parser.ts +168 -168
  90. package/dist/templates/cc-native/_cc-native/lib-ts/orchestrator.ts +70 -70
  91. package/dist/templates/cc-native/_cc-native/lib-ts/output-builder.ts +130 -130
  92. package/dist/templates/cc-native/_cc-native/lib-ts/plan-discovery.ts +80 -80
  93. package/dist/templates/cc-native/_cc-native/lib-ts/plan-enhancement.ts +41 -41
  94. package/dist/templates/cc-native/_cc-native/lib-ts/plan-questions.ts +101 -101
  95. package/dist/templates/cc-native/_cc-native/lib-ts/review-pipeline.ts +511 -511
  96. package/dist/templates/cc-native/_cc-native/lib-ts/reviewers/agent.ts +71 -71
  97. package/dist/templates/cc-native/_cc-native/lib-ts/reviewers/base/base-agent.ts +217 -217
  98. package/dist/templates/cc-native/_cc-native/lib-ts/reviewers/index.ts +12 -12
  99. package/dist/templates/cc-native/_cc-native/lib-ts/reviewers/providers/claude-agent.ts +66 -65
  100. package/dist/templates/cc-native/_cc-native/lib-ts/reviewers/providers/codex-agent.ts +184 -184
  101. package/dist/templates/cc-native/_cc-native/lib-ts/reviewers/providers/gemini-agent.ts +39 -39
  102. package/dist/templates/cc-native/_cc-native/lib-ts/reviewers/providers/orchestrator-claude-agent.ts +196 -195
  103. package/dist/templates/cc-native/_cc-native/lib-ts/reviewers/schemas.ts +201 -201
  104. package/dist/templates/cc-native/_cc-native/lib-ts/reviewers/types.ts +21 -21
  105. package/dist/templates/cc-native/_cc-native/lib-ts/rlm/CLAUDE.md +480 -480
  106. package/dist/templates/cc-native/_cc-native/lib-ts/rlm/embedding-indexer.ts +287 -287
  107. package/dist/templates/cc-native/_cc-native/lib-ts/rlm/hyde.ts +148 -148
  108. package/dist/templates/cc-native/_cc-native/lib-ts/rlm/index.ts +54 -54
  109. package/dist/templates/cc-native/_cc-native/lib-ts/rlm/logger.ts +58 -58
  110. package/dist/templates/cc-native/_cc-native/lib-ts/rlm/ollama-client.ts +208 -208
  111. package/dist/templates/cc-native/_cc-native/lib-ts/rlm/retrieval-pipeline.ts +460 -460
  112. package/dist/templates/cc-native/_cc-native/lib-ts/rlm/transcript-indexer.ts +446 -447
  113. package/dist/templates/cc-native/_cc-native/lib-ts/rlm/transcript-loader.ts +280 -280
  114. package/dist/templates/cc-native/_cc-native/lib-ts/rlm/transcript-searcher.ts +274 -274
  115. package/dist/templates/cc-native/_cc-native/lib-ts/rlm/types.ts +201 -201
  116. package/dist/templates/cc-native/_cc-native/lib-ts/rlm/vector-store.ts +278 -278
  117. package/dist/templates/cc-native/_cc-native/lib-ts/settings.ts +184 -184
  118. package/dist/templates/cc-native/_cc-native/lib-ts/state.ts +275 -275
  119. package/dist/templates/cc-native/_cc-native/lib-ts/tsconfig.json +18 -18
  120. package/dist/templates/cc-native/_cc-native/lib-ts/types.ts +329 -329
  121. package/dist/templates/cc-native/_cc-native/lib-ts/verdict.ts +72 -72
  122. package/dist/templates/cc-native/_cc-native/workflows/specdev.md +9 -9
  123. package/oclif.manifest.json +1 -1
  124. package/package.json +108 -108
  125. package/dist/templates/cc-native/_cc-native/lib-ts/nul +0 -3
@@ -1,460 +1,460 @@
1
- #!/usr/bin/env bun
2
- /**
3
- * Retrieval Pipeline — Semantic search across session transcripts.
4
- *
5
- * Orchestrates a 4-stage pipeline:
6
- * Stage 2: Embed query → KNN search → top chunks
7
- * Stage 3: Parallel haiku summarizers per session
8
- * Stage 4: Sonnet ranker → structured JSON per session
9
- * Stage 5: Sonnet synthesizer → final markdown answer
10
- *
11
- * Usage:
12
- * bun retrieval-pipeline.ts "query" [--top=20] [--project=name]
13
- */
14
-
15
- import { z } from "zod";
16
- import {
17
- VECTOR_TOP_K,
18
- MAX_PARALLEL_SUMMARIZERS,
19
- HYDE_ENABLED,
20
- HYDE_NUM_RESPONSES,
21
- HYDE_MAX_TOKENS,
22
- HYDE_TIMEOUT_MS,
23
- HYDE_FALLBACK_TO_QUERY,
24
- type VectorSearchResult,
25
- type ChunkSummary,
26
- type RankedSession,
27
- type RetrievalResult,
28
- } from "./types.js";
29
- import { logInfo, logWarn, logError, logDebug } from "./logger.js";
30
- import { checkOllamaHealth, embedOne } from "./ollama-client.js";
31
- import { openVectorDb, searchKnn } from "./vector-store.js";
32
- import { loadTranscript } from "./transcript-loader.js";
33
- import { hydeQueryEmbedding } from "./hyde.js";
34
-
35
- const HOOK_NAME = "rlm_retrieve";
36
-
37
- // Dynamic import for inference (crosses package boundary)
38
- let inferenceAsync: typeof import("../../../../_shared/lib-ts/base/inference.js").inferenceAsync;
39
-
40
- try {
41
- const mod = await import("../../../../_shared/lib-ts/base/inference.js");
42
- inferenceAsync = mod.inferenceAsync;
43
- } catch {
44
- // Fallback: warn and provide a stub that always fails
45
- logWarn(HOOK_NAME, "Could not import inferenceAsync, AI stages will fail");
46
- inferenceAsync = async () => ({
47
- success: false,
48
- output: "",
49
- error: "inferenceAsync not available",
50
- latency_ms: 0,
51
- });
52
- }
53
-
54
- // Zod schema for AI ranking response
55
- const RankingItemSchema = z.object({
56
- index: z.number(),
57
- relevant: z.boolean(),
58
- confidence: z.number(),
59
- topics: z.array(z.string()),
60
- key_findings: z.array(z.string()),
61
- });
62
- const RankingsSchema = z.array(RankingItemSchema);
63
-
64
- // ---------------------------------------------------------------------------
65
- // CLI entry
66
- // ---------------------------------------------------------------------------
67
-
68
- const args = process.argv.slice(2);
69
- const query = args.find((a) => !a.startsWith("--"));
70
- const topArg = args.find((a) => a.startsWith("--top="));
71
- const topK = topArg ? parseInt(topArg.split("=")[1], 10) : VECTOR_TOP_K;
72
- const projectArg = args.find((a) => a.startsWith("--project="));
73
- const projectFilter = projectArg ? projectArg.split("=")[1] : undefined;
74
-
75
- if (!query) {
76
- process.stderr.write(
77
- 'Usage: bun retrieval-pipeline.ts "query" [--top=20] [--project=name]\n',
78
- );
79
- process.exitCode = 1;
80
- } else {
81
- runPipeline(query, topK, projectFilter).catch((e) => {
82
- logError(HOOK_NAME, `Fatal: ${e}`, { stderr: true });
83
- process.exitCode = 1;
84
- });
85
- }
86
-
87
- // ---------------------------------------------------------------------------
88
- // Pipeline orchestrator
89
- // ---------------------------------------------------------------------------
90
-
91
- async function runPipeline(
92
- query: string,
93
- topK: number,
94
- project?: string,
95
- ): Promise<void> {
96
- const totalStart = Date.now();
97
- const timings = {
98
- embed_query_ms: 0,
99
- vector_search_ms: 0,
100
- summarize_ms: 0,
101
- rank_ms: 0,
102
- synthesize_ms: 0,
103
- total_ms: 0,
104
- };
105
-
106
- // Pre-flight: check Ollama
107
- const health = await checkOllamaHealth();
108
- if (!health.ok) {
109
- logError(HOOK_NAME, health.error ?? "Unknown Ollama health check error", { stderr: true });
110
- process.exitCode = 1;
111
- return;
112
- }
113
-
114
- // Stage 2: Embed query + KNN search
115
- let t = Date.now();
116
- let queryEmbedding: Float32Array;
117
- let hydeTiming = 0;
118
-
119
- if (HYDE_ENABLED) {
120
- try {
121
- const hydeStart = Date.now();
122
- queryEmbedding = await hydeQueryEmbedding(query, {
123
- numResponses: HYDE_NUM_RESPONSES,
124
- maxTokens: HYDE_MAX_TOKENS,
125
- timeout: HYDE_TIMEOUT_MS,
126
- fallbackToQuery: HYDE_FALLBACK_TO_QUERY,
127
- });
128
- hydeTiming = Date.now() - hydeStart;
129
- logInfo(HOOK_NAME, `HyDE query embedding completed in ${hydeTiming}ms`);
130
- } catch (e) {
131
- logWarn(HOOK_NAME, `HyDE failed: ${e}, falling back to direct query embedding`);
132
- queryEmbedding = await embedOne(query);
133
- }
134
- } else {
135
- queryEmbedding = await embedOne(query);
136
- }
137
-
138
- timings.embed_query_ms = Date.now() - t;
139
- if (hydeTiming > 0) {
140
- (timings as any).hyde_ms = hydeTiming;
141
- }
142
-
143
- t = Date.now();
144
- const db = openVectorDb();
145
- let results: VectorSearchResult[];
146
- try {
147
- results = searchKnn(db, queryEmbedding, topK, project);
148
- } finally {
149
- db.close();
150
- }
151
- timings.vector_search_ms = Date.now() - t;
152
-
153
- if (results.length === 0) {
154
- const empty: RetrievalResult = {
155
- query,
156
- synthesis:
157
- "No results found. Suggestions:\n" +
158
- "- Try a different query\n" +
159
- "- Run `/rlm:embed-index` to build/refresh the vector index\n" +
160
- "- Use `/rlm:search` for keyword-based fallback",
161
- sources: [],
162
- stage_timings: { ...timings, total_ms: Date.now() - totalStart },
163
- };
164
- process.stdout.write(JSON.stringify(empty, null, 2) + "\n");
165
- return;
166
- }
167
-
168
- // Deduplicate by session_id (keep best chunk per session)
169
- const sessionMap = new Map<
170
- string,
171
- { result: VectorSearchResult; chunks: VectorSearchResult[] }
172
- >();
173
- for (const r of results) {
174
- const key = `${r.session_id}:${r.project}`;
175
- const existing = sessionMap.get(key);
176
- if (!existing) {
177
- sessionMap.set(key, { result: r, chunks: [r] });
178
- } else {
179
- existing.chunks.push(r);
180
- if (r.distance < existing.result.distance) {
181
- existing.result = r;
182
- }
183
- }
184
- }
185
- const sessions = Array.from(sessionMap.values());
186
- logInfo(
187
- HOOK_NAME,
188
- `Stage 2: ${results.length} chunks → ${sessions.length} sessions`,
189
- );
190
-
191
- // Stage 3: Parallel haiku summarization
192
- t = Date.now();
193
- const summaries = await summarizeSessions(query, sessions);
194
- timings.summarize_ms = Date.now() - t;
195
-
196
- if (summaries.length === 0) {
197
- const noSummaries: RetrievalResult = {
198
- query,
199
- synthesis: "Found matching chunks but all summarization attempts failed.",
200
- sources: [],
201
- stage_timings: { ...timings, total_ms: Date.now() - totalStart },
202
- };
203
- process.stdout.write(JSON.stringify(noSummaries, null, 2) + "\n");
204
- return;
205
- }
206
-
207
- // Stage 4: Sonnet ranking
208
- t = Date.now();
209
- const ranked = await rankSessions(query, summaries);
210
- timings.rank_ms = Date.now() - t;
211
-
212
- // Stage 5: Sonnet synthesis
213
- t = Date.now();
214
- const relevant = ranked.filter((r) => r.relevant);
215
- let synthesis: string;
216
- if (relevant.length > 0) {
217
- synthesis = await synthesize(query, relevant, summaries);
218
- } else {
219
- synthesis =
220
- "No sessions were deemed relevant to your query.\n" +
221
- "Suggestions:\n" +
222
- "- Try a different or broader query\n" +
223
- "- Use `/rlm:search` for keyword-based fallback";
224
- }
225
- timings.synthesize_ms = Date.now() - t;
226
- timings.total_ms = Date.now() - totalStart;
227
-
228
- const output: RetrievalResult = {
229
- query,
230
- synthesis,
231
- sources: ranked,
232
- stage_timings: timings,
233
- };
234
-
235
- process.stdout.write(JSON.stringify(output, null, 2) + "\n");
236
- }
237
-
238
- // ---------------------------------------------------------------------------
239
- // Stage 3: Parallel haiku summarization
240
- // ---------------------------------------------------------------------------
241
-
242
- async function summarizeSessions(
243
- query: string,
244
- sessions: Array<{
245
- result: VectorSearchResult;
246
- chunks: VectorSearchResult[];
247
- }>,
248
- ): Promise<ChunkSummary[]> {
249
- const results: ChunkSummary[] = [];
250
-
251
- // Process in batches of MAX_PARALLEL_SUMMARIZERS
252
- for (let i = 0; i < sessions.length; i += MAX_PARALLEL_SUMMARIZERS) {
253
- const batch = sessions.slice(i, i + MAX_PARALLEL_SUMMARIZERS);
254
- const promises = batch.map(async (session) => {
255
- try {
256
- return await summarizeOneSession(query, session);
257
- } catch (e) {
258
- logWarn(
259
- HOOK_NAME,
260
- `Summarize failed for ${session.result.session_id}: ${e}`,
261
- );
262
- return null;
263
- }
264
- });
265
-
266
- const batchResults = await Promise.all(promises);
267
- for (const r of batchResults) {
268
- if (r) results.push(r);
269
- }
270
- }
271
-
272
- return results;
273
- }
274
-
275
- async function summarizeOneSession(
276
- query: string,
277
- session: { result: VectorSearchResult; chunks: VectorSearchResult[] },
278
- ): Promise<ChunkSummary | null> {
279
- const best = session.result;
280
-
281
- // Load transcript segment
282
- let content: string;
283
- try {
284
- const loaded = await loadTranscript(
285
- best.source_path,
286
- [best.line_start, best.line_end],
287
- 4000,
288
- );
289
- content = loaded.content;
290
- } catch {
291
- content = `[Could not load transcript. Topic: ${best.topic}]`;
292
- }
293
-
294
- if (!content || content.length < 20) return null;
295
-
296
- const systemPrompt =
297
- "You are a session transcript summarizer. Extract ONLY information relevant to the query. " +
298
- "Mention specific file names, function names, decisions made, and outcomes. " +
299
- "If nothing in the transcript is relevant to the query, respond with exactly: Not relevant. " +
300
- "Keep your summary under 200 words.";
301
-
302
- const userPrompt =
303
- `Query: ${query}\n\n` +
304
- `Session: ${best.session_id} (${best.project}, ${best.date})\n` +
305
- `Topic: ${best.topic}\n\n` +
306
- `Transcript:\n${content}`;
307
-
308
- const result = await inferenceAsync(systemPrompt, userPrompt, "fast", 30);
309
-
310
- if (!result.success || !result.output) {
311
- logWarn(HOOK_NAME, `Summarize inference failed: ${result.error}`);
312
- return null;
313
- }
314
-
315
- if (result.output.trim().toLowerCase() === "not relevant.") {
316
- return null;
317
- }
318
-
319
- return {
320
- session_id: best.session_id,
321
- project: best.project,
322
- date: best.date,
323
- segment_lines: [best.line_start, best.line_end],
324
- summary: result.output.trim(),
325
- source_path: best.source_path,
326
- };
327
- }
328
-
329
- // ---------------------------------------------------------------------------
330
- // Stage 4: Sonnet ranking
331
- // ---------------------------------------------------------------------------
332
-
333
- async function rankSessions(
334
- query: string,
335
- summaries: ChunkSummary[],
336
- ): Promise<RankedSession[]> {
337
- const summaryText = summaries
338
- .map(
339
- (s, i) =>
340
- `[${i + 1}] Session: ${s.session_id} | Project: ${s.project} | Date: ${s.date}\nSummary: ${s.summary}`,
341
- )
342
- .join("\n\n");
343
-
344
- const systemPrompt =
345
- "You are a session relevance ranker. Given a query and session summaries, " +
346
- "evaluate each session's relevance. Output a JSON array where each element has:\n" +
347
- ' { "index": number, "relevant": boolean, "confidence": number (0-1), "topics": string[], "key_findings": string[] }\n' +
348
- "Output ONLY the JSON array, no other text.";
349
-
350
- const userPrompt = `Query: ${query}\n\nSessions:\n${summaryText}`;
351
-
352
- const result = await inferenceAsync(systemPrompt, userPrompt, "standard", 60);
353
-
354
- if (!result.success || !result.output) {
355
- logWarn(HOOK_NAME, `Rank inference failed: ${result.error}, marking all as relevant`);
356
- return summaries.map((s) => ({
357
- session_id: s.session_id,
358
- project: s.project,
359
- date: s.date,
360
- relevant: true,
361
- confidence: 0.3,
362
- topics: [],
363
- key_findings: [s.summary.slice(0, 200)],
364
- }));
365
- }
366
-
367
- try {
368
- // Extract JSON array from response (may be wrapped in markdown code blocks)
369
- let jsonStr = result.output.trim();
370
- const codeBlockMatch = jsonStr.match(/```(?:json)?\s*([\s\S]*?)```/);
371
- if (codeBlockMatch) {
372
- jsonStr = codeBlockMatch[1].trim();
373
- }
374
-
375
- const rawJson = JSON.parse(jsonStr);
376
- const parseResult = RankingsSchema.safeParse(rawJson);
377
- if (!parseResult.success) {
378
- throw new Error(`Invalid ranking response format: ${parseResult.error.message}`);
379
- }
380
-
381
- const rankings = parseResult.data;
382
-
383
- return rankings.map((r) => {
384
- // Safe array indexing with bounds check
385
- if (r.index < 1 || r.index > summaries.length) {
386
- logWarn(HOOK_NAME, `Rank index ${r.index} out of bounds (1-${summaries.length})`);
387
- return null;
388
- }
389
- const summary = summaries[r.index - 1];
390
- return {
391
- session_id: summary.session_id,
392
- project: summary.project,
393
- date: summary.date,
394
- relevant: r.relevant,
395
- confidence: r.confidence,
396
- topics: r.topics,
397
- key_findings: r.key_findings,
398
- };
399
- }).filter((r): r is RankedSession => r !== null);
400
- } catch (e) {
401
- logWarn(HOOK_NAME, `Rank parse failed: ${e}, marking all as relevant`);
402
- return summaries.map((s) => ({
403
- session_id: s.session_id,
404
- project: s.project,
405
- date: s.date,
406
- relevant: true,
407
- confidence: 0.3,
408
- topics: [],
409
- key_findings: [s.summary.slice(0, 200)],
410
- }));
411
- }
412
- }
413
-
414
- // ---------------------------------------------------------------------------
415
- // Stage 5: Sonnet synthesis
416
- // ---------------------------------------------------------------------------
417
-
418
- async function synthesize(
419
- query: string,
420
- relevant: RankedSession[],
421
- summaries: ChunkSummary[],
422
- ): Promise<string> {
423
- // Build context from relevant sessions
424
- const summaryMap = new Map(summaries.map((s) => [s.session_id, s]));
425
-
426
- const context = relevant
427
- .map((r) => {
428
- const summary = summaryMap.get(r.session_id);
429
- return (
430
- `Session: ${r.session_id} | Project: ${r.project} | Date: ${r.date}\n` +
431
- `Topics: ${r.topics.join(", ")}\n` +
432
- `Key Findings: ${r.key_findings.join("; ")}\n` +
433
- `Full Summary: ${summary?.summary ?? "(no summary)"}`
434
- );
435
- })
436
- .join("\n\n---\n\n");
437
-
438
- const systemPrompt =
439
- "You are a knowledge synthesizer. Given a query and relevant session findings, " +
440
- "produce a coherent markdown answer. Include session citations inline as " +
441
- '"(session: {date}, {project})". Highlight the most recent and relevant information. ' +
442
- "Note any contradictions or evolution across sessions. Be concise but thorough.";
443
-
444
- const userPrompt = `Query: ${query}\n\nRelevant Sessions:\n${context}`;
445
-
446
- const result = await inferenceAsync(systemPrompt, userPrompt, "standard", 60);
447
-
448
- if (!result.success || !result.output) {
449
- logWarn(HOOK_NAME, `Synthesize inference failed: ${result.error}`);
450
- // Fallback: concatenate key findings
451
- return relevant
452
- .map(
453
- (r) =>
454
- `**${r.date} (${r.project}):** ${r.key_findings.join(". ")}`,
455
- )
456
- .join("\n\n");
457
- }
458
-
459
- return result.output.trim();
460
- }
1
+ #!/usr/bin/env bun
2
+ /**
3
+ * Retrieval Pipeline — Semantic search across session transcripts.
4
+ *
5
+ * Orchestrates a 4-stage pipeline:
6
+ * Stage 2: Embed query → KNN search → top chunks
7
+ * Stage 3: Parallel haiku summarizers per session
8
+ * Stage 4: Sonnet ranker → structured JSON per session
9
+ * Stage 5: Sonnet synthesizer → final markdown answer
10
+ *
11
+ * Usage:
12
+ * bun retrieval-pipeline.ts "query" [--top=20] [--project=name]
13
+ */
14
+
15
+ import { z } from "zod";
16
+ import {
17
+ VECTOR_TOP_K,
18
+ MAX_PARALLEL_SUMMARIZERS,
19
+ HYDE_ENABLED,
20
+ HYDE_NUM_RESPONSES,
21
+ HYDE_MAX_TOKENS,
22
+ HYDE_TIMEOUT_MS,
23
+ HYDE_FALLBACK_TO_QUERY,
24
+ type VectorSearchResult,
25
+ type ChunkSummary,
26
+ type RankedSession,
27
+ type RetrievalResult,
28
+ } from "./types.js";
29
+ import { logInfo, logWarn, logError, logDebug } from "./logger.js";
30
+ import { checkOllamaHealth, embedOne } from "./ollama-client.js";
31
+ import { openVectorDb, searchKnn } from "./vector-store.js";
32
+ import { loadTranscript } from "./transcript-loader.js";
33
+ import { hydeQueryEmbedding } from "./hyde.js";
34
+
35
+ const HOOK_NAME = "rlm_retrieve";
36
+
37
+ // Dynamic import for inference (crosses package boundary)
38
+ let inferenceAsync: typeof import("../../../../_shared/lib-ts/base/inference.js").inferenceAsync;
39
+
40
+ try {
41
+ const mod = await import("../../../../_shared/lib-ts/base/inference.js");
42
+ inferenceAsync = mod.inferenceAsync;
43
+ } catch {
44
+ // Fallback: warn and provide a stub that always fails
45
+ logWarn(HOOK_NAME, "Could not import inferenceAsync, AI stages will fail");
46
+ inferenceAsync = async () => ({
47
+ success: false,
48
+ output: "",
49
+ error: "inferenceAsync not available",
50
+ latency_ms: 0,
51
+ });
52
+ }
53
+
54
+ // Zod schema for AI ranking response
55
+ const RankingItemSchema = z.object({
56
+ index: z.number(),
57
+ relevant: z.boolean(),
58
+ confidence: z.number(),
59
+ topics: z.array(z.string()),
60
+ key_findings: z.array(z.string()),
61
+ });
62
+ const RankingsSchema = z.array(RankingItemSchema);
63
+
64
+ // ---------------------------------------------------------------------------
65
+ // CLI entry
66
+ // ---------------------------------------------------------------------------
67
+
68
+ const args = process.argv.slice(2);
69
+ const query = args.find((a) => !a.startsWith("--"));
70
+ const topArg = args.find((a) => a.startsWith("--top="));
71
+ const topK = topArg ? parseInt(topArg.split("=")[1], 10) : VECTOR_TOP_K;
72
+ const projectArg = args.find((a) => a.startsWith("--project="));
73
+ const projectFilter = projectArg ? projectArg.split("=")[1] : undefined;
74
+
75
+ if (!query) {
76
+ process.stderr.write(
77
+ 'Usage: bun retrieval-pipeline.ts "query" [--top=20] [--project=name]\n',
78
+ );
79
+ process.exitCode = 1;
80
+ } else {
81
+ runPipeline(query, topK, projectFilter).catch((e) => {
82
+ logError(HOOK_NAME, `Fatal: ${e}`, { stderr: true });
83
+ process.exitCode = 1;
84
+ });
85
+ }
86
+
87
+ // ---------------------------------------------------------------------------
88
+ // Pipeline orchestrator
89
+ // ---------------------------------------------------------------------------
90
+
91
+ async function runPipeline(
92
+ query: string,
93
+ topK: number,
94
+ project?: string,
95
+ ): Promise<void> {
96
+ const totalStart = Date.now();
97
+ const timings = {
98
+ embed_query_ms: 0,
99
+ vector_search_ms: 0,
100
+ summarize_ms: 0,
101
+ rank_ms: 0,
102
+ synthesize_ms: 0,
103
+ total_ms: 0,
104
+ };
105
+
106
+ // Pre-flight: check Ollama
107
+ const health = await checkOllamaHealth();
108
+ if (!health.ok) {
109
+ logError(HOOK_NAME, health.error ?? "Unknown Ollama health check error", { stderr: true });
110
+ process.exitCode = 1;
111
+ return;
112
+ }
113
+
114
+ // Stage 2: Embed query + KNN search
115
+ let t = Date.now();
116
+ let queryEmbedding: Float32Array;
117
+ let hydeTiming = 0;
118
+
119
+ if (HYDE_ENABLED) {
120
+ try {
121
+ const hydeStart = Date.now();
122
+ queryEmbedding = await hydeQueryEmbedding(query, {
123
+ numResponses: HYDE_NUM_RESPONSES,
124
+ maxTokens: HYDE_MAX_TOKENS,
125
+ timeout: HYDE_TIMEOUT_MS,
126
+ fallbackToQuery: HYDE_FALLBACK_TO_QUERY,
127
+ });
128
+ hydeTiming = Date.now() - hydeStart;
129
+ logInfo(HOOK_NAME, `HyDE query embedding completed in ${hydeTiming}ms`);
130
+ } catch (e) {
131
+ logWarn(HOOK_NAME, `HyDE failed: ${e}, falling back to direct query embedding`);
132
+ queryEmbedding = await embedOne(query);
133
+ }
134
+ } else {
135
+ queryEmbedding = await embedOne(query);
136
+ }
137
+
138
+ timings.embed_query_ms = Date.now() - t;
139
+ if (hydeTiming > 0) {
140
+ (timings as any).hyde_ms = hydeTiming;
141
+ }
142
+
143
+ t = Date.now();
144
+ const db = openVectorDb();
145
+ let results: VectorSearchResult[];
146
+ try {
147
+ results = searchKnn(db, queryEmbedding, topK, project);
148
+ } finally {
149
+ db.close();
150
+ }
151
+ timings.vector_search_ms = Date.now() - t;
152
+
153
+ if (results.length === 0) {
154
+ const empty: RetrievalResult = {
155
+ query,
156
+ synthesis:
157
+ "No results found. Suggestions:\n" +
158
+ "- Try a different query\n" +
159
+ "- Run `/rlm:embed-index` to build/refresh the vector index\n" +
160
+ "- Use `/rlm:search` for keyword-based fallback",
161
+ sources: [],
162
+ stage_timings: { ...timings, total_ms: Date.now() - totalStart },
163
+ };
164
+ process.stdout.write(JSON.stringify(empty, null, 2) + "\n");
165
+ return;
166
+ }
167
+
168
+ // Deduplicate by session_id (keep best chunk per session)
169
+ const sessionMap = new Map<
170
+ string,
171
+ { result: VectorSearchResult; chunks: VectorSearchResult[] }
172
+ >();
173
+ for (const r of results) {
174
+ const key = `${r.session_id}:${r.project}`;
175
+ const existing = sessionMap.get(key);
176
+ if (!existing) {
177
+ sessionMap.set(key, { result: r, chunks: [r] });
178
+ } else {
179
+ existing.chunks.push(r);
180
+ if (r.distance < existing.result.distance) {
181
+ existing.result = r;
182
+ }
183
+ }
184
+ }
185
+ const sessions = Array.from(sessionMap.values());
186
+ logInfo(
187
+ HOOK_NAME,
188
+ `Stage 2: ${results.length} chunks → ${sessions.length} sessions`,
189
+ );
190
+
191
+ // Stage 3: Parallel haiku summarization
192
+ t = Date.now();
193
+ const summaries = await summarizeSessions(query, sessions);
194
+ timings.summarize_ms = Date.now() - t;
195
+
196
+ if (summaries.length === 0) {
197
+ const noSummaries: RetrievalResult = {
198
+ query,
199
+ synthesis: "Found matching chunks but all summarization attempts failed.",
200
+ sources: [],
201
+ stage_timings: { ...timings, total_ms: Date.now() - totalStart },
202
+ };
203
+ process.stdout.write(JSON.stringify(noSummaries, null, 2) + "\n");
204
+ return;
205
+ }
206
+
207
+ // Stage 4: Sonnet ranking
208
+ t = Date.now();
209
+ const ranked = await rankSessions(query, summaries);
210
+ timings.rank_ms = Date.now() - t;
211
+
212
+ // Stage 5: Sonnet synthesis
213
+ t = Date.now();
214
+ const relevant = ranked.filter((r) => r.relevant);
215
+ let synthesis: string;
216
+ if (relevant.length > 0) {
217
+ synthesis = await synthesize(query, relevant, summaries);
218
+ } else {
219
+ synthesis =
220
+ "No sessions were deemed relevant to your query.\n" +
221
+ "Suggestions:\n" +
222
+ "- Try a different or broader query\n" +
223
+ "- Use `/rlm:search` for keyword-based fallback";
224
+ }
225
+ timings.synthesize_ms = Date.now() - t;
226
+ timings.total_ms = Date.now() - totalStart;
227
+
228
+ const output: RetrievalResult = {
229
+ query,
230
+ synthesis,
231
+ sources: ranked,
232
+ stage_timings: timings,
233
+ };
234
+
235
+ process.stdout.write(JSON.stringify(output, null, 2) + "\n");
236
+ }
237
+
238
+ // ---------------------------------------------------------------------------
239
+ // Stage 3: Parallel haiku summarization
240
+ // ---------------------------------------------------------------------------
241
+
242
+ async function summarizeSessions(
243
+ query: string,
244
+ sessions: Array<{
245
+ result: VectorSearchResult;
246
+ chunks: VectorSearchResult[];
247
+ }>,
248
+ ): Promise<ChunkSummary[]> {
249
+ const results: ChunkSummary[] = [];
250
+
251
+ // Process in batches of MAX_PARALLEL_SUMMARIZERS
252
+ for (let i = 0; i < sessions.length; i += MAX_PARALLEL_SUMMARIZERS) {
253
+ const batch = sessions.slice(i, i + MAX_PARALLEL_SUMMARIZERS);
254
+ const promises = batch.map(async (session) => {
255
+ try {
256
+ return await summarizeOneSession(query, session);
257
+ } catch (e) {
258
+ logWarn(
259
+ HOOK_NAME,
260
+ `Summarize failed for ${session.result.session_id}: ${e}`,
261
+ );
262
+ return null;
263
+ }
264
+ });
265
+
266
+ const batchResults = await Promise.all(promises);
267
+ for (const r of batchResults) {
268
+ if (r) results.push(r);
269
+ }
270
+ }
271
+
272
+ return results;
273
+ }
274
+
275
+ async function summarizeOneSession(
276
+ query: string,
277
+ session: { result: VectorSearchResult; chunks: VectorSearchResult[] },
278
+ ): Promise<ChunkSummary | null> {
279
+ const best = session.result;
280
+
281
+ // Load transcript segment
282
+ let content: string;
283
+ try {
284
+ const loaded = await loadTranscript(
285
+ best.source_path,
286
+ [best.line_start, best.line_end],
287
+ 4000,
288
+ );
289
+ content = loaded.content;
290
+ } catch {
291
+ content = `[Could not load transcript. Topic: ${best.topic}]`;
292
+ }
293
+
294
+ if (!content || content.length < 20) return null;
295
+
296
+ const systemPrompt =
297
+ "You are a session transcript summarizer. Extract ONLY information relevant to the query. " +
298
+ "Mention specific file names, function names, decisions made, and outcomes. " +
299
+ "If nothing in the transcript is relevant to the query, respond with exactly: Not relevant. " +
300
+ "Keep your summary under 200 words.";
301
+
302
+ const userPrompt =
303
+ `Query: ${query}\n\n` +
304
+ `Session: ${best.session_id} (${best.project}, ${best.date})\n` +
305
+ `Topic: ${best.topic}\n\n` +
306
+ `Transcript:\n${content}`;
307
+
308
+ const result = await inferenceAsync(systemPrompt, userPrompt, "fast", 30);
309
+
310
+ if (!result.success || !result.output) {
311
+ logWarn(HOOK_NAME, `Summarize inference failed: ${result.error}`);
312
+ return null;
313
+ }
314
+
315
+ if (result.output.trim().toLowerCase() === "not relevant.") {
316
+ return null;
317
+ }
318
+
319
+ return {
320
+ session_id: best.session_id,
321
+ project: best.project,
322
+ date: best.date,
323
+ segment_lines: [best.line_start, best.line_end],
324
+ summary: result.output.trim(),
325
+ source_path: best.source_path,
326
+ };
327
+ }
328
+
329
+ // ---------------------------------------------------------------------------
330
+ // Stage 4: Sonnet ranking
331
+ // ---------------------------------------------------------------------------
332
+
333
+ async function rankSessions(
334
+ query: string,
335
+ summaries: ChunkSummary[],
336
+ ): Promise<RankedSession[]> {
337
+ const summaryText = summaries
338
+ .map(
339
+ (s, i) =>
340
+ `[${i + 1}] Session: ${s.session_id} | Project: ${s.project} | Date: ${s.date}\nSummary: ${s.summary}`,
341
+ )
342
+ .join("\n\n");
343
+
344
+ const systemPrompt =
345
+ "You are a session relevance ranker. Given a query and session summaries, " +
346
+ "evaluate each session's relevance. Output a JSON array where each element has:\n" +
347
+ ' { "index": number, "relevant": boolean, "confidence": number (0-1), "topics": string[], "key_findings": string[] }\n' +
348
+ "Output ONLY the JSON array, no other text.";
349
+
350
+ const userPrompt = `Query: ${query}\n\nSessions:\n${summaryText}`;
351
+
352
+ const result = await inferenceAsync(systemPrompt, userPrompt, "standard", 60);
353
+
354
+ if (!result.success || !result.output) {
355
+ logWarn(HOOK_NAME, `Rank inference failed: ${result.error}, marking all as relevant`);
356
+ return summaries.map((s) => ({
357
+ session_id: s.session_id,
358
+ project: s.project,
359
+ date: s.date,
360
+ relevant: true,
361
+ confidence: 0.3,
362
+ topics: [],
363
+ key_findings: [s.summary.slice(0, 200)],
364
+ }));
365
+ }
366
+
367
+ try {
368
+ // Extract JSON array from response (may be wrapped in markdown code blocks)
369
+ let jsonStr = result.output.trim();
370
+ const codeBlockMatch = jsonStr.match(/```(?:json)?\s*([\s\S]*?)```/);
371
+ if (codeBlockMatch) {
372
+ jsonStr = codeBlockMatch[1].trim();
373
+ }
374
+
375
+ const rawJson = JSON.parse(jsonStr);
376
+ const parseResult = RankingsSchema.safeParse(rawJson);
377
+ if (!parseResult.success) {
378
+ throw new Error(`Invalid ranking response format: ${parseResult.error.message}`);
379
+ }
380
+
381
+ const rankings = parseResult.data;
382
+
383
+ return rankings.map((r) => {
384
+ // Safe array indexing with bounds check
385
+ if (r.index < 1 || r.index > summaries.length) {
386
+ logWarn(HOOK_NAME, `Rank index ${r.index} out of bounds (1-${summaries.length})`);
387
+ return null;
388
+ }
389
+ const summary = summaries[r.index - 1];
390
+ return {
391
+ session_id: summary.session_id,
392
+ project: summary.project,
393
+ date: summary.date,
394
+ relevant: r.relevant,
395
+ confidence: r.confidence,
396
+ topics: r.topics,
397
+ key_findings: r.key_findings,
398
+ };
399
+ }).filter((r): r is RankedSession => r !== null);
400
+ } catch (e) {
401
+ logWarn(HOOK_NAME, `Rank parse failed: ${e}, marking all as relevant`);
402
+ return summaries.map((s) => ({
403
+ session_id: s.session_id,
404
+ project: s.project,
405
+ date: s.date,
406
+ relevant: true,
407
+ confidence: 0.3,
408
+ topics: [],
409
+ key_findings: [s.summary.slice(0, 200)],
410
+ }));
411
+ }
412
+ }
413
+
414
+ // ---------------------------------------------------------------------------
415
+ // Stage 5: Sonnet synthesis
416
+ // ---------------------------------------------------------------------------
417
+
418
+ async function synthesize(
419
+ query: string,
420
+ relevant: RankedSession[],
421
+ summaries: ChunkSummary[],
422
+ ): Promise<string> {
423
+ // Build context from relevant sessions
424
+ const summaryMap = new Map(summaries.map((s) => [s.session_id, s]));
425
+
426
+ const context = relevant
427
+ .map((r) => {
428
+ const summary = summaryMap.get(r.session_id);
429
+ return (
430
+ `Session: ${r.session_id} | Project: ${r.project} | Date: ${r.date}\n` +
431
+ `Topics: ${r.topics.join(", ")}\n` +
432
+ `Key Findings: ${r.key_findings.join("; ")}\n` +
433
+ `Full Summary: ${summary?.summary ?? "(no summary)"}`
434
+ );
435
+ })
436
+ .join("\n\n---\n\n");
437
+
438
+ const systemPrompt =
439
+ "You are a knowledge synthesizer. Given a query and relevant session findings, " +
440
+ "produce a coherent markdown answer. Include session citations inline as " +
441
+ '"(session: {date}, {project})". Highlight the most recent and relevant information. ' +
442
+ "Note any contradictions or evolution across sessions. Be concise but thorough.";
443
+
444
+ const userPrompt = `Query: ${query}\n\nRelevant Sessions:\n${context}`;
445
+
446
+ const result = await inferenceAsync(systemPrompt, userPrompt, "standard", 60);
447
+
448
+ if (!result.success || !result.output) {
449
+ logWarn(HOOK_NAME, `Synthesize inference failed: ${result.error}`);
450
+ // Fallback: concatenate key findings
451
+ return relevant
452
+ .map(
453
+ (r) =>
454
+ `**${r.date} (${r.project}):** ${r.key_findings.join(". ")}`,
455
+ )
456
+ .join("\n\n");
457
+ }
458
+
459
+ return result.output.trim();
460
+ }