@luckydraw/cumulus 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (142) hide show
  1. package/README.md +148 -0
  2. package/dist/cli/cumulus.d.ts +3 -0
  3. package/dist/cli/cumulus.d.ts.map +1 -0
  4. package/dist/cli/cumulus.js +233 -0
  5. package/dist/cli/cumulus.js.map +1 -0
  6. package/dist/index.d.ts +33 -0
  7. package/dist/index.d.ts.map +1 -0
  8. package/dist/index.js +43 -0
  9. package/dist/index.js.map +1 -0
  10. package/dist/lib/config.d.ts +86 -0
  11. package/dist/lib/config.d.ts.map +1 -0
  12. package/dist/lib/config.js +241 -0
  13. package/dist/lib/config.js.map +1 -0
  14. package/dist/lib/content-detector.d.ts +46 -0
  15. package/dist/lib/content-detector.d.ts.map +1 -0
  16. package/dist/lib/content-detector.js +359 -0
  17. package/dist/lib/content-detector.js.map +1 -0
  18. package/dist/lib/content-store.d.ts +255 -0
  19. package/dist/lib/content-store.d.ts.map +1 -0
  20. package/dist/lib/content-store.js +955 -0
  21. package/dist/lib/content-store.js.map +1 -0
  22. package/dist/lib/context-budget.d.ts +83 -0
  23. package/dist/lib/context-budget.d.ts.map +1 -0
  24. package/dist/lib/context-budget.js +101 -0
  25. package/dist/lib/context-budget.js.map +1 -0
  26. package/dist/lib/embeddings.d.ts +64 -0
  27. package/dist/lib/embeddings.d.ts.map +1 -0
  28. package/dist/lib/embeddings.js +176 -0
  29. package/dist/lib/embeddings.js.map +1 -0
  30. package/dist/lib/history.d.ts +120 -0
  31. package/dist/lib/history.d.ts.map +1 -0
  32. package/dist/lib/history.js +205 -0
  33. package/dist/lib/history.js.map +1 -0
  34. package/dist/lib/image-utils.d.ts +41 -0
  35. package/dist/lib/image-utils.d.ts.map +1 -0
  36. package/dist/lib/image-utils.js +288 -0
  37. package/dist/lib/image-utils.js.map +1 -0
  38. package/dist/lib/migrate.d.ts +35 -0
  39. package/dist/lib/migrate.d.ts.map +1 -0
  40. package/dist/lib/migrate.js +196 -0
  41. package/dist/lib/migrate.js.map +1 -0
  42. package/dist/lib/retriever.d.ts +56 -0
  43. package/dist/lib/retriever.d.ts.map +1 -0
  44. package/dist/lib/retriever.js +644 -0
  45. package/dist/lib/retriever.js.map +1 -0
  46. package/dist/lib/revert.d.ts +23 -0
  47. package/dist/lib/revert.d.ts.map +1 -0
  48. package/dist/lib/revert.js +75 -0
  49. package/dist/lib/revert.js.map +1 -0
  50. package/dist/lib/session.d.ts +65 -0
  51. package/dist/lib/session.d.ts.map +1 -0
  52. package/dist/lib/session.js +289 -0
  53. package/dist/lib/session.js.map +1 -0
  54. package/dist/lib/snapshots.d.ts +39 -0
  55. package/dist/lib/snapshots.d.ts.map +1 -0
  56. package/dist/lib/snapshots.js +99 -0
  57. package/dist/lib/snapshots.js.map +1 -0
  58. package/dist/lib/stream-processor.d.ts +149 -0
  59. package/dist/lib/stream-processor.d.ts.map +1 -0
  60. package/dist/lib/stream-processor.js +389 -0
  61. package/dist/lib/stream-processor.js.map +1 -0
  62. package/dist/lib/summarizer.d.ts +67 -0
  63. package/dist/lib/summarizer.d.ts.map +1 -0
  64. package/dist/lib/summarizer.js +213 -0
  65. package/dist/lib/summarizer.js.map +1 -0
  66. package/dist/mcp/index.d.ts +3 -0
  67. package/dist/mcp/index.d.ts.map +1 -0
  68. package/dist/mcp/index.js +16 -0
  69. package/dist/mcp/index.js.map +1 -0
  70. package/dist/mcp/proxy.d.ts +19 -0
  71. package/dist/mcp/proxy.d.ts.map +1 -0
  72. package/dist/mcp/proxy.js +120 -0
  73. package/dist/mcp/proxy.js.map +1 -0
  74. package/dist/mcp/server.d.ts +6 -0
  75. package/dist/mcp/server.d.ts.map +1 -0
  76. package/dist/mcp/server.js +29 -0
  77. package/dist/mcp/server.js.map +1 -0
  78. package/dist/mcp/shared-server.d.ts +21 -0
  79. package/dist/mcp/shared-server.d.ts.map +1 -0
  80. package/dist/mcp/shared-server.js +210 -0
  81. package/dist/mcp/shared-server.js.map +1 -0
  82. package/dist/mcp/tool-handler.d.ts +20 -0
  83. package/dist/mcp/tool-handler.d.ts.map +1 -0
  84. package/dist/mcp/tool-handler.js +1405 -0
  85. package/dist/mcp/tool-handler.js.map +1 -0
  86. package/dist/tui/components/App.d.ts +11 -0
  87. package/dist/tui/components/App.d.ts.map +1 -0
  88. package/dist/tui/components/App.js +607 -0
  89. package/dist/tui/components/App.js.map +1 -0
  90. package/dist/tui/components/DebugContextView.d.ts +13 -0
  91. package/dist/tui/components/DebugContextView.d.ts.map +1 -0
  92. package/dist/tui/components/DebugContextView.js +78 -0
  93. package/dist/tui/components/DebugContextView.js.map +1 -0
  94. package/dist/tui/components/IncludeMenu.d.ts +12 -0
  95. package/dist/tui/components/IncludeMenu.d.ts.map +1 -0
  96. package/dist/tui/components/IncludeMenu.js +127 -0
  97. package/dist/tui/components/IncludeMenu.js.map +1 -0
  98. package/dist/tui/components/InputArea.d.ts +27 -0
  99. package/dist/tui/components/InputArea.d.ts.map +1 -0
  100. package/dist/tui/components/InputArea.js +366 -0
  101. package/dist/tui/components/InputArea.js.map +1 -0
  102. package/dist/tui/components/MarkdownText.d.ts +38 -0
  103. package/dist/tui/components/MarkdownText.d.ts.map +1 -0
  104. package/dist/tui/components/MarkdownText.js +234 -0
  105. package/dist/tui/components/MarkdownText.js.map +1 -0
  106. package/dist/tui/components/MessageBubble.d.ts +11 -0
  107. package/dist/tui/components/MessageBubble.d.ts.map +1 -0
  108. package/dist/tui/components/MessageBubble.js +16 -0
  109. package/dist/tui/components/MessageBubble.js.map +1 -0
  110. package/dist/tui/components/MessageHistory.d.ts +11 -0
  111. package/dist/tui/components/MessageHistory.d.ts.map +1 -0
  112. package/dist/tui/components/MessageHistory.js +12 -0
  113. package/dist/tui/components/MessageHistory.js.map +1 -0
  114. package/dist/tui/components/RevertMenu.d.ts +17 -0
  115. package/dist/tui/components/RevertMenu.d.ts.map +1 -0
  116. package/dist/tui/components/RevertMenu.js +144 -0
  117. package/dist/tui/components/RevertMenu.js.map +1 -0
  118. package/dist/tui/components/StatusBar.d.ts +14 -0
  119. package/dist/tui/components/StatusBar.d.ts.map +1 -0
  120. package/dist/tui/components/StatusBar.js +13 -0
  121. package/dist/tui/components/StatusBar.js.map +1 -0
  122. package/dist/tui/components/StreamingResponse.d.ts +15 -0
  123. package/dist/tui/components/StreamingResponse.d.ts.map +1 -0
  124. package/dist/tui/components/StreamingResponse.js +52 -0
  125. package/dist/tui/components/StreamingResponse.js.map +1 -0
  126. package/dist/tui/hooks/useAppState.d.ts +147 -0
  127. package/dist/tui/hooks/useAppState.d.ts.map +1 -0
  128. package/dist/tui/hooks/useAppState.js +110 -0
  129. package/dist/tui/hooks/useAppState.js.map +1 -0
  130. package/dist/tui/hooks/useClaudeProcess.d.ts +19 -0
  131. package/dist/tui/hooks/useClaudeProcess.d.ts.map +1 -0
  132. package/dist/tui/hooks/useClaudeProcess.js +185 -0
  133. package/dist/tui/hooks/useClaudeProcess.js.map +1 -0
  134. package/dist/tui/index.d.ts +10 -0
  135. package/dist/tui/index.d.ts.map +1 -0
  136. package/dist/tui/index.js +11 -0
  137. package/dist/tui/index.js.map +1 -0
  138. package/dist/tui/utils/streamParser.d.ts +31 -0
  139. package/dist/tui/utils/streamParser.d.ts.map +1 -0
  140. package/dist/tui/utils/streamParser.js +63 -0
  141. package/dist/tui/utils/streamParser.js.map +1 -0
  142. package/package.json +94 -0
@@ -0,0 +1,955 @@
1
+ import * as crypto from 'crypto';
2
+ import * as fs from 'fs/promises';
3
+ import * as path from 'path';
4
+ import { nanoid } from 'nanoid';
5
+ import { generateHeuristicSummary } from './content-detector.js';
6
+ import { estimateTokens } from './context-budget.js';
7
+ import { cosineSimilarity, embeddingsAvailable, getEmbeddingProvider } from './embeddings.js';
8
+ /** Default chunk size in tokens */
9
+ const DEFAULT_CHUNK_SIZE = 800;
10
+ /** Minimum structural unit size before merging with sibling */
11
+ const MIN_UNIT_TOKENS = 100;
12
+ /** Maximum snippet length for search results */
13
+ const SNIPPET_LENGTH = 300;
14
+ /** Minimum similarity threshold for semantic search */
15
+ const SEMANTIC_THRESHOLD = 0.3;
16
+ /**
17
+ * ContentStore manages externalized content storage and retrieval.
18
+ *
19
+ * Content is stored in a thread-specific directory:
20
+ * - index.jsonl: Metadata for all stored content
21
+ * - chunks/: Directory containing actual content chunks
22
+ */
23
+ export class ContentStore {
24
+ basePath;
25
+ indexPath;
26
+ chunksDir;
27
+ constructor(basePath) {
28
+ this.basePath = basePath;
29
+ this.indexPath = path.join(basePath, 'index.jsonl');
30
+ this.chunksDir = path.join(basePath, 'chunks');
31
+ }
32
+ /**
33
+ * Ensure storage directories exist.
34
+ */
35
+ async ensureDirectories() {
36
+ await fs.mkdir(this.chunksDir, { recursive: true });
37
+ }
38
+ /**
39
+ * Get path to the images directory for cached image files.
40
+ */
41
+ getImagesDir() {
42
+ return path.join(this.basePath, 'images');
43
+ }
44
+ /**
45
+ * Ensure the images directory exists.
46
+ */
47
+ async ensureImagesDir() {
48
+ const imagesDir = this.getImagesDir();
49
+ await fs.mkdir(imagesDir, { recursive: true });
50
+ return imagesDir;
51
+ }
52
+ /**
53
+ * Compute SHA-256 hash of content for deduplication.
54
+ */
55
+ computeHash(content) {
56
+ return crypto.createHash('sha256').update(content).digest('hex');
57
+ }
58
+ /**
59
+ * Split content into chunks of approximately chunkSize tokens.
60
+ */
61
+ createChunks(content, chunkSize = DEFAULT_CHUNK_SIZE) {
62
+ const lines = content.split('\n');
63
+ const chunks = [];
64
+ let currentChunk = [];
65
+ let currentTokens = 0;
66
+ for (const line of lines) {
67
+ const lineTokens = estimateTokens(line);
68
+ // If adding this line would exceed chunk size, finalize current chunk
69
+ if (currentTokens + lineTokens > chunkSize && currentChunk.length > 0) {
70
+ const chunkContent = currentChunk.join('\n');
71
+ chunks.push({
72
+ index: chunks.length,
73
+ content: chunkContent,
74
+ tokenEstimate: currentTokens,
75
+ });
76
+ currentChunk = [];
77
+ currentTokens = 0;
78
+ }
79
+ currentChunk.push(line);
80
+ currentTokens += lineTokens;
81
+ }
82
+ // Don't forget the last chunk
83
+ if (currentChunk.length > 0) {
84
+ const chunkContent = currentChunk.join('\n');
85
+ chunks.push({
86
+ index: chunks.length,
87
+ content: chunkContent,
88
+ tokenEstimate: currentTokens,
89
+ });
90
+ }
91
+ return chunks;
92
+ }
93
+ /**
94
+ * Detect structural boundaries in non-code content and split into structural units.
95
+ * Each unit is a coherent block: a headed section, table, code fence, paragraph, etc.
96
+ */
97
+ detectStructuralUnits(lines) {
98
+ const units = [];
99
+ let current = [];
100
+ let inCodeFence = false;
101
+ let inTable = false;
102
+ const flushCurrent = () => {
103
+ if (current.length > 0) {
104
+ units.push(current);
105
+ current = [];
106
+ }
107
+ };
108
+ for (let i = 0; i < lines.length; i++) {
109
+ const line = lines[i];
110
+ const trimmed = line.trimStart();
111
+ // Code fence toggle
112
+ if (trimmed.startsWith('```') || trimmed.startsWith('~~~')) {
113
+ if (!inCodeFence) {
114
+ // Starting a code fence — flush anything before it
115
+ flushCurrent();
116
+ inCodeFence = true;
117
+ current.push(line);
118
+ }
119
+ else {
120
+ // Closing a code fence — include closing line, flush as atomic unit
121
+ inCodeFence = false;
122
+ current.push(line);
123
+ flushCurrent();
124
+ }
125
+ continue;
126
+ }
127
+ // Inside a code fence — accumulate without any splitting
128
+ if (inCodeFence) {
129
+ current.push(line);
130
+ continue;
131
+ }
132
+ // Table detection: lines starting with |
133
+ const isTableLine = trimmed.startsWith('|') && trimmed.includes('|', 1);
134
+ if (isTableLine) {
135
+ if (!inTable) {
136
+ // Starting a table — flush anything before it
137
+ flushCurrent();
138
+ inTable = true;
139
+ }
140
+ current.push(line);
141
+ continue;
142
+ }
143
+ else if (inTable) {
144
+ // Leaving a table — flush the table as atomic unit
145
+ inTable = false;
146
+ flushCurrent();
147
+ }
148
+ // Markdown heading — starts a new section
149
+ if (/^#{1,6}\s/.test(trimmed)) {
150
+ flushCurrent();
151
+ current.push(line);
152
+ continue;
153
+ }
154
+ // Horizontal rule
155
+ if (/^(?:---+|___+|\*\*\*+)\s*$/.test(trimmed)) {
156
+ flushCurrent();
157
+ current.push(line);
158
+ flushCurrent();
159
+ continue;
160
+ }
161
+ // Blank line — paragraph boundary
162
+ if (trimmed === '') {
163
+ if (current.length > 0) {
164
+ // End of a paragraph/section
165
+ flushCurrent();
166
+ }
167
+ // Skip blank lines (don't include in any unit)
168
+ continue;
169
+ }
170
+ // Regular line — accumulate into current unit
171
+ current.push(line);
172
+ }
173
+ // Flush remaining (including unclosed code fences)
174
+ flushCurrent();
175
+ return units;
176
+ }
177
+ /**
178
+ * Split non-code content into structure-aware chunks.
179
+ * Respects headers, tables, code fences, and paragraph boundaries.
180
+ * Merges small units and splits oversized ones.
181
+ */
182
+ createStructuralChunks(content, chunkSize = DEFAULT_CHUNK_SIZE) {
183
+ const lines = content.split('\n');
184
+ const units = this.detectStructuralUnits(lines);
185
+ // Merge tiny units with their next sibling
186
+ const mergedUnits = [];
187
+ let pendingUnit = null;
188
+ let pendingTokens = 0;
189
+ for (const unit of units) {
190
+ const unitContent = unit.join('\n');
191
+ const unitTokens = estimateTokens(unitContent);
192
+ if (pendingUnit) {
193
+ // Merge pending tiny unit with this one
194
+ pendingUnit.push('', ...unit); // blank line separator
195
+ pendingTokens += unitTokens;
196
+ if (pendingTokens >= MIN_UNIT_TOKENS) {
197
+ mergedUnits.push(pendingUnit);
198
+ pendingUnit = null;
199
+ pendingTokens = 0;
200
+ }
201
+ }
202
+ else if (unitTokens < MIN_UNIT_TOKENS) {
203
+ pendingUnit = [...unit];
204
+ pendingTokens = unitTokens;
205
+ }
206
+ else {
207
+ mergedUnits.push(unit);
208
+ }
209
+ }
210
+ // Flush any remaining pending unit
211
+ if (pendingUnit) {
212
+ if (mergedUnits.length > 0) {
213
+ // Append to previous unit
214
+ const last = mergedUnits[mergedUnits.length - 1];
215
+ last.push('', ...pendingUnit);
216
+ }
217
+ else {
218
+ mergedUnits.push(pendingUnit);
219
+ }
220
+ }
221
+ // Pack units into chunks
222
+ const chunks = [];
223
+ let chunkLines = [];
224
+ let chunkTokens = 0;
225
+ for (const unit of mergedUnits) {
226
+ const unitContent = unit.join('\n');
227
+ const unitTokens = estimateTokens(unitContent);
228
+ // If a single unit exceeds chunk size, split it with the fallback line-based splitter
229
+ if (unitTokens > chunkSize) {
230
+ // Flush accumulated chunk first
231
+ if (chunkLines.length > 0) {
232
+ const cc = chunkLines.join('\n');
233
+ chunks.push({ index: chunks.length, content: cc, tokenEstimate: chunkTokens });
234
+ chunkLines = [];
235
+ chunkTokens = 0;
236
+ }
237
+ // Split the oversized unit at line boundaries
238
+ const subChunks = this.createChunks(unitContent, chunkSize);
239
+ for (const sub of subChunks) {
240
+ chunks.push({
241
+ index: chunks.length,
242
+ content: sub.content,
243
+ tokenEstimate: sub.tokenEstimate,
244
+ });
245
+ }
246
+ continue;
247
+ }
248
+ // If adding this unit exceeds chunk size, finalize current chunk
249
+ if (chunkTokens + unitTokens > chunkSize && chunkLines.length > 0) {
250
+ const cc = chunkLines.join('\n');
251
+ chunks.push({ index: chunks.length, content: cc, tokenEstimate: chunkTokens });
252
+ chunkLines = [];
253
+ chunkTokens = 0;
254
+ }
255
+ // Add blank line separator between units within a chunk
256
+ if (chunkLines.length > 0) {
257
+ chunkLines.push('');
258
+ }
259
+ chunkLines.push(...unit);
260
+ chunkTokens += unitTokens;
261
+ }
262
+ // Final chunk
263
+ if (chunkLines.length > 0) {
264
+ const cc = chunkLines.join('\n');
265
+ chunks.push({ index: chunks.length, content: cc, tokenEstimate: chunkTokens });
266
+ }
267
+ return chunks;
268
+ }
269
+ /**
270
+ * Regex patterns that identify top-level code block boundaries.
271
+ */
272
+ static CODE_BLOCK_PATTERNS = [
273
+ // JavaScript/TypeScript
274
+ /^(?:export\s+)?(?:async\s+)?function\s+\w+/,
275
+ /^(?:export\s+)?(?:const|let|var)\s+\w+\s*=\s*(?:async\s*)?\(/,
276
+ /^(?:export\s+)?class\s+\w+/,
277
+ /^(?:export\s+)?(?:interface|type|enum)\s+\w+/,
278
+ // Python
279
+ /^(?:async\s+)?def\s+\w+/,
280
+ // Rust
281
+ /^(?:pub\s+)?(?:async\s+)?fn\s+\w+/,
282
+ /^(?:pub\s+)?(?:struct|enum|trait)\s+/,
283
+ /^impl\s+/,
284
+ // Go
285
+ /^func\s+/,
286
+ /^type\s+\w+\s+(?:struct|interface)/,
287
+ ];
288
+ /**
289
+ * Pattern matching import/use/include lines.
290
+ */
291
+ static IMPORT_PATTERN = /^(?:import\s|from\s|require\(|use\s|#include\s|include\s)/;
292
+ /**
293
+ * Check if a line is a code block boundary (top-level declaration).
294
+ */
295
+ isBlockBoundary(line) {
296
+ return ContentStore.CODE_BLOCK_PATTERNS.some(p => p.test(line));
297
+ }
298
+ /**
299
+ * Split code content into chunks at function/class/type boundaries.
300
+ * Falls back to line-based splitting for oversized single blocks.
301
+ */
302
+ createCodeChunks(content, chunkSize = DEFAULT_CHUNK_SIZE) {
303
+ const lines = content.split('\n');
304
+ const blocks = [];
305
+ let currentBlock = [];
306
+ let inImportBlock = false;
307
+ for (const line of lines) {
308
+ const isImport = ContentStore.IMPORT_PATTERN.test(line);
309
+ const isBoundary = this.isBlockBoundary(line);
310
+ if (isImport) {
311
+ // Group consecutive imports together
312
+ if (!inImportBlock && currentBlock.length > 0) {
313
+ blocks.push(currentBlock);
314
+ currentBlock = [];
315
+ }
316
+ inImportBlock = true;
317
+ currentBlock.push(line);
318
+ }
319
+ else if (isBoundary) {
320
+ // New top-level declaration starts a new block
321
+ if (currentBlock.length > 0) {
322
+ blocks.push(currentBlock);
323
+ }
324
+ inImportBlock = false;
325
+ currentBlock = [line];
326
+ }
327
+ else {
328
+ // Continue current block (or start a new implicit block after imports)
329
+ if (inImportBlock && line.trim() !== '') {
330
+ blocks.push(currentBlock);
331
+ currentBlock = [line];
332
+ inImportBlock = false;
333
+ }
334
+ else {
335
+ currentBlock.push(line);
336
+ }
337
+ }
338
+ }
339
+ if (currentBlock.length > 0) {
340
+ blocks.push(currentBlock);
341
+ }
342
+ // Now accumulate blocks into chunks, respecting size limits
343
+ const chunks = [];
344
+ let chunkLines = [];
345
+ let chunkTokens = 0;
346
+ for (const block of blocks) {
347
+ const blockContent = block.join('\n');
348
+ const blockTokens = estimateTokens(blockContent);
349
+ // If a single block exceeds chunk size, fall back to line-based splitting
350
+ if (blockTokens > chunkSize) {
351
+ // First, flush any accumulated lines
352
+ if (chunkLines.length > 0) {
353
+ const content = chunkLines.join('\n');
354
+ chunks.push({ index: chunks.length, content, tokenEstimate: chunkTokens });
355
+ chunkLines = [];
356
+ chunkTokens = 0;
357
+ }
358
+ // Split the oversized block by lines
359
+ const subChunks = this.createChunks(blockContent, chunkSize);
360
+ for (const sub of subChunks) {
361
+ chunks.push({
362
+ index: chunks.length,
363
+ content: sub.content,
364
+ tokenEstimate: sub.tokenEstimate,
365
+ });
366
+ }
367
+ continue;
368
+ }
369
+ // If adding this block would exceed chunk size, finalize current chunk
370
+ if (chunkTokens + blockTokens > chunkSize && chunkLines.length > 0) {
371
+ const content = chunkLines.join('\n');
372
+ chunks.push({ index: chunks.length, content, tokenEstimate: chunkTokens });
373
+ chunkLines = [];
374
+ chunkTokens = 0;
375
+ }
376
+ chunkLines.push(...block);
377
+ chunkTokens += blockTokens;
378
+ }
379
+ // Final chunk
380
+ if (chunkLines.length > 0) {
381
+ const content = chunkLines.join('\n');
382
+ chunks.push({ index: chunks.length, content, tokenEstimate: chunkTokens });
383
+ }
384
+ return chunks;
385
+ }
386
+ /**
387
+ * Generate a brief summary of content.
388
+ * Delegates to the shared heuristic summary generator in content-detector.
389
+ */
390
+ generateSimpleSummary(content, contentType) {
391
+ return generateHeuristicSummary(content, contentType, 500);
392
+ }
393
+ /**
394
+ * Store content externally and return metadata.
395
+ * Returns existing content ID if content hash matches (deduplication).
396
+ */
397
+ async store(content, options) {
398
+ await this.ensureDirectories();
399
+ const contentHash = this.computeHash(content);
400
+ // Check for existing content with same hash
401
+ const existing = await this.findByHash(contentHash);
402
+ if (existing) {
403
+ return existing;
404
+ }
405
+ const id = `cnt_${nanoid(10)}`;
406
+ const contentType = options.contentType ?? this.detectContentType(content);
407
+ const chunks = contentType === 'code'
408
+ ? this.createCodeChunks(content)
409
+ : this.createStructuralChunks(content);
410
+ const summary = options.summary ?? this.generateSimpleSummary(content, contentType);
411
+ // Context propagation: prepend document-level context to chunks 1..N
412
+ if (chunks.length > 1) {
413
+ const sourceName = options.metadata?.filePath ??
414
+ options.metadata?.command ??
415
+ options.sourceTool ??
416
+ options.sourceType;
417
+ const contextHeader = `[Source: ${sourceName} | ${contentType} | ${summary.slice(0, 150)}]`;
418
+ const headerTokens = estimateTokens(contextHeader);
419
+ for (let i = 1; i < chunks.length; i++) {
420
+ chunks[i].content = contextHeader + '\n\n' + chunks[i].content;
421
+ chunks[i].tokenEstimate += headerTokens;
422
+ }
423
+ }
424
+ const meta = {
425
+ id,
426
+ timestamp: Date.now(),
427
+ sourceType: options.sourceType,
428
+ sourceTool: options.sourceTool,
429
+ originalSize: content.length,
430
+ tokenEstimate: estimateTokens(content),
431
+ contentType,
432
+ summary,
433
+ chunkCount: chunks.length,
434
+ contentHash,
435
+ metadata: options.metadata ?? {},
436
+ };
437
+ // Write chunks to files
438
+ for (const chunk of chunks) {
439
+ const chunkPath = path.join(this.chunksDir, `${id}_${chunk.index}.txt`);
440
+ await fs.writeFile(chunkPath, chunk.content, 'utf-8');
441
+ }
442
+ // Append metadata to index
443
+ const indexLine = JSON.stringify(meta) + '\n';
444
+ await fs.appendFile(this.indexPath, indexLine, 'utf-8');
445
+ return meta;
446
+ }
447
+ /**
448
+ * Simple content type detection based on heuristics.
449
+ */
450
+ detectContentType(content) {
451
+ const lines = content.split('\n');
452
+ const sample = content.slice(0, 2000);
453
+ // Check for JSON
454
+ if (sample.trim().startsWith('{') || sample.trim().startsWith('[')) {
455
+ try {
456
+ JSON.parse(content);
457
+ return 'json';
458
+ }
459
+ catch {
460
+ // Not valid JSON
461
+ }
462
+ }
463
+ // Check for code patterns
464
+ const codePatterns = [
465
+ /^import\s+/m,
466
+ /^export\s+/m,
467
+ /^(?:const|let|var)\s+\w+\s*=/m,
468
+ /^(?:function|def|fn|func)\s+\w+/m,
469
+ /^(?:class|interface|struct)\s+\w+/m,
470
+ /^(?:public|private|protected)\s+/m,
471
+ /^\s*(?:if|for|while|switch)\s*\(/m,
472
+ ];
473
+ const codeScore = codePatterns.filter(p => p.test(sample)).length;
474
+ if (codeScore >= 2) {
475
+ return 'code';
476
+ }
477
+ // Check for log patterns
478
+ const logPatterns = [
479
+ /^\d{4}-\d{2}-\d{2}/m, // Date stamps
480
+ /^\[\w+\]/m, // [INFO], [ERROR], etc.
481
+ /^(?:DEBUG|INFO|WARN|ERROR|FATAL):/m,
482
+ /^\d+:\d+:\d+/m, // Time stamps
483
+ ];
484
+ const logScore = logPatterns.filter(p => p.test(sample)).length;
485
+ if (logScore >= 2) {
486
+ return 'logs';
487
+ }
488
+ // Check for prose (sentences, paragraphs)
489
+ const avgLineLength = content.length / Math.max(lines.length, 1);
490
+ const hasLongLines = avgLineLength > 60;
491
+ const hasPunctuation = /[.!?]\s+[A-Z]/.test(sample);
492
+ if (hasLongLines && hasPunctuation) {
493
+ return 'prose';
494
+ }
495
+ return 'mixed';
496
+ }
497
+ /**
498
+ * Find content by hash (for deduplication).
499
+ */
500
+ async findByHash(hash) {
501
+ const index = await this.loadIndex();
502
+ return index.find(meta => meta.contentHash === hash) ?? null;
503
+ }
504
+ /**
505
+ * Load the content index.
506
+ */
507
+ async loadIndex() {
508
+ try {
509
+ const content = await fs.readFile(this.indexPath, 'utf-8');
510
+ if (!content.trim()) {
511
+ return [];
512
+ }
513
+ return content
514
+ .trim()
515
+ .split('\n')
516
+ .filter(line => line.trim())
517
+ .map(line => JSON.parse(line));
518
+ }
519
+ catch (error) {
520
+ if (error.code === 'ENOENT') {
521
+ return [];
522
+ }
523
+ throw error;
524
+ }
525
+ }
526
+ /**
527
+ * Retrieve content by ID.
528
+ * @param id - Content ID
529
+ * @param chunkIndex - Optional specific chunk index
530
+ * @returns Full content or specific chunk
531
+ */
532
+ async retrieve(id, chunkIndex) {
533
+ const index = await this.loadIndex();
534
+ const meta = index.find(m => m.id === id);
535
+ if (!meta) {
536
+ return null;
537
+ }
538
+ if (chunkIndex !== undefined) {
539
+ if (chunkIndex < 0 || chunkIndex >= meta.chunkCount) {
540
+ return null;
541
+ }
542
+ const chunkPath = path.join(this.chunksDir, `${id}_${chunkIndex}.txt`);
543
+ try {
544
+ return await fs.readFile(chunkPath, 'utf-8');
545
+ }
546
+ catch {
547
+ return null;
548
+ }
549
+ }
550
+ // Retrieve all chunks
551
+ const chunks = [];
552
+ for (let i = 0; i < meta.chunkCount; i++) {
553
+ const chunkPath = path.join(this.chunksDir, `${id}_${i}.txt`);
554
+ try {
555
+ const chunk = await fs.readFile(chunkPath, 'utf-8');
556
+ chunks.push(chunk);
557
+ }
558
+ catch {
559
+ // Skip missing chunks
560
+ }
561
+ }
562
+ return chunks.join('\n');
563
+ }
564
+ /**
565
+ * Get metadata for stored content.
566
+ */
567
+ async getMeta(id) {
568
+ const index = await this.loadIndex();
569
+ return index.find(m => m.id === id) ?? null;
570
+ }
571
+ /**
572
+ * Get previews of all chunks for a stored content item.
573
+ * Returns a brief preview of each chunk for navigation (table of contents).
574
+ */
575
+ async getChunkPreviews(id) {
576
+ const meta = await this.getMeta(id);
577
+ if (!meta)
578
+ return null;
579
+ const previews = [];
580
+ for (let i = 0; i < meta.chunkCount; i++) {
581
+ const chunkPath = path.join(this.chunksDir, `${id}_${i}.txt`);
582
+ try {
583
+ const content = await fs.readFile(chunkPath, 'utf-8');
584
+ const lines = content.split('\n');
585
+ // Skip context propagation header (starts with [Source:)
586
+ let previewLine = '';
587
+ for (const line of lines) {
588
+ const trimmed = line.trim();
589
+ if (trimmed === '' || trimmed.startsWith('[Source:'))
590
+ continue;
591
+ previewLine = trimmed;
592
+ break;
593
+ }
594
+ // Truncate to 120 chars
595
+ const preview = previewLine.length > 120 ? previewLine.slice(0, 120) + '...' : previewLine;
596
+ const tokens = estimateTokens(content);
597
+ previews.push({ index: i, tokens, preview });
598
+ }
599
+ catch {
600
+ previews.push({ index: i, tokens: 0, preview: '(chunk not found)' });
601
+ }
602
+ }
603
+ return previews;
604
+ }
605
+ /**
606
+ * List all stored content.
607
+ */
608
+ async list(options = {}) {
609
+ let index = await this.loadIndex();
610
+ if (options.sourceTypes && options.sourceTypes.length > 0) {
611
+ index = index.filter(m => options.sourceTypes.includes(m.sourceType));
612
+ }
613
+ // Sort by timestamp descending (most recent first)
614
+ index.sort((a, b) => b.timestamp - a.timestamp);
615
+ if (options.limit) {
616
+ index = index.slice(0, options.limit);
617
+ }
618
+ return index;
619
+ }
620
+ /**
621
+ * Delete stored content by ID.
622
+ * Removes from index, chunks, and embeddings.
623
+ * @returns true if content was found and deleted, false if not found
624
+ */
625
+ async delete(id) {
626
+ const index = await this.loadIndex();
627
+ const metaIndex = index.findIndex(m => m.id === id);
628
+ if (metaIndex === -1) {
629
+ return false;
630
+ }
631
+ const meta = index[metaIndex];
632
+ // Remove chunk files
633
+ for (let i = 0; i < meta.chunkCount; i++) {
634
+ const chunkPath = path.join(this.chunksDir, `${id}_${i}.txt`);
635
+ try {
636
+ await fs.unlink(chunkPath);
637
+ }
638
+ catch {
639
+ // Ignore if chunk file doesn't exist
640
+ }
641
+ }
642
+ // Remove from index
643
+ index.splice(metaIndex, 1);
644
+ await this.saveIndex(index);
645
+ // Remove from embeddings
646
+ await this.deleteEmbeddings(id);
647
+ return true;
648
+ }
649
+ /**
650
+ * Delete embeddings for a content ID.
651
+ */
652
+ async deleteEmbeddings(contentId) {
653
+ const embeddingsPath = this.getEmbeddingsPath();
654
+ try {
655
+ const content = await fs.readFile(embeddingsPath, 'utf-8');
656
+ const data = JSON.parse(content);
657
+ // Filter out entries for this content ID
658
+ data.entries = data.entries.filter(e => e.contentId !== contentId);
659
+ await fs.writeFile(embeddingsPath, JSON.stringify(data), 'utf-8');
660
+ }
661
+ catch {
662
+ // No embeddings file or parse error - nothing to delete
663
+ }
664
+ }
665
+ /**
666
+ * Save the index file.
667
+ */
668
+ async saveIndex(index) {
669
+ await this.ensureDirectories();
670
+ const content = index.map(m => JSON.stringify(m)).join('\n');
671
+ await fs.writeFile(this.indexPath, content + (content ? '\n' : ''), 'utf-8');
672
+ }
673
+ /**
674
+ * Get path to embeddings file.
675
+ */
676
+ getEmbeddingsPath() {
677
+ return path.join(this.basePath, 'embeddings.json');
678
+ }
679
+ /**
680
+ * Get embeddings for all chunks of a specific content item.
681
+ * Generates missing embeddings first if needed.
682
+ * Returns a Map keyed by chunk index.
683
+ */
684
+ async getEmbeddingsForContent(contentId) {
685
+ const meta = await this.getMeta(contentId);
686
+ if (!meta)
687
+ return null;
688
+ await this.generateMissingEmbeddings();
689
+ const allEmbeddings = await this.loadContentEmbeddings();
690
+ const result = new Map();
691
+ for (let i = 0; i < meta.chunkCount; i++) {
692
+ const embedding = allEmbeddings.get(`${contentId}:${i}`);
693
+ if (embedding) {
694
+ result.set(i, embedding);
695
+ }
696
+ }
697
+ return result;
698
+ }
699
+ /**
700
+ * Load content embeddings.
701
+ */
702
+ async loadContentEmbeddings() {
703
+ const embeddingsPath = this.getEmbeddingsPath();
704
+ try {
705
+ const content = await fs.readFile(embeddingsPath, 'utf-8');
706
+ const data = JSON.parse(content);
707
+ const map = new Map();
708
+ for (const entry of data.entries) {
709
+ // Key is contentId:chunkIndex
710
+ map.set(`${entry.contentId}:${entry.chunkIndex}`, entry.embedding);
711
+ }
712
+ return map;
713
+ }
714
+ catch (error) {
715
+ if (error.code === 'ENOENT') {
716
+ return new Map();
717
+ }
718
+ throw error;
719
+ }
720
+ }
721
+ /**
722
+ * Save content embeddings.
723
+ */
724
+ async saveContentEmbeddings(entries) {
725
+ const embeddingsPath = this.getEmbeddingsPath();
726
+ const provider = getEmbeddingProvider();
727
+ let existingData;
728
+ try {
729
+ const content = await fs.readFile(embeddingsPath, 'utf-8');
730
+ existingData = JSON.parse(content);
731
+ }
732
+ catch {
733
+ existingData = {
734
+ version: 1,
735
+ model: provider.name,
736
+ dimensions: provider.dimensions,
737
+ entries: [],
738
+ };
739
+ }
740
+ existingData.entries.push(...entries);
741
+ await fs.mkdir(this.basePath, { recursive: true });
742
+ await fs.writeFile(embeddingsPath, JSON.stringify(existingData), 'utf-8');
743
+ }
744
+ /**
745
+ * Generate embeddings for content that doesn't have them yet.
746
+ */
747
+ async generateMissingEmbeddings() {
748
+ if (!(await embeddingsAvailable())) {
749
+ return 0;
750
+ }
751
+ const index = await this.loadIndex();
752
+ const existingEmbeddings = await this.loadContentEmbeddings();
753
+ const provider = getEmbeddingProvider();
754
+ const newEntries = [];
755
+ for (const meta of index) {
756
+ for (let i = 0; i < meta.chunkCount; i++) {
757
+ const key = `${meta.id}:${i}`;
758
+ if (existingEmbeddings.has(key)) {
759
+ continue;
760
+ }
761
+ const chunkContent = await this.retrieve(meta.id, i);
762
+ if (!chunkContent)
763
+ continue;
764
+ const [embedding] = await provider.embed([chunkContent]);
765
+ if (embedding) {
766
+ newEntries.push({
767
+ contentId: meta.id,
768
+ chunkIndex: i,
769
+ embedding,
770
+ timestamp: new Date().toISOString(),
771
+ });
772
+ }
773
+ }
774
+ }
775
+ if (newEntries.length > 0) {
776
+ await this.saveContentEmbeddings(newEntries);
777
+ }
778
+ return newEntries.length;
779
+ }
780
+ /**
781
+ * Search stored content with keyword, semantic, or hybrid search.
782
+ */
783
+ async search(query, options = {}) {
784
+ const index = await this.loadIndex();
785
+ const queryLower = query.toLowerCase();
786
+ // Use pre-expanded terms if provided, otherwise split query into terms
787
+ const queryTerms = options.expandedTerms ?? queryLower.split(/\s+/).filter(t => t.length > 2);
788
+ const limit = options.limit ?? 10;
789
+ const mode = options.mode ?? 'hybrid';
790
+ // Keyword scores
791
+ const keywordResults = new Map();
792
+ for (const meta of index) {
793
+ // Apply filters
794
+ if (options.sourceTypes && !options.sourceTypes.includes(meta.sourceType)) {
795
+ continue;
796
+ }
797
+ if (options.contentTypes && !options.contentTypes.includes(meta.contentType)) {
798
+ continue;
799
+ }
800
+ const summaryLower = meta.summary.toLowerCase();
801
+ let summaryScore = 0;
802
+ for (const term of queryTerms) {
803
+ if (summaryLower.includes(term)) {
804
+ summaryScore++;
805
+ }
806
+ }
807
+ // Check summary for keyword match (any term matches)
808
+ if (summaryScore > 0) {
809
+ keywordResults.set(meta.id, {
810
+ meta,
811
+ score: Math.min(0.5 * (summaryScore / queryTerms.length), 0.5),
812
+ snippet: meta.summary.slice(0, SNIPPET_LENGTH),
813
+ });
814
+ continue;
815
+ }
816
+ // Check content chunks for keyword match
817
+ const content = await this.retrieve(meta.id);
818
+ if (content) {
819
+ const contentLower = content.toLowerCase();
820
+ let contentScore = 0;
821
+ let bestMatchIndex = -1;
822
+ for (const term of queryTerms) {
823
+ if (contentLower.includes(term)) {
824
+ contentScore++;
825
+ // Track first significant term match for snippet
826
+ if (bestMatchIndex < 0 && term.length > 3) {
827
+ bestMatchIndex = contentLower.indexOf(term);
828
+ }
829
+ }
830
+ }
831
+ if (contentScore > 0) {
832
+ // Generate snippet around the best match
833
+ const matchIndex = bestMatchIndex >= 0 ? bestMatchIndex : 0;
834
+ const start = Math.max(0, matchIndex - 50);
835
+ const end = Math.min(content.length, matchIndex + 250);
836
+ const snippet = (start > 0 ? '...' : '') +
837
+ content.slice(start, end) +
838
+ (end < content.length ? '...' : '');
839
+ keywordResults.set(meta.id, {
840
+ meta,
841
+ score: Math.min(0.8 * (contentScore / queryTerms.length), 0.8),
842
+ snippet,
843
+ });
844
+ }
845
+ }
846
+ }
847
+ // If keyword-only mode, return keyword results
848
+ if (mode === 'keyword') {
849
+ const results = Array.from(keywordResults.values());
850
+ results.sort((a, b) => b.score - a.score);
851
+ return results.slice(0, limit);
852
+ }
853
+ // Semantic search
854
+ const semanticResults = new Map();
855
+ if (await embeddingsAvailable()) {
856
+ try {
857
+ // Generate embeddings for any content that doesn't have them
858
+ await this.generateMissingEmbeddings();
859
+ const embeddings = await this.loadContentEmbeddings();
860
+ const provider = getEmbeddingProvider();
861
+ const [queryEmbedding] = await provider.embed([query]);
862
+ if (queryEmbedding) {
863
+ for (const meta of index) {
864
+ // Apply filters
865
+ if (options.sourceTypes && !options.sourceTypes.includes(meta.sourceType)) {
866
+ continue;
867
+ }
868
+ if (options.contentTypes && !options.contentTypes.includes(meta.contentType)) {
869
+ continue;
870
+ }
871
+ // Find best matching chunk
872
+ let bestScore = 0;
873
+ let bestChunkIndex = 0;
874
+ for (let i = 0; i < meta.chunkCount; i++) {
875
+ const embedding = embeddings.get(`${meta.id}:${i}`);
876
+ if (!embedding)
877
+ continue;
878
+ const similarity = cosineSimilarity(queryEmbedding, embedding);
879
+ if (similarity > bestScore) {
880
+ bestScore = similarity;
881
+ bestChunkIndex = i;
882
+ }
883
+ }
884
+ if (bestScore > SEMANTIC_THRESHOLD) {
885
+ const chunkContent = await this.retrieve(meta.id, bestChunkIndex);
886
+ const snippet = chunkContent?.slice(0, SNIPPET_LENGTH) + '...' || meta.summary;
887
+ semanticResults.set(meta.id, {
888
+ meta,
889
+ score: bestScore,
890
+ snippet,
891
+ });
892
+ }
893
+ }
894
+ }
895
+ }
896
+ catch {
897
+ // Fall back to keyword-only if semantic search fails
898
+ }
899
+ }
900
+ // If semantic-only mode, return semantic results
901
+ if (mode === 'semantic') {
902
+ const results = Array.from(semanticResults.values());
903
+ results.sort((a, b) => b.score - a.score);
904
+ return results.slice(0, limit);
905
+ }
906
+ // Hybrid mode: combine scores
907
+ const combinedResults = new Map();
908
+ for (const [id, result] of keywordResults) {
909
+ combinedResults.set(id, result);
910
+ }
911
+ for (const [id, result] of semanticResults) {
912
+ const existing = combinedResults.get(id);
913
+ if (existing) {
914
+ // Combine scores: weight keyword 0.3, semantic 0.7, boost if both match
915
+ const combinedScore = (existing.score * 0.3 + result.score * 0.7) * 1.2;
916
+ combinedResults.set(id, {
917
+ meta: result.meta,
918
+ score: Math.min(combinedScore, 1),
919
+ snippet: existing.snippet, // Keep keyword snippet as it's more targeted
920
+ });
921
+ }
922
+ else {
923
+ combinedResults.set(id, {
924
+ meta: result.meta,
925
+ score: result.score * 0.7, // Semantic-only gets weighted down
926
+ snippet: result.snippet,
927
+ });
928
+ }
929
+ }
930
+ const results = Array.from(combinedResults.values());
931
+ results.sort((a, b) => b.score - a.score);
932
+ return results.slice(0, limit);
933
+ }
934
+ /**
935
+ * Get statistics about stored content.
936
+ */
937
+ async getStats() {
938
+ const index = await this.loadIndex();
939
+ const stats = {
940
+ totalItems: index.length,
941
+ totalSize: 0,
942
+ totalTokens: 0,
943
+ bySourceType: {},
944
+ byContentType: {},
945
+ };
946
+ for (const meta of index) {
947
+ stats.totalSize += meta.originalSize;
948
+ stats.totalTokens += meta.tokenEstimate;
949
+ stats.bySourceType[meta.sourceType] = (stats.bySourceType[meta.sourceType] ?? 0) + 1;
950
+ stats.byContentType[meta.contentType] = (stats.byContentType[meta.contentType] ?? 0) + 1;
951
+ }
952
+ return stats;
953
+ }
954
+ }
955
+ //# sourceMappingURL=content-store.js.map