bluera-knowledge 0.12.11 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,7 +3,7 @@ import {
3
3
  createLogger,
4
4
  summarizePayload,
5
5
  truncateForLog
6
- } from "./chunk-7DZZHYDU.js";
6
+ } from "./chunk-6ZVW2P2F.js";
7
7
 
8
8
  // src/crawl/intelligent-crawler.ts
9
9
  import { EventEmitter } from "events";
@@ -753,4 +753,4 @@ var IntelligentCrawler = class extends EventEmitter {
753
753
  export {
754
754
  IntelligentCrawler
755
755
  };
756
- //# sourceMappingURL=chunk-S5VW7NPH.js.map
756
+ //# sourceMappingURL=chunk-GCUKVV33.js.map
@@ -7,7 +7,7 @@ import {
7
7
  createStoreId,
8
8
  destroyServices,
9
9
  summarizePayload
10
- } from "./chunk-7DZZHYDU.js";
10
+ } from "./chunk-6ZVW2P2F.js";
11
11
 
12
12
  // src/mcp/server.ts
13
13
  import { Server } from "@modelcontextprotocol/sdk/server/index.js";
@@ -2151,4 +2151,4 @@ export {
2151
2151
  createMCPServer,
2152
2152
  runMCPServer
2153
2153
  };
2154
- //# sourceMappingURL=chunk-XVVMSRLO.js.map
2154
+ //# sourceMappingURL=chunk-H5AKKHY7.js.map
package/dist/index.js CHANGED
@@ -7,10 +7,10 @@ import {
7
7
  isWebStoreDefinition,
8
8
  runMCPServer,
9
9
  spawnBackgroundWorker
10
- } from "./chunk-XVVMSRLO.js";
10
+ } from "./chunk-H5AKKHY7.js";
11
11
  import {
12
12
  IntelligentCrawler
13
- } from "./chunk-S5VW7NPH.js";
13
+ } from "./chunk-GCUKVV33.js";
14
14
  import {
15
15
  ASTParser,
16
16
  AdapterRegistry,
@@ -24,7 +24,7 @@ import {
24
24
  err,
25
25
  extractRepoName,
26
26
  ok
27
- } from "./chunk-7DZZHYDU.js";
27
+ } from "./chunk-6ZVW2P2F.js";
28
28
  import "./chunk-HRQD3MPH.js";
29
29
 
30
30
  // src/index.ts
@@ -1,8 +1,8 @@
1
1
  import {
2
2
  createMCPServer,
3
3
  runMCPServer
4
- } from "../chunk-XVVMSRLO.js";
5
- import "../chunk-7DZZHYDU.js";
4
+ } from "../chunk-H5AKKHY7.js";
5
+ import "../chunk-6ZVW2P2F.js";
6
6
  import "../chunk-HRQD3MPH.js";
7
7
  export {
8
8
  createMCPServer,
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env node
2
2
  import {
3
3
  IntelligentCrawler
4
- } from "../chunk-S5VW7NPH.js";
4
+ } from "../chunk-GCUKVV33.js";
5
5
  import {
6
6
  JobService,
7
7
  createDocumentId,
@@ -10,7 +10,7 @@ import {
10
10
  createStoreId,
11
11
  destroyServices,
12
12
  shutdownLogger
13
- } from "../chunk-7DZZHYDU.js";
13
+ } from "../chunk-6ZVW2P2F.js";
14
14
  import "../chunk-HRQD3MPH.js";
15
15
 
16
16
  // src/workers/background-worker-cli.ts
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "bluera-knowledge",
3
- "version": "0.12.11",
3
+ "version": "0.13.0",
4
4
  "description": "CLI tool for managing knowledge stores with semantic search",
5
5
  "type": "module",
6
6
  "bin": {
@@ -1952,3 +1952,350 @@ describe('IndexService - Symlink Handling', () => {
1952
1952
  // (on most systems, readdir with withFileTypes shows symlinks as isFile() if target is file)
1953
1953
  });
1954
1954
  });
1955
+
1956
+ describe('IndexService - Batch Embedding', () => {
1957
+ let indexService: IndexService;
1958
+ let lanceStore: LanceStore;
1959
+ let embeddingEngine: EmbeddingEngine;
1960
+ let tempDir: string;
1961
+ let testFilesDir: string;
1962
+ const storeId = createStoreId('batch-embed-test');
1963
+
1964
+ beforeAll(async () => {
1965
+ tempDir = await mkdtemp(join(tmpdir(), 'index-batch-embed-test-'));
1966
+ testFilesDir = join(tempDir, 'files');
1967
+ await mkdir(testFilesDir, { recursive: true });
1968
+
1969
+ lanceStore = new LanceStore(tempDir);
1970
+ embeddingEngine = new EmbeddingEngine();
1971
+
1972
+ await embeddingEngine.initialize();
1973
+ await lanceStore.initialize(storeId);
1974
+
1975
+ indexService = new IndexService(lanceStore, embeddingEngine);
1976
+ }, 120000);
1977
+
1978
+ afterAll(async () => {
1979
+ await rm(tempDir, { recursive: true, force: true });
1980
+ });
1981
+
1982
+ beforeEach(async () => {
1983
+ // Clear test directory for fresh state
1984
+ await rm(testFilesDir, { recursive: true, force: true });
1985
+ await mkdir(testFilesDir, { recursive: true });
1986
+ });
1987
+
1988
+ it('calls embedBatch instead of sequential embed for multiple chunks', async () => {
1989
+ // Create a file large enough to produce multiple chunks (>1500 chars)
1990
+ const largeContent = Array(50)
1991
+ .fill('This is a paragraph of text that will be chunked. ')
1992
+ .join('\n\n');
1993
+ await writeFile(join(testFilesDir, 'large.md'), largeContent);
1994
+
1995
+ const embedBatchSpy = vi.spyOn(embeddingEngine, 'embedBatch');
1996
+
1997
+ const store: FileStore = {
1998
+ type: 'file',
1999
+ id: storeId,
2000
+ name: 'Batch Embed Test Store',
2001
+ path: testFilesDir,
2002
+ createdAt: new Date(),
2003
+ updatedAt: new Date(),
2004
+ };
2005
+
2006
+ const result = await indexService.indexStore(store);
2007
+
2008
+ expect(result.success).toBe(true);
2009
+ if (result.success) {
2010
+ // Should have created multiple chunks
2011
+ expect(result.data.chunksCreated).toBeGreaterThan(1);
2012
+ }
2013
+
2014
+ // embedBatch should be called (it internally uses embed via Promise.all)
2015
+ expect(embedBatchSpy).toHaveBeenCalled();
2016
+ // Verify batch was called with multiple items
2017
+ const callArgs = embedBatchSpy.mock.calls[0];
2018
+ expect(callArgs).toBeDefined();
2019
+ expect(callArgs[0].length).toBeGreaterThan(1);
2020
+
2021
+ embedBatchSpy.mockRestore();
2022
+ });
2023
+
2024
+ it('preserves chunk order when using batch embedding', async () => {
2025
+ // Create file with distinct, ordered sections that will produce multiple chunks
2026
+ const sections = Array(10)
2027
+ .fill(null)
2028
+ .map((_, i) => `# Section ${String(i + 1)}\n\n${'Content for section. '.repeat(50)}`)
2029
+ .join('\n\n');
2030
+
2031
+ await writeFile(join(testFilesDir, 'ordered.md'), sections);
2032
+
2033
+ const embedBatchSpy = vi.spyOn(embeddingEngine, 'embedBatch');
2034
+
2035
+ const store: FileStore = {
2036
+ type: 'file',
2037
+ id: storeId,
2038
+ name: 'Order Test Store',
2039
+ path: testFilesDir,
2040
+ createdAt: new Date(),
2041
+ updatedAt: new Date(),
2042
+ };
2043
+
2044
+ const result = await indexService.indexStore(store);
2045
+
2046
+ expect(result.success).toBe(true);
2047
+ if (result.success) {
2048
+ // Verify chunks are in correct order
2049
+ expect(result.data.chunksCreated).toBeGreaterThan(1);
2050
+ }
2051
+
2052
+ // embedBatch should be called with chunks in order
2053
+ expect(embedBatchSpy).toHaveBeenCalled();
2054
+ const callArgs = embedBatchSpy.mock.calls[0];
2055
+ expect(callArgs).toBeDefined();
2056
+
2057
+ // Verify that if content has "Section 1", it comes before "Section 2" in the array
2058
+ const batchedTexts = callArgs[0];
2059
+ const section1Index = batchedTexts.findIndex((t: string) => t.includes('Section 1'));
2060
+ const section2Index = batchedTexts.findIndex((t: string) => t.includes('Section 2'));
2061
+
2062
+ // Section 1 should appear before Section 2 in the batch (or they may be in different chunks)
2063
+ if (section1Index !== -1 && section2Index !== -1) {
2064
+ expect(section1Index).toBeLessThan(section2Index);
2065
+ }
2066
+
2067
+ embedBatchSpy.mockRestore();
2068
+ });
2069
+
2070
+ it('handles single-chunk files correctly', async () => {
2071
+ // Create a small file that won't be chunked
2072
+ await writeFile(join(testFilesDir, 'small.ts'), 'export const x = 42;');
2073
+
2074
+ const embedBatchSpy = vi.spyOn(embeddingEngine, 'embedBatch');
2075
+
2076
+ const store: FileStore = {
2077
+ type: 'file',
2078
+ id: storeId,
2079
+ name: 'Single Chunk Test Store',
2080
+ path: testFilesDir,
2081
+ createdAt: new Date(),
2082
+ updatedAt: new Date(),
2083
+ };
2084
+
2085
+ const result = await indexService.indexStore(store);
2086
+
2087
+ expect(result.success).toBe(true);
2088
+ if (result.success) {
2089
+ // Should have exactly one chunk
2090
+ expect(result.data.chunksCreated).toBe(1);
2091
+ }
2092
+
2093
+ // embedBatch should still be called (with a single item)
2094
+ expect(embedBatchSpy).toHaveBeenCalled();
2095
+
2096
+ embedBatchSpy.mockRestore();
2097
+ });
2098
+
2099
+ it('handles multiple files with batch embedding', async () => {
2100
+ // Create multiple files to verify batch embedding works across files
2101
+ await writeFile(join(testFilesDir, 'file1.ts'), 'export const a = 1;');
2102
+ await writeFile(join(testFilesDir, 'file2.ts'), 'export const b = 2;');
2103
+ await writeFile(join(testFilesDir, 'file3.ts'), 'export const c = 3;');
2104
+
2105
+ const embedBatchSpy = vi.spyOn(embeddingEngine, 'embedBatch');
2106
+
2107
+ const multiStoreId = createStoreId('multi-file-test');
2108
+ await lanceStore.initialize(multiStoreId);
2109
+
2110
+ const multiIndexService = new IndexService(lanceStore, embeddingEngine);
2111
+
2112
+ const store: FileStore = {
2113
+ type: 'file',
2114
+ id: multiStoreId,
2115
+ name: 'Multi File Test Store',
2116
+ path: testFilesDir,
2117
+ createdAt: new Date(),
2118
+ updatedAt: new Date(),
2119
+ };
2120
+
2121
+ const result = await multiIndexService.indexStore(store);
2122
+
2123
+ expect(result.success).toBe(true);
2124
+ if (result.success) {
2125
+ // Should index all 3 files
2126
+ expect(result.data.documentsIndexed).toBe(3);
2127
+ }
2128
+
2129
+ // embedBatch should be called once per file (3 times)
2130
+ expect(embedBatchSpy).toHaveBeenCalledTimes(3);
2131
+
2132
+ embedBatchSpy.mockRestore();
2133
+ });
2134
+ });
2135
+
2136
+ describe('IndexService - Parallel File Processing', () => {
2137
+ let lanceStore: LanceStore;
2138
+ let embeddingEngine: EmbeddingEngine;
2139
+ let tempDir: string;
2140
+ let testFilesDir: string;
2141
+ const storeId = createStoreId('parallel-test');
2142
+
2143
+ beforeAll(async () => {
2144
+ tempDir = await mkdtemp(join(tmpdir(), 'index-parallel-test-'));
2145
+ testFilesDir = join(tempDir, 'files');
2146
+ await mkdir(testFilesDir, { recursive: true });
2147
+
2148
+ lanceStore = new LanceStore(tempDir);
2149
+ embeddingEngine = new EmbeddingEngine();
2150
+
2151
+ await embeddingEngine.initialize();
2152
+ await lanceStore.initialize(storeId);
2153
+ }, 120000);
2154
+
2155
+ afterAll(async () => {
2156
+ await rm(tempDir, { recursive: true, force: true });
2157
+ });
2158
+
2159
+ beforeEach(async () => {
2160
+ // Clear test directory for fresh state
2161
+ await rm(testFilesDir, { recursive: true, force: true });
2162
+ await mkdir(testFilesDir, { recursive: true });
2163
+ });
2164
+
2165
+ it('uses concurrency option from IndexService constructor', async () => {
2166
+ // Create 10 test files
2167
+ for (let i = 0; i < 10; i++) {
2168
+ await writeFile(
2169
+ join(testFilesDir, `file${String(i)}.ts`),
2170
+ `export const x${String(i)} = ${String(i)};`
2171
+ );
2172
+ }
2173
+
2174
+ // Track when files start being processed
2175
+ const processingTimestamps: number[] = [];
2176
+ const originalEmbedBatch = embeddingEngine.embedBatch.bind(embeddingEngine);
2177
+
2178
+ vi.spyOn(embeddingEngine, 'embedBatch').mockImplementation(async (texts: string[]) => {
2179
+ processingTimestamps.push(Date.now());
2180
+ // Small delay to simulate processing time
2181
+ await new Promise((resolve) => setTimeout(resolve, 50));
2182
+ return originalEmbedBatch(texts);
2183
+ });
2184
+
2185
+ const concurrency = 4;
2186
+ const indexService = new IndexService(lanceStore, embeddingEngine, { concurrency });
2187
+
2188
+ const parallelStoreId = createStoreId('parallel-concurrency-test');
2189
+ await lanceStore.initialize(parallelStoreId);
2190
+
2191
+ const store: FileStore = {
2192
+ type: 'file',
2193
+ id: parallelStoreId,
2194
+ name: 'Parallel Test Store',
2195
+ path: testFilesDir,
2196
+ createdAt: new Date(),
2197
+ updatedAt: new Date(),
2198
+ };
2199
+
2200
+ const result = await indexService.indexStore(store);
2201
+
2202
+ expect(result.success).toBe(true);
2203
+ if (result.success) {
2204
+ expect(result.data.documentsIndexed).toBe(10);
2205
+ }
2206
+
2207
+ vi.restoreAllMocks();
2208
+ });
2209
+
2210
+ it('reports progress correctly with parallel processing', async () => {
2211
+ // Create test files
2212
+ for (let i = 0; i < 5; i++) {
2213
+ await writeFile(
2214
+ join(testFilesDir, `progress${String(i)}.ts`),
2215
+ `export const p${String(i)} = ${String(i)};`
2216
+ );
2217
+ }
2218
+
2219
+ const concurrency = 2;
2220
+ const indexService = new IndexService(lanceStore, embeddingEngine, { concurrency });
2221
+
2222
+ const progressStoreId = createStoreId('progress-test');
2223
+ await lanceStore.initialize(progressStoreId);
2224
+
2225
+ const store: FileStore = {
2226
+ type: 'file',
2227
+ id: progressStoreId,
2228
+ name: 'Progress Test Store',
2229
+ path: testFilesDir,
2230
+ createdAt: new Date(),
2231
+ updatedAt: new Date(),
2232
+ };
2233
+
2234
+ const progressEvents: Array<{ type: string; current: number; total: number }> = [];
2235
+ const onProgress = (event: { type: string; current: number; total: number }): void => {
2236
+ progressEvents.push(event);
2237
+ };
2238
+
2239
+ const result = await indexService.indexStore(store, onProgress);
2240
+
2241
+ expect(result.success).toBe(true);
2242
+
2243
+ // Should have start event
2244
+ expect(progressEvents.some((e) => e.type === 'start')).toBe(true);
2245
+
2246
+ // Should have progress events
2247
+ const progressOnly = progressEvents.filter((e) => e.type === 'progress');
2248
+ expect(progressOnly.length).toBeGreaterThan(0);
2249
+
2250
+ // Should have complete event
2251
+ expect(progressEvents.some((e) => e.type === 'complete')).toBe(true);
2252
+
2253
+ // Current should never exceed total
2254
+ for (const event of progressEvents) {
2255
+ expect(event.current).toBeLessThanOrEqual(event.total);
2256
+ }
2257
+ });
2258
+
2259
+ it('continues processing remaining files if one file fails to read', async () => {
2260
+ // Create valid test files
2261
+ await writeFile(join(testFilesDir, 'valid1.ts'), 'export const a = 1;');
2262
+ await writeFile(join(testFilesDir, 'valid2.ts'), 'export const b = 2;');
2263
+ await writeFile(join(testFilesDir, 'valid3.ts'), 'export const c = 3;');
2264
+
2265
+ // Create a file that will fail to read (remove read permission)
2266
+ const unreadablePath = join(testFilesDir, 'unreadable.ts');
2267
+ await writeFile(unreadablePath, 'export const x = 999;');
2268
+ await chmod(unreadablePath, 0o000);
2269
+
2270
+ const concurrency = 2;
2271
+ const indexService = new IndexService(lanceStore, embeddingEngine, { concurrency });
2272
+
2273
+ const errorStoreId = createStoreId('error-handling-test');
2274
+ await lanceStore.initialize(errorStoreId);
2275
+
2276
+ const store: FileStore = {
2277
+ type: 'file',
2278
+ id: errorStoreId,
2279
+ name: 'Error Handling Test Store',
2280
+ path: testFilesDir,
2281
+ createdAt: new Date(),
2282
+ updatedAt: new Date(),
2283
+ };
2284
+
2285
+ // The indexing should either succeed with partial results or fail gracefully
2286
+ const result = await indexService.indexStore(store);
2287
+
2288
+ // Restore permissions for cleanup
2289
+ await chmod(unreadablePath, 0o644);
2290
+
2291
+ // With current implementation, it may fail completely on the first error
2292
+ // This test documents the current behavior
2293
+ if (result.success) {
2294
+ // If it succeeds, it should have indexed at least the valid files
2295
+ expect(result.data.documentsIndexed).toBeGreaterThanOrEqual(0);
2296
+ } else {
2297
+ // If it fails, it should have an error
2298
+ expect(result.error).toBeDefined();
2299
+ }
2300
+ });
2301
+ });
@@ -25,6 +25,7 @@ interface IndexOptions {
25
25
  chunkSize?: number;
26
26
  chunkOverlap?: number;
27
27
  codeGraphService?: CodeGraphService;
28
+ concurrency?: number;
28
29
  }
29
30
 
30
31
  const TEXT_EXTENSIONS = new Set([
@@ -62,6 +63,7 @@ export class IndexService {
62
63
  private readonly embeddingEngine: EmbeddingEngine;
63
64
  private readonly chunker: ChunkingService;
64
65
  private readonly codeGraphService: CodeGraphService | undefined;
66
+ private readonly concurrency: number;
65
67
 
66
68
  constructor(
67
69
  lanceStore: LanceStore,
@@ -75,6 +77,7 @@ export class IndexService {
75
77
  chunkOverlap: options.chunkOverlap ?? 100,
76
78
  });
77
79
  this.codeGraphService = options.codeGraphService;
80
+ this.concurrency = options.concurrency ?? 4;
78
81
  }
79
82
 
80
83
  async indexStore(store: Store, onProgress?: ProgressCallback): Promise<Result<IndexResult>> {
@@ -123,6 +126,7 @@ export class IndexService {
123
126
  storeId: store.id,
124
127
  path: store.path,
125
128
  fileCount: files.length,
129
+ concurrency: this.concurrency,
126
130
  },
127
131
  'Files scanned for indexing'
128
132
  );
@@ -138,59 +142,30 @@ export class IndexService {
138
142
  message: 'Starting index',
139
143
  });
140
144
 
141
- for (const filePath of files) {
142
- const content = await readFile(filePath, 'utf-8');
143
- const fileHash = createHash('md5').update(content).digest('hex');
144
- // Pass file path for semantic Markdown chunking
145
- const chunks = this.chunker.chunk(content, filePath);
145
+ // Process files in parallel batches
146
+ for (let i = 0; i < files.length; i += this.concurrency) {
147
+ const batch = files.slice(i, i + this.concurrency);
146
148
 
147
- // Determine file type for ranking
148
- const ext = extname(filePath).toLowerCase();
149
- const fileName = basename(filePath).toLowerCase();
150
- const fileType = this.classifyFileType(ext, fileName, filePath);
149
+ const batchResults = await Promise.all(
150
+ batch.map((filePath) => this.processFile(filePath, store))
151
+ );
151
152
 
152
- // Collect source files for code graph
153
- if (['.ts', '.tsx', '.js', '.jsx'].includes(ext)) {
154
- sourceFiles.push({ path: filePath, content });
153
+ // Collect results from batch
154
+ for (const result of batchResults) {
155
+ documents.push(...result.documents);
156
+ if (result.sourceFile !== undefined) {
157
+ sourceFiles.push(result.sourceFile);
158
+ }
155
159
  }
156
160
 
157
- for (const chunk of chunks) {
158
- const vector = await this.embeddingEngine.embed(chunk.content);
159
- const chunkId =
160
- chunks.length > 1
161
- ? `${store.id}-${fileHash}-${String(chunk.chunkIndex)}`
162
- : `${store.id}-${fileHash}`;
163
-
164
- const doc: Document = {
165
- id: createDocumentId(chunkId),
166
- content: chunk.content,
167
- vector,
168
- metadata: {
169
- type: chunks.length > 1 ? 'chunk' : 'file',
170
- storeId: store.id,
171
- path: filePath,
172
- indexedAt: new Date(),
173
- fileHash,
174
- chunkIndex: chunk.chunkIndex,
175
- totalChunks: chunk.totalChunks,
176
- // New metadata for ranking
177
- fileType,
178
- sectionHeader: chunk.sectionHeader,
179
- functionName: chunk.functionName,
180
- hasDocComments: /\/\*\*[\s\S]*?\*\//.test(chunk.content),
181
- docSummary: chunk.docSummary,
182
- },
183
- };
184
- documents.push(doc);
185
- }
186
- filesProcessed++;
161
+ filesProcessed += batch.length;
187
162
 
188
- // Emit progress event
163
+ // Emit progress event after each batch
189
164
  onProgress?.({
190
165
  type: 'progress',
191
166
  current: filesProcessed,
192
167
  total: files.length,
193
- message: `Indexing ${filePath}`,
168
+ message: `Indexed ${String(filesProcessed)}/${String(files.length)} files`,
194
169
  });
195
170
  }
196
171
 
@@ -235,6 +210,80 @@ export class IndexService {
235
210
  });
236
211
  }
237
212
 
213
+ /**
214
+ * Process a single file: read, chunk, embed, and return documents.
215
+ * Extracted for parallel processing.
216
+ */
217
+ private async processFile(
218
+ filePath: string,
219
+ store: FileStore | RepoStore
220
+ ): Promise<{
221
+ documents: Document[];
222
+ sourceFile: { path: string; content: string } | undefined;
223
+ }> {
224
+ const content = await readFile(filePath, 'utf-8');
225
+ const fileHash = createHash('md5').update(content).digest('hex');
226
+ const chunks = this.chunker.chunk(content, filePath);
227
+
228
+ const ext = extname(filePath).toLowerCase();
229
+ const fileName = basename(filePath).toLowerCase();
230
+ const fileType = this.classifyFileType(ext, fileName, filePath);
231
+
232
+ // Track source file for code graph
233
+ const sourceFile = ['.ts', '.tsx', '.js', '.jsx'].includes(ext)
234
+ ? { path: filePath, content }
235
+ : undefined;
236
+
237
+ // Skip files with no chunks (empty files)
238
+ if (chunks.length === 0) {
239
+ return { documents: [], sourceFile };
240
+ }
241
+
242
+ // Batch embed all chunks from this file
243
+ const chunkContents = chunks.map((c) => c.content);
244
+ const vectors = await this.embeddingEngine.embedBatch(chunkContents);
245
+
246
+ const documents: Document[] = [];
247
+ for (let i = 0; i < chunks.length; i++) {
248
+ const chunk = chunks[i];
249
+ const vector = vectors[i];
250
+
251
+ // Fail fast if chunk/vector mismatch (should never happen)
252
+ if (chunk === undefined || vector === undefined) {
253
+ throw new Error(
254
+ `Chunk/vector mismatch at index ${String(i)}: chunk=${String(chunk !== undefined)}, vector=${String(vector !== undefined)}`
255
+ );
256
+ }
257
+
258
+ const chunkId =
259
+ chunks.length > 1
260
+ ? `${store.id}-${fileHash}-${String(chunk.chunkIndex)}`
261
+ : `${store.id}-${fileHash}`;
262
+
263
+ documents.push({
264
+ id: createDocumentId(chunkId),
265
+ content: chunk.content,
266
+ vector,
267
+ metadata: {
268
+ type: chunks.length > 1 ? 'chunk' : 'file',
269
+ storeId: store.id,
270
+ path: filePath,
271
+ indexedAt: new Date(),
272
+ fileHash,
273
+ chunkIndex: chunk.chunkIndex,
274
+ totalChunks: chunk.totalChunks,
275
+ fileType,
276
+ sectionHeader: chunk.sectionHeader,
277
+ functionName: chunk.functionName,
278
+ hasDocComments: /\/\*\*[\s\S]*?\*\//.test(chunk.content),
279
+ docSummary: chunk.docSummary,
280
+ },
281
+ });
282
+ }
283
+
284
+ return { documents, sourceFile };
285
+ }
286
+
238
287
  private async scanDirectory(dir: string): Promise<string[]> {
239
288
  const files: string[] = [];
240
289
  const entries = await readdir(dir, { withFileTypes: true });