@renseiai/agentfactory-code-intelligence 0.8.8 → 0.8.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/dist/src/embedding/__tests__/embedding.test.d.ts +2 -0
  2. package/dist/src/embedding/__tests__/embedding.test.d.ts.map +1 -0
  3. package/dist/src/embedding/__tests__/embedding.test.js +339 -0
  4. package/dist/src/embedding/chunker.d.ts +40 -0
  5. package/dist/src/embedding/chunker.d.ts.map +1 -0
  6. package/dist/src/embedding/chunker.js +135 -0
  7. package/dist/src/embedding/embedding-provider.d.ts +15 -0
  8. package/dist/src/embedding/embedding-provider.d.ts.map +1 -0
  9. package/dist/src/embedding/embedding-provider.js +1 -0
  10. package/dist/src/embedding/voyage-provider.d.ts +39 -0
  11. package/dist/src/embedding/voyage-provider.d.ts.map +1 -0
  12. package/dist/src/embedding/voyage-provider.js +146 -0
  13. package/dist/src/index.d.ts +14 -2
  14. package/dist/src/index.d.ts.map +1 -1
  15. package/dist/src/index.js +10 -1
  16. package/dist/src/indexing/__tests__/vector-indexing.test.d.ts +2 -0
  17. package/dist/src/indexing/__tests__/vector-indexing.test.d.ts.map +1 -0
  18. package/dist/src/indexing/__tests__/vector-indexing.test.js +291 -0
  19. package/dist/src/indexing/incremental-indexer.d.ts +4 -0
  20. package/dist/src/indexing/incremental-indexer.d.ts.map +1 -1
  21. package/dist/src/indexing/incremental-indexer.js +45 -0
  22. package/dist/src/indexing/vector-indexer.d.ts +63 -0
  23. package/dist/src/indexing/vector-indexer.d.ts.map +1 -0
  24. package/dist/src/indexing/vector-indexer.js +197 -0
  25. package/dist/src/plugin/code-intelligence-plugin.d.ts.map +1 -1
  26. package/dist/src/plugin/code-intelligence-plugin.js +4 -2
  27. package/dist/src/reranking/__tests__/reranker.test.d.ts +2 -0
  28. package/dist/src/reranking/__tests__/reranker.test.d.ts.map +1 -0
  29. package/dist/src/reranking/__tests__/reranker.test.js +503 -0
  30. package/dist/src/reranking/cohere-reranker.d.ts +26 -0
  31. package/dist/src/reranking/cohere-reranker.d.ts.map +1 -0
  32. package/dist/src/reranking/cohere-reranker.js +110 -0
  33. package/dist/src/reranking/reranker-provider.d.ts +40 -0
  34. package/dist/src/reranking/reranker-provider.d.ts.map +1 -0
  35. package/dist/src/reranking/reranker-provider.js +6 -0
  36. package/dist/src/reranking/voyage-reranker.d.ts +27 -0
  37. package/dist/src/reranking/voyage-reranker.d.ts.map +1 -0
  38. package/dist/src/reranking/voyage-reranker.js +111 -0
  39. package/dist/src/search/__tests__/hybrid-search.test.d.ts +2 -0
  40. package/dist/src/search/__tests__/hybrid-search.test.d.ts.map +1 -0
  41. package/dist/src/search/__tests__/hybrid-search.test.js +437 -0
  42. package/dist/src/search/__tests__/query-classifier.test.d.ts +2 -0
  43. package/dist/src/search/__tests__/query-classifier.test.d.ts.map +1 -0
  44. package/dist/src/search/__tests__/query-classifier.test.js +136 -0
  45. package/dist/src/search/hybrid-search.d.ts +56 -0
  46. package/dist/src/search/hybrid-search.d.ts.map +1 -0
  47. package/dist/src/search/hybrid-search.js +299 -0
  48. package/dist/src/search/query-classifier.d.ts +20 -0
  49. package/dist/src/search/query-classifier.d.ts.map +1 -0
  50. package/dist/src/search/query-classifier.js +58 -0
  51. package/dist/src/search/score-normalizer.d.ts +16 -0
  52. package/dist/src/search/score-normalizer.d.ts.map +1 -0
  53. package/dist/src/search/score-normalizer.js +26 -0
  54. package/dist/src/types.d.ts +83 -0
  55. package/dist/src/types.d.ts.map +1 -1
  56. package/dist/src/types.js +36 -2
  57. package/dist/src/vector/__tests__/vector-store.test.d.ts +2 -0
  58. package/dist/src/vector/__tests__/vector-store.test.d.ts.map +1 -0
  59. package/dist/src/vector/__tests__/vector-store.test.js +278 -0
  60. package/dist/src/vector/hnsw-store.d.ts +48 -0
  61. package/dist/src/vector/hnsw-store.d.ts.map +1 -0
  62. package/dist/src/vector/hnsw-store.js +437 -0
  63. package/dist/src/vector/vector-store.d.ts +15 -0
  64. package/dist/src/vector/vector-store.d.ts.map +1 -0
  65. package/dist/src/vector/vector-store.js +1 -0
  66. package/package.json +1 -1
@@ -0,0 +1,2 @@
1
+ export {};
2
+ //# sourceMappingURL=embedding.test.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"embedding.test.d.ts","sourceRoot":"","sources":["../../../../src/embedding/__tests__/embedding.test.ts"],"names":[],"mappings":""}
@@ -0,0 +1,339 @@
1
+ import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
2
+ import { VoyageCodeProvider } from '../voyage-provider.js';
3
+ import { Chunker } from '../chunker.js';
4
+ // ── Helpers ──────────────────────────────────────────────────────────
5
+ function makeSymbol(name, kind, filePath, extra) {
6
+ return {
7
+ name,
8
+ kind: kind,
9
+ filePath,
10
+ line: 0,
11
+ exported: true,
12
+ ...extra,
13
+ };
14
+ }
15
+ function makeFileAST(filePath, language, symbols) {
16
+ return { filePath, language, symbols, imports: [], exports: [] };
17
+ }
18
+ /** Create a mock fetch response. */
19
+ function mockFetchResponse(status, body, headers) {
20
+ return {
21
+ ok: status >= 200 && status < 300,
22
+ status,
23
+ headers: new Headers(headers ?? {}),
24
+ json: async () => body,
25
+ text: async () => JSON.stringify(body),
26
+ };
27
+ }
28
+ /** Build a Voyage API success response for N inputs of D dimensions. */
29
+ function voyageSuccessResponse(count, dimensions = 256) {
30
+ return {
31
+ data: Array.from({ length: count }, (_, i) => ({
32
+ embedding: Array.from({ length: dimensions }, (_, j) => (i + 1) * 0.01 + j * 0.001),
33
+ index: i,
34
+ })),
35
+ model: 'voyage-code-3',
36
+ usage: { total_tokens: count * 10 },
37
+ };
38
+ }
39
+ // ── VoyageCodeProvider ───────────────────────────────────────────────
40
+ describe('VoyageCodeProvider', () => {
41
+ let originalEnv;
42
+ beforeEach(() => {
43
+ originalEnv = process.env.VOYAGE_API_KEY;
44
+ process.env.VOYAGE_API_KEY = 'test-api-key';
45
+ });
46
+ afterEach(() => {
47
+ if (originalEnv !== undefined) {
48
+ process.env.VOYAGE_API_KEY = originalEnv;
49
+ }
50
+ else {
51
+ delete process.env.VOYAGE_API_KEY;
52
+ }
53
+ vi.restoreAllMocks();
54
+ });
55
+ it('throws on missing API key', () => {
56
+ delete process.env.VOYAGE_API_KEY;
57
+ expect(() => new VoyageCodeProvider()).toThrow('VOYAGE_API_KEY');
58
+ });
59
+ it('uses default config values', () => {
60
+ const provider = new VoyageCodeProvider();
61
+ expect(provider.model).toBe('voyage-code-3');
62
+ expect(provider.dimensions).toBe(256);
63
+ });
64
+ it('accepts custom config values', () => {
65
+ const provider = new VoyageCodeProvider({
66
+ model: 'voyage-code-3',
67
+ dimensions: 1024,
68
+ batchSize: 64,
69
+ maxRetries: 5,
70
+ });
71
+ expect(provider.dimensions).toBe(1024);
72
+ });
73
+ it('embeds a single query', async () => {
74
+ const fetchSpy = vi.spyOn(globalThis, 'fetch').mockResolvedValueOnce(mockFetchResponse(200, voyageSuccessResponse(1)));
75
+ const provider = new VoyageCodeProvider();
76
+ const result = await provider.embedQuery('function hello()');
77
+ expect(result).toHaveLength(256);
78
+ expect(fetchSpy).toHaveBeenCalledTimes(1);
79
+ // Verify the request body includes input_type: 'query'
80
+ const callArgs = fetchSpy.mock.calls[0];
81
+ const body = JSON.parse(callArgs[1].body);
82
+ expect(body.input_type).toBe('query');
83
+ expect(body.output_dimension).toBe(256);
84
+ });
85
+ it('embeds a batch of texts', async () => {
86
+ const fetchSpy = vi.spyOn(globalThis, 'fetch').mockResolvedValueOnce(mockFetchResponse(200, voyageSuccessResponse(3)));
87
+ const provider = new VoyageCodeProvider();
88
+ const results = await provider.embed(['text 1', 'text 2', 'text 3']);
89
+ expect(results).toHaveLength(3);
90
+ expect(results[0]).toHaveLength(256);
91
+ expect(fetchSpy).toHaveBeenCalledTimes(1);
92
+ // Verify the request body includes input_type: 'document'
93
+ const callArgs = fetchSpy.mock.calls[0];
94
+ const body = JSON.parse(callArgs[1].body);
95
+ expect(body.input_type).toBe('document');
96
+ });
97
+ it('returns empty array for empty input', async () => {
98
+ const fetchSpy = vi.spyOn(globalThis, 'fetch');
99
+ const provider = new VoyageCodeProvider();
100
+ const results = await provider.embed([]);
101
+ expect(results).toHaveLength(0);
102
+ expect(fetchSpy).not.toHaveBeenCalled();
103
+ });
104
+ it('splits large batches into chunks of 128', async () => {
105
+ // Create 300 texts, should be split into 3 batches (128 + 128 + 44)
106
+ const texts = Array.from({ length: 300 }, (_, i) => `text ${i}`);
107
+ const fetchSpy = vi.spyOn(globalThis, 'fetch')
108
+ .mockResolvedValueOnce(mockFetchResponse(200, voyageSuccessResponse(128)))
109
+ .mockResolvedValueOnce(mockFetchResponse(200, voyageSuccessResponse(128)))
110
+ .mockResolvedValueOnce(mockFetchResponse(200, voyageSuccessResponse(44)));
111
+ const provider = new VoyageCodeProvider();
112
+ const results = await provider.embed(texts);
113
+ expect(results).toHaveLength(300);
114
+ expect(fetchSpy).toHaveBeenCalledTimes(3);
115
+ });
116
+ it('retries on 429 rate limit', async () => {
117
+ const fetchSpy = vi.spyOn(globalThis, 'fetch')
118
+ .mockResolvedValueOnce(mockFetchResponse(429, { detail: 'Rate limited' }))
119
+ .mockResolvedValueOnce(mockFetchResponse(200, voyageSuccessResponse(1)));
120
+ const provider = new VoyageCodeProvider({ maxRetries: 3 });
121
+ // Mock sleep to avoid waiting
122
+ vi.spyOn(provider, 'sleep').mockResolvedValue(undefined);
123
+ const result = await provider.embedQuery('test');
124
+ expect(result).toHaveLength(256);
125
+ expect(fetchSpy).toHaveBeenCalledTimes(2);
126
+ });
127
+ it('retries on 5xx server errors', async () => {
128
+ const fetchSpy = vi.spyOn(globalThis, 'fetch')
129
+ .mockResolvedValueOnce(mockFetchResponse(503, { detail: 'Service unavailable' }))
130
+ .mockResolvedValueOnce(mockFetchResponse(500, { detail: 'Internal error' }))
131
+ .mockResolvedValueOnce(mockFetchResponse(200, voyageSuccessResponse(1)));
132
+ const provider = new VoyageCodeProvider({ maxRetries: 3 });
133
+ vi.spyOn(provider, 'sleep').mockResolvedValue(undefined);
134
+ const result = await provider.embedQuery('test');
135
+ expect(result).toHaveLength(256);
136
+ expect(fetchSpy).toHaveBeenCalledTimes(3);
137
+ });
138
+ it('throws after exhausting retries', async () => {
139
+ vi.spyOn(globalThis, 'fetch')
140
+ .mockResolvedValue(mockFetchResponse(429, { detail: 'Rate limited' }));
141
+ const provider = new VoyageCodeProvider({ maxRetries: 2 });
142
+ vi.spyOn(provider, 'sleep').mockResolvedValue(undefined);
143
+ await expect(provider.embedQuery('test')).rejects.toThrow('429');
144
+ });
145
+ it('throws immediately on 4xx non-retryable errors', async () => {
146
+ vi.spyOn(globalThis, 'fetch').mockResolvedValueOnce(mockFetchResponse(401, { detail: 'Invalid API key' }));
147
+ const provider = new VoyageCodeProvider();
148
+ await expect(provider.embedQuery('test')).rejects.toThrow('Invalid API key');
149
+ });
150
+ it('respects Retry-After header', async () => {
151
+ const fetchSpy = vi.spyOn(globalThis, 'fetch')
152
+ .mockResolvedValueOnce(mockFetchResponse(429, { detail: 'Rate limited' }, { 'retry-after': '5' }))
153
+ .mockResolvedValueOnce(mockFetchResponse(200, voyageSuccessResponse(1)));
154
+ const provider = new VoyageCodeProvider({ maxRetries: 3 });
155
+ const sleepSpy = vi.spyOn(provider, 'sleep').mockResolvedValue(undefined);
156
+ await provider.embedQuery('test');
157
+ // Should have waited 5000ms (5 seconds from Retry-After header)
158
+ expect(sleepSpy).toHaveBeenCalledWith(5000);
159
+ });
160
+ it('sends correct Authorization header', async () => {
161
+ const fetchSpy = vi.spyOn(globalThis, 'fetch').mockResolvedValueOnce(mockFetchResponse(200, voyageSuccessResponse(1)));
162
+ const provider = new VoyageCodeProvider();
163
+ await provider.embedQuery('test');
164
+ const callArgs = fetchSpy.mock.calls[0];
165
+ const headers = callArgs[1].headers;
166
+ expect(headers['Authorization']).toBe('Bearer test-api-key');
167
+ });
168
+ });
169
+ // ── Chunker ──────────────────────────────────────────────────────────
170
+ describe('Chunker', () => {
171
+ it('creates a chunk from a simple symbol', () => {
172
+ const chunker = new Chunker();
173
+ const ast = makeFileAST('src/utils.ts', 'typescript', [
174
+ makeSymbol('add', 'function', 'src/utils.ts', {
175
+ line: 0,
176
+ endLine: 3,
177
+ signature: 'function add(a: number, b: number): number',
178
+ documentation: 'Adds two numbers.',
179
+ }),
180
+ ]);
181
+ const fileContent = [
182
+ 'function add(a: number, b: number): number {',
183
+ ' // Adds two numbers.',
184
+ ' return a + b',
185
+ '}',
186
+ ].join('\n');
187
+ const chunks = chunker.chunkFile(ast, fileContent);
188
+ expect(chunks).toHaveLength(1);
189
+ expect(chunks[0].id).toBe('src/utils.ts:add:0');
190
+ expect(chunks[0].content).toContain('function add(a: number, b: number): number');
191
+ expect(chunks[0].content).toContain('Adds two numbers.');
192
+ expect(chunks[0].metadata.filePath).toBe('src/utils.ts');
193
+ expect(chunks[0].metadata.symbolName).toBe('add');
194
+ expect(chunks[0].metadata.symbolKind).toBe('function');
195
+ expect(chunks[0].metadata.startLine).toBe(0);
196
+ expect(chunks[0].metadata.endLine).toBe(3);
197
+ expect(chunks[0].metadata.language).toBe('typescript');
198
+ });
199
+ it('creates chunks for multiple symbols in a file', () => {
200
+ const chunker = new Chunker();
201
+ const ast = makeFileAST('src/math.ts', 'typescript', [
202
+ makeSymbol('add', 'function', 'src/math.ts', { line: 0, endLine: 2 }),
203
+ makeSymbol('subtract', 'function', 'src/math.ts', { line: 4, endLine: 6 }),
204
+ makeSymbol('MathHelper', 'class', 'src/math.ts', { line: 8, endLine: 20 }),
205
+ ]);
206
+ const chunks = chunker.chunkFile(ast);
207
+ expect(chunks).toHaveLength(3);
208
+ expect(chunks[0].metadata.symbolName).toBe('add');
209
+ expect(chunks[1].metadata.symbolName).toBe('subtract');
210
+ expect(chunks[2].metadata.symbolName).toBe('MathHelper');
211
+ });
212
+ it('handles symbols without endLine as single-line', () => {
213
+ const chunker = new Chunker();
214
+ const ast = makeFileAST('src/types.ts', 'typescript', [
215
+ makeSymbol('Config', 'type', 'src/types.ts', { line: 5 }),
216
+ ]);
217
+ const chunks = chunker.chunkFile(ast);
218
+ expect(chunks).toHaveLength(1);
219
+ expect(chunks[0].metadata.startLine).toBe(5);
220
+ expect(chunks[0].metadata.endLine).toBe(5);
221
+ });
222
+ it('handles symbols without signature or documentation', () => {
223
+ const chunker = new Chunker();
224
+ const ast = makeFileAST('src/utils.ts', 'typescript', [
225
+ makeSymbol('helper', 'function', 'src/utils.ts', { line: 0, endLine: 2 }),
226
+ ]);
227
+ const fileContent = 'function helper() {\n return true\n}';
228
+ const chunks = chunker.chunkFile(ast, fileContent);
229
+ expect(chunks).toHaveLength(1);
230
+ expect(chunks[0].content).toContain('function helper()');
231
+ });
232
+ it('uses sliding window for large symbols', () => {
233
+ const chunker = new Chunker({ maxChunkLines: 10, overlapLines: 2 });
234
+ const ast = makeFileAST('src/big.ts', 'typescript', [
235
+ makeSymbol('bigFunction', 'function', 'src/big.ts', {
236
+ line: 0,
237
+ endLine: 24,
238
+ signature: 'function bigFunction(): void',
239
+ }),
240
+ ]);
241
+ // Create 25 lines of content
242
+ const lines = Array.from({ length: 25 }, (_, i) => ` line ${i}`);
243
+ const fileContent = lines.join('\n');
244
+ const chunks = chunker.chunkFile(ast, fileContent);
245
+ // 25 lines with maxChunkLines=10, overlap=2 => step=8
246
+ // Window 0: 0-9, Window 1: 8-17, Window 2: 16-24, Window 3: 24-24
247
+ expect(chunks.length).toBe(4);
248
+ // First chunk should have signature prepended
249
+ expect(chunks[0].content).toContain('function bigFunction(): void');
250
+ expect(chunks[0].id).toBe('src/big.ts:bigFunction:0');
251
+ expect(chunks[0].metadata.startLine).toBe(0);
252
+ expect(chunks[0].metadata.endLine).toBe(9);
253
+ // Second chunk
254
+ expect(chunks[1].id).toBe('src/big.ts:bigFunction:8:1');
255
+ expect(chunks[1].metadata.startLine).toBe(8);
256
+ expect(chunks[1].metadata.endLine).toBe(17);
257
+ // Third chunk
258
+ expect(chunks[2].id).toBe('src/big.ts:bigFunction:16:2');
259
+ expect(chunks[2].metadata.startLine).toBe(16);
260
+ expect(chunks[2].metadata.endLine).toBe(24);
261
+ // Fourth chunk (tail)
262
+ expect(chunks[3].id).toBe('src/big.ts:bigFunction:24:3');
263
+ expect(chunks[3].metadata.startLine).toBe(24);
264
+ expect(chunks[3].metadata.endLine).toBe(24);
265
+ });
266
+ it('sliding window chunks overlap correctly', () => {
267
+ const chunker = new Chunker({ maxChunkLines: 10, overlapLines: 3 });
268
+ const ast = makeFileAST('src/big.ts', 'typescript', [
269
+ makeSymbol('fn', 'function', 'src/big.ts', { line: 0, endLine: 19 }),
270
+ ]);
271
+ // 20 lines total
272
+ const lines = Array.from({ length: 20 }, (_, i) => `line ${i}`);
273
+ const fileContent = lines.join('\n');
274
+ const chunks = chunker.chunkFile(ast, fileContent);
275
+ // step = 10 - 3 = 7
276
+ // Window 0: 0-9, Window 1: 7-16, Window 2: 14-19 => 3 chunks
277
+ expect(chunks.length).toBe(3);
278
+ // Lines 7-9 should appear in both first and second chunk (overlap)
279
+ expect(chunks[0].content).toContain('line 7');
280
+ expect(chunks[0].content).toContain('line 9');
281
+ expect(chunks[1].content).toContain('line 7');
282
+ expect(chunks[1].content).toContain('line 9');
283
+ });
284
+ it('chunks multiple FileASTs via chunkFiles', () => {
285
+ const chunker = new Chunker();
286
+ const asts = [
287
+ makeFileAST('a.ts', 'typescript', [
288
+ makeSymbol('foo', 'function', 'a.ts', { line: 0, endLine: 5 }),
289
+ ]),
290
+ makeFileAST('b.py', 'python', [
291
+ makeSymbol('bar', 'function', 'b.py', { line: 0, endLine: 3 }),
292
+ makeSymbol('baz', 'class', 'b.py', { line: 5, endLine: 15 }),
293
+ ]),
294
+ ];
295
+ const chunks = chunker.chunkFiles(asts);
296
+ expect(chunks).toHaveLength(3);
297
+ expect(chunks[0].metadata.language).toBe('typescript');
298
+ expect(chunks[1].metadata.language).toBe('python');
299
+ expect(chunks[2].metadata.language).toBe('python');
300
+ });
301
+ it('uses file contents map in chunkFiles', () => {
302
+ const chunker = new Chunker();
303
+ const asts = [
304
+ makeFileAST('a.ts', 'typescript', [
305
+ makeSymbol('hello', 'function', 'a.ts', { line: 0, endLine: 2 }),
306
+ ]),
307
+ ];
308
+ const contents = new Map([
309
+ ['a.ts', 'function hello() {\n return "world"\n}'],
310
+ ]);
311
+ const chunks = chunker.chunkFiles(asts, contents);
312
+ expect(chunks).toHaveLength(1);
313
+ expect(chunks[0].content).toContain('return "world"');
314
+ });
315
+ it('produces valid chunk ids', () => {
316
+ const chunker = new Chunker();
317
+ const ast = makeFileAST('src/index.ts', 'typescript', [
318
+ makeSymbol('main', 'function', 'src/index.ts', { line: 10, endLine: 20 }),
319
+ ]);
320
+ const chunks = chunker.chunkFile(ast);
321
+ expect(chunks[0].id).toBe('src/index.ts:main:10');
322
+ });
323
+ it('handles empty symbol list', () => {
324
+ const chunker = new Chunker();
325
+ const ast = makeFileAST('empty.ts', 'typescript', []);
326
+ const chunks = chunker.chunkFile(ast);
327
+ expect(chunks).toHaveLength(0);
328
+ });
329
+ it('uses symbol name and kind when no body, signature, or docs available', () => {
330
+ const chunker = new Chunker();
331
+ const ast = makeFileAST('src/types.ts', 'typescript', [
332
+ makeSymbol('MyInterface', 'interface', 'src/types.ts', { line: 0 }),
333
+ ]);
334
+ // No file content provided, no signature, no docs
335
+ const chunks = chunker.chunkFile(ast);
336
+ expect(chunks).toHaveLength(1);
337
+ expect(chunks[0].content).toBe('interface MyInterface');
338
+ });
339
+ });
@@ -0,0 +1,40 @@
1
+ import type { FileAST, EmbeddingChunk } from '../types.js';
2
+ export interface ChunkerOptions {
3
+ /** Maximum number of lines per chunk before sliding window kicks in. Default: 200 */
4
+ maxChunkLines?: number;
5
+ /** Overlap lines between sliding window chunks. Default: 20 */
6
+ overlapLines?: number;
7
+ }
8
+ /**
9
+ * Converts FileAST / CodeSymbol data into embedding-ready chunks.
10
+ *
11
+ * Each chunk contains a symbol's signature, documentation, and body.
12
+ * Large symbols exceeding maxChunkLines are split using a sliding window with overlap.
13
+ */
14
+ export declare class Chunker {
15
+ private maxChunkLines;
16
+ private overlapLines;
17
+ constructor(options?: ChunkerOptions);
18
+ /**
19
+ * Convert a FileAST into embedding chunks.
20
+ * Each symbol with line/endLine info becomes one or more chunks.
21
+ * Symbols without endLine are treated as single-line.
22
+ */
23
+ chunkFile(ast: FileAST, fileContent?: string): EmbeddingChunk[];
24
+ /**
25
+ * Convert multiple FileASTs into chunks.
26
+ * Optionally accepts a map of filePath -> fileContent for body extraction.
27
+ */
28
+ chunkFiles(asts: FileAST[], fileContents?: Map<string, string>): EmbeddingChunk[];
29
+ /** Convert a single CodeSymbol into one or more chunks. */
30
+ private chunkSymbol;
31
+ /** Extract the body text of a symbol from source lines. */
32
+ private extractBody;
33
+ /** Build the full text content for a chunk from symbol metadata + body. */
34
+ private buildChunkContent;
35
+ /** Split a large symbol into overlapping window chunks. */
36
+ private slidingWindowChunks;
37
+ /** Create an EmbeddingChunk with a deterministic ID. */
38
+ private makeChunk;
39
+ }
40
+ //# sourceMappingURL=chunker.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"chunker.d.ts","sourceRoot":"","sources":["../../../src/embedding/chunker.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,OAAO,EAAc,cAAc,EAAE,MAAM,aAAa,CAAA;AAEtE,MAAM,WAAW,cAAc;IAC7B,qFAAqF;IACrF,aAAa,CAAC,EAAE,MAAM,CAAA;IACtB,+DAA+D;IAC/D,YAAY,CAAC,EAAE,MAAM,CAAA;CACtB;AAKD;;;;;GAKG;AACH,qBAAa,OAAO;IAClB,OAAO,CAAC,aAAa,CAAQ;IAC7B,OAAO,CAAC,YAAY,CAAQ;gBAEhB,OAAO,GAAE,cAAmB;IAKxC;;;;OAIG;IACH,SAAS,CAAC,GAAG,EAAE,OAAO,EAAE,WAAW,CAAC,EAAE,MAAM,GAAG,cAAc,EAAE;IAY/D;;;OAGG;IACH,UAAU,CACR,IAAI,EAAE,OAAO,EAAE,EACf,YAAY,CAAC,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,GACjC,cAAc,EAAE;IASnB,2DAA2D;IAC3D,OAAO,CAAC,WAAW;IAsBnB,2DAA2D;IAC3D,OAAO,CAAC,WAAW;IAanB,2EAA2E;IAC3E,OAAO,CAAC,iBAAiB;IAuBzB,2DAA2D;IAC3D,OAAO,CAAC,mBAAmB;IAmD3B,wDAAwD;IACxD,OAAO,CAAC,SAAS;CA0BlB"}
@@ -0,0 +1,135 @@
1
+ const DEFAULT_MAX_CHUNK_LINES = 200;
2
+ const DEFAULT_OVERLAP_LINES = 20;
3
+ /**
4
+ * Converts FileAST / CodeSymbol data into embedding-ready chunks.
5
+ *
6
+ * Each chunk contains a symbol's signature, documentation, and body.
7
+ * Large symbols exceeding maxChunkLines are split using a sliding window with overlap.
8
+ */
9
+ export class Chunker {
10
+ maxChunkLines;
11
+ overlapLines;
12
+ constructor(options = {}) {
13
+ this.maxChunkLines = options.maxChunkLines ?? DEFAULT_MAX_CHUNK_LINES;
14
+ this.overlapLines = options.overlapLines ?? DEFAULT_OVERLAP_LINES;
15
+ }
16
+ /**
17
+ * Convert a FileAST into embedding chunks.
18
+ * Each symbol with line/endLine info becomes one or more chunks.
19
+ * Symbols without endLine are treated as single-line.
20
+ */
21
+ chunkFile(ast, fileContent) {
22
+ const chunks = [];
23
+ const lines = fileContent ? fileContent.split('\n') : undefined;
24
+ for (const symbol of ast.symbols) {
25
+ const symbolChunks = this.chunkSymbol(symbol, ast.language, lines);
26
+ chunks.push(...symbolChunks);
27
+ }
28
+ return chunks;
29
+ }
30
+ /**
31
+ * Convert multiple FileASTs into chunks.
32
+ * Optionally accepts a map of filePath -> fileContent for body extraction.
33
+ */
34
+ chunkFiles(asts, fileContents) {
35
+ const chunks = [];
36
+ for (const ast of asts) {
37
+ const content = fileContents?.get(ast.filePath);
38
+ chunks.push(...this.chunkFile(ast, content));
39
+ }
40
+ return chunks;
41
+ }
42
+ /** Convert a single CodeSymbol into one or more chunks. */
43
+ chunkSymbol(symbol, language, lines) {
44
+ const startLine = symbol.line;
45
+ const endLine = symbol.endLine ?? symbol.line;
46
+ // Build the text content for this symbol
47
+ const body = this.extractBody(symbol, lines, startLine, endLine);
48
+ const fullContent = this.buildChunkContent(symbol, body);
49
+ const totalLines = endLine - startLine + 1;
50
+ // If the symbol fits within maxChunkLines, produce a single chunk
51
+ if (totalLines <= this.maxChunkLines) {
52
+ return [this.makeChunk(symbol, language, fullContent, startLine, endLine)];
53
+ }
54
+ // Sliding window for large symbols
55
+ return this.slidingWindowChunks(symbol, language, lines, startLine, endLine);
56
+ }
57
+ /** Extract the body text of a symbol from source lines. */
58
+ extractBody(symbol, lines, startLine, endLine) {
59
+ if (!lines)
60
+ return '';
61
+ // Lines are 0-indexed in the array, symbol lines are 0-indexed
62
+ const start = Math.max(0, startLine);
63
+ const end = Math.min(lines.length - 1, endLine);
64
+ return lines.slice(start, end + 1).join('\n');
65
+ }
66
+ /** Build the full text content for a chunk from symbol metadata + body. */
67
+ buildChunkContent(symbol, body) {
68
+ const parts = [];
69
+ if (symbol.signature) {
70
+ parts.push(symbol.signature);
71
+ }
72
+ if (symbol.documentation) {
73
+ parts.push(symbol.documentation);
74
+ }
75
+ if (body) {
76
+ parts.push(body);
77
+ }
78
+ // If we have no body and no signature, use just the name
79
+ if (parts.length === 0) {
80
+ parts.push(`${symbol.kind} ${symbol.name}`);
81
+ }
82
+ return parts.join('\n\n');
83
+ }
84
+ /** Split a large symbol into overlapping window chunks. */
85
+ slidingWindowChunks(symbol, language, lines, startLine, endLine) {
86
+ const chunks = [];
87
+ const step = this.maxChunkLines - this.overlapLines;
88
+ let windowStart = startLine;
89
+ let windowIndex = 0;
90
+ while (windowStart <= endLine) {
91
+ const windowEnd = Math.min(windowStart + this.maxChunkLines - 1, endLine);
92
+ const body = lines
93
+ ? lines.slice(Math.max(0, windowStart), Math.min(lines.length, windowEnd + 1)).join('\n')
94
+ : '';
95
+ // Prepend signature/docs to the first window only
96
+ let content;
97
+ if (windowIndex === 0) {
98
+ const preamble = [];
99
+ if (symbol.signature)
100
+ preamble.push(symbol.signature);
101
+ if (symbol.documentation)
102
+ preamble.push(symbol.documentation);
103
+ content = preamble.length > 0
104
+ ? preamble.join('\n\n') + '\n\n' + body
105
+ : body;
106
+ }
107
+ else {
108
+ content = body;
109
+ }
110
+ chunks.push(this.makeChunk(symbol, language, content, windowStart, windowEnd, windowIndex));
111
+ windowStart += step;
112
+ windowIndex++;
113
+ }
114
+ return chunks;
115
+ }
116
+ /** Create an EmbeddingChunk with a deterministic ID. */
117
+ makeChunk(symbol, language, content, startLine, endLine, windowIndex) {
118
+ const idParts = [symbol.filePath, symbol.name, String(startLine)];
119
+ if (windowIndex !== undefined && windowIndex > 0) {
120
+ idParts.push(String(windowIndex));
121
+ }
122
+ return {
123
+ id: idParts.join(':'),
124
+ content,
125
+ metadata: {
126
+ filePath: symbol.filePath,
127
+ symbolName: symbol.name,
128
+ symbolKind: symbol.kind,
129
+ startLine,
130
+ endLine,
131
+ language,
132
+ },
133
+ };
134
+ }
135
+ }
@@ -0,0 +1,15 @@
1
+ import type { EmbeddingProviderConfig } from '../types.js';
2
+ /**
3
+ * Abstract interface for embedding providers.
4
+ * Implementations produce dense vectors from text for semantic search.
5
+ */
6
+ export interface EmbeddingProvider {
7
+ readonly model: string;
8
+ readonly dimensions: number;
9
+ /** Batch embed multiple texts, returning one vector per text. */
10
+ embed(texts: string[]): Promise<number[][]>;
11
+ /** Embed a single query text. */
12
+ embedQuery(text: string): Promise<number[]>;
13
+ }
14
+ export type { EmbeddingProviderConfig };
15
+ //# sourceMappingURL=embedding-provider.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"embedding-provider.d.ts","sourceRoot":"","sources":["../../../src/embedding/embedding-provider.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,uBAAuB,EAAE,MAAM,aAAa,CAAA;AAE1D;;;GAGG;AACH,MAAM,WAAW,iBAAiB;IAChC,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAA;IACtB,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAA;IAC3B,iEAAiE;IACjE,KAAK,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CAAA;IAC3C,iCAAiC;IACjC,UAAU,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,CAAA;CAC5C;AAED,YAAY,EAAE,uBAAuB,EAAE,CAAA"}
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,39 @@
1
+ import type { EmbeddingProvider } from './embedding-provider.js';
2
+ import type { EmbeddingProviderConfig } from '../types.js';
3
+ /**
4
+ * Embedding provider for Voyage AI's voyage-code-3 model.
5
+ *
6
+ * Features:
7
+ * - 32K token context window
8
+ * - 2048 native dimensions with Matryoshka support (256–2048)
9
+ * - Batched requests (max 128 texts per API call)
10
+ * - Exponential backoff retry on 429 / 5xx errors
11
+ *
12
+ * Requires the VOYAGE_API_KEY environment variable.
13
+ */
14
+ export declare class VoyageCodeProvider implements EmbeddingProvider {
15
+ readonly model: string;
16
+ readonly dimensions: number;
17
+ private batchSize;
18
+ private maxRetries;
19
+ private apiKey;
20
+ constructor(config?: Partial<EmbeddingProviderConfig>);
21
+ /**
22
+ * Embed multiple texts in batches.
23
+ * Returns one vector per input text, in the same order.
24
+ */
25
+ embed(texts: string[]): Promise<number[][]>;
26
+ /**
27
+ * Embed a single query text.
28
+ * Uses input_type "query" for asymmetric retrieval.
29
+ */
30
+ embedQuery(text: string): Promise<number[]>;
31
+ /** Split texts into batches of at most batchSize. */
32
+ private splitIntoBatches;
33
+ /** Call the Voyage embeddings API with retry logic. */
34
+ private callAPI;
35
+ /** Calculate retry delay with exponential backoff, respecting Retry-After header. */
36
+ private getRetryDelay;
37
+ private sleep;
38
+ }
39
+ //# sourceMappingURL=voyage-provider.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"voyage-provider.d.ts","sourceRoot":"","sources":["../../../src/embedding/voyage-provider.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,yBAAyB,CAAA;AAChE,OAAO,KAAK,EAAE,uBAAuB,EAAE,MAAM,aAAa,CAAA;AAoB1D;;;;;;;;;;GAUG;AACH,qBAAa,kBAAmB,YAAW,iBAAiB;IAC1D,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAA;IACtB,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAA;IAC3B,OAAO,CAAC,SAAS,CAAQ;IACzB,OAAO,CAAC,UAAU,CAAQ;IAC1B,OAAO,CAAC,MAAM,CAAQ;gBAEV,MAAM,GAAE,OAAO,CAAC,uBAAuB,CAAM;IAezD;;;OAGG;IACG,KAAK,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC;IAkBjD;;;OAGG;IACG,UAAU,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;IAKjD,qDAAqD;IACrD,OAAO,CAAC,gBAAgB;IAQxB,uDAAuD;YACzC,OAAO;IAoErB,qFAAqF;IACrF,OAAO,CAAC,aAAa;IAWrB,OAAO,CAAC,KAAK;CAGd"}