ted-mosby 1.0.0 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/rag/index.js CHANGED
@@ -7,15 +7,22 @@
7
7
  import * as fs from 'fs';
8
8
  import * as path from 'path';
9
9
  import { glob } from 'glob';
10
- import Anthropic from '@anthropic-ai/sdk';
11
- // FAISS types (faiss-node)
10
+ import { simpleGit } from 'simple-git';
11
+ import { createRequire } from 'module';
12
+ import { pipeline, env } from '@huggingface/transformers';
13
+ // Configure transformers.js to use local cache
14
+ env.cacheDir = './.ted-mosby-models';
15
+ // FAISS types (faiss-node) - use createRequire for CommonJS module in ESM context
12
16
  let faiss;
13
17
  try {
18
+ const require = createRequire(import.meta.url);
14
19
  faiss = require('faiss-node');
15
20
  }
16
21
  catch (e) {
17
22
  console.warn('Warning: faiss-node not available, using fallback similarity search');
18
23
  }
24
+ // Embedding model - will be initialized lazily
25
+ let embeddingPipeline = null;
19
26
  // File extensions to index
20
27
  const INDEXABLE_EXTENSIONS = [
21
28
  '.ts', '.tsx', '.js', '.jsx', '.mjs', '.cjs',
@@ -52,37 +59,90 @@ const EXCLUDE_PATTERNS = [
52
59
  ];
53
60
  export class RAGSystem {
54
61
  config;
55
- anthropic;
56
62
  index = null; // FAISS index
57
63
  metadata = new Map();
58
- embeddingDimension = 1024; // Voyage embeddings dimension
64
+ embeddingDimension = 384; // all-MiniLM-L6-v2 dimension
59
65
  documentCount = 0;
66
+ indexState = null;
60
67
  constructor(config) {
61
68
  this.config = {
62
69
  chunkSize: 1500,
63
70
  chunkOverlap: 200,
64
71
  ...config
65
72
  };
66
- this.anthropic = new Anthropic();
67
73
  // Ensure cache directory exists
68
74
  if (!fs.existsSync(this.config.storePath)) {
69
75
  fs.mkdirSync(this.config.storePath, { recursive: true });
70
76
  }
71
77
  }
78
+ /**
79
+ * Initialize the embedding model (lazy loading)
80
+ */
81
+ async getEmbeddingPipeline() {
82
+ if (!embeddingPipeline) {
83
+ console.log(' Loading embedding model (first run only)...');
84
+ // Use all-MiniLM-L6-v2 - small, fast, good quality for code search
85
+ embeddingPipeline = await pipeline('feature-extraction', 'Xenova/all-MiniLM-L6-v2');
86
+ }
87
+ return embeddingPipeline;
88
+ }
89
+ /**
90
+ * Get the current git commit hash for the repository
91
+ */
92
+ async getCurrentCommitHash() {
93
+ try {
94
+ const git = simpleGit(this.config.repoPath);
95
+ const log = await git.log({ maxCount: 1 });
96
+ return log.latest?.hash || 'unknown';
97
+ }
98
+ catch {
99
+ return 'unknown';
100
+ }
101
+ }
102
+ /**
103
+ * Get the index state (last indexed commit)
104
+ */
105
+ getIndexState() {
106
+ return this.indexState;
107
+ }
108
+ /**
109
+ * Get files changed since a specific commit
110
+ */
111
+ async getChangedFilesSince(commitHash) {
112
+ try {
113
+ const git = simpleGit(this.config.repoPath);
114
+ const diff = await git.diff(['--name-only', commitHash, 'HEAD']);
115
+ return diff.split('\n').filter(f => f.trim().length > 0);
116
+ }
117
+ catch {
118
+ return [];
119
+ }
120
+ }
72
121
  /**
73
122
  * Index the repository for semantic search
74
123
  */
75
124
  async indexRepository() {
76
125
  const cachedIndexPath = path.join(this.config.storePath, 'index.faiss');
77
126
  const cachedMetaPath = path.join(this.config.storePath, 'metadata.json');
127
+ const indexStatePath = path.join(this.config.storePath, 'index-state.json');
128
+ // Get current commit hash
129
+ const currentCommit = await this.getCurrentCommitHash();
78
130
  // Try to load cached index
79
131
  if (fs.existsSync(cachedIndexPath) && fs.existsSync(cachedMetaPath) && faiss) {
80
132
  try {
81
- this.index = faiss.read_index(cachedIndexPath);
133
+ // faiss-node API: IndexFlatIP.read(path) to load index
134
+ this.index = faiss.IndexFlatIP.read(cachedIndexPath);
82
135
  const metaData = JSON.parse(fs.readFileSync(cachedMetaPath, 'utf-8'));
83
136
  this.metadata = new Map(Object.entries(metaData).map(([k, v]) => [parseInt(k), v]));
84
137
  this.documentCount = this.metadata.size;
85
- console.log(`Loaded cached index with ${this.documentCount} chunks`);
138
+ // Load index state if available
139
+ if (fs.existsSync(indexStatePath)) {
140
+ this.indexState = JSON.parse(fs.readFileSync(indexStatePath, 'utf-8'));
141
+ console.log(`Loaded cached index with ${this.documentCount} chunks (indexed at commit ${this.indexState?.commitHash?.slice(0, 7) || 'unknown'})`);
142
+ }
143
+ else {
144
+ console.log(`Loaded cached index with ${this.documentCount} chunks`);
145
+ }
86
146
  return;
87
147
  }
88
148
  catch (e) {
@@ -121,36 +181,74 @@ export class RAGSystem {
121
181
  console.warn('No code chunks to index');
122
182
  return;
123
183
  }
184
+ // Apply maxChunks limit if configured (for large codebases)
185
+ let chunksToIndex = chunks;
186
+ if (this.config.maxChunks && chunks.length > this.config.maxChunks) {
187
+ console.log(` ⚠️ Limiting to ${this.config.maxChunks} chunks (was ${chunks.length}) to manage memory`);
188
+ // Prioritize chunks from smaller files and main source directories
189
+ chunksToIndex = this.prioritizeChunks(chunks, this.config.maxChunks);
190
+ }
124
191
  // Generate embeddings
125
- console.log(` Generating embeddings for ${chunks.length} chunks...`);
126
- const embeddings = await this.generateEmbeddings(chunks);
192
+ console.log(` Generating embeddings for ${chunksToIndex.length} chunks...`);
193
+ const embeddings = await this.generateEmbeddings(chunksToIndex);
127
194
  // Build FAISS index
128
195
  console.log(` Building search index...`);
129
196
  if (faiss && embeddings.length > 0) {
197
+ // Get actual dimension from first embedding
198
+ const actualDimension = embeddings[0].length;
199
+ if (actualDimension !== this.embeddingDimension) {
200
+ console.log(` Adjusting dimension: expected ${this.embeddingDimension}, got ${actualDimension}`);
201
+ this.embeddingDimension = actualDimension;
202
+ }
130
203
  this.index = new faiss.IndexFlatIP(this.embeddingDimension); // Inner product for cosine similarity
131
- // Add all embeddings
204
+ // Normalize all embeddings and prepare for batch add
205
+ const normalizedEmbeddings = [];
132
206
  for (let i = 0; i < embeddings.length; i++) {
133
- // Normalize for cosine similarity
134
207
  const normalized = this.normalizeVector(embeddings[i]);
135
- this.index.add([normalized]);
208
+ normalizedEmbeddings.push(normalized);
136
209
  this.metadata.set(i, {
137
- id: chunks[i].id,
138
- filePath: chunks[i].filePath,
139
- startLine: chunks[i].startLine,
140
- endLine: chunks[i].endLine,
141
- content: chunks[i].content,
142
- language: chunks[i].language
210
+ id: chunksToIndex[i].id,
211
+ filePath: chunksToIndex[i].filePath,
212
+ startLine: chunksToIndex[i].startLine,
213
+ endLine: chunksToIndex[i].endLine,
214
+ content: chunksToIndex[i].content,
215
+ language: chunksToIndex[i].language
143
216
  });
144
217
  }
145
- // Save index and metadata
146
- faiss.write_index(this.index, cachedIndexPath);
147
- fs.writeFileSync(cachedMetaPath, JSON.stringify(Object.fromEntries(this.metadata)), 'utf-8');
148
- this.documentCount = chunks.length;
149
- console.log(` ✓ Indexed ${this.documentCount} chunks with FAISS`);
218
+ // Add all vectors in one batch to avoid threading issues
219
+ // IMPORTANT: faiss-node expects a flat array, not array of arrays
220
+ // e.g., [v1_d1, v1_d2, ..., v2_d1, v2_d2, ...] not [[v1], [v2], ...]
221
+ try {
222
+ const flatEmbeddings = normalizedEmbeddings.flat();
223
+ this.index.add(flatEmbeddings);
224
+ }
225
+ catch (faissError) {
226
+ console.warn(` FAISS batch add failed, falling back to keyword search: ${faissError}`);
227
+ // Fall through to keyword search fallback
228
+ this.index = null;
229
+ }
230
+ if (this.index) {
231
+ // Save index and metadata
232
+ // faiss-node API: index.write(path) to save index
233
+ this.index.write(cachedIndexPath);
234
+ fs.writeFileSync(cachedMetaPath, JSON.stringify(Object.fromEntries(this.metadata)), 'utf-8');
235
+ // Save index state with commit hash
236
+ this.indexState = {
237
+ commitHash: currentCommit,
238
+ indexedAt: new Date().toISOString(),
239
+ fileCount: files.length,
240
+ chunkCount: chunksToIndex.length
241
+ };
242
+ fs.writeFileSync(indexStatePath, JSON.stringify(this.indexState, null, 2), 'utf-8');
243
+ this.documentCount = chunksToIndex.length;
244
+ console.log(` ✓ Indexed ${this.documentCount} chunks with FAISS (commit ${currentCommit.slice(0, 7)})`);
245
+ return;
246
+ }
150
247
  }
151
- else {
152
- // Fallback: just store chunks for simple search
153
- chunks.forEach((chunk, i) => {
248
+ // Fallback: keyword search mode (when FAISS not available or failed)
249
+ // Metadata may already be populated from the FAISS attempt, but ensure it's complete
250
+ if (this.metadata.size === 0) {
251
+ chunksToIndex.forEach((chunk, i) => {
154
252
  this.metadata.set(i, {
155
253
  id: chunk.id,
156
254
  filePath: chunk.filePath,
@@ -160,9 +258,19 @@ export class RAGSystem {
160
258
  language: chunk.language
161
259
  });
162
260
  });
163
- this.documentCount = chunks.length;
164
- console.log(` ✓ Indexed ${this.documentCount} chunks (keyword search mode)`);
165
261
  }
262
+ // Save metadata for fallback search
263
+ fs.writeFileSync(cachedMetaPath, JSON.stringify(Object.fromEntries(this.metadata)), 'utf-8');
264
+ // Save index state with commit hash (even in fallback mode)
265
+ this.indexState = {
266
+ commitHash: currentCommit,
267
+ indexedAt: new Date().toISOString(),
268
+ fileCount: files.length,
269
+ chunkCount: chunksToIndex.length
270
+ };
271
+ fs.writeFileSync(indexStatePath, JSON.stringify(this.indexState, null, 2), 'utf-8');
272
+ this.documentCount = chunksToIndex.length;
273
+ console.log(` ✓ Indexed ${this.documentCount} chunks (keyword search mode, commit ${currentCommit.slice(0, 7)})`);
166
274
  }
167
275
  /**
168
276
  * Discover all indexable files in the repository
@@ -246,20 +354,25 @@ export class RAGSystem {
246
354
  return chunks;
247
355
  }
248
356
  /**
249
- * Generate embeddings for code chunks using Anthropic's Voyage embeddings via their SDK
357
+ * Generate embeddings for code chunks using local Transformers.js model
358
+ * Uses all-MiniLM-L6-v2 - a fast, high-quality embedding model
250
359
  */
251
360
  async generateEmbeddings(chunks) {
252
361
  const embeddings = [];
253
- // Process in batches
254
- const batchSize = 20;
362
+ const extractor = await this.getEmbeddingPipeline();
363
+ // Process in batches for memory efficiency
364
+ const batchSize = 32;
255
365
  for (let i = 0; i < chunks.length; i += batchSize) {
256
366
  const batch = chunks.slice(i, i + batchSize);
257
367
  try {
258
- // Use Anthropic to generate embeddings via message
259
- // Note: Anthropic doesn't have a direct embedding API, so we use a workaround
260
- // In production, you'd use Voyage AI or OpenAI embeddings
261
- const batchEmbeddings = await this.generateSimpleEmbeddings(batch);
262
- embeddings.push(...batchEmbeddings);
368
+ // Generate embeddings for the batch
369
+ for (const chunk of batch) {
370
+ // Truncate content to avoid memory issues (model max is 512 tokens)
371
+ const text = chunk.content.slice(0, 2000);
372
+ const output = await extractor(text, { pooling: 'mean', normalize: true });
373
+ // Convert to array
374
+ embeddings.push(Array.from(output.data));
375
+ }
263
376
  // Progress update every batch
264
377
  const processed = Math.min(i + batchSize, chunks.length);
265
378
  const percent = Math.floor(processed / chunks.length * 100);
@@ -277,36 +390,13 @@ export class RAGSystem {
277
390
  return embeddings;
278
391
  }
279
392
  /**
280
- * Simple TF-IDF-like embedding for fallback when API embedding isn't available
281
- * This is a simplified implementation - production should use proper embeddings
282
- */
283
- async generateSimpleEmbeddings(chunks) {
284
- return chunks.map(chunk => {
285
- // Create a simple bag-of-words vector
286
- const words = chunk.content.toLowerCase()
287
- .replace(/[^a-z0-9_]/g, ' ')
288
- .split(/\s+/)
289
- .filter(w => w.length > 2);
290
- // Simple hash-based embedding
291
- const vector = new Array(this.embeddingDimension).fill(0);
292
- for (const word of words) {
293
- const hash = this.simpleHash(word) % this.embeddingDimension;
294
- vector[hash] += 1;
295
- }
296
- return this.normalizeVector(vector);
297
- });
298
- }
299
- /**
300
- * Simple string hash function
393
+ * Generate embedding for a single text (used for queries)
301
394
  */
302
- simpleHash(str) {
303
- let hash = 0;
304
- for (let i = 0; i < str.length; i++) {
305
- const char = str.charCodeAt(i);
306
- hash = ((hash << 5) - hash) + char;
307
- hash = hash & hash;
308
- }
309
- return Math.abs(hash);
395
+ async generateSingleEmbedding(text) {
396
+ const extractor = await this.getEmbeddingPipeline();
397
+ const truncated = text.slice(0, 2000);
398
+ const output = await extractor(truncated, { pooling: 'mean', normalize: true });
399
+ return Array.from(output.data);
310
400
  }
311
401
  /**
312
402
  * Normalize a vector for cosine similarity
@@ -325,22 +415,16 @@ export class RAGSystem {
325
415
  if (this.metadata.size === 0) {
326
416
  return [];
327
417
  }
328
- // Generate query embedding
329
- const [queryEmbedding] = await this.generateSimpleEmbeddings([{
330
- id: 'query',
331
- filePath: '',
332
- startLine: 0,
333
- endLine: 0,
334
- content: query,
335
- language: ''
336
- }]);
418
+ // Generate query embedding using local model
419
+ const queryEmbedding = await this.generateSingleEmbedding(query);
337
420
  let results = [];
338
421
  if (this.index && faiss) {
339
- // FAISS search
422
+ // FAISS search - pass flat array (faiss-node expects flat, not nested)
423
+ // Results are also flat arrays: { distances: [d1, d2, ...], labels: [l1, l2, ...] }
340
424
  const normalized = this.normalizeVector(queryEmbedding);
341
- const { distances, labels } = this.index.search([normalized], maxResults * 2);
342
- for (let i = 0; i < labels[0].length; i++) {
343
- const label = labels[0][i];
425
+ const { distances, labels } = this.index.search(normalized, maxResults * 2);
426
+ for (let i = 0; i < labels.length; i++) {
427
+ const label = labels[i];
344
428
  if (label === -1)
345
429
  continue;
346
430
  const meta = this.metadata.get(label);
@@ -353,7 +437,7 @@ export class RAGSystem {
353
437
  continue;
354
438
  results.push({
355
439
  ...meta,
356
- score: distances[0][i]
440
+ score: distances[i]
357
441
  });
358
442
  if (results.length >= maxResults)
359
443
  break;
@@ -442,5 +526,226 @@ export class RAGSystem {
442
526
  getDocumentCount() {
443
527
  return this.documentCount;
444
528
  }
529
+ /**
530
+ * Discover total chunk count without indexing (for batch planning)
531
+ */
532
+ async discoverChunkCount() {
533
+ const files = await this.discoverFiles();
534
+ let totalChunks = 0;
535
+ for (const file of files) {
536
+ try {
537
+ const fileChunks = await this.chunkFile(file);
538
+ totalChunks += fileChunks.length;
539
+ }
540
+ catch {
541
+ // Skip files that fail
542
+ }
543
+ }
544
+ return { files: files.length, chunks: totalChunks };
545
+ }
546
+ /**
547
+ * Index a specific batch of chunks (for chunked generation mode).
548
+ * Returns batch info for progress tracking.
549
+ */
550
+ async indexBatch(batchNumber, batchSize) {
551
+ const batchStatePath = path.join(this.config.storePath, `batch-${batchNumber}-state.json`);
552
+ // Discover all files and chunks
553
+ console.log(` [Batch ${batchNumber}] Discovering files...`);
554
+ const files = await this.discoverFiles();
555
+ // Chunk all files
556
+ const allChunks = [];
557
+ for (const file of files) {
558
+ try {
559
+ const fileChunks = await this.chunkFile(file);
560
+ allChunks.push(...fileChunks);
561
+ }
562
+ catch {
563
+ // Skip files that fail
564
+ }
565
+ }
566
+ const totalChunks = allChunks.length;
567
+ const totalBatches = Math.ceil(totalChunks / batchSize);
568
+ const batchStart = batchNumber * batchSize;
569
+ const batchEnd = Math.min(batchStart + batchSize, totalChunks);
570
+ if (batchStart >= totalChunks) {
571
+ return {
572
+ totalChunks,
573
+ totalBatches,
574
+ currentBatch: batchNumber,
575
+ batchStart,
576
+ batchEnd: batchStart,
577
+ chunksInBatch: 0
578
+ };
579
+ }
580
+ // Get chunks for this batch
581
+ const batchChunks = allChunks.slice(batchStart, batchEnd);
582
+ console.log(` [Batch ${batchNumber}] Processing chunks ${batchStart + 1}-${batchEnd} of ${totalChunks}`);
583
+ // Generate embeddings for batch (validates chunks are processable)
584
+ console.log(` [Batch ${batchNumber}] Generating embeddings for ${batchChunks.length} chunks...`);
585
+ await this.generateEmbeddings(batchChunks);
586
+ // Store metadata for this batch (append to main metadata)
587
+ const mainMetaPath = path.join(this.config.storePath, 'metadata.json');
588
+ let existingMeta = {};
589
+ if (fs.existsSync(mainMetaPath)) {
590
+ existingMeta = JSON.parse(fs.readFileSync(mainMetaPath, 'utf-8'));
591
+ }
592
+ // Add batch chunks to metadata with global indices
593
+ batchChunks.forEach((chunk, i) => {
594
+ const globalIndex = batchStart + i;
595
+ existingMeta[globalIndex.toString()] = {
596
+ id: chunk.id,
597
+ filePath: chunk.filePath,
598
+ startLine: chunk.startLine,
599
+ endLine: chunk.endLine,
600
+ content: chunk.content,
601
+ language: chunk.language
602
+ };
603
+ });
604
+ // Save updated metadata
605
+ fs.writeFileSync(mainMetaPath, JSON.stringify(existingMeta), 'utf-8');
606
+ // Save batch state
607
+ const batchState = {
608
+ batchNumber,
609
+ batchSize,
610
+ batchStart,
611
+ batchEnd,
612
+ chunksProcessed: batchChunks.length,
613
+ completedAt: new Date().toISOString()
614
+ };
615
+ fs.writeFileSync(batchStatePath, JSON.stringify(batchState, null, 2), 'utf-8');
616
+ // Update in-memory metadata
617
+ this.metadata = new Map(Object.entries(existingMeta).map(([k, v]) => [parseInt(k), v]));
618
+ this.documentCount = this.metadata.size;
619
+ console.log(` [Batch ${batchNumber}] ✓ Indexed ${batchChunks.length} chunks (total: ${this.documentCount})`);
620
+ return {
621
+ totalChunks,
622
+ totalBatches,
623
+ currentBatch: batchNumber,
624
+ batchStart,
625
+ batchEnd,
626
+ chunksInBatch: batchChunks.length
627
+ };
628
+ }
629
+ /**
630
+ * Load metadata only (for batched mode - metadata was already saved during batches)
631
+ * This avoids regenerating embeddings which is expensive and was causing the issue.
632
+ */
633
+ async loadMetadataOnly() {
634
+ const mainMetaPath = path.join(this.config.storePath, 'metadata.json');
635
+ const cachedIndexPath = path.join(this.config.storePath, 'index.faiss');
636
+ if (!fs.existsSync(mainMetaPath)) {
637
+ console.warn('No metadata found to load');
638
+ return;
639
+ }
640
+ // Load metadata
641
+ const metaData = JSON.parse(fs.readFileSync(mainMetaPath, 'utf-8'));
642
+ this.metadata = new Map(Object.entries(metaData).map(([k, v]) => [parseInt(k), v]));
643
+ this.documentCount = this.metadata.size;
644
+ // Try to load FAISS index if it exists
645
+ if (fs.existsSync(cachedIndexPath) && faiss) {
646
+ try {
647
+ this.index = faiss.IndexFlatIP.read(cachedIndexPath);
648
+ console.log(` Loaded FAISS index with ${this.documentCount} chunks`);
649
+ }
650
+ catch (e) {
651
+ console.log(` FAISS index not available, using keyword search (${this.documentCount} chunks)`);
652
+ }
653
+ }
654
+ else {
655
+ console.log(` Loaded ${this.documentCount} chunks (keyword search mode)`);
656
+ }
657
+ }
658
+ /**
659
+ * Build FAISS index from all accumulated metadata (call after all batches complete)
660
+ */
661
+ async finalizeIndex() {
662
+ const mainMetaPath = path.join(this.config.storePath, 'metadata.json');
663
+ const cachedIndexPath = path.join(this.config.storePath, 'index.faiss');
664
+ const indexStatePath = path.join(this.config.storePath, 'index-state.json');
665
+ if (!fs.existsSync(mainMetaPath)) {
666
+ console.warn('No metadata found to finalize');
667
+ return;
668
+ }
669
+ const metaData = JSON.parse(fs.readFileSync(mainMetaPath, 'utf-8'));
670
+ this.metadata = new Map(Object.entries(metaData).map(([k, v]) => [parseInt(k), v]));
671
+ this.documentCount = this.metadata.size;
672
+ console.log(` Finalizing index with ${this.documentCount} chunks...`);
673
+ if (!faiss || this.documentCount === 0) {
674
+ console.log(' Using keyword search mode (FAISS not available or no chunks)');
675
+ return;
676
+ }
677
+ // Regenerate embeddings for FAISS index
678
+ const chunks = Array.from(this.metadata.values());
679
+ console.log(` Generating embeddings for final index...`);
680
+ const embeddings = await this.generateEmbeddings(chunks);
681
+ // Build FAISS index
682
+ this.index = new faiss.IndexFlatIP(this.embeddingDimension);
683
+ const normalizedEmbeddings = [];
684
+ for (let i = 0; i < embeddings.length; i++) {
685
+ normalizedEmbeddings.push(this.normalizeVector(embeddings[i]));
686
+ }
687
+ try {
688
+ const flatEmbeddings = normalizedEmbeddings.flat();
689
+ this.index.add(flatEmbeddings);
690
+ this.index.write(cachedIndexPath);
691
+ const currentCommit = await this.getCurrentCommitHash();
692
+ this.indexState = {
693
+ commitHash: currentCommit,
694
+ indexedAt: new Date().toISOString(),
695
+ fileCount: new Set(chunks.map(c => c.filePath)).size,
696
+ chunkCount: this.documentCount
697
+ };
698
+ fs.writeFileSync(indexStatePath, JSON.stringify(this.indexState, null, 2), 'utf-8');
699
+ console.log(` ✓ Finalized FAISS index with ${this.documentCount} chunks`);
700
+ }
701
+ catch (err) {
702
+ console.warn(` FAISS indexing failed, using keyword search: ${err}`);
703
+ }
704
+ }
705
+ /**
706
+ * Prioritize chunks for indexing when maxChunks limit is set.
707
+ * Prioritizes:
708
+ * 1. Core source directories (src/, lib/, app/)
709
+ * 2. Entry points and config files
710
+ * 3. Non-test files over test files
711
+ * 4. Smaller chunks (more complete code units)
712
+ */
713
+ prioritizeChunks(chunks, maxChunks) {
714
+ // Score each chunk by priority
715
+ const scored = chunks.map(chunk => {
716
+ let score = 0;
717
+ const fp = chunk.filePath.toLowerCase();
718
+ // Prioritize core source directories
719
+ if (fp.startsWith('src/') || fp.startsWith('lib/') || fp.startsWith('app/')) {
720
+ score += 100;
721
+ }
722
+ // Entry points and important files
723
+ if (fp.includes('index.') || fp.includes('main.') || fp.includes('app.')) {
724
+ score += 50;
725
+ }
726
+ // Config files are important for understanding architecture
727
+ if (fp.includes('config') || fp.endsWith('.json') || fp.endsWith('.yaml')) {
728
+ score += 30;
729
+ }
730
+ // Deprioritize test files
731
+ if (this.isTestFile(chunk.filePath)) {
732
+ score -= 50;
733
+ }
734
+ // Deprioritize vendor/generated
735
+ if (fp.includes('vendor/') || fp.includes('generated/') || fp.includes('.min.')) {
736
+ score -= 100;
737
+ }
738
+ // Prefer smaller chunks (more likely to be complete logical units)
739
+ const chunkSize = chunk.content.length;
740
+ if (chunkSize < 1000)
741
+ score += 20;
742
+ else if (chunkSize > 3000)
743
+ score -= 10;
744
+ return { chunk, score };
745
+ });
746
+ // Sort by score descending and take top maxChunks
747
+ scored.sort((a, b) => b.score - a.score);
748
+ return scored.slice(0, maxChunks).map(s => s.chunk);
749
+ }
445
750
  }
446
751
  //# sourceMappingURL=index.js.map