gitnexus 1.6.6-rc.67 → 1.6.6-rc.68

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -86,6 +86,22 @@ export declare class WikiGenerator {
86
86
  private ensureHTMLViewer;
87
87
  private fullGeneration;
88
88
  private buildModuleTree;
89
+ /**
90
+ * Run grouping in batches when the full file list exceeds GROUPING_TOKEN_BUDGET.
91
+ */
92
+ private batchedGrouping;
93
+ /**
94
+ * Partition files into batches that fit within GROUPING_TOKEN_BUDGET.
95
+ * Groups by top-level directory for semantic coherence.
96
+ */
97
+ private batchFilesForGrouping;
98
+ private estimateGroupingPromptTokens;
99
+ private trimSymbolsToFit;
100
+ /**
101
+ * Merge partial groupings from multiple batches. Same module name across
102
+ * batches gets file lists concatenated. Deduplicates (first-seen wins).
103
+ */
104
+ private mergeGroupings;
89
105
  /**
90
106
  * Parse LLM grouping response. Validates all files are assigned.
91
107
  */
@@ -22,6 +22,7 @@ import { GROUPING_SYSTEM_PROMPT, GROUPING_USER_PROMPT, MODULE_SYSTEM_PROMPT, MOD
22
22
  import { shouldIgnorePath } from '../../config/ignore-service.js';
23
23
  // ─── Constants ────────────────────────────────────────────────────────
24
24
  const DEFAULT_MAX_TOKENS_PER_MODULE = 30_000;
25
+ const GROUPING_TOKEN_BUDGET = 100_000;
25
26
  const WIKI_DIR = 'wiki';
26
27
  // ─── Generator Class ──────────────────────────────────────────────────
27
28
  export class WikiGenerator {
@@ -345,11 +346,18 @@ export class WikiGenerator {
345
346
  FILE_LIST: fileList,
346
347
  DIRECTORY_TREE: dirTree,
347
348
  });
348
- // Grouping is a structured-data phase (JSON output), not documentation.
349
- // Do NOT apply buildSystemPrompt here — a language instruction would risk
350
- // translating module-name keys, breaking slug stability and JSON parsing.
351
- const response = await this.invokeLLM(prompt, GROUPING_SYSTEM_PROMPT, this.streamOpts('Grouping files', 15, 13));
352
- const grouping = this.parseGroupingResponse(response.content, files);
349
+ const promptTokens = estimateTokens(prompt);
350
+ let grouping;
351
+ if (promptTokens <= GROUPING_TOKEN_BUDGET) {
352
+ // Grouping is a structured-data phase (JSON output), not documentation.
353
+ // Do NOT apply buildSystemPrompt here — a language instruction would risk
354
+ // translating module-name keys, breaking slug stability and JSON parsing.
355
+ const response = await this.invokeLLM(prompt, GROUPING_SYSTEM_PROMPT, this.streamOpts('Grouping files', 15, 13));
356
+ grouping = this.parseGroupingResponse(response.content, files);
357
+ }
358
+ else {
359
+ grouping = await this.batchedGrouping(files);
360
+ }
353
361
  // Convert to tree nodes
354
362
  const tree = [];
355
363
  for (const [moduleName, modulePaths] of Object.entries(grouping)) {
@@ -374,6 +382,171 @@ export class WikiGenerator {
374
382
  this.onProgress('grouping', 28, `Created ${tree.length} modules`);
375
383
  return tree;
376
384
  }
385
+ /**
386
+ * Run grouping in batches when the full file list exceeds GROUPING_TOKEN_BUDGET.
387
+ */
388
+ async batchedGrouping(files) {
389
+ const batches = this.batchFilesForGrouping(files);
390
+ const partials = [];
391
+ for (let i = 0; i < batches.length; i++) {
392
+ const batch = batches[i];
393
+ this.onProgress('grouping', 15 + Math.round(((i + 1) / batches.length) * 13), `Grouping batch ${i + 1}/${batches.length} (LLM)...`);
394
+ const batchFileList = formatFileListForGrouping(batch);
395
+ const batchDirTree = formatDirectoryTree(batch.map((f) => f.filePath));
396
+ const batchPrompt = fillTemplate(GROUPING_USER_PROMPT, {
397
+ FILE_LIST: batchFileList,
398
+ DIRECTORY_TREE: batchDirTree,
399
+ });
400
+ try {
401
+ const batchStart = 15 + Math.round((i / batches.length) * 13);
402
+ const batchRange = Math.max(1, Math.round(13 / batches.length));
403
+ const response = await this.invokeLLM(batchPrompt, GROUPING_SYSTEM_PROMPT, this.streamOpts(`Grouping batch ${i + 1}/${batches.length}`, batchStart, batchRange));
404
+ partials.push(this.parseGroupingResponse(response.content, batch));
405
+ }
406
+ catch {
407
+ this.onProgress('grouping', 15, `Batch ${i + 1} failed, falling back to directory grouping`);
408
+ return this.fallbackGrouping(files);
409
+ }
410
+ }
411
+ const merged = this.mergeGroupings(partials);
412
+ const assignedFiles = new Set(Object.values(merged).flat());
413
+ const unassigned = files.map((f) => f.filePath).filter((fp) => !assignedFiles.has(fp));
414
+ if (unassigned.length > 0) {
415
+ merged['Other'] = [...(merged['Other'] ?? []), ...unassigned];
416
+ }
417
+ return Object.keys(merged).length > 0 ? merged : this.fallbackGrouping(files);
418
+ }
419
+ /**
420
+ * Partition files into batches that fit within GROUPING_TOKEN_BUDGET.
421
+ * Groups by top-level directory for semantic coherence.
422
+ */
423
+ batchFilesForGrouping(files) {
424
+ if (files.length === 0)
425
+ return [];
426
+ const dirGroups = new Map();
427
+ for (const f of files) {
428
+ const parts = f.filePath.replace(/\\/g, '/').split('/');
429
+ const topDir = parts.length > 1 ? parts[0] : 'Root';
430
+ let group = dirGroups.get(topDir);
431
+ if (!group) {
432
+ group = [];
433
+ dirGroups.set(topDir, group);
434
+ }
435
+ group.push(f);
436
+ }
437
+ const batches = [];
438
+ let currentBatch = [];
439
+ for (const dirFiles of dirGroups.values()) {
440
+ const dirPromptSize = this.estimateGroupingPromptTokens(dirFiles);
441
+ if (dirPromptSize > GROUPING_TOKEN_BUDGET) {
442
+ if (currentBatch.length > 0) {
443
+ batches.push(currentBatch);
444
+ currentBatch = [];
445
+ }
446
+ // Sub-batch this large directory by fixed chunks
447
+ for (let i = 0; i < dirFiles.length;) {
448
+ const subBatch = [];
449
+ while (i < dirFiles.length) {
450
+ subBatch.push(dirFiles[i]);
451
+ i++;
452
+ if (this.estimateGroupingPromptTokens(subBatch) > GROUPING_TOKEN_BUDGET &&
453
+ subBatch.length > 1) {
454
+ subBatch.pop();
455
+ i--;
456
+ break;
457
+ }
458
+ }
459
+ if (subBatch.length === 1 &&
460
+ this.estimateGroupingPromptTokens(subBatch) > GROUPING_TOKEN_BUDGET) {
461
+ subBatch[0] = this.trimSymbolsToFit(subBatch[0]);
462
+ }
463
+ batches.push(subBatch);
464
+ }
465
+ continue;
466
+ }
467
+ const candidateBatch = [...currentBatch, ...dirFiles];
468
+ if (this.estimateGroupingPromptTokens(candidateBatch) > GROUPING_TOKEN_BUDGET) {
469
+ if (currentBatch.length > 0) {
470
+ batches.push(currentBatch);
471
+ }
472
+ currentBatch = dirFiles;
473
+ }
474
+ else {
475
+ currentBatch = candidateBatch;
476
+ }
477
+ }
478
+ if (currentBatch.length > 0) {
479
+ batches.push(currentBatch);
480
+ }
481
+ return batches;
482
+ }
483
+ estimateGroupingPromptTokens(files) {
484
+ const fileList = formatFileListForGrouping(files);
485
+ const dirTree = formatDirectoryTree(files.map((f) => f.filePath));
486
+ const prompt = fillTemplate(GROUPING_USER_PROMPT, {
487
+ FILE_LIST: fileList,
488
+ DIRECTORY_TREE: dirTree,
489
+ });
490
+ return estimateTokens(prompt);
491
+ }
492
+ trimSymbolsToFit(file) {
493
+ const symbols = file.symbols;
494
+ let lo = 0;
495
+ let hi = symbols.length;
496
+ while (lo < hi) {
497
+ const mid = (lo + hi + 1) >>> 1;
498
+ const candidate = {
499
+ filePath: file.filePath,
500
+ symbols: [
501
+ ...symbols.slice(0, mid),
502
+ { name: `... and ${symbols.length - mid} more`, type: 'truncated' },
503
+ ],
504
+ };
505
+ if (this.estimateGroupingPromptTokens([candidate]) <= GROUPING_TOKEN_BUDGET) {
506
+ lo = mid;
507
+ }
508
+ else {
509
+ hi = mid - 1;
510
+ }
511
+ }
512
+ if (lo >= symbols.length)
513
+ return file;
514
+ return {
515
+ filePath: file.filePath,
516
+ symbols: lo > 0
517
+ ? [
518
+ ...symbols.slice(0, lo),
519
+ { name: `... and ${symbols.length - lo} more`, type: 'truncated' },
520
+ ]
521
+ : [{ name: 'no exports (truncated)', type: 'truncated' }],
522
+ };
523
+ }
524
+ /**
525
+ * Merge partial groupings from multiple batches. Same module name across
526
+ * batches gets file lists concatenated. Deduplicates (first-seen wins).
527
+ */
528
+ mergeGroupings(partials) {
529
+ const merged = {};
530
+ const seen = new Set();
531
+ const slugToCanonical = new Map();
532
+ for (const partial of partials) {
533
+ for (const [mod, paths] of Object.entries(partial)) {
534
+ const slug = this.slugify(mod);
535
+ const canonical = slugToCanonical.get(slug) ?? mod;
536
+ if (!slugToCanonical.has(slug))
537
+ slugToCanonical.set(slug, mod);
538
+ for (const fp of paths) {
539
+ if (!seen.has(fp)) {
540
+ seen.add(fp);
541
+ if (!merged[canonical])
542
+ merged[canonical] = [];
543
+ merged[canonical].push(fp);
544
+ }
545
+ }
546
+ }
547
+ }
548
+ return merged;
549
+ }
377
550
  /**
378
551
  * Parse LLM grouping response. Validates all files are assigned.
379
552
  */
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "gitnexus",
3
- "version": "1.6.6-rc.67",
3
+ "version": "1.6.6-rc.68",
4
4
  "description": "Graph-powered code intelligence for AI agents. Index any codebase, query via MCP or CLI.",
5
5
  "author": "Abhigyan Patwari",
6
6
  "license": "PolyForm-Noncommercial-1.0.0",