gitnexus 1.6.6-rc.67 → 1.6.6-rc.68
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/core/wiki/generator.d.ts +16 -0
- package/dist/core/wiki/generator.js +178 -5
- package/package.json +1 -1
|
@@ -86,6 +86,22 @@ export declare class WikiGenerator {
|
|
|
86
86
|
private ensureHTMLViewer;
|
|
87
87
|
private fullGeneration;
|
|
88
88
|
private buildModuleTree;
|
|
89
|
+
/**
|
|
90
|
+
* Run grouping in batches when the full file list exceeds GROUPING_TOKEN_BUDGET.
|
|
91
|
+
*/
|
|
92
|
+
private batchedGrouping;
|
|
93
|
+
/**
|
|
94
|
+
* Partition files into batches that fit within GROUPING_TOKEN_BUDGET.
|
|
95
|
+
* Groups by top-level directory for semantic coherence.
|
|
96
|
+
*/
|
|
97
|
+
private batchFilesForGrouping;
|
|
98
|
+
private estimateGroupingPromptTokens;
|
|
99
|
+
private trimSymbolsToFit;
|
|
100
|
+
/**
|
|
101
|
+
* Merge partial groupings from multiple batches. Same module name across
|
|
102
|
+
* batches gets file lists concatenated. Deduplicates (first-seen wins).
|
|
103
|
+
*/
|
|
104
|
+
private mergeGroupings;
|
|
89
105
|
/**
|
|
90
106
|
* Parse LLM grouping response. Validates all files are assigned.
|
|
91
107
|
*/
|
|
@@ -22,6 +22,7 @@ import { GROUPING_SYSTEM_PROMPT, GROUPING_USER_PROMPT, MODULE_SYSTEM_PROMPT, MOD
|
|
|
22
22
|
import { shouldIgnorePath } from '../../config/ignore-service.js';
|
|
23
23
|
// ─── Constants ────────────────────────────────────────────────────────
|
|
24
24
|
const DEFAULT_MAX_TOKENS_PER_MODULE = 30_000;
|
|
25
|
+
const GROUPING_TOKEN_BUDGET = 100_000;
|
|
25
26
|
const WIKI_DIR = 'wiki';
|
|
26
27
|
// ─── Generator Class ──────────────────────────────────────────────────
|
|
27
28
|
export class WikiGenerator {
|
|
@@ -345,11 +346,18 @@ export class WikiGenerator {
|
|
|
345
346
|
FILE_LIST: fileList,
|
|
346
347
|
DIRECTORY_TREE: dirTree,
|
|
347
348
|
});
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
349
|
+
const promptTokens = estimateTokens(prompt);
|
|
350
|
+
let grouping;
|
|
351
|
+
if (promptTokens <= GROUPING_TOKEN_BUDGET) {
|
|
352
|
+
// Grouping is a structured-data phase (JSON output), not documentation.
|
|
353
|
+
// Do NOT apply buildSystemPrompt here — a language instruction would risk
|
|
354
|
+
// translating module-name keys, breaking slug stability and JSON parsing.
|
|
355
|
+
const response = await this.invokeLLM(prompt, GROUPING_SYSTEM_PROMPT, this.streamOpts('Grouping files', 15, 13));
|
|
356
|
+
grouping = this.parseGroupingResponse(response.content, files);
|
|
357
|
+
}
|
|
358
|
+
else {
|
|
359
|
+
grouping = await this.batchedGrouping(files);
|
|
360
|
+
}
|
|
353
361
|
// Convert to tree nodes
|
|
354
362
|
const tree = [];
|
|
355
363
|
for (const [moduleName, modulePaths] of Object.entries(grouping)) {
|
|
@@ -374,6 +382,171 @@ export class WikiGenerator {
|
|
|
374
382
|
this.onProgress('grouping', 28, `Created ${tree.length} modules`);
|
|
375
383
|
return tree;
|
|
376
384
|
}
|
|
385
|
+
/**
|
|
386
|
+
* Run grouping in batches when the full file list exceeds GROUPING_TOKEN_BUDGET.
|
|
387
|
+
*/
|
|
388
|
+
async batchedGrouping(files) {
|
|
389
|
+
const batches = this.batchFilesForGrouping(files);
|
|
390
|
+
const partials = [];
|
|
391
|
+
for (let i = 0; i < batches.length; i++) {
|
|
392
|
+
const batch = batches[i];
|
|
393
|
+
this.onProgress('grouping', 15 + Math.round(((i + 1) / batches.length) * 13), `Grouping batch ${i + 1}/${batches.length} (LLM)...`);
|
|
394
|
+
const batchFileList = formatFileListForGrouping(batch);
|
|
395
|
+
const batchDirTree = formatDirectoryTree(batch.map((f) => f.filePath));
|
|
396
|
+
const batchPrompt = fillTemplate(GROUPING_USER_PROMPT, {
|
|
397
|
+
FILE_LIST: batchFileList,
|
|
398
|
+
DIRECTORY_TREE: batchDirTree,
|
|
399
|
+
});
|
|
400
|
+
try {
|
|
401
|
+
const batchStart = 15 + Math.round((i / batches.length) * 13);
|
|
402
|
+
const batchRange = Math.max(1, Math.round(13 / batches.length));
|
|
403
|
+
const response = await this.invokeLLM(batchPrompt, GROUPING_SYSTEM_PROMPT, this.streamOpts(`Grouping batch ${i + 1}/${batches.length}`, batchStart, batchRange));
|
|
404
|
+
partials.push(this.parseGroupingResponse(response.content, batch));
|
|
405
|
+
}
|
|
406
|
+
catch {
|
|
407
|
+
this.onProgress('grouping', 15, `Batch ${i + 1} failed, falling back to directory grouping`);
|
|
408
|
+
return this.fallbackGrouping(files);
|
|
409
|
+
}
|
|
410
|
+
}
|
|
411
|
+
const merged = this.mergeGroupings(partials);
|
|
412
|
+
const assignedFiles = new Set(Object.values(merged).flat());
|
|
413
|
+
const unassigned = files.map((f) => f.filePath).filter((fp) => !assignedFiles.has(fp));
|
|
414
|
+
if (unassigned.length > 0) {
|
|
415
|
+
merged['Other'] = [...(merged['Other'] ?? []), ...unassigned];
|
|
416
|
+
}
|
|
417
|
+
return Object.keys(merged).length > 0 ? merged : this.fallbackGrouping(files);
|
|
418
|
+
}
|
|
419
|
+
/**
|
|
420
|
+
* Partition files into batches that fit within GROUPING_TOKEN_BUDGET.
|
|
421
|
+
* Groups by top-level directory for semantic coherence.
|
|
422
|
+
*/
|
|
423
|
+
batchFilesForGrouping(files) {
|
|
424
|
+
if (files.length === 0)
|
|
425
|
+
return [];
|
|
426
|
+
const dirGroups = new Map();
|
|
427
|
+
for (const f of files) {
|
|
428
|
+
const parts = f.filePath.replace(/\\/g, '/').split('/');
|
|
429
|
+
const topDir = parts.length > 1 ? parts[0] : 'Root';
|
|
430
|
+
let group = dirGroups.get(topDir);
|
|
431
|
+
if (!group) {
|
|
432
|
+
group = [];
|
|
433
|
+
dirGroups.set(topDir, group);
|
|
434
|
+
}
|
|
435
|
+
group.push(f);
|
|
436
|
+
}
|
|
437
|
+
const batches = [];
|
|
438
|
+
let currentBatch = [];
|
|
439
|
+
for (const dirFiles of dirGroups.values()) {
|
|
440
|
+
const dirPromptSize = this.estimateGroupingPromptTokens(dirFiles);
|
|
441
|
+
if (dirPromptSize > GROUPING_TOKEN_BUDGET) {
|
|
442
|
+
if (currentBatch.length > 0) {
|
|
443
|
+
batches.push(currentBatch);
|
|
444
|
+
currentBatch = [];
|
|
445
|
+
}
|
|
446
|
+
// Sub-batch this large directory by fixed chunks
|
|
447
|
+
for (let i = 0; i < dirFiles.length;) {
|
|
448
|
+
const subBatch = [];
|
|
449
|
+
while (i < dirFiles.length) {
|
|
450
|
+
subBatch.push(dirFiles[i]);
|
|
451
|
+
i++;
|
|
452
|
+
if (this.estimateGroupingPromptTokens(subBatch) > GROUPING_TOKEN_BUDGET &&
|
|
453
|
+
subBatch.length > 1) {
|
|
454
|
+
subBatch.pop();
|
|
455
|
+
i--;
|
|
456
|
+
break;
|
|
457
|
+
}
|
|
458
|
+
}
|
|
459
|
+
if (subBatch.length === 1 &&
|
|
460
|
+
this.estimateGroupingPromptTokens(subBatch) > GROUPING_TOKEN_BUDGET) {
|
|
461
|
+
subBatch[0] = this.trimSymbolsToFit(subBatch[0]);
|
|
462
|
+
}
|
|
463
|
+
batches.push(subBatch);
|
|
464
|
+
}
|
|
465
|
+
continue;
|
|
466
|
+
}
|
|
467
|
+
const candidateBatch = [...currentBatch, ...dirFiles];
|
|
468
|
+
if (this.estimateGroupingPromptTokens(candidateBatch) > GROUPING_TOKEN_BUDGET) {
|
|
469
|
+
if (currentBatch.length > 0) {
|
|
470
|
+
batches.push(currentBatch);
|
|
471
|
+
}
|
|
472
|
+
currentBatch = dirFiles;
|
|
473
|
+
}
|
|
474
|
+
else {
|
|
475
|
+
currentBatch = candidateBatch;
|
|
476
|
+
}
|
|
477
|
+
}
|
|
478
|
+
if (currentBatch.length > 0) {
|
|
479
|
+
batches.push(currentBatch);
|
|
480
|
+
}
|
|
481
|
+
return batches;
|
|
482
|
+
}
|
|
483
|
+
estimateGroupingPromptTokens(files) {
|
|
484
|
+
const fileList = formatFileListForGrouping(files);
|
|
485
|
+
const dirTree = formatDirectoryTree(files.map((f) => f.filePath));
|
|
486
|
+
const prompt = fillTemplate(GROUPING_USER_PROMPT, {
|
|
487
|
+
FILE_LIST: fileList,
|
|
488
|
+
DIRECTORY_TREE: dirTree,
|
|
489
|
+
});
|
|
490
|
+
return estimateTokens(prompt);
|
|
491
|
+
}
|
|
492
|
+
trimSymbolsToFit(file) {
|
|
493
|
+
const symbols = file.symbols;
|
|
494
|
+
let lo = 0;
|
|
495
|
+
let hi = symbols.length;
|
|
496
|
+
while (lo < hi) {
|
|
497
|
+
const mid = (lo + hi + 1) >>> 1;
|
|
498
|
+
const candidate = {
|
|
499
|
+
filePath: file.filePath,
|
|
500
|
+
symbols: [
|
|
501
|
+
...symbols.slice(0, mid),
|
|
502
|
+
{ name: `... and ${symbols.length - mid} more`, type: 'truncated' },
|
|
503
|
+
],
|
|
504
|
+
};
|
|
505
|
+
if (this.estimateGroupingPromptTokens([candidate]) <= GROUPING_TOKEN_BUDGET) {
|
|
506
|
+
lo = mid;
|
|
507
|
+
}
|
|
508
|
+
else {
|
|
509
|
+
hi = mid - 1;
|
|
510
|
+
}
|
|
511
|
+
}
|
|
512
|
+
if (lo >= symbols.length)
|
|
513
|
+
return file;
|
|
514
|
+
return {
|
|
515
|
+
filePath: file.filePath,
|
|
516
|
+
symbols: lo > 0
|
|
517
|
+
? [
|
|
518
|
+
...symbols.slice(0, lo),
|
|
519
|
+
{ name: `... and ${symbols.length - lo} more`, type: 'truncated' },
|
|
520
|
+
]
|
|
521
|
+
: [{ name: 'no exports (truncated)', type: 'truncated' }],
|
|
522
|
+
};
|
|
523
|
+
}
|
|
524
|
+
/**
|
|
525
|
+
* Merge partial groupings from multiple batches. Same module name across
|
|
526
|
+
* batches gets file lists concatenated. Deduplicates (first-seen wins).
|
|
527
|
+
*/
|
|
528
|
+
mergeGroupings(partials) {
|
|
529
|
+
const merged = {};
|
|
530
|
+
const seen = new Set();
|
|
531
|
+
const slugToCanonical = new Map();
|
|
532
|
+
for (const partial of partials) {
|
|
533
|
+
for (const [mod, paths] of Object.entries(partial)) {
|
|
534
|
+
const slug = this.slugify(mod);
|
|
535
|
+
const canonical = slugToCanonical.get(slug) ?? mod;
|
|
536
|
+
if (!slugToCanonical.has(slug))
|
|
537
|
+
slugToCanonical.set(slug, mod);
|
|
538
|
+
for (const fp of paths) {
|
|
539
|
+
if (!seen.has(fp)) {
|
|
540
|
+
seen.add(fp);
|
|
541
|
+
if (!merged[canonical])
|
|
542
|
+
merged[canonical] = [];
|
|
543
|
+
merged[canonical].push(fp);
|
|
544
|
+
}
|
|
545
|
+
}
|
|
546
|
+
}
|
|
547
|
+
}
|
|
548
|
+
return merged;
|
|
549
|
+
}
|
|
377
550
|
/**
|
|
378
551
|
* Parse LLM grouping response. Validates all files are assigned.
|
|
379
552
|
*/
|
package/package.json
CHANGED