nexus-prime 7.2.0 → 7.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/architects/config.js +6 -1
- package/dist/cli/doctor-storage.d.ts +19 -0
- package/dist/cli/doctor-storage.js +72 -3
- package/dist/cli/install-wizard.d.ts +15 -0
- package/dist/cli/install-wizard.js +50 -32
- package/dist/cli.js +10 -53
- package/dist/dashboard/routes/health.js +20 -0
- package/dist/dashboard/server.js +1 -1
- package/dist/engines/ngram-index.d.ts +37 -3
- package/dist/engines/ngram-index.js +209 -23
- package/dist/engines/orchestrator/types.d.ts +5 -0
- package/dist/engines/orchestrator/types.js +64 -2
- package/dist/engines/orchestrator.js +29 -4
- package/dist/engines/runtime-hygiene.d.ts +4 -0
- package/dist/engines/runtime-hygiene.js +45 -1
- package/dist/install/claude-code-hooks.d.ts +33 -0
- package/dist/install/claude-code-hooks.js +96 -0
- package/dist/install/manifest.d.ts +29 -0
- package/dist/install/manifest.js +44 -1
- package/dist/install/state-locator.d.ts +11 -0
- package/dist/install/state-locator.js +30 -0
- package/dist/licensing/enforcement.js +13 -1
- package/dist/licensing/index.d.ts +1 -1
- package/dist/licensing/index.js +1 -1
- package/dist/licensing/license-manager.d.ts +12 -0
- package/dist/licensing/license-manager.js +91 -4
- package/dist/licensing/types.d.ts +5 -1
- package/dist/licensing/upgrade-prompts.d.ts +10 -0
- package/dist/licensing/upgrade-prompts.js +23 -0
- package/dist/licensing/web-auth.d.ts +4 -1
- package/dist/licensing/web-auth.js +34 -9
- package/dist/synapse/config.js +15 -11
- package/package.json +2 -2
|
@@ -40,6 +40,26 @@ function safeStatSize(filePath) {
|
|
|
40
40
|
}
|
|
41
41
|
}
|
|
42
42
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
43
|
+
// SQLite WAL footprint helpers — used by quota guard, rotation, and cleanup.
|
|
44
|
+
// SQLite databases are db + db-wal + db-shm. Any size accounting that ignores
|
|
45
|
+
// wal/shm misses the bug that turned ngram-index.db-wal into 84GB on disk.
|
|
46
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
47
|
+
const NGRAM_DEFAULT_WAL_LIMIT_BYTES = 64 * 1024 * 1024; // 64 MB
|
|
48
|
+
const NGRAM_DEFAULT_FOOTPRINT_BYTES = 512 * 1024 * 1024; // 512 MB
|
|
49
|
+
const NGRAM_DEFAULT_CHECKPOINT_INTERVAL_MS = 30_000;
|
|
50
|
+
const NGRAM_DEFAULT_CHECKPOINT_DOC_COUNT = 200;
|
|
51
|
+
export function getNgramWalPath(dbPath) {
|
|
52
|
+
return `${dbPath}-wal`;
|
|
53
|
+
}
|
|
54
|
+
export function getNgramShmPath(dbPath) {
|
|
55
|
+
return `${dbPath}-shm`;
|
|
56
|
+
}
|
|
57
|
+
export function getNgramFootprintBytes(dbPath) {
|
|
58
|
+
return safeStatSize(dbPath)
|
|
59
|
+
+ safeStatSize(getNgramWalPath(dbPath))
|
|
60
|
+
+ safeStatSize(getNgramShmPath(dbPath));
|
|
61
|
+
}
|
|
62
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
43
63
|
// Character pair frequency weights (precomputed from common code patterns)
|
|
44
64
|
// Rarer pairs get higher weights for sparse n-gram extraction.
|
|
45
65
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
@@ -157,6 +177,10 @@ export class NgramIndex {
|
|
|
157
177
|
deleteStmt;
|
|
158
178
|
lookupStmt;
|
|
159
179
|
docExistsStmt;
|
|
180
|
+
// Write-side checkpoint accounting (Phase 2): bound WAL growth on hot indexers.
|
|
181
|
+
writesSinceCheckpoint = 0;
|
|
182
|
+
lastCheckpointAt = 0;
|
|
183
|
+
quotaSkipNoticeShown = false;
|
|
160
184
|
// Search analytics
|
|
161
185
|
_searchStats = {
|
|
162
186
|
totalQueries: 0,
|
|
@@ -190,36 +214,69 @@ export class NgramIndex {
|
|
|
190
214
|
}
|
|
191
215
|
}
|
|
192
216
|
rotateOversizeDbIfNeeded() {
|
|
217
|
+
// Count the full SQLite footprint (db + wal + shm). The 84GB regression
|
|
218
|
+
// happened because a 32MB db file had a 84GB -wal sibling that this
|
|
219
|
+
// routine never inspected.
|
|
193
220
|
const rotateBytes = readEnvBytes('NEXUS_NGRAM_ROTATE_BYTES', 1024 * 1024 * 1024); // 1GB default
|
|
194
|
-
const
|
|
195
|
-
|
|
221
|
+
const dbBytes = safeStatSize(this.dbPath);
|
|
222
|
+
const footprint = getNgramFootprintBytes(this.dbPath);
|
|
223
|
+
if (footprint <= 0 || footprint < rotateBytes)
|
|
196
224
|
return;
|
|
197
|
-
// Keep only the most recent oversize archive to bound disk usage.
|
|
225
|
+
// Keep only the most recent oversize archive to bound disk usage. When the
|
|
226
|
+
// operator opts out of archiving (ARCHIVE_OVERSIZE=0), drop the leftovers
|
|
227
|
+
// outright instead of growing a backlog of multi-GB carcasses.
|
|
198
228
|
const dir = path.dirname(this.dbPath);
|
|
199
229
|
const base = path.basename(this.dbPath);
|
|
230
|
+
const archiveEnabled = process.env.NEXUS_NGRAM_ARCHIVE_OVERSIZE !== '0';
|
|
200
231
|
const existing = fs.existsSync(dir)
|
|
201
232
|
? fs.readdirSync(dir).filter((entry) => entry.startsWith(`${base}.oversize.`)).sort().reverse()
|
|
202
233
|
: [];
|
|
203
|
-
|
|
234
|
+
const keepCount = archiveEnabled ? 1 : 0;
|
|
235
|
+
for (const entry of existing.slice(keepCount)) {
|
|
204
236
|
try {
|
|
205
237
|
fs.rmSync(path.join(dir, entry), { force: true });
|
|
206
238
|
}
|
|
207
239
|
catch { /* best effort */ }
|
|
208
240
|
}
|
|
209
|
-
const
|
|
241
|
+
const stamp = Date.now();
|
|
242
|
+
const rotatedPath = `${this.dbPath}.oversize.${stamp}`;
|
|
243
|
+
const removeSibling = (suffix) => {
|
|
244
|
+
try {
|
|
245
|
+
fs.rmSync(`${this.dbPath}${suffix}`, { force: true });
|
|
246
|
+
}
|
|
247
|
+
catch { /* best effort */ }
|
|
248
|
+
};
|
|
210
249
|
try {
|
|
211
|
-
|
|
212
|
-
|
|
250
|
+
if (archiveEnabled) {
|
|
251
|
+
fs.renameSync(this.dbPath, rotatedPath);
|
|
252
|
+
}
|
|
253
|
+
else {
|
|
254
|
+
fs.rmSync(this.dbPath, { force: true });
|
|
255
|
+
}
|
|
256
|
+
// db is gone; the WAL/SHM siblings are stale and unsafe to keep — drop
|
|
257
|
+
// them so SQLite never re-attaches them on the next open.
|
|
258
|
+
removeSibling('-wal');
|
|
259
|
+
removeSibling('-shm');
|
|
260
|
+
logNgramNoticeOnce(`ngram:rotated:${this.dbPath}:${stamp}`, `[NgramIndex] rotated oversized DB (footprint=${Math.round(footprint / 1024 / 1024)}MB db=${Math.round(dbBytes / 1024 / 1024)}MB threshold=${Math.round(rotateBytes / 1024 / 1024)}MB archive=${archiveEnabled ? 'on' : 'off'}) db=${this.dbPath}${archiveEnabled ? ` rotated=${rotatedPath}` : ''}`);
|
|
213
261
|
}
|
|
214
262
|
catch (err) {
|
|
215
263
|
logNgramNoticeOnce(`ngram:rotate-failed:${this.dbPath}`, `[NgramIndex] oversize rotation failed; continuing with existing DB db=${this.dbPath} err=${String(err?.message ?? err)}`);
|
|
216
264
|
}
|
|
217
265
|
}
|
|
218
266
|
initSchema() {
|
|
267
|
+
const walLimitBytes = readEnvBytes('NEXUS_NGRAM_WAL_LIMIT_BYTES', NGRAM_DEFAULT_WAL_LIMIT_BYTES);
|
|
219
268
|
this.db.pragma('journal_mode = WAL');
|
|
220
269
|
this.db.pragma('synchronous = NORMAL');
|
|
221
270
|
this.db.pragma('cache_size = -16000');
|
|
222
271
|
this.db.pragma('busy_timeout = 5000');
|
|
272
|
+
// Bound WAL growth: autocheckpoint every N pages and hard-cap journal size
|
|
273
|
+
// so it never balloons (the 84GB regression). temp_store=MEMORY avoids
|
|
274
|
+
// spilling sort/temp tables into another on-disk file. mmap_size=0
|
|
275
|
+
// disables the OS-mapped read cache that can keep pages alive on small VMs.
|
|
276
|
+
this.db.pragma('wal_autocheckpoint = 1000');
|
|
277
|
+
this.db.pragma(`journal_size_limit = ${walLimitBytes}`);
|
|
278
|
+
this.db.pragma('temp_store = MEMORY');
|
|
279
|
+
this.db.pragma('mmap_size = 0');
|
|
223
280
|
this.db.exec(`
|
|
224
281
|
CREATE TABLE IF NOT EXISTS ngram_postings (
|
|
225
282
|
ngram_hash INTEGER NOT NULL,
|
|
@@ -288,8 +345,62 @@ export class NgramIndex {
|
|
|
288
345
|
}
|
|
289
346
|
}
|
|
290
347
|
// ── Document Management ─────────────────────────────────────────────────
|
|
348
|
+
/** Run an opportunistic WAL checkpoint. PASSIVE first, escalate to TRUNCATE
|
|
349
|
+
* if the WAL is still over its configured limit. Returns the WAL byte size
|
|
350
|
+
* observed after the attempt so callers can report progress. */
|
|
351
|
+
checkpointIfNeeded(reason) {
|
|
352
|
+
const walLimit = readEnvBytes('NEXUS_NGRAM_WAL_LIMIT_BYTES', NGRAM_DEFAULT_WAL_LIMIT_BYTES);
|
|
353
|
+
try {
|
|
354
|
+
this.db.exec('PRAGMA wal_checkpoint(PASSIVE)');
|
|
355
|
+
}
|
|
356
|
+
catch {
|
|
357
|
+
// best effort
|
|
358
|
+
}
|
|
359
|
+
let walBytes = this.getWalBytes();
|
|
360
|
+
if (walBytes > walLimit || reason !== 'periodic') {
|
|
361
|
+
try {
|
|
362
|
+
this.db.exec('PRAGMA wal_checkpoint(TRUNCATE)');
|
|
363
|
+
}
|
|
364
|
+
catch {
|
|
365
|
+
// best effort
|
|
366
|
+
}
|
|
367
|
+
walBytes = this.getWalBytes();
|
|
368
|
+
}
|
|
369
|
+
this.writesSinceCheckpoint = 0;
|
|
370
|
+
this.lastCheckpointAt = Date.now();
|
|
371
|
+
return walBytes;
|
|
372
|
+
}
|
|
373
|
+
/** Footprint quota check before a write. Returns true if the write should
|
|
374
|
+
* proceed; false if it should be skipped. Throws when NEXUS_NGRAM_STRICT_QUOTA=1
|
|
375
|
+
* so callers can surface hard quota failures during tests/CI. */
|
|
376
|
+
allowWrite() {
|
|
377
|
+
const walLimit = readEnvBytes('NEXUS_NGRAM_WAL_LIMIT_BYTES', NGRAM_DEFAULT_WAL_LIMIT_BYTES);
|
|
378
|
+
const maxFootprint = readEnvBytes('NEXUS_NGRAM_MAX_FOOTPRINT_BYTES', NGRAM_DEFAULT_FOOTPRINT_BYTES);
|
|
379
|
+
const strict = process.env.NEXUS_NGRAM_STRICT_QUOTA === '1';
|
|
380
|
+
let walBytes = this.getWalBytes();
|
|
381
|
+
if (walBytes > walLimit) {
|
|
382
|
+
walBytes = this.checkpointIfNeeded('quota');
|
|
383
|
+
}
|
|
384
|
+
const footprint = this.getSqliteFootprintBytes();
|
|
385
|
+
if (footprint > maxFootprint) {
|
|
386
|
+
const msg = `[NgramIndex] footprint quota exceeded — db=${this.dbPath} footprint=${footprint} cap=${maxFootprint} wal=${walBytes}`;
|
|
387
|
+
if (strict) {
|
|
388
|
+
throw new Error(msg);
|
|
389
|
+
}
|
|
390
|
+
if (!this.quotaSkipNoticeShown) {
|
|
391
|
+
this.quotaSkipNoticeShown = true;
|
|
392
|
+
logNgramNoticeOnce(`ngram:quota-skip:${this.dbPath}`, msg);
|
|
393
|
+
}
|
|
394
|
+
return false;
|
|
395
|
+
}
|
|
396
|
+
// Recovered — re-arm the warning so a later breach prints again.
|
|
397
|
+
this.quotaSkipNoticeShown = false;
|
|
398
|
+
return true;
|
|
399
|
+
}
|
|
291
400
|
/** Index a document's text content */
|
|
292
401
|
addDocument(docId, text) {
|
|
402
|
+
if (!this.allowWrite())
|
|
403
|
+
return;
|
|
293
404
|
// Remove existing postings for this doc (idempotent)
|
|
294
405
|
this.deleteStmt.run(docId);
|
|
295
406
|
const trigrams = extractTrigrams(text);
|
|
@@ -319,6 +430,19 @@ export class NgramIndex {
|
|
|
319
430
|
`).run(docId, text.length, Date.now());
|
|
320
431
|
});
|
|
321
432
|
insertMany();
|
|
433
|
+
this.maybePeriodicCheckpoint();
|
|
434
|
+
}
|
|
435
|
+
/** Trigger a passive checkpoint when either the doc or time interval threshold
|
|
436
|
+
* is hit. Cheap on a healthy DB, prevents WAL growth on bulk indexing. */
|
|
437
|
+
maybePeriodicCheckpoint() {
|
|
438
|
+
this.writesSinceCheckpoint += 1;
|
|
439
|
+
const intervalMs = readEnvBytes('NEXUS_NGRAM_CHECKPOINT_INTERVAL_MS', NGRAM_DEFAULT_CHECKPOINT_INTERVAL_MS);
|
|
440
|
+
const docThreshold = readEnvBytes('NEXUS_NGRAM_CHECKPOINT_DOCS', NGRAM_DEFAULT_CHECKPOINT_DOC_COUNT);
|
|
441
|
+
const dueByDocs = this.writesSinceCheckpoint >= docThreshold;
|
|
442
|
+
const dueByTime = Date.now() - this.lastCheckpointAt > intervalMs;
|
|
443
|
+
if (!dueByDocs && !dueByTime)
|
|
444
|
+
return;
|
|
445
|
+
this.checkpointIfNeeded('periodic');
|
|
322
446
|
}
|
|
323
447
|
/** Remove a document from the index */
|
|
324
448
|
removeDocument(docId) {
|
|
@@ -337,6 +461,27 @@ export class NgramIndex {
|
|
|
337
461
|
const row = this.db.prepare('SELECT COUNT(*) as cnt FROM ngram_docs').get();
|
|
338
462
|
return row.cnt;
|
|
339
463
|
}
|
|
464
|
+
// ── Footprint accessors ─────────────────────────────────────────────────
|
|
465
|
+
/** Path to the underlying SQLite database file. */
|
|
466
|
+
getDbPath() {
|
|
467
|
+
return this.dbPath;
|
|
468
|
+
}
|
|
469
|
+
/** Bytes occupied by the .db file alone. */
|
|
470
|
+
getDbBytes() {
|
|
471
|
+
return safeStatSize(this.dbPath);
|
|
472
|
+
}
|
|
473
|
+
/** Bytes occupied by the .db-wal file. */
|
|
474
|
+
getWalBytes() {
|
|
475
|
+
return safeStatSize(getNgramWalPath(this.dbPath));
|
|
476
|
+
}
|
|
477
|
+
/** Bytes occupied by the .db-shm file. */
|
|
478
|
+
getShmBytes() {
|
|
479
|
+
return safeStatSize(getNgramShmPath(this.dbPath));
|
|
480
|
+
}
|
|
481
|
+
/** Total SQLite footprint = db + wal + shm. Use this for quota checks. */
|
|
482
|
+
getSqliteFootprintBytes() {
|
|
483
|
+
return getNgramFootprintBytes(this.dbPath);
|
|
484
|
+
}
|
|
340
485
|
// ── Search ──────────────────────────────────────────────────────────────
|
|
341
486
|
/**
|
|
342
487
|
* Search for documents matching a text query.
|
|
@@ -522,15 +667,24 @@ export class NgramIndex {
|
|
|
522
667
|
}
|
|
523
668
|
/**
|
|
524
669
|
* Operator-focused maintenance for the on-disk ngram DB.
|
|
525
|
-
* - Bounds runaway DB growth via rotation (default >= 1GB)
|
|
526
|
-
*
|
|
670
|
+
* - Bounds runaway DB growth via rotation (default >= 1GB), counting the
|
|
671
|
+
* full SQLite footprint (db + wal + shm) so a runaway WAL triggers it.
|
|
672
|
+
* - Vacuums only when safe (<= vacuumMaxBytes) and either forced or dirty.
|
|
527
673
|
*/
|
|
528
674
|
maintainBounded(options = {}) {
|
|
529
|
-
const
|
|
675
|
+
const dbBytes = safeStatSize(this.dbPath);
|
|
676
|
+
const footprint = this.getSqliteFootprintBytes();
|
|
530
677
|
const rotateBytes = readEnvBytes('NEXUS_NGRAM_ROTATE_BYTES', 1024 * 1024 * 1024);
|
|
531
678
|
const vacuumMaxBytes = readEnvBytes('NEXUS_NGRAM_VACUUM_MAX_BYTES', 256 * 1024 * 1024);
|
|
532
|
-
if (
|
|
679
|
+
if (footprint >= rotateBytes && footprint > 0) {
|
|
680
|
+
const archiveEnabled = process.env.NEXUS_NGRAM_ARCHIVE_OVERSIZE !== '0';
|
|
533
681
|
const rotatedPath = `${this.dbPath}.oversize.${Date.now()}`;
|
|
682
|
+
const removeSibling = (suffix) => {
|
|
683
|
+
try {
|
|
684
|
+
fs.rmSync(`${this.dbPath}${suffix}`, { force: true });
|
|
685
|
+
}
|
|
686
|
+
catch { /* best effort */ }
|
|
687
|
+
};
|
|
534
688
|
try {
|
|
535
689
|
try {
|
|
536
690
|
this.db.exec('PRAGMA wal_checkpoint(TRUNCATE)');
|
|
@@ -540,7 +694,14 @@ export class NgramIndex {
|
|
|
540
694
|
this.db.close();
|
|
541
695
|
}
|
|
542
696
|
catch { /* ignore */ }
|
|
543
|
-
|
|
697
|
+
if (archiveEnabled) {
|
|
698
|
+
fs.renameSync(this.dbPath, rotatedPath);
|
|
699
|
+
}
|
|
700
|
+
else {
|
|
701
|
+
fs.rmSync(this.dbPath, { force: true });
|
|
702
|
+
}
|
|
703
|
+
removeSibling('-wal');
|
|
704
|
+
removeSibling('-shm');
|
|
544
705
|
}
|
|
545
706
|
catch (err) {
|
|
546
707
|
// Re-open if close succeeded but rename failed
|
|
@@ -553,13 +714,16 @@ export class NgramIndex {
|
|
|
553
714
|
return {
|
|
554
715
|
action: 'none',
|
|
555
716
|
dbPath: this.dbPath,
|
|
556
|
-
sizeBytes,
|
|
717
|
+
sizeBytes: footprint,
|
|
557
718
|
reason: `rotate failed: ${String(err?.message ?? err)}`,
|
|
558
719
|
};
|
|
559
720
|
}
|
|
560
721
|
// Recreate a fresh DB
|
|
561
722
|
this.knownHashes.clear();
|
|
562
723
|
this.storageDirty = false;
|
|
724
|
+
this.writesSinceCheckpoint = 0;
|
|
725
|
+
this.lastCheckpointAt = Date.now();
|
|
726
|
+
this.quotaSkipNoticeShown = false;
|
|
563
727
|
this.db = new Database(this.dbPath);
|
|
564
728
|
this.initSchema();
|
|
565
729
|
this.prepareStatements();
|
|
@@ -567,26 +731,26 @@ export class NgramIndex {
|
|
|
567
731
|
return {
|
|
568
732
|
action: 'rotated',
|
|
569
733
|
dbPath: this.dbPath,
|
|
570
|
-
sizeBytes,
|
|
571
|
-
previousPath: rotatedPath,
|
|
572
|
-
reason: `
|
|
734
|
+
sizeBytes: footprint,
|
|
735
|
+
previousPath: archiveEnabled ? rotatedPath : undefined,
|
|
736
|
+
reason: `footprint exceeded rotate threshold (db=${dbBytes}, footprint=${footprint}, threshold=${rotateBytes})`,
|
|
573
737
|
};
|
|
574
738
|
}
|
|
575
|
-
if (
|
|
739
|
+
if (dbBytes > 0 && dbBytes <= vacuumMaxBytes && (options.force || this.storageDirty)) {
|
|
576
740
|
this.optimizeStorage(true);
|
|
577
741
|
return {
|
|
578
742
|
action: 'vacuum',
|
|
579
743
|
dbPath: this.dbPath,
|
|
580
|
-
sizeBytes,
|
|
744
|
+
sizeBytes: dbBytes,
|
|
581
745
|
reason: options.force ? 'forced vacuum' : 'storageDirty vacuum',
|
|
582
746
|
};
|
|
583
747
|
}
|
|
584
748
|
return {
|
|
585
749
|
action: 'none',
|
|
586
750
|
dbPath: this.dbPath,
|
|
587
|
-
sizeBytes,
|
|
588
|
-
reason:
|
|
589
|
-
? `skip vacuum: db too large (${
|
|
751
|
+
sizeBytes: footprint,
|
|
752
|
+
reason: dbBytes > vacuumMaxBytes
|
|
753
|
+
? `skip vacuum: db too large (${dbBytes} > ${vacuumMaxBytes})`
|
|
590
754
|
: 'no maintenance needed',
|
|
591
755
|
};
|
|
592
756
|
}
|
|
@@ -607,9 +771,31 @@ export class NgramIndex {
|
|
|
607
771
|
dbSizeBytes,
|
|
608
772
|
};
|
|
609
773
|
}
|
|
610
|
-
/** Close the database connection
|
|
774
|
+
/** Close the database connection.
|
|
775
|
+
*
|
|
776
|
+
* Always truncates the WAL so the .db-wal sibling can never outgrow the
|
|
777
|
+
* configured cap on shutdown — the failure mode that produced the 84GB
|
|
778
|
+
* WAL on a user machine. VACUUM is only run when the operator has both a
|
|
779
|
+
* small DB and explicitly opts in via NEXUS_NGRAM_VACUUM_ON_CLOSE=1, since
|
|
780
|
+
* blind VACUUM on multi-GB DBs blocks shutdown indefinitely. */
|
|
611
781
|
close() {
|
|
612
|
-
|
|
782
|
+
try {
|
|
783
|
+
this.db.exec('PRAGMA wal_checkpoint(TRUNCATE)');
|
|
784
|
+
}
|
|
785
|
+
catch {
|
|
786
|
+
// best effort — never block shutdown on checkpoint failure
|
|
787
|
+
}
|
|
788
|
+
if (process.env.NEXUS_NGRAM_VACUUM_ON_CLOSE === '1') {
|
|
789
|
+
const vacuumMaxBytes = readEnvBytes('NEXUS_NGRAM_VACUUM_MAX_BYTES', 256 * 1024 * 1024);
|
|
790
|
+
const dbBytes = safeStatSize(this.dbPath);
|
|
791
|
+
if (this.storageDirty && dbBytes > 0 && dbBytes <= vacuumMaxBytes) {
|
|
792
|
+
try {
|
|
793
|
+
this.db.exec('VACUUM');
|
|
794
|
+
this.storageDirty = false;
|
|
795
|
+
}
|
|
796
|
+
catch { /* best effort */ }
|
|
797
|
+
}
|
|
798
|
+
}
|
|
613
799
|
this.db.close();
|
|
614
800
|
}
|
|
615
801
|
}
|
|
@@ -267,4 +267,9 @@ export declare const MAX_DISCOVERED_FILES = 32;
|
|
|
267
267
|
export declare const ABSOLUTE_SECONDARY_MIN = 0.3;
|
|
268
268
|
export declare const DISCOVERY_EXTENSIONS: Set<string>;
|
|
269
269
|
export declare const DISCOVERY_IGNORES: Set<string>;
|
|
270
|
+
export declare const DISCOVERY_FILENAME_SKIPS: Set<string>;
|
|
271
|
+
export declare const DISCOVERY_BINARY_EXTENSIONS: Set<string>;
|
|
270
272
|
export declare const REPO_SEARCH_HEAD_BYTES = 2000;
|
|
273
|
+
/** Default per-file byte cap for the ngram indexer. Override with
|
|
274
|
+
* NEXUS_NGRAM_MAX_FILE_BYTES. Files above this are skipped (not truncated). */
|
|
275
|
+
export declare const DISCOVERY_DEFAULT_MAX_FILE_BYTES = 256000;
|
|
@@ -7,6 +7,68 @@
|
|
|
7
7
|
export const MAX_AUTONOMY_HISTORY = 24;
|
|
8
8
|
export const MAX_DISCOVERED_FILES = 32;
|
|
9
9
|
export const ABSOLUTE_SECONDARY_MIN = 0.3;
|
|
10
|
-
|
|
11
|
-
|
|
10
|
+
// Source-like file extensions worth indexing. Covers JS/TS, Python, Go, Rust,
|
|
11
|
+
// JVM langs, native, web, configs and prose. NEXUS_NGRAM_INDEX_ALL=1 disables
|
|
12
|
+
// the allowlist (useful for diagnostics on non-mainstream repos).
|
|
13
|
+
export const DISCOVERY_EXTENSIONS = new Set([
|
|
14
|
+
'.ts', '.tsx', '.js', '.jsx', '.mjs', '.cjs',
|
|
15
|
+
'.py', '.go', '.rs', '.java', '.kt', '.swift',
|
|
16
|
+
'.cpp', '.cc', '.c', '.h', '.hpp', '.cs',
|
|
17
|
+
'.rb', '.php',
|
|
18
|
+
'.md', '.mdx',
|
|
19
|
+
'.json', '.yaml', '.yml', '.toml',
|
|
20
|
+
'.sql', '.sh',
|
|
21
|
+
'.css', '.scss', '.html',
|
|
22
|
+
]);
|
|
23
|
+
// Directories the indexer must skip — generated artefacts, dependency caches,
|
|
24
|
+
// virtualenvs, build outputs, and Nexus's own working dirs.
|
|
25
|
+
export const DISCOVERY_IGNORES = new Set([
|
|
26
|
+
'.git',
|
|
27
|
+
'node_modules',
|
|
28
|
+
'dist',
|
|
29
|
+
'build',
|
|
30
|
+
'out',
|
|
31
|
+
'coverage',
|
|
32
|
+
'.next',
|
|
33
|
+
'.turbo',
|
|
34
|
+
'.playwright-cli',
|
|
35
|
+
'.cache',
|
|
36
|
+
'vendor',
|
|
37
|
+
'target',
|
|
38
|
+
'.venv',
|
|
39
|
+
'__pycache__',
|
|
40
|
+
'tmp',
|
|
41
|
+
'.agents',
|
|
42
|
+
'.agent',
|
|
43
|
+
]);
|
|
44
|
+
// Specific filenames to skip even when they match an allowed extension.
|
|
45
|
+
// Lockfiles and large generated manifests blow up trigram indexes without
|
|
46
|
+
// any retrieval value.
|
|
47
|
+
export const DISCOVERY_FILENAME_SKIPS = new Set([
|
|
48
|
+
'package-lock.json',
|
|
49
|
+
'pnpm-lock.yaml',
|
|
50
|
+
'yarn.lock',
|
|
51
|
+
'bun.lockb',
|
|
52
|
+
'composer.lock',
|
|
53
|
+
'poetry.lock',
|
|
54
|
+
'Pipfile.lock',
|
|
55
|
+
'Cargo.lock',
|
|
56
|
+
'Gemfile.lock',
|
|
57
|
+
'go.sum',
|
|
58
|
+
]);
|
|
59
|
+
// Binary / media / archive extensions never to read into the indexer even
|
|
60
|
+
// when the user passes NEXUS_NGRAM_INDEX_ALL=1 — these contain no useful
|
|
61
|
+
// trigrams and inflate the WAL when accidentally included.
|
|
62
|
+
export const DISCOVERY_BINARY_EXTENSIONS = new Set([
|
|
63
|
+
'.png', '.jpg', '.jpeg', '.gif', '.webp', '.bmp', '.ico', '.tiff',
|
|
64
|
+
'.mp3', '.mp4', '.mov', '.avi', '.mkv', '.webm', '.wav', '.flac', '.ogg',
|
|
65
|
+
'.zip', '.tar', '.gz', '.tgz', '.bz2', '.xz', '.7z', '.rar',
|
|
66
|
+
'.pdf', '.psd', '.ai',
|
|
67
|
+
'.so', '.dll', '.dylib', '.a', '.o', '.node', '.exe', '.bin',
|
|
68
|
+
'.wasm', '.class', '.jar', '.pyc', '.pyo',
|
|
69
|
+
'.lockb',
|
|
70
|
+
]);
|
|
12
71
|
export const REPO_SEARCH_HEAD_BYTES = 2_000;
|
|
72
|
+
/** Default per-file byte cap for the ngram indexer. Override with
|
|
73
|
+
* NEXUS_NGRAM_MAX_FILE_BYTES. Files above this are skipped (not truncated). */
|
|
74
|
+
export const DISCOVERY_DEFAULT_MAX_FILE_BYTES = 256_000;
|
|
@@ -38,7 +38,7 @@ import { computeFilePriors } from './priors/file-priors.js';
|
|
|
38
38
|
import { GhostPass, createSubAgentRuntime, } from '../phantom/index.js';
|
|
39
39
|
// ─── Types and constants (extracted to orchestrator/types.ts) ─────────────────
|
|
40
40
|
export * from './orchestrator/types.js';
|
|
41
|
-
import { MAX_AUTONOMY_HISTORY, MAX_DISCOVERED_FILES, DISCOVERY_EXTENSIONS, DISCOVERY_IGNORES, } from './orchestrator/types.js';
|
|
41
|
+
import { MAX_AUTONOMY_HISTORY, MAX_DISCOVERED_FILES, DISCOVERY_EXTENSIONS, DISCOVERY_IGNORES, DISCOVERY_FILENAME_SKIPS, DISCOVERY_BINARY_EXTENSIONS, DISCOVERY_DEFAULT_MAX_FILE_BYTES, } from './orchestrator/types.js';
|
|
42
42
|
// ─── Scoring / planning utilities (extracted to orchestrator/scoring.ts) ───────
|
|
43
43
|
import { dedupeStrings, shortLabel, extractKeywords, scoreText, scorePath, decomposeTask as decomposeTaskFn, classifyIntent, buildTaskGraph, buildWorkerPlan, decideWorkers, determineMode, toSourceAwareTokenBudget, toRagCandidateStatus, toRagUsageSummary, buildArtifactOutcome, mergeArtifactOutcomeHistory, getArtifactHistoryRecord, getArtifactHistoryScore, deriveArtifactMemoryClass, deriveArtifactFreshness, buildRepoSearchDocument, readRepoSearchHead, extractRepoSearchTerms, } from './orchestrator/scoring.js';
|
|
44
44
|
import { categoryFilter, emitFunnelStage } from './orchestrator/funnel.js';
|
|
@@ -2103,14 +2103,33 @@ export class OrchestratorEngine {
|
|
|
2103
2103
|
// Cheap extension check first (no I/O), then bounded-parallel stat for
|
|
2104
2104
|
// the size + isFile gate. Replaces a per-file synchronous statSync that
|
|
2105
2105
|
// ran inside .filter() loops on the hot path. CLAUDE.md §2 hard rule #1.
|
|
2106
|
-
|
|
2106
|
+
//
|
|
2107
|
+
// Indexing discipline: skip lockfiles and binary/media extensions even
|
|
2108
|
+
// when NEXUS_NGRAM_INDEX_ALL=1 — these never produce useful trigrams and
|
|
2109
|
+
// bloat the WAL on big monorepos.
|
|
2110
|
+
const indexAll = process.env.NEXUS_NGRAM_INDEX_ALL === '1';
|
|
2111
|
+
const rawCap = Number(process.env.NEXUS_NGRAM_MAX_FILE_BYTES);
|
|
2112
|
+
const maxFileBytes = Number.isFinite(rawCap) && rawCap > 0
|
|
2113
|
+
? Math.floor(rawCap)
|
|
2114
|
+
: DISCOVERY_DEFAULT_MAX_FILE_BYTES;
|
|
2115
|
+
const candidates = filePaths.filter((f) => {
|
|
2116
|
+
const base = path.basename(f);
|
|
2117
|
+
if (DISCOVERY_FILENAME_SKIPS.has(base))
|
|
2118
|
+
return false;
|
|
2119
|
+
const ext = path.extname(f).toLowerCase();
|
|
2120
|
+
if (DISCOVERY_BINARY_EXTENSIONS.has(ext))
|
|
2121
|
+
return false;
|
|
2122
|
+
if (indexAll)
|
|
2123
|
+
return true;
|
|
2124
|
+
return DISCOVERY_EXTENSIONS.has(ext);
|
|
2125
|
+
});
|
|
2107
2126
|
if (candidates.length === 0)
|
|
2108
2127
|
return [];
|
|
2109
2128
|
const limit = pLimit(8);
|
|
2110
2129
|
const results = await Promise.all(candidates.map((f) => limit(async () => {
|
|
2111
2130
|
try {
|
|
2112
2131
|
const stat = await fs.promises.stat(f);
|
|
2113
|
-
return stat.isFile() && stat.size <=
|
|
2132
|
+
return stat.isFile() && stat.size <= maxFileBytes ? f : null;
|
|
2114
2133
|
}
|
|
2115
2134
|
catch {
|
|
2116
2135
|
return null;
|
|
@@ -2202,6 +2221,7 @@ export class OrchestratorEngine {
|
|
|
2202
2221
|
// we don't need a separate stat per entry, with bounded parallel reads
|
|
2203
2222
|
// via pLimit(8). Size filtering is done in a single second pass through
|
|
2204
2223
|
// filterDiscoverableFiles. CLAUDE.md §2 hard rule #1.
|
|
2224
|
+
const walkIndexAll = process.env.NEXUS_NGRAM_INDEX_ALL === '1';
|
|
2205
2225
|
const limit = pLimit(8);
|
|
2206
2226
|
const candidates = [];
|
|
2207
2227
|
let queue = [root];
|
|
@@ -2228,7 +2248,12 @@ export class OrchestratorEngine {
|
|
|
2228
2248
|
}
|
|
2229
2249
|
if (!entry.isFile())
|
|
2230
2250
|
continue;
|
|
2231
|
-
if (
|
|
2251
|
+
if (DISCOVERY_FILENAME_SKIPS.has(entry.name))
|
|
2252
|
+
continue;
|
|
2253
|
+
const ext = path.extname(entry.name).toLowerCase();
|
|
2254
|
+
if (DISCOVERY_BINARY_EXTENSIONS.has(ext))
|
|
2255
|
+
continue;
|
|
2256
|
+
if (!walkIndexAll && !DISCOVERY_EXTENSIONS.has(ext))
|
|
2232
2257
|
continue;
|
|
2233
2258
|
candidates.push(fullPath);
|
|
2234
2259
|
}
|
|
@@ -12,6 +12,10 @@ export interface HygieneReport {
|
|
|
12
12
|
orphanWorktreesRemoved: number;
|
|
13
13
|
/** Runs dirs swept due to budgets. */
|
|
14
14
|
boundedRunsSweeps: number;
|
|
15
|
+
/** Orphan ngram-index sidecar files removed (-wal/-shm without .db). */
|
|
16
|
+
ngramSidecarOrphansRemoved: number;
|
|
17
|
+
/** ngram-index oversize archives pruned beyond keep=1. */
|
|
18
|
+
ngramArchivesPruned: number;
|
|
15
19
|
}
|
|
16
20
|
export declare function runStartupHygiene(input: {
|
|
17
21
|
repoRoot: string;
|
|
@@ -6,7 +6,7 @@ import { SessionDNAManager } from './session-dna.js';
|
|
|
6
6
|
import { clearBootstrapReceipt, readBootstrapReceipt } from './bootstrap/bootstrap-registry.js';
|
|
7
7
|
import { doctorGitWorktrees } from './worktree-health.js';
|
|
8
8
|
import { sweepDirectory, sweepOrphanWorktrees } from '../install/fs-purge.js';
|
|
9
|
-
import { getRuntimeTmpRoots, getWorktreeRoots } from '../install/state-locator.js';
|
|
9
|
+
import { enumerateNgramArchives, getRuntimeTmpRoots, getWorktreeRoots } from '../install/state-locator.js';
|
|
10
10
|
import { resolveWorktreeBudget, resolveRunsBudget } from '../cli/cleanup.js';
|
|
11
11
|
function isOlderThan(target, maxAgeMs) {
|
|
12
12
|
try {
|
|
@@ -140,6 +140,7 @@ export async function runStartupHygiene(input) {
|
|
|
140
140
|
// best-effort
|
|
141
141
|
}
|
|
142
142
|
}
|
|
143
|
+
const { ngramSidecarOrphansRemoved, ngramArchivesPruned } = pruneNgramArtifacts(stateDir);
|
|
143
144
|
return {
|
|
144
145
|
mode,
|
|
145
146
|
cleanedRuntimeRegistryEntries: Math.max(0, registryBefore - registryAfter),
|
|
@@ -150,5 +151,48 @@ export async function runStartupHygiene(input) {
|
|
|
150
151
|
boundedWorktreeSweeps,
|
|
151
152
|
orphanWorktreesRemoved,
|
|
152
153
|
boundedRunsSweeps,
|
|
154
|
+
ngramSidecarOrphansRemoved,
|
|
155
|
+
ngramArchivesPruned,
|
|
153
156
|
};
|
|
154
157
|
}
|
|
158
|
+
/**
|
|
159
|
+
* Best-effort ngram cleanup at startup:
|
|
160
|
+
* - delete orphan `*.db-wal` / `*.db-shm` when the matching `.db` is missing
|
|
161
|
+
* - keep at most one `ngram-index.db.oversize.*` archive
|
|
162
|
+
* The live DB is left alone — NgramIndex itself enforces WAL bounds via
|
|
163
|
+
* journal_size_limit + autocheckpoint when it opens.
|
|
164
|
+
*/
|
|
165
|
+
function pruneNgramArtifacts(stateDir) {
|
|
166
|
+
let ngramSidecarOrphansRemoved = 0;
|
|
167
|
+
let ngramArchivesPruned = 0;
|
|
168
|
+
if (!fs.existsSync(stateDir)) {
|
|
169
|
+
return { ngramSidecarOrphansRemoved, ngramArchivesPruned };
|
|
170
|
+
}
|
|
171
|
+
// Orphan sidecars: foo.db-wal / foo.db-shm with no foo.db.
|
|
172
|
+
try {
|
|
173
|
+
for (const entry of fs.readdirSync(stateDir)) {
|
|
174
|
+
if (!entry.endsWith('.db-wal') && !entry.endsWith('.db-shm'))
|
|
175
|
+
continue;
|
|
176
|
+
const dbBase = entry.replace(/-(?:wal|shm)$/, '');
|
|
177
|
+
if (fs.existsSync(path.join(stateDir, dbBase)))
|
|
178
|
+
continue;
|
|
179
|
+
if (safeUnlink(path.join(stateDir, entry)))
|
|
180
|
+
ngramSidecarOrphansRemoved += 1;
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
catch {
|
|
184
|
+
// best-effort
|
|
185
|
+
}
|
|
186
|
+
// Archive cap: keep newest, drop the rest.
|
|
187
|
+
try {
|
|
188
|
+
const archives = enumerateNgramArchives(stateDir).sort((a, b) => b.modifiedAt - a.modifiedAt);
|
|
189
|
+
for (const archive of archives.slice(1)) {
|
|
190
|
+
if (safeUnlink(archive.path))
|
|
191
|
+
ngramArchivesPruned += 1;
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
catch {
|
|
195
|
+
// best-effort
|
|
196
|
+
}
|
|
197
|
+
return { ngramSidecarOrphansRemoved, ngramArchivesPruned };
|
|
198
|
+
}
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
/** Marker that identifies a hook entry as Nexus-owned. */
|
|
2
|
+
export declare const NEXUS_HOOK_COMMAND_MARKER = "nexus-prime hook";
|
|
3
|
+
interface HookCommand {
|
|
4
|
+
type: 'command';
|
|
5
|
+
command: string;
|
|
6
|
+
timeout?: number;
|
|
7
|
+
}
|
|
8
|
+
interface HookEntry {
|
|
9
|
+
matcher?: string;
|
|
10
|
+
hooks: HookCommand[];
|
|
11
|
+
}
|
|
12
|
+
/**
|
|
13
|
+
* Canonical Nexus hook spec. Edit here, not in callers.
|
|
14
|
+
* Timeouts protect Claude Code from a wedged Nexus process — leave them set.
|
|
15
|
+
*/
|
|
16
|
+
export declare function getNexusHookSpec(): Record<string, HookEntry[]>;
|
|
17
|
+
export interface NexusHookWriteResult {
|
|
18
|
+
/** True when the file would be written (or was written). */
|
|
19
|
+
changed: boolean;
|
|
20
|
+
/** Hook event names that ended up with Nexus entries. */
|
|
21
|
+
events: string[];
|
|
22
|
+
/** Number of stale Nexus entries removed before re-adding. */
|
|
23
|
+
staleRemoved: number;
|
|
24
|
+
}
|
|
25
|
+
/**
|
|
26
|
+
* Idempotent writer for the Nexus hook block. Returns metadata so callers can
|
|
27
|
+
* report status (`installed | unchanged | dry-run`) without re-implementing
|
|
28
|
+
* the merge logic.
|
|
29
|
+
*/
|
|
30
|
+
export declare function writeNexusClaudeCodeHooks(settingsPath: string, options?: {
|
|
31
|
+
dryRun?: boolean;
|
|
32
|
+
}): NexusHookWriteResult;
|
|
33
|
+
export {};
|