@karmaniverous/jeeves-watcher 0.15.1 → 0.15.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/jeeves-watcher/index.js +171 -25
- package/dist/index.d.ts +72 -7
- package/dist/index.js +172 -26
- package/package.json +1 -1
|
@@ -1,13 +1,14 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
import { createRequire } from 'node:module';
|
|
3
3
|
import { Command } from '@commander-js/extra-typings';
|
|
4
|
-
import { mkdirSync, existsSync, readFileSync, writeFileSync, statSync, readdirSync } from 'node:fs';
|
|
4
|
+
import { mkdirSync, existsSync, readFileSync, writeFileSync, rmSync, renameSync, statSync, readdirSync } from 'node:fs';
|
|
5
5
|
import { join, dirname, resolve, relative, extname, basename, isAbsolute } from 'node:path';
|
|
6
6
|
import Database from 'better-sqlite3';
|
|
7
7
|
import { z, ZodError } from 'zod';
|
|
8
8
|
import Ajv from 'ajv';
|
|
9
9
|
import addFormats from 'ajv-formats';
|
|
10
10
|
import picomatch from 'picomatch';
|
|
11
|
+
import { deserialize, serialize } from 'node:v8';
|
|
11
12
|
import { readdir, stat, writeFile, readFile } from 'node:fs/promises';
|
|
12
13
|
import { parallel, capitalize, title, camel, snake, dash, isEqual, get, omit } from 'radash';
|
|
13
14
|
import ignore from 'ignore';
|
|
@@ -207,21 +208,32 @@ class ContentHashCache {
|
|
|
207
208
|
* @module enrichment/EnrichmentStore
|
|
208
209
|
* SQLite-backed enrichment metadata store. Persists path-keyed metadata at stateDir/enrichments.sqlite. Atomic writes, supports move.
|
|
209
210
|
*/
|
|
211
|
+
const BUSY_TIMEOUT_MS = 5000;
|
|
210
212
|
/**
|
|
211
213
|
* SQLite-backed enrichment metadata store.
|
|
212
214
|
*/
|
|
213
215
|
class EnrichmentStore {
|
|
214
216
|
db;
|
|
217
|
+
logger;
|
|
215
218
|
/**
|
|
216
219
|
* Create or open the enrichment store.
|
|
217
220
|
*
|
|
218
221
|
* @param stateDir - Directory for the SQLite database file.
|
|
219
222
|
*/
|
|
220
|
-
constructor(stateDir) {
|
|
223
|
+
constructor(stateDir, logger) {
|
|
224
|
+
this.logger = logger;
|
|
221
225
|
mkdirSync(stateDir, { recursive: true });
|
|
222
226
|
const dbPath = join(stateDir, 'enrichments.sqlite');
|
|
223
227
|
this.db = new Database(dbPath);
|
|
224
228
|
this.db.pragma('journal_mode = WAL');
|
|
229
|
+
this.db.pragma('busy_timeout = ' + BUSY_TIMEOUT_MS.toString());
|
|
230
|
+
const [checkpointStatus] = this.db.pragma('wal_checkpoint(TRUNCATE)');
|
|
231
|
+
if (checkpointStatus && checkpointStatus.busy > 0) {
|
|
232
|
+
// EnrichmentStore is expected to be single-writer. If we see a busy WAL
|
|
233
|
+
// checkpoint at startup, it's most likely from an unclean shutdown where
|
|
234
|
+
// the OS hasn't yet released file handles.
|
|
235
|
+
this.logger?.warn({ checkpointStatus }, 'WAL checkpoint busy at startup; OS may still be releasing file handles');
|
|
236
|
+
}
|
|
225
237
|
this.db.exec(`
|
|
226
238
|
CREATE TABLE IF NOT EXISTS enrichments (
|
|
227
239
|
path TEXT PRIMARY KEY,
|
|
@@ -578,6 +590,92 @@ class VirtualRuleStore {
|
|
|
578
590
|
}
|
|
579
591
|
}
|
|
580
592
|
|
|
593
|
+
/**
|
|
594
|
+
* @module util/BinaryFileStore
|
|
595
|
+
* Binary-backed read/modify/write store with in-memory caching and debounced flush.
|
|
596
|
+
*
|
|
597
|
+
* Persists a single JS object to disk using V8 structured clone serialization.
|
|
598
|
+
* I/O: synchronous fs read/write.
|
|
599
|
+
*/
|
|
600
|
+
/**
|
|
601
|
+
* Base class for binary file stores.
|
|
602
|
+
*
|
|
603
|
+
* @typeParam T - The stored data structure.
|
|
604
|
+
*/
|
|
605
|
+
class BinaryFileStore {
|
|
606
|
+
/** Path to the binary file on disk. */
|
|
607
|
+
filePath;
|
|
608
|
+
/** In-memory cache of the file contents, or `null` if not yet loaded. */
|
|
609
|
+
cache = null;
|
|
610
|
+
/** Logger instance for warnings and diagnostics. */
|
|
611
|
+
logger;
|
|
612
|
+
flushDebounceMs;
|
|
613
|
+
flushTimer;
|
|
614
|
+
dirty = false;
|
|
615
|
+
constructor(options) {
|
|
616
|
+
this.filePath = options.filePath;
|
|
617
|
+
this.logger = options.logger;
|
|
618
|
+
this.flushDebounceMs = options.flushDebounceMs ?? 5000;
|
|
619
|
+
mkdirSync(dirname(this.filePath), { recursive: true });
|
|
620
|
+
}
|
|
621
|
+
/** Load from disk into cache if not already loaded. */
|
|
622
|
+
load() {
|
|
623
|
+
if (this.cache)
|
|
624
|
+
return this.cache;
|
|
625
|
+
try {
|
|
626
|
+
if (existsSync(this.filePath)) {
|
|
627
|
+
const raw = readFileSync(this.filePath);
|
|
628
|
+
this.cache = deserialize(raw);
|
|
629
|
+
}
|
|
630
|
+
else {
|
|
631
|
+
this.cache = this.createEmpty();
|
|
632
|
+
}
|
|
633
|
+
}
|
|
634
|
+
catch (error) {
|
|
635
|
+
this.logger.warn({ filePath: this.filePath, err: error }, 'Failed to read binary store file, starting fresh');
|
|
636
|
+
this.cache = this.createEmpty();
|
|
637
|
+
}
|
|
638
|
+
return this.cache;
|
|
639
|
+
}
|
|
640
|
+
/**
|
|
641
|
+
* Mark the store dirty and schedule a debounced flush.
|
|
642
|
+
*/
|
|
643
|
+
markDirty() {
|
|
644
|
+
this.dirty = true;
|
|
645
|
+
if (this.flushTimer)
|
|
646
|
+
return;
|
|
647
|
+
this.flushTimer = setTimeout(() => {
|
|
648
|
+
this.flushTimer = undefined;
|
|
649
|
+
this.flush();
|
|
650
|
+
}, this.flushDebounceMs);
|
|
651
|
+
}
|
|
652
|
+
/**
|
|
653
|
+
* Flush cache to disk if dirty.
|
|
654
|
+
*
|
|
655
|
+
* Uses an atomic write (tmp + rename) to avoid partial files.
|
|
656
|
+
*/
|
|
657
|
+
flush() {
|
|
658
|
+
if (!this.dirty)
|
|
659
|
+
return;
|
|
660
|
+
this.stopAutoFlush();
|
|
661
|
+
const value = this.cache ?? this.createEmpty();
|
|
662
|
+
const tmpPath = `${this.filePath}.tmp`;
|
|
663
|
+
const payload = serialize(value);
|
|
664
|
+
writeFileSync(tmpPath, payload);
|
|
665
|
+
// renameSync does not reliably overwrite on Windows. Remove target first.
|
|
666
|
+
rmSync(this.filePath, { force: true });
|
|
667
|
+
renameSync(tmpPath, this.filePath);
|
|
668
|
+
this.dirty = false;
|
|
669
|
+
}
|
|
670
|
+
/** Stop any pending scheduled flush. Does not flush automatically. */
|
|
671
|
+
stopAutoFlush() {
|
|
672
|
+
if (this.flushTimer) {
|
|
673
|
+
clearTimeout(this.flushTimer);
|
|
674
|
+
this.flushTimer = undefined;
|
|
675
|
+
}
|
|
676
|
+
}
|
|
677
|
+
}
|
|
678
|
+
|
|
581
679
|
/**
|
|
582
680
|
* @module values/ValuesManager
|
|
583
681
|
* Manages per-rule distinct metadata value tracking. Persists to disk with in-memory caching and sorted deduplication.
|
|
@@ -585,9 +683,9 @@ class VirtualRuleStore {
|
|
|
585
683
|
/**
|
|
586
684
|
* Manages a persistent values.json file tracking distinct metadata values per rule.
|
|
587
685
|
*/
|
|
588
|
-
class ValuesManager extends
|
|
686
|
+
class ValuesManager extends BinaryFileStore {
|
|
589
687
|
constructor(stateDir, logger) {
|
|
590
|
-
super({ filePath: join(stateDir, 'values.
|
|
688
|
+
super({ filePath: join(stateDir, 'values.v8'), logger });
|
|
591
689
|
}
|
|
592
690
|
createEmpty() {
|
|
593
691
|
return {};
|
|
@@ -602,6 +700,7 @@ class ValuesManager extends JsonFileStore {
|
|
|
602
700
|
const index = this.load();
|
|
603
701
|
index[ruleName] ??= {};
|
|
604
702
|
const ruleValues = index[ruleName];
|
|
703
|
+
let changed = false;
|
|
605
704
|
for (const [key, value] of Object.entries(metadata)) {
|
|
606
705
|
// Decompose arrays into individual trackable elements so that
|
|
607
706
|
// array-typed fields (e.g. domains: ["email"]) are indexed.
|
|
@@ -619,15 +718,18 @@ class ValuesManager extends JsonFileStore {
|
|
|
619
718
|
}
|
|
620
719
|
return typeof a < typeof b ? -1 : 1;
|
|
621
720
|
});
|
|
721
|
+
changed = true;
|
|
622
722
|
}
|
|
623
723
|
}
|
|
624
724
|
}
|
|
625
|
-
|
|
725
|
+
if (changed)
|
|
726
|
+
this.markDirty();
|
|
626
727
|
}
|
|
627
728
|
/** Wipe all values (called on full reindex start). */
|
|
628
729
|
clearAll() {
|
|
629
730
|
this.cache = {};
|
|
630
|
-
this.
|
|
731
|
+
this.markDirty();
|
|
732
|
+
this.flush();
|
|
631
733
|
this.logger.debug('All values cleared');
|
|
632
734
|
}
|
|
633
735
|
/** Get all current values. */
|
|
@@ -1361,9 +1463,19 @@ async function executeReindex(deps, scope, path, dryRun = false) {
|
|
|
1361
1463
|
// Compute plan before starting async work
|
|
1362
1464
|
let plan;
|
|
1363
1465
|
if (scope === 'prune') {
|
|
1364
|
-
|
|
1466
|
+
deps.queue?.pause();
|
|
1467
|
+
await deps.queue?.drain();
|
|
1468
|
+
let pruneResult;
|
|
1469
|
+
try {
|
|
1470
|
+
pruneResult = await computePrunePlan(deps);
|
|
1471
|
+
}
|
|
1472
|
+
catch (err) {
|
|
1473
|
+
deps.queue?.resume();
|
|
1474
|
+
throw err;
|
|
1475
|
+
}
|
|
1365
1476
|
plan = pruneResult.plan;
|
|
1366
1477
|
if (dryRun) {
|
|
1478
|
+
deps.queue?.resume();
|
|
1367
1479
|
return { filesProcessed: 0, durationMs: 0, errors: 0, plan };
|
|
1368
1480
|
}
|
|
1369
1481
|
// Execute prune
|
|
@@ -1410,6 +1522,9 @@ async function executeReindex(deps, scope, path, dryRun = false) {
|
|
|
1410
1522
|
}
|
|
1411
1523
|
return { filesProcessed: 0, durationMs, errors: 1, plan };
|
|
1412
1524
|
}
|
|
1525
|
+
finally {
|
|
1526
|
+
deps.queue?.resume();
|
|
1527
|
+
}
|
|
1413
1528
|
}
|
|
1414
1529
|
// Non-prune scopes: compute plan from file lists
|
|
1415
1530
|
let fileList;
|
|
@@ -3908,6 +4023,7 @@ function createConfigReindexHandler(deps) {
|
|
|
3908
4023
|
issuesManager: deps.issuesManager,
|
|
3909
4024
|
gitignoreFilter: deps.gitignoreFilter,
|
|
3910
4025
|
vectorStore: deps.vectorStore,
|
|
4026
|
+
queue: deps.queue,
|
|
3911
4027
|
};
|
|
3912
4028
|
// Pass path for 'path' and 'rules' scopes
|
|
3913
4029
|
const pathParam = validScope === 'path' || validScope === 'rules'
|
|
@@ -4726,19 +4842,20 @@ function createWalkHandler(deps) {
|
|
|
4726
4842
|
message: 'The "globs" field is required and must be a non-empty string array.',
|
|
4727
4843
|
});
|
|
4728
4844
|
}
|
|
4729
|
-
|
|
4845
|
+
const fileSystemWatcher = deps.getFileSystemWatcher?.();
|
|
4846
|
+
if (!fileSystemWatcher) {
|
|
4730
4847
|
return await reply.status(503).send({
|
|
4731
4848
|
error: 'Watcher unavailable',
|
|
4732
4849
|
message: 'Filesystem watcher is not initialized.',
|
|
4733
4850
|
});
|
|
4734
4851
|
}
|
|
4735
|
-
if (!
|
|
4852
|
+
if (!fileSystemWatcher.isReady) {
|
|
4736
4853
|
return await reply.status(503).send({
|
|
4737
4854
|
error: 'Scan in progress',
|
|
4738
4855
|
message: 'Initial filesystem scan is still active. Try again after scan completes.',
|
|
4739
4856
|
});
|
|
4740
4857
|
}
|
|
4741
|
-
const watchedFiles =
|
|
4858
|
+
const watchedFiles = fileSystemWatcher.getWatchedFiles();
|
|
4742
4859
|
const normGlobs = globs.map((g) => normalizeSlashes(g));
|
|
4743
4860
|
const matchGlobs = picomatch(normGlobs, { dot: true, nocase: true });
|
|
4744
4861
|
const paths = watchedFiles.filter((f) => matchGlobs(normalizeSlashes(f)));
|
|
@@ -4944,8 +5061,9 @@ class ReindexTracker {
|
|
|
4944
5061
|
* @returns A configured Fastify instance.
|
|
4945
5062
|
*/
|
|
4946
5063
|
function createApiServer(options) {
|
|
4947
|
-
const { processor, vectorStore, embeddingProvider, logger, config, issuesManager, valuesManager, configPath, helperIntrospection, virtualRuleStore, gitignoreFilter, version, initialScanTracker, } = options;
|
|
5064
|
+
const { processor, vectorStore, embeddingProvider, queue, logger, config, issuesManager, valuesManager, configPath, helperIntrospection, virtualRuleStore, gitignoreFilter, version, initialScanTracker, } = options;
|
|
4948
5065
|
const getConfig = options.getConfig ?? (() => config);
|
|
5066
|
+
const getFileSystemWatcher = options.getFileSystemWatcher ?? (() => options.fileSystemWatcher);
|
|
4949
5067
|
const reindexTracker = options.reindexTracker ?? new ReindexTracker();
|
|
4950
5068
|
const app = Fastify({ logger: false });
|
|
4951
5069
|
const triggerReindex = (scope) => {
|
|
@@ -4962,6 +5080,7 @@ function createApiServer(options) {
|
|
|
4962
5080
|
issuesManager,
|
|
4963
5081
|
gitignoreFilter,
|
|
4964
5082
|
vectorStore,
|
|
5083
|
+
queue,
|
|
4965
5084
|
}, scope);
|
|
4966
5085
|
};
|
|
4967
5086
|
const cacheTtlMs = config.api?.cacheTtlMs ?? 30000;
|
|
@@ -4994,7 +5113,7 @@ function createApiServer(options) {
|
|
|
4994
5113
|
}));
|
|
4995
5114
|
app.post('/walk', createWalkHandler({
|
|
4996
5115
|
getWatchPaths: () => getConfig().watch.paths,
|
|
4997
|
-
|
|
5116
|
+
getFileSystemWatcher,
|
|
4998
5117
|
logger,
|
|
4999
5118
|
}));
|
|
5000
5119
|
app.post('/search', createSearchHandler({
|
|
@@ -5022,6 +5141,7 @@ function createApiServer(options) {
|
|
|
5022
5141
|
issuesManager,
|
|
5023
5142
|
gitignoreFilter,
|
|
5024
5143
|
vectorStore,
|
|
5144
|
+
queue,
|
|
5025
5145
|
}));
|
|
5026
5146
|
app.get('/issues', withCache(cacheTtlMs, createIssuesHandler({ issuesManager })));
|
|
5027
5147
|
app.get('/config/schema', withCache(cacheTtlMs, createConfigSchemaHandler()));
|
|
@@ -6154,6 +6274,7 @@ class EventQueue {
|
|
|
6154
6274
|
concurrency;
|
|
6155
6275
|
rateLimitPerMinute;
|
|
6156
6276
|
started = false;
|
|
6277
|
+
paused = false;
|
|
6157
6278
|
active = 0;
|
|
6158
6279
|
debounceTimers = new Map();
|
|
6159
6280
|
latestByKey = new Map();
|
|
@@ -6203,6 +6324,19 @@ class EventQueue {
|
|
|
6203
6324
|
this.started = true;
|
|
6204
6325
|
this.pump();
|
|
6205
6326
|
}
|
|
6327
|
+
/**
|
|
6328
|
+
* Pause processing events. Events can still be enqueued but will not be processed.
|
|
6329
|
+
*/
|
|
6330
|
+
pause() {
|
|
6331
|
+
this.paused = true;
|
|
6332
|
+
}
|
|
6333
|
+
/**
|
|
6334
|
+
* Resume processing events.
|
|
6335
|
+
*/
|
|
6336
|
+
resume() {
|
|
6337
|
+
this.paused = false;
|
|
6338
|
+
this.pump();
|
|
6339
|
+
}
|
|
6206
6340
|
/**
|
|
6207
6341
|
* Wait for the queue to become idle (no pending debounces, no queued items, no active work).
|
|
6208
6342
|
*
|
|
@@ -6242,7 +6376,7 @@ class EventQueue {
|
|
|
6242
6376
|
return this.normalQueue.shift() ?? this.lowQueue.shift();
|
|
6243
6377
|
}
|
|
6244
6378
|
pump() {
|
|
6245
|
-
if (!this.started)
|
|
6379
|
+
if (!this.started || this.paused)
|
|
6246
6380
|
return;
|
|
6247
6381
|
while (this.active < this.concurrency) {
|
|
6248
6382
|
const item = this.nextItem();
|
|
@@ -6555,6 +6689,7 @@ async function* scrollCollection(client, collectionName, filter, limit = 100) {
|
|
|
6555
6689
|
*/
|
|
6556
6690
|
class VectorStoreClient {
|
|
6557
6691
|
client;
|
|
6692
|
+
writeClient;
|
|
6558
6693
|
clientConfig;
|
|
6559
6694
|
collectionName;
|
|
6560
6695
|
dims;
|
|
@@ -6570,6 +6705,7 @@ class VectorStoreClient {
|
|
|
6570
6705
|
constructor(config, dimensions, logger) {
|
|
6571
6706
|
this.clientConfig = { url: config.url, apiKey: config.apiKey };
|
|
6572
6707
|
this.client = this.createClient();
|
|
6708
|
+
this.writeClient = this.client;
|
|
6573
6709
|
this.collectionName = config.collectionName;
|
|
6574
6710
|
this.dims = dimensions;
|
|
6575
6711
|
this.log = getLogger(logger);
|
|
@@ -6626,7 +6762,7 @@ class VectorStoreClient {
|
|
|
6626
6762
|
if (attempt > 1) {
|
|
6627
6763
|
this.log.warn({ attempt, operation: `qdrant.${operation}` }, `Retrying Qdrant ${operation}`);
|
|
6628
6764
|
}
|
|
6629
|
-
await fn();
|
|
6765
|
+
await fn(attempt);
|
|
6630
6766
|
}, {
|
|
6631
6767
|
attempts: 5,
|
|
6632
6768
|
baseDelayMs: 500,
|
|
@@ -6642,21 +6778,27 @@ class VectorStoreClient {
|
|
|
6642
6778
|
},
|
|
6643
6779
|
});
|
|
6644
6780
|
}
|
|
6781
|
+
getWriteClient(attempt) {
|
|
6782
|
+
if (attempt > 1) {
|
|
6783
|
+
this.pinoLogger?.info('Created fresh Qdrant client for retry');
|
|
6784
|
+
this.writeClient = this.createClient();
|
|
6785
|
+
}
|
|
6786
|
+
return this.writeClient;
|
|
6787
|
+
}
|
|
6645
6788
|
/**
|
|
6646
6789
|
* Upsert points into the collection.
|
|
6647
6790
|
*
|
|
6648
|
-
* Uses
|
|
6649
|
-
*
|
|
6650
|
-
* server, causing ECONNRESET on reuse.
|
|
6791
|
+
* Uses the shared client. On retry (after ECONNRESET from stale connections),
|
|
6792
|
+
* creates a fresh client to recover.
|
|
6651
6793
|
*
|
|
6652
6794
|
* @param points - The points to upsert.
|
|
6653
6795
|
*/
|
|
6654
6796
|
async upsert(points) {
|
|
6655
6797
|
if (points.length === 0)
|
|
6656
6798
|
return;
|
|
6657
|
-
await this.retryOperation('upsert', async () => {
|
|
6658
|
-
const
|
|
6659
|
-
await
|
|
6799
|
+
await this.retryOperation('upsert', async (attempt) => {
|
|
6800
|
+
const client = this.getWriteClient(attempt);
|
|
6801
|
+
await client.upsert(this.collectionName, {
|
|
6660
6802
|
wait: true,
|
|
6661
6803
|
points: points.map((p) => ({
|
|
6662
6804
|
id: p.id,
|
|
@@ -6669,16 +6811,16 @@ class VectorStoreClient {
|
|
|
6669
6811
|
/**
|
|
6670
6812
|
* Delete points by their IDs.
|
|
6671
6813
|
*
|
|
6672
|
-
* Uses
|
|
6814
|
+
* Uses the shared client. On retry, creates a fresh client to recover.
|
|
6673
6815
|
*
|
|
6674
6816
|
* @param ids - The point IDs to delete.
|
|
6675
6817
|
*/
|
|
6676
6818
|
async delete(ids) {
|
|
6677
6819
|
if (ids.length === 0)
|
|
6678
6820
|
return;
|
|
6679
|
-
await this.retryOperation('delete', async () => {
|
|
6680
|
-
const
|
|
6681
|
-
await
|
|
6821
|
+
await this.retryOperation('delete', async (attempt) => {
|
|
6822
|
+
const client = this.getWriteClient(attempt);
|
|
6823
|
+
await client.delete(this.collectionName, {
|
|
6682
6824
|
wait: true,
|
|
6683
6825
|
points: ids,
|
|
6684
6826
|
});
|
|
@@ -7546,7 +7688,7 @@ class JeevesWatcher {
|
|
|
7546
7688
|
const stateDir = this.config.stateDir ?? '.jeeves-metadata';
|
|
7547
7689
|
this.issuesManager = new IssuesManager(stateDir, logger);
|
|
7548
7690
|
this.valuesManager = new ValuesManager(stateDir, logger);
|
|
7549
|
-
this.enrichmentStore = new EnrichmentStore(stateDir);
|
|
7691
|
+
this.enrichmentStore = new EnrichmentStore(stateDir, logger);
|
|
7550
7692
|
const enrichmentStore = this.enrichmentStore;
|
|
7551
7693
|
this.contentHashCache = new ContentHashCache();
|
|
7552
7694
|
const contentHashCache = this.contentHashCache;
|
|
@@ -7598,6 +7740,9 @@ class JeevesWatcher {
|
|
|
7598
7740
|
this.logger?.warn({ timeoutMs: timeout }, 'Queue drain timeout hit, forcing shutdown');
|
|
7599
7741
|
}
|
|
7600
7742
|
}
|
|
7743
|
+
// Flush in-memory state caches before shutdown.
|
|
7744
|
+
this.valuesManager?.stopAutoFlush();
|
|
7745
|
+
this.valuesManager?.flush();
|
|
7601
7746
|
this.enrichmentStore?.close();
|
|
7602
7747
|
if (this.server) {
|
|
7603
7748
|
await this.server.close();
|
|
@@ -7622,6 +7767,7 @@ class JeevesWatcher {
|
|
|
7622
7767
|
version: this.version,
|
|
7623
7768
|
initialScanTracker: this.initialScanTracker,
|
|
7624
7769
|
fileSystemWatcher: this.watcher,
|
|
7770
|
+
getFileSystemWatcher: () => this.watcher,
|
|
7625
7771
|
enrichmentStore: this.enrichmentStore,
|
|
7626
7772
|
});
|
|
7627
7773
|
await server.listen({
|
package/dist/index.d.ts
CHANGED
|
@@ -300,6 +300,7 @@ declare function createEmbeddingProvider(config: EmbeddingConfig, logger?: pino.
|
|
|
300
300
|
* @module enrichment/EnrichmentStore
|
|
301
301
|
* SQLite-backed enrichment metadata store. Persists path-keyed metadata at stateDir/enrichments.sqlite. Atomic writes, supports move.
|
|
302
302
|
*/
|
|
303
|
+
|
|
303
304
|
/**
|
|
304
305
|
* Interface for enrichment metadata persistence.
|
|
305
306
|
*/
|
|
@@ -322,12 +323,13 @@ interface EnrichmentStoreInterface {
|
|
|
322
323
|
*/
|
|
323
324
|
declare class EnrichmentStore implements EnrichmentStoreInterface {
|
|
324
325
|
private readonly db;
|
|
326
|
+
private readonly logger?;
|
|
325
327
|
/**
|
|
326
328
|
* Create or open the enrichment store.
|
|
327
329
|
*
|
|
328
330
|
* @param stateDir - Directory for the SQLite database file.
|
|
329
331
|
*/
|
|
330
|
-
constructor(stateDir: string);
|
|
332
|
+
constructor(stateDir: string, logger?: pino.Logger);
|
|
331
333
|
get(path: string): Record<string, unknown> | null;
|
|
332
334
|
set(path: string, metadata: Record<string, unknown>): void;
|
|
333
335
|
delete(path: string): void;
|
|
@@ -806,6 +808,57 @@ declare class VirtualRuleStore {
|
|
|
806
808
|
get size(): number;
|
|
807
809
|
}
|
|
808
810
|
|
|
811
|
+
/**
|
|
812
|
+
* @module util/BinaryFileStore
|
|
813
|
+
* Binary-backed read/modify/write store with in-memory caching and debounced flush.
|
|
814
|
+
*
|
|
815
|
+
* Persists a single JS object to disk using V8 structured clone serialization.
|
|
816
|
+
* I/O: synchronous fs read/write.
|
|
817
|
+
*/
|
|
818
|
+
|
|
819
|
+
/** Options for {@link BinaryFileStore}. */
|
|
820
|
+
interface BinaryFileStoreOptions {
|
|
821
|
+
/** Path to the binary file on disk. */
|
|
822
|
+
filePath: string;
|
|
823
|
+
/** Logger for warnings. */
|
|
824
|
+
logger: pino.Logger;
|
|
825
|
+
/** Debounce interval in ms for flushing dirty state. Default: 5000. */
|
|
826
|
+
flushDebounceMs?: number;
|
|
827
|
+
}
|
|
828
|
+
/**
|
|
829
|
+
* Base class for binary file stores.
|
|
830
|
+
*
|
|
831
|
+
* @typeParam T - The stored data structure.
|
|
832
|
+
*/
|
|
833
|
+
declare abstract class BinaryFileStore<T> {
|
|
834
|
+
/** Path to the binary file on disk. */
|
|
835
|
+
protected readonly filePath: string;
|
|
836
|
+
/** In-memory cache of the file contents, or `null` if not yet loaded. */
|
|
837
|
+
protected cache: T | null;
|
|
838
|
+
/** Logger instance for warnings and diagnostics. */
|
|
839
|
+
protected readonly logger: pino.Logger;
|
|
840
|
+
private readonly flushDebounceMs;
|
|
841
|
+
private flushTimer;
|
|
842
|
+
private dirty;
|
|
843
|
+
protected constructor(options: BinaryFileStoreOptions);
|
|
844
|
+
/** Create an empty default value when file is missing or unreadable. */
|
|
845
|
+
protected abstract createEmpty(): T;
|
|
846
|
+
/** Load from disk into cache if not already loaded. */
|
|
847
|
+
protected load(): T;
|
|
848
|
+
/**
|
|
849
|
+
* Mark the store dirty and schedule a debounced flush.
|
|
850
|
+
*/
|
|
851
|
+
protected markDirty(): void;
|
|
852
|
+
/**
|
|
853
|
+
* Flush cache to disk if dirty.
|
|
854
|
+
*
|
|
855
|
+
* Uses an atomic write (tmp + rename) to avoid partial files.
|
|
856
|
+
*/
|
|
857
|
+
flush(): void;
|
|
858
|
+
/** Stop any pending scheduled flush. Does not flush automatically. */
|
|
859
|
+
stopAutoFlush(): void;
|
|
860
|
+
}
|
|
861
|
+
|
|
809
862
|
/**
|
|
810
863
|
* @module values/ValuesManager
|
|
811
864
|
* Manages per-rule distinct metadata value tracking. Persists to disk with in-memory caching and sorted deduplication.
|
|
@@ -816,7 +869,7 @@ type ValuesIndex = Record<string, Record<string, unknown[]>>;
|
|
|
816
869
|
/**
|
|
817
870
|
* Manages a persistent values.json file tracking distinct metadata values per rule.
|
|
818
871
|
*/
|
|
819
|
-
declare class ValuesManager extends
|
|
872
|
+
declare class ValuesManager extends BinaryFileStore<ValuesIndex> {
|
|
820
873
|
constructor(stateDir: string, logger: pino.Logger);
|
|
821
874
|
protected createEmpty(): ValuesIndex;
|
|
822
875
|
/** Check if a value is a trackable primitive (string, number, boolean). */
|
|
@@ -997,6 +1050,7 @@ interface VectorStore {
|
|
|
997
1050
|
*/
|
|
998
1051
|
declare class VectorStoreClient implements VectorStore {
|
|
999
1052
|
private readonly client;
|
|
1053
|
+
private writeClient;
|
|
1000
1054
|
private readonly clientConfig;
|
|
1001
1055
|
private readonly collectionName;
|
|
1002
1056
|
private readonly dims;
|
|
@@ -1037,12 +1091,12 @@ declare class VectorStoreClient implements VectorStore {
|
|
|
1037
1091
|
* @param fn - Async function to retry.
|
|
1038
1092
|
*/
|
|
1039
1093
|
private retryOperation;
|
|
1094
|
+
private getWriteClient;
|
|
1040
1095
|
/**
|
|
1041
1096
|
* Upsert points into the collection.
|
|
1042
1097
|
*
|
|
1043
|
-
* Uses
|
|
1044
|
-
*
|
|
1045
|
-
* server, causing ECONNRESET on reuse.
|
|
1098
|
+
* Uses the shared client. On retry (after ECONNRESET from stale connections),
|
|
1099
|
+
* creates a fresh client to recover.
|
|
1046
1100
|
*
|
|
1047
1101
|
* @param points - The points to upsert.
|
|
1048
1102
|
*/
|
|
@@ -1050,7 +1104,7 @@ declare class VectorStoreClient implements VectorStore {
|
|
|
1050
1104
|
/**
|
|
1051
1105
|
* Delete points by their IDs.
|
|
1052
1106
|
*
|
|
1053
|
-
* Uses
|
|
1107
|
+
* Uses the shared client. On retry, creates a fresh client to recover.
|
|
1054
1108
|
*
|
|
1055
1109
|
* @param ids - The point IDs to delete.
|
|
1056
1110
|
*/
|
|
@@ -1360,6 +1414,7 @@ declare class EventQueue {
|
|
|
1360
1414
|
private readonly concurrency;
|
|
1361
1415
|
private readonly rateLimitPerMinute?;
|
|
1362
1416
|
private started;
|
|
1417
|
+
private paused;
|
|
1363
1418
|
private active;
|
|
1364
1419
|
private readonly debounceTimers;
|
|
1365
1420
|
private readonly latestByKey;
|
|
@@ -1385,6 +1440,14 @@ declare class EventQueue {
|
|
|
1385
1440
|
* Start processing events.
|
|
1386
1441
|
*/
|
|
1387
1442
|
process(): void;
|
|
1443
|
+
/**
|
|
1444
|
+
* Pause processing events. Events can still be enqueued but will not be processed.
|
|
1445
|
+
*/
|
|
1446
|
+
pause(): void;
|
|
1447
|
+
/**
|
|
1448
|
+
* Resume processing events.
|
|
1449
|
+
*/
|
|
1450
|
+
resume(): void;
|
|
1388
1451
|
/**
|
|
1389
1452
|
* Wait for the queue to become idle (no pending debounces, no queued items, no active work).
|
|
1390
1453
|
*
|
|
@@ -1663,6 +1726,8 @@ interface ApiServerOptions {
|
|
|
1663
1726
|
initialScanTracker?: InitialScanTracker;
|
|
1664
1727
|
/** Filesystem watcher instance for /walk endpoint (in-memory file list). */
|
|
1665
1728
|
fileSystemWatcher?: FileSystemWatcher;
|
|
1729
|
+
/** Getter for live filesystem watcher access after hot-reload rebuilds. */
|
|
1730
|
+
getFileSystemWatcher?: () => FileSystemWatcher | undefined;
|
|
1666
1731
|
/** Optional enrichment store for persisted enrichment metadata. */
|
|
1667
1732
|
enrichmentStore?: EnrichmentStoreInterface;
|
|
1668
1733
|
}
|
|
@@ -1836,5 +1901,5 @@ declare function contentHash(text: string): string;
|
|
|
1836
1901
|
*/
|
|
1837
1902
|
declare function pointId(filePath: string, chunkIndex?: number): string;
|
|
1838
1903
|
|
|
1839
|
-
export { DocumentProcessor, EnrichmentStore, EventQueue, FileSystemWatcher, GitignoreFilter, InitialScanTracker, IssuesManager, JeevesWatcher, ReindexTracker, SystemHealth, TemplateEngine, ValuesManager, VectorStoreClient, VirtualRuleStore, apiConfigSchema, applyRules, buildAttributes, buildTemplateEngine, compileRules, configWatchConfigSchema, contentHash, createApiServer, createEmbeddingProvider, createHandlebarsInstance, createLogger, embeddingConfigSchema, extractText, inferenceRuleSchema, issueRecordSchema, jeevesWatcherConfigSchema, loadConfig, loadCustomHelpers, loggingConfigSchema, mergeEnrichment, pointId, registerBuiltinHelpers, resolveTemplateSource, startFromConfig, vectorStoreConfigSchema, watchConfigSchema };
|
|
1904
|
+
export { ContentHashCache, DocumentProcessor, EnrichmentStore, EventQueue, FileSystemWatcher, GitignoreFilter, InitialScanTracker, IssuesManager, JeevesWatcher, ReindexTracker, SystemHealth, TemplateEngine, ValuesManager, VectorStoreClient, VirtualRuleStore, apiConfigSchema, applyRules, buildAttributes, buildTemplateEngine, compileRules, configWatchConfigSchema, contentHash, createApiServer, createEmbeddingProvider, createHandlebarsInstance, createLogger, embeddingConfigSchema, extractText, inferenceRuleSchema, issueRecordSchema, jeevesWatcherConfigSchema, loadConfig, loadCustomHelpers, loggingConfigSchema, mergeEnrichment, pointId, registerBuiltinHelpers, resolveTemplateSource, startFromConfig, vectorStoreConfigSchema, watchConfigSchema };
|
|
1840
1905
|
export type { AllHelpersIntrospection, ApiConfig, ApiServerOptions, ApplyRulesOptions, ApplyRulesResult, CollectionInfo, CompiledRule, CompiledTemplate, ConfigWatchConfig, DocumentProcessorDeps, DocumentProcessorInterface, EmbeddingConfig, EmbeddingProvider, EnrichmentStoreInterface, EventQueueOptions, ExtractedText, Extractor, FileAttributes, FileSystemWatcherOptions, HelperModuleIntrospection, InferenceRule, InitialScanStatus, IssueRecord, IssuesFile, JeevesWatcherConfig, JeevesWatcherConfigInput, JeevesWatcherFactories, JeevesWatcherRuntimeOptions, LoggingConfig, PayloadFieldSchema, ProcessFn, ProcessorConfig, ProviderFactory, ReindexStatus, RenderResult, RuleLogger, ScrollPageResult, ScrolledPoint, SearchResult, SystemHealthOptions, ValuesIndex, VectorPoint, VectorStore, VectorStoreConfig, WatchConfig, WatchEvent };
|
package/dist/index.js
CHANGED
|
@@ -2,7 +2,7 @@ import { join, dirname, resolve, relative, extname, basename, isAbsolute } from
|
|
|
2
2
|
import Fastify from 'fastify';
|
|
3
3
|
import { readdir, stat, writeFile, readFile } from 'node:fs/promises';
|
|
4
4
|
import { parallel, capitalize, title, camel, snake, dash, isEqual, get, omit } from 'radash';
|
|
5
|
-
import { existsSync, statSync, readFileSync, readdirSync, mkdirSync, writeFileSync } from 'node:fs';
|
|
5
|
+
import { existsSync, statSync, readFileSync, readdirSync, mkdirSync, writeFileSync, rmSync, renameSync } from 'node:fs';
|
|
6
6
|
import ignore from 'ignore';
|
|
7
7
|
import picomatch from 'picomatch';
|
|
8
8
|
import { z, ZodError } from 'zod';
|
|
@@ -22,6 +22,7 @@ import yaml from 'js-yaml';
|
|
|
22
22
|
import crypto from 'crypto';
|
|
23
23
|
import { packageDirectorySync } from 'package-directory';
|
|
24
24
|
import Database from 'better-sqlite3';
|
|
25
|
+
import { deserialize, serialize } from 'node:v8';
|
|
25
26
|
import chokidar from 'chokidar';
|
|
26
27
|
import { cosmiconfig } from 'cosmiconfig';
|
|
27
28
|
import https from 'node:https';
|
|
@@ -753,9 +754,19 @@ async function executeReindex(deps, scope, path, dryRun = false) {
|
|
|
753
754
|
// Compute plan before starting async work
|
|
754
755
|
let plan;
|
|
755
756
|
if (scope === 'prune') {
|
|
756
|
-
|
|
757
|
+
deps.queue?.pause();
|
|
758
|
+
await deps.queue?.drain();
|
|
759
|
+
let pruneResult;
|
|
760
|
+
try {
|
|
761
|
+
pruneResult = await computePrunePlan(deps);
|
|
762
|
+
}
|
|
763
|
+
catch (err) {
|
|
764
|
+
deps.queue?.resume();
|
|
765
|
+
throw err;
|
|
766
|
+
}
|
|
757
767
|
plan = pruneResult.plan;
|
|
758
768
|
if (dryRun) {
|
|
769
|
+
deps.queue?.resume();
|
|
759
770
|
return { filesProcessed: 0, durationMs: 0, errors: 0, plan };
|
|
760
771
|
}
|
|
761
772
|
// Execute prune
|
|
@@ -802,6 +813,9 @@ async function executeReindex(deps, scope, path, dryRun = false) {
|
|
|
802
813
|
}
|
|
803
814
|
return { filesProcessed: 0, durationMs, errors: 1, plan };
|
|
804
815
|
}
|
|
816
|
+
finally {
|
|
817
|
+
deps.queue?.resume();
|
|
818
|
+
}
|
|
805
819
|
}
|
|
806
820
|
// Non-prune scopes: compute plan from file lists
|
|
807
821
|
let fileList;
|
|
@@ -1986,6 +2000,7 @@ function createConfigReindexHandler(deps) {
|
|
|
1986
2000
|
issuesManager: deps.issuesManager,
|
|
1987
2001
|
gitignoreFilter: deps.gitignoreFilter,
|
|
1988
2002
|
vectorStore: deps.vectorStore,
|
|
2003
|
+
queue: deps.queue,
|
|
1989
2004
|
};
|
|
1990
2005
|
// Pass path for 'path' and 'rules' scopes
|
|
1991
2006
|
const pathParam = validScope === 'path' || validScope === 'rules'
|
|
@@ -3853,19 +3868,20 @@ function createWalkHandler(deps) {
|
|
|
3853
3868
|
message: 'The "globs" field is required and must be a non-empty string array.',
|
|
3854
3869
|
});
|
|
3855
3870
|
}
|
|
3856
|
-
|
|
3871
|
+
const fileSystemWatcher = deps.getFileSystemWatcher?.();
|
|
3872
|
+
if (!fileSystemWatcher) {
|
|
3857
3873
|
return await reply.status(503).send({
|
|
3858
3874
|
error: 'Watcher unavailable',
|
|
3859
3875
|
message: 'Filesystem watcher is not initialized.',
|
|
3860
3876
|
});
|
|
3861
3877
|
}
|
|
3862
|
-
if (!
|
|
3878
|
+
if (!fileSystemWatcher.isReady) {
|
|
3863
3879
|
return await reply.status(503).send({
|
|
3864
3880
|
error: 'Scan in progress',
|
|
3865
3881
|
message: 'Initial filesystem scan is still active. Try again after scan completes.',
|
|
3866
3882
|
});
|
|
3867
3883
|
}
|
|
3868
|
-
const watchedFiles =
|
|
3884
|
+
const watchedFiles = fileSystemWatcher.getWatchedFiles();
|
|
3869
3885
|
const normGlobs = globs.map((g) => normalizeSlashes(g));
|
|
3870
3886
|
const matchGlobs = picomatch(normGlobs, { dot: true, nocase: true });
|
|
3871
3887
|
const paths = watchedFiles.filter((f) => matchGlobs(normalizeSlashes(f)));
|
|
@@ -4446,8 +4462,9 @@ class InitialScanTracker {
|
|
|
4446
4462
|
* @returns A configured Fastify instance.
|
|
4447
4463
|
*/
|
|
4448
4464
|
function createApiServer(options) {
|
|
4449
|
-
const { processor, vectorStore, embeddingProvider, logger, config, issuesManager, valuesManager, configPath, helperIntrospection, virtualRuleStore, gitignoreFilter, version, initialScanTracker, } = options;
|
|
4465
|
+
const { processor, vectorStore, embeddingProvider, queue, logger, config, issuesManager, valuesManager, configPath, helperIntrospection, virtualRuleStore, gitignoreFilter, version, initialScanTracker, } = options;
|
|
4450
4466
|
const getConfig = options.getConfig ?? (() => config);
|
|
4467
|
+
const getFileSystemWatcher = options.getFileSystemWatcher ?? (() => options.fileSystemWatcher);
|
|
4451
4468
|
const reindexTracker = options.reindexTracker ?? new ReindexTracker();
|
|
4452
4469
|
const app = Fastify({ logger: false });
|
|
4453
4470
|
const triggerReindex = (scope) => {
|
|
@@ -4464,6 +4481,7 @@ function createApiServer(options) {
|
|
|
4464
4481
|
issuesManager,
|
|
4465
4482
|
gitignoreFilter,
|
|
4466
4483
|
vectorStore,
|
|
4484
|
+
queue,
|
|
4467
4485
|
}, scope);
|
|
4468
4486
|
};
|
|
4469
4487
|
const cacheTtlMs = config.api?.cacheTtlMs ?? 30000;
|
|
@@ -4496,7 +4514,7 @@ function createApiServer(options) {
|
|
|
4496
4514
|
}));
|
|
4497
4515
|
app.post('/walk', createWalkHandler({
|
|
4498
4516
|
getWatchPaths: () => getConfig().watch.paths,
|
|
4499
|
-
|
|
4517
|
+
getFileSystemWatcher,
|
|
4500
4518
|
logger,
|
|
4501
4519
|
}));
|
|
4502
4520
|
app.post('/search', createSearchHandler({
|
|
@@ -4524,6 +4542,7 @@ function createApiServer(options) {
|
|
|
4524
4542
|
issuesManager,
|
|
4525
4543
|
gitignoreFilter,
|
|
4526
4544
|
vectorStore,
|
|
4545
|
+
queue,
|
|
4527
4546
|
}));
|
|
4528
4547
|
app.get('/issues', withCache(cacheTtlMs, createIssuesHandler({ issuesManager })));
|
|
4529
4548
|
app.get('/config/schema', withCache(cacheTtlMs, createConfigSchemaHandler()));
|
|
@@ -4692,21 +4711,32 @@ class ContentHashCache {
|
|
|
4692
4711
|
* @module enrichment/EnrichmentStore
|
|
4693
4712
|
* SQLite-backed enrichment metadata store. Persists path-keyed metadata at stateDir/enrichments.sqlite. Atomic writes, supports move.
|
|
4694
4713
|
*/
|
|
4714
|
+
const BUSY_TIMEOUT_MS = 5000;
|
|
4695
4715
|
/**
|
|
4696
4716
|
* SQLite-backed enrichment metadata store.
|
|
4697
4717
|
*/
|
|
4698
4718
|
class EnrichmentStore {
|
|
4699
4719
|
db;
|
|
4720
|
+
logger;
|
|
4700
4721
|
/**
|
|
4701
4722
|
* Create or open the enrichment store.
|
|
4702
4723
|
*
|
|
4703
4724
|
* @param stateDir - Directory for the SQLite database file.
|
|
4704
4725
|
*/
|
|
4705
|
-
constructor(stateDir) {
|
|
4726
|
+
constructor(stateDir, logger) {
|
|
4727
|
+
this.logger = logger;
|
|
4706
4728
|
mkdirSync(stateDir, { recursive: true });
|
|
4707
4729
|
const dbPath = join(stateDir, 'enrichments.sqlite');
|
|
4708
4730
|
this.db = new Database(dbPath);
|
|
4709
4731
|
this.db.pragma('journal_mode = WAL');
|
|
4732
|
+
this.db.pragma('busy_timeout = ' + BUSY_TIMEOUT_MS.toString());
|
|
4733
|
+
const [checkpointStatus] = this.db.pragma('wal_checkpoint(TRUNCATE)');
|
|
4734
|
+
if (checkpointStatus && checkpointStatus.busy > 0) {
|
|
4735
|
+
// EnrichmentStore is expected to be single-writer. If we see a busy WAL
|
|
4736
|
+
// checkpoint at startup, it's most likely from an unclean shutdown where
|
|
4737
|
+
// the OS hasn't yet released file handles.
|
|
4738
|
+
this.logger?.warn({ checkpointStatus }, 'WAL checkpoint busy at startup; OS may still be releasing file handles');
|
|
4739
|
+
}
|
|
4710
4740
|
this.db.exec(`
|
|
4711
4741
|
CREATE TABLE IF NOT EXISTS enrichments (
|
|
4712
4742
|
path TEXT PRIMARY KEY,
|
|
@@ -4926,6 +4956,92 @@ const issueRecordSchema = z.object({
|
|
|
4926
4956
|
timestamp: z.union([z.number(), z.string()]),
|
|
4927
4957
|
});
|
|
4928
4958
|
|
|
4959
|
+
/**
|
|
4960
|
+
* @module util/BinaryFileStore
|
|
4961
|
+
* Binary-backed read/modify/write store with in-memory caching and debounced flush.
|
|
4962
|
+
*
|
|
4963
|
+
* Persists a single JS object to disk using V8 structured clone serialization.
|
|
4964
|
+
* I/O: synchronous fs read/write.
|
|
4965
|
+
*/
|
|
4966
|
+
/**
|
|
4967
|
+
* Base class for binary file stores.
|
|
4968
|
+
*
|
|
4969
|
+
* @typeParam T - The stored data structure.
|
|
4970
|
+
*/
|
|
4971
|
+
class BinaryFileStore {
|
|
4972
|
+
/** Path to the binary file on disk. */
|
|
4973
|
+
filePath;
|
|
4974
|
+
/** In-memory cache of the file contents, or `null` if not yet loaded. */
|
|
4975
|
+
cache = null;
|
|
4976
|
+
/** Logger instance for warnings and diagnostics. */
|
|
4977
|
+
logger;
|
|
4978
|
+
flushDebounceMs;
|
|
4979
|
+
flushTimer;
|
|
4980
|
+
dirty = false;
|
|
4981
|
+
constructor(options) {
|
|
4982
|
+
this.filePath = options.filePath;
|
|
4983
|
+
this.logger = options.logger;
|
|
4984
|
+
this.flushDebounceMs = options.flushDebounceMs ?? 5000;
|
|
4985
|
+
mkdirSync(dirname(this.filePath), { recursive: true });
|
|
4986
|
+
}
|
|
4987
|
+
/** Load from disk into cache if not already loaded. */
|
|
4988
|
+
load() {
|
|
4989
|
+
if (this.cache)
|
|
4990
|
+
return this.cache;
|
|
4991
|
+
try {
|
|
4992
|
+
if (existsSync(this.filePath)) {
|
|
4993
|
+
const raw = readFileSync(this.filePath);
|
|
4994
|
+
this.cache = deserialize(raw);
|
|
4995
|
+
}
|
|
4996
|
+
else {
|
|
4997
|
+
this.cache = this.createEmpty();
|
|
4998
|
+
}
|
|
4999
|
+
}
|
|
5000
|
+
catch (error) {
|
|
5001
|
+
this.logger.warn({ filePath: this.filePath, err: error }, 'Failed to read binary store file, starting fresh');
|
|
5002
|
+
this.cache = this.createEmpty();
|
|
5003
|
+
}
|
|
5004
|
+
return this.cache;
|
|
5005
|
+
}
|
|
5006
|
+
/**
|
|
5007
|
+
* Mark the store dirty and schedule a debounced flush.
|
|
5008
|
+
*/
|
|
5009
|
+
markDirty() {
|
|
5010
|
+
this.dirty = true;
|
|
5011
|
+
if (this.flushTimer)
|
|
5012
|
+
return;
|
|
5013
|
+
this.flushTimer = setTimeout(() => {
|
|
5014
|
+
this.flushTimer = undefined;
|
|
5015
|
+
this.flush();
|
|
5016
|
+
}, this.flushDebounceMs);
|
|
5017
|
+
}
|
|
5018
|
+
/**
|
|
5019
|
+
* Flush cache to disk if dirty.
|
|
5020
|
+
*
|
|
5021
|
+
* Uses an atomic write (tmp + rename) to avoid partial files.
|
|
5022
|
+
*/
|
|
5023
|
+
flush() {
|
|
5024
|
+
if (!this.dirty)
|
|
5025
|
+
return;
|
|
5026
|
+
this.stopAutoFlush();
|
|
5027
|
+
const value = this.cache ?? this.createEmpty();
|
|
5028
|
+
const tmpPath = `${this.filePath}.tmp`;
|
|
5029
|
+
const payload = serialize(value);
|
|
5030
|
+
writeFileSync(tmpPath, payload);
|
|
5031
|
+
// renameSync does not reliably overwrite on Windows. Remove target first.
|
|
5032
|
+
rmSync(this.filePath, { force: true });
|
|
5033
|
+
renameSync(tmpPath, this.filePath);
|
|
5034
|
+
this.dirty = false;
|
|
5035
|
+
}
|
|
5036
|
+
/** Stop any pending scheduled flush. Does not flush automatically. */
|
|
5037
|
+
stopAutoFlush() {
|
|
5038
|
+
if (this.flushTimer) {
|
|
5039
|
+
clearTimeout(this.flushTimer);
|
|
5040
|
+
this.flushTimer = undefined;
|
|
5041
|
+
}
|
|
5042
|
+
}
|
|
5043
|
+
}
|
|
5044
|
+
|
|
4929
5045
|
/**
|
|
4930
5046
|
* @module values/ValuesManager
|
|
4931
5047
|
* Manages per-rule distinct metadata value tracking. Persists to disk with in-memory caching and sorted deduplication.
|
|
@@ -4933,9 +5049,9 @@ const issueRecordSchema = z.object({
|
|
|
4933
5049
|
/**
|
|
4934
5050
|
* Manages a persistent values.json file tracking distinct metadata values per rule.
|
|
4935
5051
|
*/
|
|
4936
|
-
class ValuesManager extends
|
|
5052
|
+
class ValuesManager extends BinaryFileStore {
|
|
4937
5053
|
constructor(stateDir, logger) {
|
|
4938
|
-
super({ filePath: join(stateDir, 'values.
|
|
5054
|
+
super({ filePath: join(stateDir, 'values.v8'), logger });
|
|
4939
5055
|
}
|
|
4940
5056
|
createEmpty() {
|
|
4941
5057
|
return {};
|
|
@@ -4950,6 +5066,7 @@ class ValuesManager extends JsonFileStore {
|
|
|
4950
5066
|
const index = this.load();
|
|
4951
5067
|
index[ruleName] ??= {};
|
|
4952
5068
|
const ruleValues = index[ruleName];
|
|
5069
|
+
let changed = false;
|
|
4953
5070
|
for (const [key, value] of Object.entries(metadata)) {
|
|
4954
5071
|
// Decompose arrays into individual trackable elements so that
|
|
4955
5072
|
// array-typed fields (e.g. domains: ["email"]) are indexed.
|
|
@@ -4967,15 +5084,18 @@ class ValuesManager extends JsonFileStore {
|
|
|
4967
5084
|
}
|
|
4968
5085
|
return typeof a < typeof b ? -1 : 1;
|
|
4969
5086
|
});
|
|
5087
|
+
changed = true;
|
|
4970
5088
|
}
|
|
4971
5089
|
}
|
|
4972
5090
|
}
|
|
4973
|
-
|
|
5091
|
+
if (changed)
|
|
5092
|
+
this.markDirty();
|
|
4974
5093
|
}
|
|
4975
5094
|
/** Wipe all values (called on full reindex start). */
|
|
4976
5095
|
clearAll() {
|
|
4977
5096
|
this.cache = {};
|
|
4978
|
-
this.
|
|
5097
|
+
this.markDirty();
|
|
5098
|
+
this.flush();
|
|
4979
5099
|
this.logger.debug('All values cleared');
|
|
4980
5100
|
}
|
|
4981
5101
|
/** Get all current values. */
|
|
@@ -6132,6 +6252,7 @@ class EventQueue {
|
|
|
6132
6252
|
concurrency;
|
|
6133
6253
|
rateLimitPerMinute;
|
|
6134
6254
|
started = false;
|
|
6255
|
+
paused = false;
|
|
6135
6256
|
active = 0;
|
|
6136
6257
|
debounceTimers = new Map();
|
|
6137
6258
|
latestByKey = new Map();
|
|
@@ -6181,6 +6302,19 @@ class EventQueue {
|
|
|
6181
6302
|
this.started = true;
|
|
6182
6303
|
this.pump();
|
|
6183
6304
|
}
|
|
6305
|
+
/**
|
|
6306
|
+
* Pause processing events. Events can still be enqueued but will not be processed.
|
|
6307
|
+
*/
|
|
6308
|
+
pause() {
|
|
6309
|
+
this.paused = true;
|
|
6310
|
+
}
|
|
6311
|
+
/**
|
|
6312
|
+
* Resume processing events.
|
|
6313
|
+
*/
|
|
6314
|
+
resume() {
|
|
6315
|
+
this.paused = false;
|
|
6316
|
+
this.pump();
|
|
6317
|
+
}
|
|
6184
6318
|
/**
|
|
6185
6319
|
* Wait for the queue to become idle (no pending debounces, no queued items, no active work).
|
|
6186
6320
|
*
|
|
@@ -6220,7 +6354,7 @@ class EventQueue {
|
|
|
6220
6354
|
return this.normalQueue.shift() ?? this.lowQueue.shift();
|
|
6221
6355
|
}
|
|
6222
6356
|
pump() {
|
|
6223
|
-
if (!this.started)
|
|
6357
|
+
if (!this.started || this.paused)
|
|
6224
6358
|
return;
|
|
6225
6359
|
while (this.active < this.concurrency) {
|
|
6226
6360
|
const item = this.nextItem();
|
|
@@ -6533,6 +6667,7 @@ async function* scrollCollection(client, collectionName, filter, limit = 100) {
|
|
|
6533
6667
|
*/
|
|
6534
6668
|
class VectorStoreClient {
|
|
6535
6669
|
client;
|
|
6670
|
+
writeClient;
|
|
6536
6671
|
clientConfig;
|
|
6537
6672
|
collectionName;
|
|
6538
6673
|
dims;
|
|
@@ -6548,6 +6683,7 @@ class VectorStoreClient {
|
|
|
6548
6683
|
constructor(config, dimensions, logger) {
|
|
6549
6684
|
this.clientConfig = { url: config.url, apiKey: config.apiKey };
|
|
6550
6685
|
this.client = this.createClient();
|
|
6686
|
+
this.writeClient = this.client;
|
|
6551
6687
|
this.collectionName = config.collectionName;
|
|
6552
6688
|
this.dims = dimensions;
|
|
6553
6689
|
this.log = getLogger(logger);
|
|
@@ -6604,7 +6740,7 @@ class VectorStoreClient {
|
|
|
6604
6740
|
if (attempt > 1) {
|
|
6605
6741
|
this.log.warn({ attempt, operation: `qdrant.${operation}` }, `Retrying Qdrant ${operation}`);
|
|
6606
6742
|
}
|
|
6607
|
-
await fn();
|
|
6743
|
+
await fn(attempt);
|
|
6608
6744
|
}, {
|
|
6609
6745
|
attempts: 5,
|
|
6610
6746
|
baseDelayMs: 500,
|
|
@@ -6620,21 +6756,27 @@ class VectorStoreClient {
|
|
|
6620
6756
|
},
|
|
6621
6757
|
});
|
|
6622
6758
|
}
|
|
6759
|
+
getWriteClient(attempt) {
|
|
6760
|
+
if (attempt > 1) {
|
|
6761
|
+
this.pinoLogger?.info('Created fresh Qdrant client for retry');
|
|
6762
|
+
this.writeClient = this.createClient();
|
|
6763
|
+
}
|
|
6764
|
+
return this.writeClient;
|
|
6765
|
+
}
|
|
6623
6766
|
/**
|
|
6624
6767
|
* Upsert points into the collection.
|
|
6625
6768
|
*
|
|
6626
|
-
* Uses
|
|
6627
|
-
*
|
|
6628
|
-
* server, causing ECONNRESET on reuse.
|
|
6769
|
+
* Uses the shared client. On retry (after ECONNRESET from stale connections),
|
|
6770
|
+
* creates a fresh client to recover.
|
|
6629
6771
|
*
|
|
6630
6772
|
* @param points - The points to upsert.
|
|
6631
6773
|
*/
|
|
6632
6774
|
async upsert(points) {
|
|
6633
6775
|
if (points.length === 0)
|
|
6634
6776
|
return;
|
|
6635
|
-
await this.retryOperation('upsert', async () => {
|
|
6636
|
-
const
|
|
6637
|
-
await
|
|
6777
|
+
await this.retryOperation('upsert', async (attempt) => {
|
|
6778
|
+
const client = this.getWriteClient(attempt);
|
|
6779
|
+
await client.upsert(this.collectionName, {
|
|
6638
6780
|
wait: true,
|
|
6639
6781
|
points: points.map((p) => ({
|
|
6640
6782
|
id: p.id,
|
|
@@ -6647,16 +6789,16 @@ class VectorStoreClient {
|
|
|
6647
6789
|
/**
|
|
6648
6790
|
* Delete points by their IDs.
|
|
6649
6791
|
*
|
|
6650
|
-
* Uses
|
|
6792
|
+
* Uses the shared client. On retry, creates a fresh client to recover.
|
|
6651
6793
|
*
|
|
6652
6794
|
* @param ids - The point IDs to delete.
|
|
6653
6795
|
*/
|
|
6654
6796
|
async delete(ids) {
|
|
6655
6797
|
if (ids.length === 0)
|
|
6656
6798
|
return;
|
|
6657
|
-
await this.retryOperation('delete', async () => {
|
|
6658
|
-
const
|
|
6659
|
-
await
|
|
6799
|
+
await this.retryOperation('delete', async (attempt) => {
|
|
6800
|
+
const client = this.getWriteClient(attempt);
|
|
6801
|
+
await client.delete(this.collectionName, {
|
|
6660
6802
|
wait: true,
|
|
6661
6803
|
points: ids,
|
|
6662
6804
|
});
|
|
@@ -7524,7 +7666,7 @@ class JeevesWatcher {
|
|
|
7524
7666
|
const stateDir = this.config.stateDir ?? '.jeeves-metadata';
|
|
7525
7667
|
this.issuesManager = new IssuesManager(stateDir, logger);
|
|
7526
7668
|
this.valuesManager = new ValuesManager(stateDir, logger);
|
|
7527
|
-
this.enrichmentStore = new EnrichmentStore(stateDir);
|
|
7669
|
+
this.enrichmentStore = new EnrichmentStore(stateDir, logger);
|
|
7528
7670
|
const enrichmentStore = this.enrichmentStore;
|
|
7529
7671
|
this.contentHashCache = new ContentHashCache();
|
|
7530
7672
|
const contentHashCache = this.contentHashCache;
|
|
@@ -7576,6 +7718,9 @@ class JeevesWatcher {
|
|
|
7576
7718
|
this.logger?.warn({ timeoutMs: timeout }, 'Queue drain timeout hit, forcing shutdown');
|
|
7577
7719
|
}
|
|
7578
7720
|
}
|
|
7721
|
+
// Flush in-memory state caches before shutdown.
|
|
7722
|
+
this.valuesManager?.stopAutoFlush();
|
|
7723
|
+
this.valuesManager?.flush();
|
|
7579
7724
|
this.enrichmentStore?.close();
|
|
7580
7725
|
if (this.server) {
|
|
7581
7726
|
await this.server.close();
|
|
@@ -7600,6 +7745,7 @@ class JeevesWatcher {
|
|
|
7600
7745
|
version: this.version,
|
|
7601
7746
|
initialScanTracker: this.initialScanTracker,
|
|
7602
7747
|
fileSystemWatcher: this.watcher,
|
|
7748
|
+
getFileSystemWatcher: () => this.watcher,
|
|
7603
7749
|
enrichmentStore: this.enrichmentStore,
|
|
7604
7750
|
});
|
|
7605
7751
|
await server.listen({
|
|
@@ -7662,4 +7808,4 @@ class JeevesWatcher {
|
|
|
7662
7808
|
}
|
|
7663
7809
|
}
|
|
7664
7810
|
|
|
7665
|
-
export { DocumentProcessor, EnrichmentStore, EventQueue, FileSystemWatcher, GitignoreFilter, InitialScanTracker, IssuesManager, JeevesWatcher, ReindexTracker, SystemHealth, TemplateEngine, ValuesManager, VectorStoreClient, VirtualRuleStore, apiConfigSchema, applyRules, buildAttributes, buildTemplateEngine, compileRules, configWatchConfigSchema, contentHash, createApiServer, createEmbeddingProvider, createHandlebarsInstance, createLogger, embeddingConfigSchema, extractText, inferenceRuleSchema, issueRecordSchema, jeevesWatcherConfigSchema, loadConfig, loadCustomHelpers, loggingConfigSchema, mergeEnrichment, pointId, registerBuiltinHelpers, resolveTemplateSource, startFromConfig, vectorStoreConfigSchema, watchConfigSchema };
|
|
7811
|
+
export { ContentHashCache, DocumentProcessor, EnrichmentStore, EventQueue, FileSystemWatcher, GitignoreFilter, InitialScanTracker, IssuesManager, JeevesWatcher, ReindexTracker, SystemHealth, TemplateEngine, ValuesManager, VectorStoreClient, VirtualRuleStore, apiConfigSchema, applyRules, buildAttributes, buildTemplateEngine, compileRules, configWatchConfigSchema, contentHash, createApiServer, createEmbeddingProvider, createHandlebarsInstance, createLogger, embeddingConfigSchema, extractText, inferenceRuleSchema, issueRecordSchema, jeevesWatcherConfigSchema, loadConfig, loadCustomHelpers, loggingConfigSchema, mergeEnrichment, pointId, registerBuiltinHelpers, resolveTemplateSource, startFromConfig, vectorStoreConfigSchema, watchConfigSchema };
|
package/package.json
CHANGED