@karmaniverous/jeeves-watcher 0.13.0 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config.schema.json +185 -154
- package/dist/cli/jeeves-watcher/index.js +563 -128
- package/dist/index.d.ts +156 -41
- package/dist/index.js +563 -128
- package/package.json +3 -1
package/dist/index.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { join, dirname, resolve, relative, extname, basename, isAbsolute } from 'node:path';
|
|
2
2
|
import Fastify from 'fastify';
|
|
3
|
-
import { readdir, stat, writeFile,
|
|
3
|
+
import { readdir, stat, writeFile, readFile } from 'node:fs/promises';
|
|
4
4
|
import { parallel, capitalize, title, camel, snake, dash, isEqual, get, omit } from 'radash';
|
|
5
5
|
import { existsSync, statSync, readFileSync, readdirSync, mkdirSync, writeFileSync } from 'node:fs';
|
|
6
6
|
import ignore from 'ignore';
|
|
@@ -19,12 +19,13 @@ import { toMarkdown } from 'mdast-util-to-markdown';
|
|
|
19
19
|
import rehypeParse from 'rehype-parse';
|
|
20
20
|
import { unified } from 'unified';
|
|
21
21
|
import yaml from 'js-yaml';
|
|
22
|
-
import { createHash } from 'node:crypto';
|
|
23
22
|
import crypto from 'crypto';
|
|
24
23
|
import { packageDirectorySync } from 'package-directory';
|
|
24
|
+
import Database from 'better-sqlite3';
|
|
25
25
|
import chokidar from 'chokidar';
|
|
26
26
|
import { cosmiconfig } from 'cosmiconfig';
|
|
27
27
|
import https from 'node:https';
|
|
28
|
+
import { createHash } from 'node:crypto';
|
|
28
29
|
import pino from 'pino';
|
|
29
30
|
import { v5 } from 'uuid';
|
|
30
31
|
import * as cheerio from 'cheerio';
|
|
@@ -995,6 +996,24 @@ const watchConfigSchema = z.object({
|
|
|
995
996
|
.boolean()
|
|
996
997
|
.optional()
|
|
997
998
|
.describe('Skip files ignored by .gitignore in git repositories. Only applies to repos with a .git directory. Default: true.'),
|
|
999
|
+
/** Move detection configuration for correlating unlink+add as file moves. */
|
|
1000
|
+
moveDetection: z
|
|
1001
|
+
.object({
|
|
1002
|
+
/** Enable move correlation. Default: true. */
|
|
1003
|
+
enabled: z
|
|
1004
|
+
.boolean()
|
|
1005
|
+
.default(true)
|
|
1006
|
+
.describe('Enable move detection via content hash correlation.'),
|
|
1007
|
+
/** Buffer time in ms for holding unlink events before treating as deletes. Default: 2000. */
|
|
1008
|
+
bufferMs: z
|
|
1009
|
+
.number()
|
|
1010
|
+
.int()
|
|
1011
|
+
.min(100)
|
|
1012
|
+
.default(2000)
|
|
1013
|
+
.describe('How long (ms) to buffer unlink events before treating as deletes.'),
|
|
1014
|
+
})
|
|
1015
|
+
.optional()
|
|
1016
|
+
.describe('Move detection: correlate unlink+add events as file moves to avoid re-embedding.'),
|
|
998
1017
|
});
|
|
999
1018
|
/**
|
|
1000
1019
|
* Configuration watch settings.
|
|
@@ -1303,18 +1322,13 @@ const jeevesWatcherConfigSchema = z.object({
|
|
|
1303
1322
|
embedding: embeddingConfigSchema.describe('Embedding model configuration.'),
|
|
1304
1323
|
/** Vector store configuration. */
|
|
1305
1324
|
vectorStore: vectorStoreConfigSchema.describe('Qdrant vector store configuration.'),
|
|
1306
|
-
/** Directory for persisted metadata. */
|
|
1307
|
-
metadataDir: z
|
|
1308
|
-
.string()
|
|
1309
|
-
.optional()
|
|
1310
|
-
.describe('Directory for persisted metadata sidecar files.'),
|
|
1311
1325
|
/** API server configuration. */
|
|
1312
1326
|
api: apiConfigSchema.optional().describe('API server configuration.'),
|
|
1313
|
-
/** Directory for persistent state files (issues.json, values.json).
|
|
1327
|
+
/** Directory for persistent state files (issues.json, values.json, enrichments.sqlite). */
|
|
1314
1328
|
stateDir: z
|
|
1315
1329
|
.string()
|
|
1316
1330
|
.optional()
|
|
1317
|
-
.describe('Directory for persistent state files (issues.json, values.json). Defaults to
|
|
1331
|
+
.describe('Directory for persistent state files (issues.json, values.json, enrichments.sqlite). Defaults to .jeeves-metadata.'),
|
|
1318
1332
|
/** Rules for inferring metadata from document properties (inline objects or file paths). */
|
|
1319
1333
|
inferenceRules: z
|
|
1320
1334
|
.array(z.union([inferenceRuleSchema, z.string()]))
|
|
@@ -3514,97 +3528,6 @@ function createPointsDeleteHandler(deps) {
|
|
|
3514
3528
|
}, deps.logger, 'PointsDelete');
|
|
3515
3529
|
}
|
|
3516
3530
|
|
|
3517
|
-
/**
|
|
3518
|
-
* @module util/normalizePath
|
|
3519
|
-
* Normalizes file paths for deterministic mapping: lowercase, forward slashes, optional drive letter stripping.
|
|
3520
|
-
*/
|
|
3521
|
-
/**
|
|
3522
|
-
* Normalize a file path: lowercase, forward slashes, optionally strip drive letter colon.
|
|
3523
|
-
*
|
|
3524
|
-
* @param filePath - The original file path.
|
|
3525
|
-
* @param stripDriveLetter - Whether to strip the colon from a leading drive letter (e.g. `C:` → `c`).
|
|
3526
|
-
* @returns The normalized path string.
|
|
3527
|
-
*/
|
|
3528
|
-
function normalizePath(filePath, stripDriveLetter = false) {
|
|
3529
|
-
let result = filePath.replace(/\\/g, '/').toLowerCase();
|
|
3530
|
-
if (stripDriveLetter) {
|
|
3531
|
-
result = result.replace(/^([a-z]):/, (_m, letter) => letter);
|
|
3532
|
-
}
|
|
3533
|
-
return result;
|
|
3534
|
-
}
|
|
3535
|
-
|
|
3536
|
-
/**
|
|
3537
|
-
* @module metadata/metadata
|
|
3538
|
-
* Persists file metadata as .meta.json. I/O: reads/writes/deletes metadata files under metadataDir. Path mapping via SHA-256 hash.
|
|
3539
|
-
*/
|
|
3540
|
-
/**
|
|
3541
|
-
* Derive a deterministic `.meta.json` path for a given file.
|
|
3542
|
-
*
|
|
3543
|
-
* @param filePath - The watched file path.
|
|
3544
|
-
* @param metadataDir - The root metadata directory.
|
|
3545
|
-
* @returns The full path to the metadata file.
|
|
3546
|
-
*/
|
|
3547
|
-
function metadataPath(filePath, metadataDir) {
|
|
3548
|
-
const normalised = normalizePath(filePath, true);
|
|
3549
|
-
const hash = createHash('sha256').update(normalised, 'utf8').digest('hex');
|
|
3550
|
-
return join(metadataDir, `${hash}.meta.json`);
|
|
3551
|
-
}
|
|
3552
|
-
/**
|
|
3553
|
-
* Read persisted metadata for a file.
|
|
3554
|
-
*
|
|
3555
|
-
* @param filePath - The watched file path.
|
|
3556
|
-
* @param metadataDir - The root metadata directory.
|
|
3557
|
-
* @returns The parsed metadata object, or `null` if not found.
|
|
3558
|
-
*/
|
|
3559
|
-
async function readMetadata(filePath, metadataDir) {
|
|
3560
|
-
try {
|
|
3561
|
-
const raw = await readFile(metadataPath(filePath, metadataDir), 'utf8');
|
|
3562
|
-
return JSON.parse(raw);
|
|
3563
|
-
}
|
|
3564
|
-
catch {
|
|
3565
|
-
return null;
|
|
3566
|
-
}
|
|
3567
|
-
}
|
|
3568
|
-
/**
|
|
3569
|
-
* Write metadata for a file.
|
|
3570
|
-
*
|
|
3571
|
-
* @param filePath - The watched file path.
|
|
3572
|
-
* @param metadataDir - The root metadata directory.
|
|
3573
|
-
* @param metadata - The metadata to persist.
|
|
3574
|
-
*/
|
|
3575
|
-
async function writeMetadata(filePath, metadataDir, metadata) {
|
|
3576
|
-
const dest = metadataPath(filePath, metadataDir);
|
|
3577
|
-
await mkdir(dirname(dest), { recursive: true });
|
|
3578
|
-
await writeFile(dest, JSON.stringify(metadata, null, 2), 'utf8');
|
|
3579
|
-
}
|
|
3580
|
-
/**
|
|
3581
|
-
* Delete metadata for a file.
|
|
3582
|
-
*
|
|
3583
|
-
* @param filePath - The watched file path.
|
|
3584
|
-
* @param metadataDir - The root metadata directory.
|
|
3585
|
-
*/
|
|
3586
|
-
async function deleteMetadata(filePath, metadataDir) {
|
|
3587
|
-
try {
|
|
3588
|
-
await rm(metadataPath(filePath, metadataDir));
|
|
3589
|
-
}
|
|
3590
|
-
catch {
|
|
3591
|
-
// Ignore if file doesn't exist.
|
|
3592
|
-
}
|
|
3593
|
-
}
|
|
3594
|
-
|
|
3595
|
-
/**
|
|
3596
|
-
* @module metadata/constants
|
|
3597
|
-
* Shared constants for metadata key classification. System keys are injected by the indexing pipeline, not user-provided.
|
|
3598
|
-
*/
|
|
3599
|
-
/** Keys managed by the indexing pipeline (not user enrichment). */
|
|
3600
|
-
const SYSTEM_METADATA_KEYS = [
|
|
3601
|
-
'file_path',
|
|
3602
|
-
'chunk_index',
|
|
3603
|
-
'total_chunks',
|
|
3604
|
-
'content_hash',
|
|
3605
|
-
'chunk_text',
|
|
3606
|
-
];
|
|
3607
|
-
|
|
3608
3531
|
/**
|
|
3609
3532
|
* @module processor/payloadFields
|
|
3610
3533
|
* Constants for Qdrant payload field names used across the processing pipeline.
|
|
@@ -3627,10 +3550,18 @@ const FIELD_MODIFIED_AT = 'modified_at';
|
|
|
3627
3550
|
const FIELD_LINE_START = 'line_start';
|
|
3628
3551
|
/** Qdrant payload field: 1-indexed line number where this chunk ends in the source file. */
|
|
3629
3552
|
const FIELD_LINE_END = 'line_end';
|
|
3553
|
+
/** Keys managed by the indexing pipeline (not user enrichment). */
|
|
3554
|
+
const SYSTEM_METADATA_KEYS = [
|
|
3555
|
+
FIELD_FILE_PATH,
|
|
3556
|
+
FIELD_CHUNK_INDEX,
|
|
3557
|
+
FIELD_TOTAL_CHUNKS,
|
|
3558
|
+
FIELD_CONTENT_HASH,
|
|
3559
|
+
FIELD_CHUNK_TEXT,
|
|
3560
|
+
];
|
|
3630
3561
|
|
|
3631
3562
|
/**
|
|
3632
3563
|
* @module api/handlers/rebuildMetadata
|
|
3633
|
-
* Fastify route handler for POST /rebuild-metadata.
|
|
3564
|
+
* Fastify route handler for POST /rebuild-metadata. Rebuilds enrichment store from vector store payloads.
|
|
3634
3565
|
*/
|
|
3635
3566
|
/**
|
|
3636
3567
|
* Create handler for POST /rebuild-metadata.
|
|
@@ -3639,7 +3570,6 @@ const FIELD_LINE_END = 'line_end';
|
|
|
3639
3570
|
*/
|
|
3640
3571
|
function createRebuildMetadataHandler(deps) {
|
|
3641
3572
|
return wrapHandler(async (_request, reply) => {
|
|
3642
|
-
const metadataDir = deps.metadataDir ?? '.jeeves-metadata';
|
|
3643
3573
|
const systemKeys = [...SYSTEM_METADATA_KEYS];
|
|
3644
3574
|
for await (const point of deps.vectorStore.scroll()) {
|
|
3645
3575
|
const payload = point.payload;
|
|
@@ -3647,7 +3577,7 @@ function createRebuildMetadataHandler(deps) {
|
|
|
3647
3577
|
if (typeof filePath !== 'string' || filePath.length === 0)
|
|
3648
3578
|
continue;
|
|
3649
3579
|
const enrichment = omit(payload, systemKeys);
|
|
3650
|
-
|
|
3580
|
+
deps.enrichmentStore?.set(filePath, enrichment);
|
|
3651
3581
|
}
|
|
3652
3582
|
return await reply.status(200).send({ ok: true });
|
|
3653
3583
|
}, deps.logger, 'Rebuild metadata');
|
|
@@ -4147,7 +4077,6 @@ async function buildTemplateEngineAndCustomMapLib(config, configDir) {
|
|
|
4147
4077
|
*/
|
|
4148
4078
|
function createProcessorConfig(config, configDir, customMapLib) {
|
|
4149
4079
|
return {
|
|
4150
|
-
metadataDir: config.metadataDir ?? '.jeeves-metadata',
|
|
4151
4080
|
chunkSize: config.embedding.chunkSize,
|
|
4152
4081
|
chunkOverlap: config.embedding.chunkOverlap,
|
|
4153
4082
|
maps: resolveMapsConfig(config.maps),
|
|
@@ -4159,7 +4088,7 @@ function createProcessorConfig(config, configDir, customMapLib) {
|
|
|
4159
4088
|
/**
|
|
4160
4089
|
* Create file system watcher with gitignore filtering.
|
|
4161
4090
|
*/
|
|
4162
|
-
function createWatcher(config, factories, queue, processor, logger, runtimeOptions, initialScanTracker) {
|
|
4091
|
+
function createWatcher(config, factories, queue, processor, logger, runtimeOptions, initialScanTracker, contentHashCache) {
|
|
4163
4092
|
const respectGitignore = config.watch.respectGitignore ?? true;
|
|
4164
4093
|
const gitignoreFilter = respectGitignore
|
|
4165
4094
|
? new GitignoreFilter(config.watch.paths)
|
|
@@ -4170,6 +4099,7 @@ function createWatcher(config, factories, queue, processor, logger, runtimeOptio
|
|
|
4170
4099
|
onFatalError: runtimeOptions.onFatalError,
|
|
4171
4100
|
gitignoreFilter,
|
|
4172
4101
|
initialScanTracker,
|
|
4102
|
+
contentHashCache,
|
|
4173
4103
|
});
|
|
4174
4104
|
return { watcher, gitignoreFilter };
|
|
4175
4105
|
}
|
|
@@ -4506,7 +4436,7 @@ function createApiServer(options) {
|
|
|
4506
4436
|
hybridConfig,
|
|
4507
4437
|
}));
|
|
4508
4438
|
app.post('/rebuild-metadata', createRebuildMetadataHandler({
|
|
4509
|
-
|
|
4439
|
+
enrichmentStore: options.enrichmentStore,
|
|
4510
4440
|
vectorStore,
|
|
4511
4441
|
logger,
|
|
4512
4442
|
}));
|
|
@@ -4580,6 +4510,218 @@ function createApiServer(options) {
|
|
|
4580
4510
|
return app;
|
|
4581
4511
|
}
|
|
4582
4512
|
|
|
4513
|
+
/**
|
|
4514
|
+
* @module util/normalizePath
|
|
4515
|
+
* Normalizes file paths for deterministic mapping: lowercase, forward slashes, optional drive letter stripping.
|
|
4516
|
+
*/
|
|
4517
|
+
/**
|
|
4518
|
+
* Normalize a file path: lowercase, forward slashes, optionally strip drive letter colon.
|
|
4519
|
+
*
|
|
4520
|
+
* @param filePath - The original file path.
|
|
4521
|
+
* @param stripDriveLetter - Whether to strip the colon from a leading drive letter (e.g. `C:` → `c`).
|
|
4522
|
+
* @returns The normalized path string.
|
|
4523
|
+
*/
|
|
4524
|
+
function normalizePath(filePath, stripDriveLetter = false) {
|
|
4525
|
+
let result = filePath.replace(/\\/g, '/').toLowerCase();
|
|
4526
|
+
if (stripDriveLetter) {
|
|
4527
|
+
result = result.replace(/^([a-z]):/, (_m, letter) => letter);
|
|
4528
|
+
}
|
|
4529
|
+
return result;
|
|
4530
|
+
}
|
|
4531
|
+
|
|
4532
|
+
/**
|
|
4533
|
+
* @module cache/ContentHashCache
|
|
4534
|
+
* In-memory cache mapping normalized file paths to content hashes.
|
|
4535
|
+
* Supports reverse lookup by hash for move correlation.
|
|
4536
|
+
*/
|
|
4537
|
+
/**
|
|
4538
|
+
* In-memory content hash cache for move detection.
|
|
4539
|
+
*
|
|
4540
|
+
* Maps normalized file paths to SHA-256 content hashes.
|
|
4541
|
+
* Supports reverse lookup (hash → paths) for correlating
|
|
4542
|
+
* unlink+add events as file moves.
|
|
4543
|
+
*/
|
|
4544
|
+
class ContentHashCache {
|
|
4545
|
+
pathToHash = new Map();
|
|
4546
|
+
hashToPaths = new Map();
|
|
4547
|
+
/**
|
|
4548
|
+
* Store or update the content hash for a file path.
|
|
4549
|
+
*
|
|
4550
|
+
* @param filePath - The file path (will be normalized).
|
|
4551
|
+
* @param hash - The SHA-256 content hash.
|
|
4552
|
+
*/
|
|
4553
|
+
set(filePath, hash) {
|
|
4554
|
+
const normalized = normalizePath(filePath, true);
|
|
4555
|
+
const oldHash = this.pathToHash.get(normalized);
|
|
4556
|
+
// Remove from old hash index if hash changed
|
|
4557
|
+
if (oldHash !== undefined && oldHash !== hash) {
|
|
4558
|
+
const oldPaths = this.hashToPaths.get(oldHash);
|
|
4559
|
+
if (oldPaths) {
|
|
4560
|
+
oldPaths.delete(normalized);
|
|
4561
|
+
if (oldPaths.size === 0)
|
|
4562
|
+
this.hashToPaths.delete(oldHash);
|
|
4563
|
+
}
|
|
4564
|
+
}
|
|
4565
|
+
this.pathToHash.set(normalized, hash);
|
|
4566
|
+
let paths = this.hashToPaths.get(hash);
|
|
4567
|
+
if (!paths) {
|
|
4568
|
+
paths = new Set();
|
|
4569
|
+
this.hashToPaths.set(hash, paths);
|
|
4570
|
+
}
|
|
4571
|
+
paths.add(normalized);
|
|
4572
|
+
}
|
|
4573
|
+
/**
|
|
4574
|
+
* Get the content hash for a file path.
|
|
4575
|
+
*
|
|
4576
|
+
* @param filePath - The file path (will be normalized).
|
|
4577
|
+
* @returns The content hash, or undefined if not cached.
|
|
4578
|
+
*/
|
|
4579
|
+
get(filePath) {
|
|
4580
|
+
return this.pathToHash.get(normalizePath(filePath, true));
|
|
4581
|
+
}
|
|
4582
|
+
/**
|
|
4583
|
+
* Remove a file path from the cache.
|
|
4584
|
+
*
|
|
4585
|
+
* @param filePath - The file path (will be normalized).
|
|
4586
|
+
*/
|
|
4587
|
+
delete(filePath) {
|
|
4588
|
+
const normalized = normalizePath(filePath, true);
|
|
4589
|
+
const hash = this.pathToHash.get(normalized);
|
|
4590
|
+
if (hash === undefined)
|
|
4591
|
+
return;
|
|
4592
|
+
this.pathToHash.delete(normalized);
|
|
4593
|
+
const paths = this.hashToPaths.get(hash);
|
|
4594
|
+
if (paths) {
|
|
4595
|
+
paths.delete(normalized);
|
|
4596
|
+
if (paths.size === 0)
|
|
4597
|
+
this.hashToPaths.delete(hash);
|
|
4598
|
+
}
|
|
4599
|
+
}
|
|
4600
|
+
/**
|
|
4601
|
+
* Reverse lookup: get all file paths with a given content hash.
|
|
4602
|
+
*
|
|
4603
|
+
* @param hash - The content hash to look up.
|
|
4604
|
+
* @returns Array of normalized file paths with that hash.
|
|
4605
|
+
*/
|
|
4606
|
+
getByHash(hash) {
|
|
4607
|
+
const paths = this.hashToPaths.get(hash);
|
|
4608
|
+
return paths ? [...paths] : [];
|
|
4609
|
+
}
|
|
4610
|
+
/** Number of cached entries. */
|
|
4611
|
+
get size() {
|
|
4612
|
+
return this.pathToHash.size;
|
|
4613
|
+
}
|
|
4614
|
+
}
|
|
4615
|
+
|
|
4616
|
+
/**
|
|
4617
|
+
* @module enrichment/EnrichmentStore
|
|
4618
|
+
* SQLite-backed enrichment metadata store. Persists path-keyed metadata at stateDir/enrichments.sqlite. Atomic writes, supports move.
|
|
4619
|
+
*/
|
|
4620
|
+
/**
|
|
4621
|
+
* SQLite-backed enrichment metadata store.
|
|
4622
|
+
*/
|
|
4623
|
+
class EnrichmentStore {
|
|
4624
|
+
db;
|
|
4625
|
+
/**
|
|
4626
|
+
* Create or open the enrichment store.
|
|
4627
|
+
*
|
|
4628
|
+
* @param stateDir - Directory for the SQLite database file.
|
|
4629
|
+
*/
|
|
4630
|
+
constructor(stateDir) {
|
|
4631
|
+
mkdirSync(stateDir, { recursive: true });
|
|
4632
|
+
const dbPath = join(stateDir, 'enrichments.sqlite');
|
|
4633
|
+
this.db = new Database(dbPath);
|
|
4634
|
+
this.db.pragma('journal_mode = WAL');
|
|
4635
|
+
this.db.exec(`
|
|
4636
|
+
CREATE TABLE IF NOT EXISTS enrichments (
|
|
4637
|
+
path TEXT PRIMARY KEY,
|
|
4638
|
+
metadata TEXT NOT NULL,
|
|
4639
|
+
created_at TEXT NOT NULL,
|
|
4640
|
+
updated_at TEXT NOT NULL
|
|
4641
|
+
)
|
|
4642
|
+
`);
|
|
4643
|
+
}
|
|
4644
|
+
get(path) {
|
|
4645
|
+
const normalized = normalizePath(path);
|
|
4646
|
+
const row = this.db
|
|
4647
|
+
.prepare('SELECT metadata FROM enrichments WHERE path = ?')
|
|
4648
|
+
.get(normalized);
|
|
4649
|
+
if (!row)
|
|
4650
|
+
return null;
|
|
4651
|
+
return JSON.parse(row.metadata);
|
|
4652
|
+
}
|
|
4653
|
+
set(path, metadata) {
|
|
4654
|
+
const normalized = normalizePath(path);
|
|
4655
|
+
const now = new Date().toISOString();
|
|
4656
|
+
const existing = this.get(path);
|
|
4657
|
+
const merged = existing ? { ...existing, ...metadata } : metadata;
|
|
4658
|
+
const json = JSON.stringify(merged);
|
|
4659
|
+
if (existing) {
|
|
4660
|
+
this.db
|
|
4661
|
+
.prepare('UPDATE enrichments SET metadata = ?, updated_at = ? WHERE path = ?')
|
|
4662
|
+
.run(json, now, normalized);
|
|
4663
|
+
}
|
|
4664
|
+
else {
|
|
4665
|
+
this.db
|
|
4666
|
+
.prepare('INSERT INTO enrichments (path, metadata, created_at, updated_at) VALUES (?, ?, ?, ?)')
|
|
4667
|
+
.run(normalized, json, now, now);
|
|
4668
|
+
}
|
|
4669
|
+
}
|
|
4670
|
+
delete(path) {
|
|
4671
|
+
const normalized = normalizePath(path);
|
|
4672
|
+
this.db.prepare('DELETE FROM enrichments WHERE path = ?').run(normalized);
|
|
4673
|
+
}
|
|
4674
|
+
move(oldPath, newPath) {
|
|
4675
|
+
const normalizedOld = normalizePath(oldPath);
|
|
4676
|
+
const normalizedNew = normalizePath(newPath);
|
|
4677
|
+
const now = new Date().toISOString();
|
|
4678
|
+
this.db
|
|
4679
|
+
.prepare('UPDATE enrichments SET path = ?, updated_at = ? WHERE path = ?')
|
|
4680
|
+
.run(normalizedNew, now, normalizedOld);
|
|
4681
|
+
}
|
|
4682
|
+
list() {
|
|
4683
|
+
const rows = this.db
|
|
4684
|
+
.prepare('SELECT path FROM enrichments ORDER BY path')
|
|
4685
|
+
.all();
|
|
4686
|
+
return rows.map((r) => r.path);
|
|
4687
|
+
}
|
|
4688
|
+
close() {
|
|
4689
|
+
this.db.close();
|
|
4690
|
+
}
|
|
4691
|
+
}
|
|
4692
|
+
|
|
4693
|
+
/**
|
|
4694
|
+
* @module enrichment/merge
|
|
4695
|
+
* Composable merge for inferred + enrichment metadata. Scalars: enrichment overwrites. Arrays: union + deduplicate. No I/O.
|
|
4696
|
+
*/
|
|
4697
|
+
/**
|
|
4698
|
+
* Merge enrichment metadata into inferred metadata with composable semantics.
|
|
4699
|
+
*
|
|
4700
|
+
* - Scalar fields: enrichment value overwrites inferred value.
|
|
4701
|
+
* - Array fields: union merge with deduplication (enrichment values appended).
|
|
4702
|
+
*
|
|
4703
|
+
* @param inferred - Metadata derived from inference rules.
|
|
4704
|
+
* @param enrichment - Human/agent-provided enrichment metadata.
|
|
4705
|
+
* @returns Merged metadata.
|
|
4706
|
+
*/
|
|
4707
|
+
function mergeEnrichment(inferred, enrichment) {
|
|
4708
|
+
const result = { ...inferred };
|
|
4709
|
+
for (const [key, enrichValue] of Object.entries(enrichment)) {
|
|
4710
|
+
const inferredValue = result[key];
|
|
4711
|
+
if (Array.isArray(inferredValue) && Array.isArray(enrichValue)) {
|
|
4712
|
+
const combined = [
|
|
4713
|
+
...inferredValue,
|
|
4714
|
+
...enrichValue,
|
|
4715
|
+
];
|
|
4716
|
+
result[key] = [...new Set(combined)];
|
|
4717
|
+
}
|
|
4718
|
+
else {
|
|
4719
|
+
result[key] = enrichValue;
|
|
4720
|
+
}
|
|
4721
|
+
}
|
|
4722
|
+
return result;
|
|
4723
|
+
}
|
|
4724
|
+
|
|
4583
4725
|
/**
|
|
4584
4726
|
* @module util/JsonFileStore
|
|
4585
4727
|
* Small base class for JSON-backed read/modify/write stores with in-memory caching.
|
|
@@ -4825,7 +4967,7 @@ class ConfigWatcher {
|
|
|
4825
4967
|
*/
|
|
4826
4968
|
/** Default root-level config values. */
|
|
4827
4969
|
const ROOT_DEFAULTS = {
|
|
4828
|
-
|
|
4970
|
+
stateDir: '.jeeves-metadata',
|
|
4829
4971
|
shutdownTimeoutMs: 10000,
|
|
4830
4972
|
};
|
|
4831
4973
|
/** Default configWatch values. */
|
|
@@ -5197,7 +5339,7 @@ function createLogger(config) {
|
|
|
5197
5339
|
|
|
5198
5340
|
/**
|
|
5199
5341
|
* @module hash
|
|
5200
|
-
* Provides SHA-256 content hashing. Pure
|
|
5342
|
+
* Provides SHA-256 content hashing. Pure functions: text hash and file hash. File hash does I/O.
|
|
5201
5343
|
*/
|
|
5202
5344
|
/**
|
|
5203
5345
|
* Compute a SHA-256 hex digest of the given text.
|
|
@@ -5208,6 +5350,16 @@ function createLogger(config) {
|
|
|
5208
5350
|
function contentHash(text) {
|
|
5209
5351
|
return createHash('sha256').update(text, 'utf8').digest('hex');
|
|
5210
5352
|
}
|
|
5353
|
+
/**
|
|
5354
|
+
* Compute a SHA-256 hex digest of a file's raw bytes.
|
|
5355
|
+
*
|
|
5356
|
+
* @param filePath - Path to the file.
|
|
5357
|
+
* @returns The hex-encoded SHA-256 hash.
|
|
5358
|
+
*/
|
|
5359
|
+
async function fileHash(filePath) {
|
|
5360
|
+
const buffer = await readFile(filePath);
|
|
5361
|
+
return createHash('sha256').update(buffer).digest('hex');
|
|
5362
|
+
}
|
|
5211
5363
|
|
|
5212
5364
|
/**
|
|
5213
5365
|
* @module pointId
|
|
@@ -5369,7 +5521,7 @@ async function extractText(filePath, extension, additionalExtractors) {
|
|
|
5369
5521
|
|
|
5370
5522
|
/**
|
|
5371
5523
|
* @module processor/buildMetadata
|
|
5372
|
-
* Builds merged metadata from file content, inference rules, and enrichment. I/O: reads files, extracts text,
|
|
5524
|
+
* Builds merged metadata from file content, inference rules, and enrichment store. I/O: reads files, extracts text, queries SQLite enrichment.
|
|
5373
5525
|
*/
|
|
5374
5526
|
/**
|
|
5375
5527
|
* Build merged metadata for a file by applying inference rules and merging with enrichment metadata.
|
|
@@ -5378,7 +5530,7 @@ async function extractText(filePath, extension, additionalExtractors) {
|
|
|
5378
5530
|
* @returns The merged metadata and intermediate data.
|
|
5379
5531
|
*/
|
|
5380
5532
|
async function buildMergedMetadata(options) {
|
|
5381
|
-
const { filePath, compiledRules,
|
|
5533
|
+
const { filePath, compiledRules, enrichmentStore, maps, logger, templateEngine, configDir, customMapLib, globalSchemas, } = options;
|
|
5382
5534
|
const ext = extname(filePath);
|
|
5383
5535
|
const stats = await stat(filePath);
|
|
5384
5536
|
// 1. Extract text and structured data
|
|
@@ -5393,12 +5545,11 @@ async function buildMergedMetadata(options) {
|
|
|
5393
5545
|
customMapLib,
|
|
5394
5546
|
globalSchemas,
|
|
5395
5547
|
});
|
|
5396
|
-
// 3. Read enrichment metadata (merge
|
|
5397
|
-
const enrichment =
|
|
5398
|
-
const metadata =
|
|
5399
|
-
|
|
5400
|
-
...
|
|
5401
|
-
};
|
|
5548
|
+
// 3. Read enrichment metadata from store (composable merge)
|
|
5549
|
+
const enrichment = enrichmentStore?.get(filePath) ?? null;
|
|
5550
|
+
const metadata = enrichment
|
|
5551
|
+
? mergeEnrichment(inferred, enrichment)
|
|
5552
|
+
: { ...inferred };
|
|
5402
5553
|
return {
|
|
5403
5554
|
inferred,
|
|
5404
5555
|
enrichment,
|
|
@@ -5587,22 +5738,26 @@ class DocumentProcessor {
|
|
|
5587
5738
|
compiledRules;
|
|
5588
5739
|
logger;
|
|
5589
5740
|
templateEngine;
|
|
5741
|
+
enrichmentStore;
|
|
5590
5742
|
issuesManager;
|
|
5591
5743
|
valuesManager;
|
|
5744
|
+
contentHashCache;
|
|
5592
5745
|
/**
|
|
5593
5746
|
* Create a new DocumentProcessor.
|
|
5594
5747
|
*
|
|
5595
5748
|
* @param deps - The processor dependencies.
|
|
5596
5749
|
*/
|
|
5597
|
-
constructor({ config, embeddingProvider, vectorStore, compiledRules, logger, templateEngine, issuesManager, valuesManager, }) {
|
|
5750
|
+
constructor({ config, embeddingProvider, vectorStore, compiledRules, logger, templateEngine, enrichmentStore, issuesManager, valuesManager, contentHashCache, }) {
|
|
5598
5751
|
this.config = config;
|
|
5599
5752
|
this.embeddingProvider = embeddingProvider;
|
|
5600
5753
|
this.vectorStore = vectorStore;
|
|
5601
5754
|
this.compiledRules = compiledRules;
|
|
5602
5755
|
this.logger = logger;
|
|
5603
5756
|
this.templateEngine = templateEngine;
|
|
5757
|
+
this.enrichmentStore = enrichmentStore;
|
|
5604
5758
|
this.issuesManager = issuesManager;
|
|
5605
5759
|
this.valuesManager = valuesManager;
|
|
5760
|
+
this.contentHashCache = contentHashCache;
|
|
5606
5761
|
}
|
|
5607
5762
|
/**
|
|
5608
5763
|
* Build merged metadata for a file and add matched_rules.
|
|
@@ -5611,7 +5766,7 @@ class DocumentProcessor {
|
|
|
5611
5766
|
const result = await buildMergedMetadata({
|
|
5612
5767
|
filePath,
|
|
5613
5768
|
compiledRules: this.compiledRules,
|
|
5614
|
-
|
|
5769
|
+
enrichmentStore: this.enrichmentStore,
|
|
5615
5770
|
maps: this.config.maps,
|
|
5616
5771
|
logger: this.logger,
|
|
5617
5772
|
templateEngine: this.templateEngine,
|
|
@@ -5658,6 +5813,9 @@ class DocumentProcessor {
|
|
|
5658
5813
|
this.logger.debug({ filePath }, 'Skipping empty file');
|
|
5659
5814
|
return;
|
|
5660
5815
|
}
|
|
5816
|
+
// Compute file-level hash for move correlation cache.
|
|
5817
|
+
const rawHash = await fileHash(filePath);
|
|
5818
|
+
this.contentHashCache?.set(filePath, rawHash);
|
|
5661
5819
|
const hash = contentHash(textToEmbed);
|
|
5662
5820
|
const baseId = pointId(filePath, 0);
|
|
5663
5821
|
const existingPayload = await this.vectorStore.getPayload(baseId);
|
|
@@ -5694,12 +5852,13 @@ class DocumentProcessor {
|
|
|
5694
5852
|
const totalChunks = getChunkCount(existingPayload);
|
|
5695
5853
|
const ids = chunkIds(filePath, totalChunks);
|
|
5696
5854
|
await this.vectorStore.delete(ids);
|
|
5697
|
-
|
|
5855
|
+
this.enrichmentStore?.delete(filePath);
|
|
5856
|
+
this.contentHashCache?.delete(filePath);
|
|
5698
5857
|
this.logger.info({ filePath }, 'File deleted from index');
|
|
5699
5858
|
}, undefined);
|
|
5700
5859
|
}
|
|
5701
5860
|
/**
|
|
5702
|
-
* Process a metadata update: merge
|
|
5861
|
+
* Process a metadata update: merge into enrichment store, update Qdrant payloads (no re-embed).
|
|
5703
5862
|
*
|
|
5704
5863
|
* @param filePath - The file whose metadata to update.
|
|
5705
5864
|
* @param metadata - The new metadata to merge.
|
|
@@ -5707,9 +5866,8 @@ class DocumentProcessor {
|
|
|
5707
5866
|
*/
|
|
5708
5867
|
async processMetadataUpdate(filePath, metadata) {
|
|
5709
5868
|
return this.withFileErrorHandling(filePath, 'Failed to update metadata', async () => {
|
|
5710
|
-
|
|
5711
|
-
const merged =
|
|
5712
|
-
await writeMetadata(filePath, this.config.metadataDir, merged);
|
|
5869
|
+
this.enrichmentStore?.set(filePath, metadata);
|
|
5870
|
+
const merged = this.enrichmentStore?.get(filePath) ?? metadata;
|
|
5713
5871
|
const baseId = pointId(filePath, 0);
|
|
5714
5872
|
const existingPayload = await this.vectorStore.getPayload(baseId);
|
|
5715
5873
|
if (!existingPayload)
|
|
@@ -5771,6 +5929,56 @@ class DocumentProcessor {
|
|
|
5771
5929
|
transformed: renderedContent !== null,
|
|
5772
5930
|
};
|
|
5773
5931
|
}
|
|
5932
|
+
/**
|
|
5933
|
+
* Move a file's vector points from old path to new path without re-embedding.
|
|
5934
|
+
* Re-applies inference rules against the new path.
|
|
5935
|
+
*
|
|
5936
|
+
* @param oldPath - The original file path.
|
|
5937
|
+
* @param newPath - The new file path.
|
|
5938
|
+
*/
|
|
5939
|
+
async moveFile(oldPath, newPath) {
|
|
5940
|
+
await this.withFileErrorHandling(newPath, 'Failed to move file', async () => {
|
|
5941
|
+
const baseId = pointId(oldPath, 0);
|
|
5942
|
+
const existingPayload = await this.vectorStore.getPayload(baseId);
|
|
5943
|
+
const totalChunks = getChunkCount(existingPayload);
|
|
5944
|
+
const oldIds = chunkIds(oldPath, totalChunks);
|
|
5945
|
+
const oldPoints = await this.vectorStore.getPointsWithVectors(oldIds);
|
|
5946
|
+
if (oldPoints.length === 0) {
|
|
5947
|
+
this.logger.warn({ oldPath, newPath }, 'No points found for move');
|
|
5948
|
+
return;
|
|
5949
|
+
}
|
|
5950
|
+
// Build new metadata from inference rules against the new path.
|
|
5951
|
+
const { metadataWithRules, matchedRules, metadata } = await this.buildMetadataWithRules(newPath);
|
|
5952
|
+
// Create new points with updated IDs and file_path payload.
|
|
5953
|
+
const newPoints = oldPoints.map((pt, i) => ({
|
|
5954
|
+
id: pointId(newPath, i),
|
|
5955
|
+
vector: pt.vector,
|
|
5956
|
+
payload: {
|
|
5957
|
+
...pt.payload,
|
|
5958
|
+
...metadataWithRules,
|
|
5959
|
+
[FIELD_FILE_PATH]: normalizeSlashes(newPath),
|
|
5960
|
+
},
|
|
5961
|
+
}));
|
|
5962
|
+
await this.vectorStore.upsert(newPoints);
|
|
5963
|
+
await this.vectorStore.delete(oldIds);
|
|
5964
|
+
// Migrate enrichment and clear old issues.
|
|
5965
|
+
this.enrichmentStore?.move(oldPath, newPath);
|
|
5966
|
+
this.issuesManager?.clear(oldPath);
|
|
5967
|
+
// Update values index for the new path's matched rules.
|
|
5968
|
+
if (this.valuesManager) {
|
|
5969
|
+
for (const ruleName of matchedRules) {
|
|
5970
|
+
this.valuesManager.update(ruleName, metadata);
|
|
5971
|
+
}
|
|
5972
|
+
}
|
|
5973
|
+
// Update content hash cache.
|
|
5974
|
+
const oldHash = this.contentHashCache?.get(oldPath);
|
|
5975
|
+
if (oldHash) {
|
|
5976
|
+
this.contentHashCache?.set(newPath, oldHash);
|
|
5977
|
+
}
|
|
5978
|
+
this.contentHashCache?.delete(oldPath);
|
|
5979
|
+
this.logger.info({ oldPath, newPath, chunks: oldPoints.length }, 'File moved in index');
|
|
5980
|
+
}, undefined);
|
|
5981
|
+
}
|
|
5774
5982
|
/**
|
|
5775
5983
|
* Update compiled inference rules, template engine, and custom map lib.
|
|
5776
5984
|
*
|
|
@@ -6429,6 +6637,33 @@ class VectorStoreClient {
|
|
|
6429
6637
|
async hybridSearch(vector, queryText, limit, textWeight, filter) {
|
|
6430
6638
|
return hybridSearch(this.client, this.collectionName, vector, queryText, limit, textWeight, filter);
|
|
6431
6639
|
}
|
|
6640
|
+
/**
|
|
6641
|
+
* Retrieve points with their vectors by ID.
|
|
6642
|
+
*
|
|
6643
|
+
* @param ids - The point IDs to retrieve.
|
|
6644
|
+
* @returns Points with vectors and payloads; missing IDs are omitted.
|
|
6645
|
+
*/
|
|
6646
|
+
async getPointsWithVectors(ids) {
|
|
6647
|
+
if (ids.length === 0)
|
|
6648
|
+
return [];
|
|
6649
|
+
try {
|
|
6650
|
+
const results = await this.client.retrieve(this.collectionName, {
|
|
6651
|
+
ids,
|
|
6652
|
+
with_payload: true,
|
|
6653
|
+
with_vector: true,
|
|
6654
|
+
});
|
|
6655
|
+
return results
|
|
6656
|
+
.filter((r) => r.vector != null)
|
|
6657
|
+
.map((r) => ({
|
|
6658
|
+
id: String(r.id),
|
|
6659
|
+
vector: r.vector,
|
|
6660
|
+
payload: r.payload,
|
|
6661
|
+
}));
|
|
6662
|
+
}
|
|
6663
|
+
catch {
|
|
6664
|
+
return [];
|
|
6665
|
+
}
|
|
6666
|
+
}
|
|
6432
6667
|
/**
|
|
6433
6668
|
* Scroll one page of points matching a filter.
|
|
6434
6669
|
*
|
|
@@ -6636,6 +6871,163 @@ function resolveIgnored(ignored) {
|
|
|
6636
6871
|
});
|
|
6637
6872
|
}
|
|
6638
6873
|
|
|
6874
|
+
/**
|
|
6875
|
+
* @module watcher/MoveCorrelator
|
|
6876
|
+
* Correlates unlink+add events as file moves using content hash matching.
|
|
6877
|
+
* Buffers unlink events and matches against subsequent add events.
|
|
6878
|
+
*/
|
|
6879
|
+
/**
|
|
6880
|
+
* Correlates unlink+add file system events as moves using content hash matching.
|
|
6881
|
+
*
|
|
6882
|
+
* When move detection is disabled, events pass straight through.
|
|
6883
|
+
*/
|
|
6884
|
+
class MoveCorrelator {
|
|
6885
|
+
enabled;
|
|
6886
|
+
bufferMs;
|
|
6887
|
+
cache;
|
|
6888
|
+
logger;
|
|
6889
|
+
onMove;
|
|
6890
|
+
onDelete;
|
|
6891
|
+
onCreate;
|
|
6892
|
+
/** Buffered unlinks indexed by content hash (FIFO per hash). */
|
|
6893
|
+
buffer = new Map();
|
|
6894
|
+
/** Track unlink burst rate per parent directory for bulk mode. */
|
|
6895
|
+
burstCounters = new Map();
|
|
6896
|
+
/** Threshold: if N+ unlinks from same parent in burstWindowMs, extend buffer. */
|
|
6897
|
+
static BURST_THRESHOLD = 5;
|
|
6898
|
+
static BURST_WINDOW_MS = 500;
|
|
6899
|
+
static BURST_MULTIPLIER = 3;
|
|
6900
|
+
constructor(options) {
|
|
6901
|
+
this.enabled = options.enabled;
|
|
6902
|
+
this.bufferMs = options.bufferMs;
|
|
6903
|
+
this.cache = options.contentHashCache;
|
|
6904
|
+
this.logger = options.logger;
|
|
6905
|
+
this.onMove = options.onMove;
|
|
6906
|
+
this.onDelete = options.onDelete;
|
|
6907
|
+
this.onCreate = options.onCreate;
|
|
6908
|
+
}
|
|
6909
|
+
/**
|
|
6910
|
+
* Handle an unlink event. Buffers the event for correlation.
|
|
6911
|
+
*
|
|
6912
|
+
* @param path - The removed file path.
|
|
6913
|
+
*/
|
|
6914
|
+
handleUnlink(path) {
|
|
6915
|
+
if (!this.enabled) {
|
|
6916
|
+
this.onDelete(path);
|
|
6917
|
+
return;
|
|
6918
|
+
}
|
|
6919
|
+
const hash = this.cache.get(path);
|
|
6920
|
+
if (!hash) {
|
|
6921
|
+
this.logger.debug({ path }, 'No cached hash for unlinked file, treating as delete');
|
|
6922
|
+
this.onDelete(path);
|
|
6923
|
+
return;
|
|
6924
|
+
}
|
|
6925
|
+
const timeoutMs = this.getEffectiveTimeout(path);
|
|
6926
|
+
const timer = setTimeout(() => {
|
|
6927
|
+
this.expireUnlink(hash, path);
|
|
6928
|
+
}, timeoutMs);
|
|
6929
|
+
const entry = {
|
|
6930
|
+
path,
|
|
6931
|
+
hash,
|
|
6932
|
+
timestamp: Date.now(),
|
|
6933
|
+
timer,
|
|
6934
|
+
};
|
|
6935
|
+
let entries = this.buffer.get(hash);
|
|
6936
|
+
if (!entries) {
|
|
6937
|
+
entries = [];
|
|
6938
|
+
this.buffer.set(hash, entries);
|
|
6939
|
+
}
|
|
6940
|
+
entries.push(entry);
|
|
6941
|
+
this.logger.debug({ path, hash: hash.slice(0, 12), timeoutMs }, 'Buffered unlink for move correlation');
|
|
6942
|
+
}
|
|
6943
|
+
/**
|
|
6944
|
+
* Handle an add event. Checks buffer for matching unlink (move detection).
|
|
6945
|
+
*
|
|
6946
|
+
* @param path - The added file path.
|
|
6947
|
+
*/
|
|
6948
|
+
async handleAdd(path) {
|
|
6949
|
+
if (!this.enabled) {
|
|
6950
|
+
this.onCreate(path);
|
|
6951
|
+
return;
|
|
6952
|
+
}
|
|
6953
|
+
let hash;
|
|
6954
|
+
try {
|
|
6955
|
+
hash = await fileHash(path);
|
|
6956
|
+
}
|
|
6957
|
+
catch {
|
|
6958
|
+
this.onCreate(path);
|
|
6959
|
+
return;
|
|
6960
|
+
}
|
|
6961
|
+
const entries = this.buffer.get(hash);
|
|
6962
|
+
if (entries && entries.length > 0) {
|
|
6963
|
+
// FIFO: consume oldest matching unlink
|
|
6964
|
+
const matched = entries.shift();
|
|
6965
|
+
clearTimeout(matched.timer);
|
|
6966
|
+
if (entries.length === 0)
|
|
6967
|
+
this.buffer.delete(hash);
|
|
6968
|
+
this.logger.info({ oldPath: matched.path, newPath: path }, 'Move detected');
|
|
6969
|
+
this.onMove(matched.path, path);
|
|
6970
|
+
}
|
|
6971
|
+
else {
|
|
6972
|
+
this.onCreate(path);
|
|
6973
|
+
}
|
|
6974
|
+
}
|
|
6975
|
+
/**
|
|
6976
|
+
* Flush all buffered unlinks as deletes. Call on shutdown.
|
|
6977
|
+
*/
|
|
6978
|
+
flush() {
|
|
6979
|
+
for (const [, entries] of this.buffer) {
|
|
6980
|
+
for (const entry of entries) {
|
|
6981
|
+
clearTimeout(entry.timer);
|
|
6982
|
+
this.onDelete(entry.path);
|
|
6983
|
+
}
|
|
6984
|
+
}
|
|
6985
|
+
this.buffer.clear();
|
|
6986
|
+
this.burstCounters.clear();
|
|
6987
|
+
}
|
|
6988
|
+
/** Number of currently buffered unlink events. */
|
|
6989
|
+
get pendingCount() {
|
|
6990
|
+
let count = 0;
|
|
6991
|
+
for (const [, entries] of this.buffer) {
|
|
6992
|
+
count += entries.length;
|
|
6993
|
+
}
|
|
6994
|
+
return count;
|
|
6995
|
+
}
|
|
6996
|
+
/**
|
|
6997
|
+
* Get effective timeout, applying burst detection for bulk moves.
|
|
6998
|
+
*/
|
|
6999
|
+
getEffectiveTimeout(path) {
|
|
7000
|
+
const parentDir = dirname(path);
|
|
7001
|
+
const now = Date.now();
|
|
7002
|
+
let counter = this.burstCounters.get(parentDir);
|
|
7003
|
+
if (!counter || now - counter.firstTs > MoveCorrelator.BURST_WINDOW_MS) {
|
|
7004
|
+
counter = { count: 0, firstTs: now };
|
|
7005
|
+
this.burstCounters.set(parentDir, counter);
|
|
7006
|
+
}
|
|
7007
|
+
counter.count++;
|
|
7008
|
+
if (counter.count >= MoveCorrelator.BURST_THRESHOLD) {
|
|
7009
|
+
return this.bufferMs * MoveCorrelator.BURST_MULTIPLIER;
|
|
7010
|
+
}
|
|
7011
|
+
return this.bufferMs;
|
|
7012
|
+
}
|
|
7013
|
+
/**
|
|
7014
|
+
* Handle a buffered unlink timeout — emit as delete.
|
|
7015
|
+
*/
|
|
7016
|
+
expireUnlink(hash, path) {
|
|
7017
|
+
const entries = this.buffer.get(hash);
|
|
7018
|
+
if (entries) {
|
|
7019
|
+
const idx = entries.findIndex((e) => e.path === path);
|
|
7020
|
+
if (idx >= 0) {
|
|
7021
|
+
entries.splice(idx, 1);
|
|
7022
|
+
if (entries.length === 0)
|
|
7023
|
+
this.buffer.delete(hash);
|
|
7024
|
+
}
|
|
7025
|
+
}
|
|
7026
|
+
this.logger.debug({ path, hash: hash.slice(0, 12) }, 'Buffered unlink expired, treating as delete');
|
|
7027
|
+
this.onDelete(path);
|
|
7028
|
+
}
|
|
7029
|
+
}
|
|
7030
|
+
|
|
6639
7031
|
/**
|
|
6640
7032
|
* @module watcher
|
|
6641
7033
|
* Filesystem watcher wrapping chokidar. I/O: watches files/directories for add/change/unlink events, enqueues to processing queue.
|
|
@@ -6651,6 +7043,8 @@ class FileSystemWatcher {
|
|
|
6651
7043
|
health;
|
|
6652
7044
|
gitignoreFilter;
|
|
6653
7045
|
initialScanTracker;
|
|
7046
|
+
contentHashCache;
|
|
7047
|
+
moveCorrelator;
|
|
6654
7048
|
globMatches;
|
|
6655
7049
|
watcher;
|
|
6656
7050
|
/**
|
|
@@ -6669,6 +7063,7 @@ class FileSystemWatcher {
|
|
|
6669
7063
|
this.logger = logger;
|
|
6670
7064
|
this.gitignoreFilter = options.gitignoreFilter;
|
|
6671
7065
|
this.initialScanTracker = options.initialScanTracker;
|
|
7066
|
+
this.contentHashCache = options.contentHashCache;
|
|
6672
7067
|
this.globMatches = () => true;
|
|
6673
7068
|
const healthOptions = {
|
|
6674
7069
|
maxRetries: options.maxRetries,
|
|
@@ -6714,6 +7109,26 @@ class FileSystemWatcher {
|
|
|
6714
7109
|
}
|
|
6715
7110
|
}
|
|
6716
7111
|
};
|
|
7112
|
+
// Create move correlator if move detection is configured and cache is available.
|
|
7113
|
+
const moveConfig = this.config.moveDetection;
|
|
7114
|
+
if (moveConfig?.enabled && this.contentHashCache) {
|
|
7115
|
+
this.moveCorrelator = new MoveCorrelator({
|
|
7116
|
+
enabled: true,
|
|
7117
|
+
bufferMs: moveConfig.bufferMs,
|
|
7118
|
+
contentHashCache: this.contentHashCache,
|
|
7119
|
+
logger: this.logger,
|
|
7120
|
+
onMove: (oldPath, newPath) => {
|
|
7121
|
+
this.queue.enqueue({ type: 'move', path: newPath, oldPath, priority: 'normal' }, () => this.wrapProcessing(() => this.processor.moveFile(oldPath, newPath)));
|
|
7122
|
+
},
|
|
7123
|
+
onDelete: (deletedPath) => {
|
|
7124
|
+
this.queue.enqueue({ type: 'delete', path: deletedPath, priority: 'normal' }, () => this.wrapProcessing(() => this.processor.deleteFile(deletedPath)));
|
|
7125
|
+
},
|
|
7126
|
+
onCreate: (createdPath) => {
|
|
7127
|
+
this.queue.enqueue({ type: 'create', path: createdPath, priority: 'normal' }, () => this.wrapProcessing(() => this.processor.processFile(createdPath)));
|
|
7128
|
+
},
|
|
7129
|
+
});
|
|
7130
|
+
}
|
|
7131
|
+
const correlator = this.moveCorrelator;
|
|
6717
7132
|
this.watcher = chokidar.watch(roots, {
|
|
6718
7133
|
ignored,
|
|
6719
7134
|
usePolling: this.config.usePolling,
|
|
@@ -6747,7 +7162,12 @@ class FileSystemWatcher {
|
|
|
6747
7162
|
this.initialScanTracker?.incrementEnqueued();
|
|
6748
7163
|
}
|
|
6749
7164
|
this.logger.debug({ path }, 'File added');
|
|
6750
|
-
|
|
7165
|
+
if (correlator && initialScanComplete) {
|
|
7166
|
+
void correlator.handleAdd(path);
|
|
7167
|
+
}
|
|
7168
|
+
else {
|
|
7169
|
+
this.queue.enqueue({ type: 'create', path, priority: 'normal' }, () => this.wrapProcessing(() => this.processor.processFile(path)));
|
|
7170
|
+
}
|
|
6751
7171
|
});
|
|
6752
7172
|
this.watcher.on('change', (path) => {
|
|
6753
7173
|
this.handleGitignoreChange(path);
|
|
@@ -6767,7 +7187,12 @@ class FileSystemWatcher {
|
|
|
6767
7187
|
if (this.isGitignored(path))
|
|
6768
7188
|
return;
|
|
6769
7189
|
this.logger.debug({ path }, 'File removed');
|
|
6770
|
-
|
|
7190
|
+
if (correlator) {
|
|
7191
|
+
correlator.handleUnlink(path);
|
|
7192
|
+
}
|
|
7193
|
+
else {
|
|
7194
|
+
this.queue.enqueue({ type: 'delete', path, priority: 'normal' }, () => this.wrapProcessing(() => this.processor.deleteFile(path)));
|
|
7195
|
+
}
|
|
6771
7196
|
});
|
|
6772
7197
|
this.watcher.on('ready', () => {
|
|
6773
7198
|
initialScanComplete = true;
|
|
@@ -6812,6 +7237,7 @@ class FileSystemWatcher {
|
|
|
6812
7237
|
* Stop the filesystem watcher.
|
|
6813
7238
|
*/
|
|
6814
7239
|
async stop() {
|
|
7240
|
+
this.moveCorrelator?.flush();
|
|
6815
7241
|
if (this.watcher) {
|
|
6816
7242
|
await this.watcher.close();
|
|
6817
7243
|
this.watcher = undefined;
|
|
@@ -6945,6 +7371,8 @@ class JeevesWatcher {
|
|
|
6945
7371
|
vectorStore;
|
|
6946
7372
|
embeddingProvider;
|
|
6947
7373
|
gitignoreFilter;
|
|
7374
|
+
enrichmentStore;
|
|
7375
|
+
contentHashCache;
|
|
6948
7376
|
initialScanTracker;
|
|
6949
7377
|
version;
|
|
6950
7378
|
/** Create a new JeevesWatcher instance. */
|
|
@@ -6985,9 +7413,13 @@ class JeevesWatcher {
|
|
|
6985
7413
|
const { templateEngine, customMapLib } = await buildTemplateEngineAndCustomMapLib(this.config, configDir);
|
|
6986
7414
|
this.helperIntrospection = await introspectHelpers(this.config, configDir);
|
|
6987
7415
|
const processorConfig = createProcessorConfig(this.config, configDir, customMapLib);
|
|
6988
|
-
const stateDir = this.config.stateDir ??
|
|
7416
|
+
const stateDir = this.config.stateDir ?? '.jeeves-metadata';
|
|
6989
7417
|
this.issuesManager = new IssuesManager(stateDir, logger);
|
|
6990
7418
|
this.valuesManager = new ValuesManager(stateDir, logger);
|
|
7419
|
+
this.enrichmentStore = new EnrichmentStore(stateDir);
|
|
7420
|
+
const enrichmentStore = this.enrichmentStore;
|
|
7421
|
+
this.contentHashCache = new ContentHashCache();
|
|
7422
|
+
const contentHashCache = this.contentHashCache;
|
|
6991
7423
|
const processor = this.factories.createDocumentProcessor({
|
|
6992
7424
|
config: processorConfig,
|
|
6993
7425
|
embeddingProvider,
|
|
@@ -6995,8 +7427,10 @@ class JeevesWatcher {
|
|
|
6995
7427
|
compiledRules,
|
|
6996
7428
|
logger,
|
|
6997
7429
|
templateEngine,
|
|
7430
|
+
enrichmentStore,
|
|
6998
7431
|
issuesManager: this.issuesManager,
|
|
6999
7432
|
valuesManager: this.valuesManager,
|
|
7433
|
+
contentHashCache,
|
|
7000
7434
|
});
|
|
7001
7435
|
this.processor = processor;
|
|
7002
7436
|
this.queue = this.factories.createEventQueue({
|
|
@@ -7004,7 +7438,7 @@ class JeevesWatcher {
|
|
|
7004
7438
|
concurrency: this.config.embedding.concurrency ?? 5,
|
|
7005
7439
|
rateLimitPerMinute: this.config.embedding.rateLimitPerMinute,
|
|
7006
7440
|
});
|
|
7007
|
-
const { watcher, gitignoreFilter } = createWatcher(this.config, this.factories, this.queue, processor, logger, this.runtimeOptions, this.initialScanTracker);
|
|
7441
|
+
const { watcher, gitignoreFilter } = createWatcher(this.config, this.factories, this.queue, processor, logger, this.runtimeOptions, this.initialScanTracker, contentHashCache);
|
|
7008
7442
|
this.watcher = watcher;
|
|
7009
7443
|
this.gitignoreFilter = gitignoreFilter;
|
|
7010
7444
|
this.server = await this.startApiServer();
|
|
@@ -7056,6 +7490,7 @@ class JeevesWatcher {
|
|
|
7056
7490
|
version: this.version,
|
|
7057
7491
|
initialScanTracker: this.initialScanTracker,
|
|
7058
7492
|
fileSystemWatcher: this.watcher,
|
|
7493
|
+
enrichmentStore: this.enrichmentStore,
|
|
7059
7494
|
});
|
|
7060
7495
|
await server.listen({
|
|
7061
7496
|
host: this.config.api?.host ?? '127.0.0.1',
|
|
@@ -7121,4 +7556,4 @@ class JeevesWatcher {
|
|
|
7121
7556
|
}
|
|
7122
7557
|
}
|
|
7123
7558
|
|
|
7124
|
-
export { DocumentProcessor, EventQueue, FileSystemWatcher, GitignoreFilter, InitialScanTracker, IssuesManager, JeevesWatcher, ReindexTracker, SystemHealth, TemplateEngine, ValuesManager, VectorStoreClient, VirtualRuleStore, apiConfigSchema, applyRules, buildAttributes, buildTemplateEngine, compileRules, configWatchConfigSchema, contentHash, createApiServer, createEmbeddingProvider, createHandlebarsInstance, createLogger,
|
|
7559
|
+
export { DocumentProcessor, EnrichmentStore, EventQueue, FileSystemWatcher, GitignoreFilter, InitialScanTracker, IssuesManager, JeevesWatcher, ReindexTracker, SystemHealth, TemplateEngine, ValuesManager, VectorStoreClient, VirtualRuleStore, apiConfigSchema, applyRules, buildAttributes, buildTemplateEngine, compileRules, configWatchConfigSchema, contentHash, createApiServer, createEmbeddingProvider, createHandlebarsInstance, createLogger, embeddingConfigSchema, extractText, inferenceRuleSchema, issueRecordSchema, jeevesWatcherConfigSchema, loadConfig, loadCustomHelpers, loggingConfigSchema, mergeEnrichment, pointId, registerBuiltinHelpers, resolveTemplateSource, startFromConfig, vectorStoreConfigSchema, watchConfigSchema };
|