@karmaniverous/jeeves-watcher 0.13.0 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1,6 +1,6 @@
1
1
  import { join, dirname, resolve, relative, extname, basename, isAbsolute } from 'node:path';
2
2
  import Fastify from 'fastify';
3
- import { readdir, stat, writeFile, rm, readFile, mkdir } from 'node:fs/promises';
3
+ import { readdir, stat, writeFile, readFile } from 'node:fs/promises';
4
4
  import { parallel, capitalize, title, camel, snake, dash, isEqual, get, omit } from 'radash';
5
5
  import { existsSync, statSync, readFileSync, readdirSync, mkdirSync, writeFileSync } from 'node:fs';
6
6
  import ignore from 'ignore';
@@ -19,12 +19,13 @@ import { toMarkdown } from 'mdast-util-to-markdown';
19
19
  import rehypeParse from 'rehype-parse';
20
20
  import { unified } from 'unified';
21
21
  import yaml from 'js-yaml';
22
- import { createHash } from 'node:crypto';
23
22
  import crypto from 'crypto';
24
23
  import { packageDirectorySync } from 'package-directory';
24
+ import Database from 'better-sqlite3';
25
25
  import chokidar from 'chokidar';
26
26
  import { cosmiconfig } from 'cosmiconfig';
27
27
  import https from 'node:https';
28
+ import { createHash } from 'node:crypto';
28
29
  import pino from 'pino';
29
30
  import { v5 } from 'uuid';
30
31
  import * as cheerio from 'cheerio';
@@ -995,6 +996,24 @@ const watchConfigSchema = z.object({
995
996
  .boolean()
996
997
  .optional()
997
998
  .describe('Skip files ignored by .gitignore in git repositories. Only applies to repos with a .git directory. Default: true.'),
999
+ /** Move detection configuration for correlating unlink+add as file moves. */
1000
+ moveDetection: z
1001
+ .object({
1002
+ /** Enable move correlation. Default: true. */
1003
+ enabled: z
1004
+ .boolean()
1005
+ .default(true)
1006
+ .describe('Enable move detection via content hash correlation.'),
1007
+ /** Buffer time in ms for holding unlink events before treating as deletes. Default: 2000. */
1008
+ bufferMs: z
1009
+ .number()
1010
+ .int()
1011
+ .min(100)
1012
+ .default(2000)
1013
+ .describe('How long (ms) to buffer unlink events before treating as deletes.'),
1014
+ })
1015
+ .optional()
1016
+ .describe('Move detection: correlate unlink+add events as file moves to avoid re-embedding.'),
998
1017
  });
999
1018
  /**
1000
1019
  * Configuration watch settings.
@@ -1303,18 +1322,13 @@ const jeevesWatcherConfigSchema = z.object({
1303
1322
  embedding: embeddingConfigSchema.describe('Embedding model configuration.'),
1304
1323
  /** Vector store configuration. */
1305
1324
  vectorStore: vectorStoreConfigSchema.describe('Qdrant vector store configuration.'),
1306
- /** Directory for persisted metadata. */
1307
- metadataDir: z
1308
- .string()
1309
- .optional()
1310
- .describe('Directory for persisted metadata sidecar files.'),
1311
1325
  /** API server configuration. */
1312
1326
  api: apiConfigSchema.optional().describe('API server configuration.'),
1313
- /** Directory for persistent state files (issues.json, values.json). Defaults to metadataDir. */
1327
+ /** Directory for persistent state files (issues.json, values.json, enrichments.sqlite). */
1314
1328
  stateDir: z
1315
1329
  .string()
1316
1330
  .optional()
1317
- .describe('Directory for persistent state files (issues.json, values.json). Defaults to metadataDir.'),
1331
+ .describe('Directory for persistent state files (issues.json, values.json, enrichments.sqlite). Defaults to .jeeves-metadata.'),
1318
1332
  /** Rules for inferring metadata from document properties (inline objects or file paths). */
1319
1333
  inferenceRules: z
1320
1334
  .array(z.union([inferenceRuleSchema, z.string()]))
@@ -3514,97 +3528,6 @@ function createPointsDeleteHandler(deps) {
3514
3528
  }, deps.logger, 'PointsDelete');
3515
3529
  }
3516
3530
 
3517
- /**
3518
- * @module util/normalizePath
3519
- * Normalizes file paths for deterministic mapping: lowercase, forward slashes, optional drive letter stripping.
3520
- */
3521
- /**
3522
- * Normalize a file path: lowercase, forward slashes, optionally strip drive letter colon.
3523
- *
3524
- * @param filePath - The original file path.
3525
- * @param stripDriveLetter - Whether to strip the colon from a leading drive letter (e.g. `C:` → `c`).
3526
- * @returns The normalized path string.
3527
- */
3528
- function normalizePath(filePath, stripDriveLetter = false) {
3529
- let result = filePath.replace(/\\/g, '/').toLowerCase();
3530
- if (stripDriveLetter) {
3531
- result = result.replace(/^([a-z]):/, (_m, letter) => letter);
3532
- }
3533
- return result;
3534
- }
3535
-
3536
- /**
3537
- * @module metadata/metadata
3538
- * Persists file metadata as .meta.json. I/O: reads/writes/deletes metadata files under metadataDir. Path mapping via SHA-256 hash.
3539
- */
3540
- /**
3541
- * Derive a deterministic `.meta.json` path for a given file.
3542
- *
3543
- * @param filePath - The watched file path.
3544
- * @param metadataDir - The root metadata directory.
3545
- * @returns The full path to the metadata file.
3546
- */
3547
- function metadataPath(filePath, metadataDir) {
3548
- const normalised = normalizePath(filePath, true);
3549
- const hash = createHash('sha256').update(normalised, 'utf8').digest('hex');
3550
- return join(metadataDir, `${hash}.meta.json`);
3551
- }
3552
- /**
3553
- * Read persisted metadata for a file.
3554
- *
3555
- * @param filePath - The watched file path.
3556
- * @param metadataDir - The root metadata directory.
3557
- * @returns The parsed metadata object, or `null` if not found.
3558
- */
3559
- async function readMetadata(filePath, metadataDir) {
3560
- try {
3561
- const raw = await readFile(metadataPath(filePath, metadataDir), 'utf8');
3562
- return JSON.parse(raw);
3563
- }
3564
- catch {
3565
- return null;
3566
- }
3567
- }
3568
- /**
3569
- * Write metadata for a file.
3570
- *
3571
- * @param filePath - The watched file path.
3572
- * @param metadataDir - The root metadata directory.
3573
- * @param metadata - The metadata to persist.
3574
- */
3575
- async function writeMetadata(filePath, metadataDir, metadata) {
3576
- const dest = metadataPath(filePath, metadataDir);
3577
- await mkdir(dirname(dest), { recursive: true });
3578
- await writeFile(dest, JSON.stringify(metadata, null, 2), 'utf8');
3579
- }
3580
- /**
3581
- * Delete metadata for a file.
3582
- *
3583
- * @param filePath - The watched file path.
3584
- * @param metadataDir - The root metadata directory.
3585
- */
3586
- async function deleteMetadata(filePath, metadataDir) {
3587
- try {
3588
- await rm(metadataPath(filePath, metadataDir));
3589
- }
3590
- catch {
3591
- // Ignore if file doesn't exist.
3592
- }
3593
- }
3594
-
3595
- /**
3596
- * @module metadata/constants
3597
- * Shared constants for metadata key classification. System keys are injected by the indexing pipeline, not user-provided.
3598
- */
3599
- /** Keys managed by the indexing pipeline (not user enrichment). */
3600
- const SYSTEM_METADATA_KEYS = [
3601
- 'file_path',
3602
- 'chunk_index',
3603
- 'total_chunks',
3604
- 'content_hash',
3605
- 'chunk_text',
3606
- ];
3607
-
3608
3531
  /**
3609
3532
  * @module processor/payloadFields
3610
3533
  * Constants for Qdrant payload field names used across the processing pipeline.
@@ -3627,10 +3550,18 @@ const FIELD_MODIFIED_AT = 'modified_at';
3627
3550
  const FIELD_LINE_START = 'line_start';
3628
3551
  /** Qdrant payload field: 1-indexed line number where this chunk ends in the source file. */
3629
3552
  const FIELD_LINE_END = 'line_end';
3553
+ /** Keys managed by the indexing pipeline (not user enrichment). */
3554
+ const SYSTEM_METADATA_KEYS = [
3555
+ FIELD_FILE_PATH,
3556
+ FIELD_CHUNK_INDEX,
3557
+ FIELD_TOTAL_CHUNKS,
3558
+ FIELD_CONTENT_HASH,
3559
+ FIELD_CHUNK_TEXT,
3560
+ ];
3630
3561
 
3631
3562
  /**
3632
3563
  * @module api/handlers/rebuildMetadata
3633
- * Fastify route handler for POST /rebuild-metadata. Recreates enrichment metadata files from vector store payloads.
3564
+ * Fastify route handler for POST /rebuild-metadata. Rebuilds enrichment store from vector store payloads.
3634
3565
  */
3635
3566
  /**
3636
3567
  * Create handler for POST /rebuild-metadata.
@@ -3639,7 +3570,6 @@ const FIELD_LINE_END = 'line_end';
3639
3570
  */
3640
3571
  function createRebuildMetadataHandler(deps) {
3641
3572
  return wrapHandler(async (_request, reply) => {
3642
- const metadataDir = deps.metadataDir ?? '.jeeves-metadata';
3643
3573
  const systemKeys = [...SYSTEM_METADATA_KEYS];
3644
3574
  for await (const point of deps.vectorStore.scroll()) {
3645
3575
  const payload = point.payload;
@@ -3647,7 +3577,7 @@ function createRebuildMetadataHandler(deps) {
3647
3577
  if (typeof filePath !== 'string' || filePath.length === 0)
3648
3578
  continue;
3649
3579
  const enrichment = omit(payload, systemKeys);
3650
- await writeMetadata(filePath, metadataDir, enrichment);
3580
+ deps.enrichmentStore?.set(filePath, enrichment);
3651
3581
  }
3652
3582
  return await reply.status(200).send({ ok: true });
3653
3583
  }, deps.logger, 'Rebuild metadata');
@@ -4147,7 +4077,6 @@ async function buildTemplateEngineAndCustomMapLib(config, configDir) {
4147
4077
  */
4148
4078
  function createProcessorConfig(config, configDir, customMapLib) {
4149
4079
  return {
4150
- metadataDir: config.metadataDir ?? '.jeeves-metadata',
4151
4080
  chunkSize: config.embedding.chunkSize,
4152
4081
  chunkOverlap: config.embedding.chunkOverlap,
4153
4082
  maps: resolveMapsConfig(config.maps),
@@ -4159,7 +4088,7 @@ function createProcessorConfig(config, configDir, customMapLib) {
4159
4088
  /**
4160
4089
  * Create file system watcher with gitignore filtering.
4161
4090
  */
4162
- function createWatcher(config, factories, queue, processor, logger, runtimeOptions, initialScanTracker) {
4091
+ function createWatcher(config, factories, queue, processor, logger, runtimeOptions, initialScanTracker, contentHashCache) {
4163
4092
  const respectGitignore = config.watch.respectGitignore ?? true;
4164
4093
  const gitignoreFilter = respectGitignore
4165
4094
  ? new GitignoreFilter(config.watch.paths)
@@ -4170,6 +4099,7 @@ function createWatcher(config, factories, queue, processor, logger, runtimeOptio
4170
4099
  onFatalError: runtimeOptions.onFatalError,
4171
4100
  gitignoreFilter,
4172
4101
  initialScanTracker,
4102
+ contentHashCache,
4173
4103
  });
4174
4104
  return { watcher, gitignoreFilter };
4175
4105
  }
@@ -4506,7 +4436,7 @@ function createApiServer(options) {
4506
4436
  hybridConfig,
4507
4437
  }));
4508
4438
  app.post('/rebuild-metadata', createRebuildMetadataHandler({
4509
- metadataDir: config.metadataDir,
4439
+ enrichmentStore: options.enrichmentStore,
4510
4440
  vectorStore,
4511
4441
  logger,
4512
4442
  }));
@@ -4580,6 +4510,218 @@ function createApiServer(options) {
4580
4510
  return app;
4581
4511
  }
4582
4512
 
4513
+ /**
4514
+ * @module util/normalizePath
4515
+ * Normalizes file paths for deterministic mapping: lowercase, forward slashes, optional drive letter stripping.
4516
+ */
4517
+ /**
4518
+ * Normalize a file path: lowercase, forward slashes, optionally strip drive letter colon.
4519
+ *
4520
+ * @param filePath - The original file path.
4521
+ * @param stripDriveLetter - Whether to strip the colon from a leading drive letter (e.g. `C:` → `c`).
4522
+ * @returns The normalized path string.
4523
+ */
4524
+ function normalizePath(filePath, stripDriveLetter = false) {
4525
+ let result = filePath.replace(/\\/g, '/').toLowerCase();
4526
+ if (stripDriveLetter) {
4527
+ result = result.replace(/^([a-z]):/, (_m, letter) => letter);
4528
+ }
4529
+ return result;
4530
+ }
4531
+
4532
+ /**
4533
+ * @module cache/ContentHashCache
4534
+ * In-memory cache mapping normalized file paths to content hashes.
4535
+ * Supports reverse lookup by hash for move correlation.
4536
+ */
4537
+ /**
4538
+ * In-memory content hash cache for move detection.
4539
+ *
4540
+ * Maps normalized file paths to SHA-256 content hashes.
4541
+ * Supports reverse lookup (hash → paths) for correlating
4542
+ * unlink+add events as file moves.
4543
+ */
4544
+ class ContentHashCache {
4545
+ pathToHash = new Map();
4546
+ hashToPaths = new Map();
4547
+ /**
4548
+ * Store or update the content hash for a file path.
4549
+ *
4550
+ * @param filePath - The file path (will be normalized).
4551
+ * @param hash - The SHA-256 content hash.
4552
+ */
4553
+ set(filePath, hash) {
4554
+ const normalized = normalizePath(filePath, true);
4555
+ const oldHash = this.pathToHash.get(normalized);
4556
+ // Remove from old hash index if hash changed
4557
+ if (oldHash !== undefined && oldHash !== hash) {
4558
+ const oldPaths = this.hashToPaths.get(oldHash);
4559
+ if (oldPaths) {
4560
+ oldPaths.delete(normalized);
4561
+ if (oldPaths.size === 0)
4562
+ this.hashToPaths.delete(oldHash);
4563
+ }
4564
+ }
4565
+ this.pathToHash.set(normalized, hash);
4566
+ let paths = this.hashToPaths.get(hash);
4567
+ if (!paths) {
4568
+ paths = new Set();
4569
+ this.hashToPaths.set(hash, paths);
4570
+ }
4571
+ paths.add(normalized);
4572
+ }
4573
+ /**
4574
+ * Get the content hash for a file path.
4575
+ *
4576
+ * @param filePath - The file path (will be normalized).
4577
+ * @returns The content hash, or undefined if not cached.
4578
+ */
4579
+ get(filePath) {
4580
+ return this.pathToHash.get(normalizePath(filePath, true));
4581
+ }
4582
+ /**
4583
+ * Remove a file path from the cache.
4584
+ *
4585
+ * @param filePath - The file path (will be normalized).
4586
+ */
4587
+ delete(filePath) {
4588
+ const normalized = normalizePath(filePath, true);
4589
+ const hash = this.pathToHash.get(normalized);
4590
+ if (hash === undefined)
4591
+ return;
4592
+ this.pathToHash.delete(normalized);
4593
+ const paths = this.hashToPaths.get(hash);
4594
+ if (paths) {
4595
+ paths.delete(normalized);
4596
+ if (paths.size === 0)
4597
+ this.hashToPaths.delete(hash);
4598
+ }
4599
+ }
4600
+ /**
4601
+ * Reverse lookup: get all file paths with a given content hash.
4602
+ *
4603
+ * @param hash - The content hash to look up.
4604
+ * @returns Array of normalized file paths with that hash.
4605
+ */
4606
+ getByHash(hash) {
4607
+ const paths = this.hashToPaths.get(hash);
4608
+ return paths ? [...paths] : [];
4609
+ }
4610
+ /** Number of cached entries. */
4611
+ get size() {
4612
+ return this.pathToHash.size;
4613
+ }
4614
+ }
4615
+
4616
+ /**
4617
+ * @module enrichment/EnrichmentStore
4618
+ * SQLite-backed enrichment metadata store. Persists path-keyed metadata at stateDir/enrichments.sqlite. Atomic writes, supports move.
4619
+ */
4620
+ /**
4621
+ * SQLite-backed enrichment metadata store.
4622
+ */
4623
+ class EnrichmentStore {
4624
+ db;
4625
+ /**
4626
+ * Create or open the enrichment store.
4627
+ *
4628
+ * @param stateDir - Directory for the SQLite database file.
4629
+ */
4630
+ constructor(stateDir) {
4631
+ mkdirSync(stateDir, { recursive: true });
4632
+ const dbPath = join(stateDir, 'enrichments.sqlite');
4633
+ this.db = new Database(dbPath);
4634
+ this.db.pragma('journal_mode = WAL');
4635
+ this.db.exec(`
4636
+ CREATE TABLE IF NOT EXISTS enrichments (
4637
+ path TEXT PRIMARY KEY,
4638
+ metadata TEXT NOT NULL,
4639
+ created_at TEXT NOT NULL,
4640
+ updated_at TEXT NOT NULL
4641
+ )
4642
+ `);
4643
+ }
4644
+ get(path) {
4645
+ const normalized = normalizePath(path);
4646
+ const row = this.db
4647
+ .prepare('SELECT metadata FROM enrichments WHERE path = ?')
4648
+ .get(normalized);
4649
+ if (!row)
4650
+ return null;
4651
+ return JSON.parse(row.metadata);
4652
+ }
4653
+ set(path, metadata) {
4654
+ const normalized = normalizePath(path);
4655
+ const now = new Date().toISOString();
4656
+ const existing = this.get(path);
4657
+ const merged = existing ? { ...existing, ...metadata } : metadata;
4658
+ const json = JSON.stringify(merged);
4659
+ if (existing) {
4660
+ this.db
4661
+ .prepare('UPDATE enrichments SET metadata = ?, updated_at = ? WHERE path = ?')
4662
+ .run(json, now, normalized);
4663
+ }
4664
+ else {
4665
+ this.db
4666
+ .prepare('INSERT INTO enrichments (path, metadata, created_at, updated_at) VALUES (?, ?, ?, ?)')
4667
+ .run(normalized, json, now, now);
4668
+ }
4669
+ }
4670
+ delete(path) {
4671
+ const normalized = normalizePath(path);
4672
+ this.db.prepare('DELETE FROM enrichments WHERE path = ?').run(normalized);
4673
+ }
4674
+ move(oldPath, newPath) {
4675
+ const normalizedOld = normalizePath(oldPath);
4676
+ const normalizedNew = normalizePath(newPath);
4677
+ const now = new Date().toISOString();
4678
+ this.db
4679
+ .prepare('UPDATE enrichments SET path = ?, updated_at = ? WHERE path = ?')
4680
+ .run(normalizedNew, now, normalizedOld);
4681
+ }
4682
+ list() {
4683
+ const rows = this.db
4684
+ .prepare('SELECT path FROM enrichments ORDER BY path')
4685
+ .all();
4686
+ return rows.map((r) => r.path);
4687
+ }
4688
+ close() {
4689
+ this.db.close();
4690
+ }
4691
+ }
4692
+
4693
+ /**
4694
+ * @module enrichment/merge
4695
+ * Composable merge for inferred + enrichment metadata. Scalars: enrichment overwrites. Arrays: union + deduplicate. No I/O.
4696
+ */
4697
+ /**
4698
+ * Merge enrichment metadata into inferred metadata with composable semantics.
4699
+ *
4700
+ * - Scalar fields: enrichment value overwrites inferred value.
4701
+ * - Array fields: union merge with deduplication (enrichment values appended).
4702
+ *
4703
+ * @param inferred - Metadata derived from inference rules.
4704
+ * @param enrichment - Human/agent-provided enrichment metadata.
4705
+ * @returns Merged metadata.
4706
+ */
4707
+ function mergeEnrichment(inferred, enrichment) {
4708
+ const result = { ...inferred };
4709
+ for (const [key, enrichValue] of Object.entries(enrichment)) {
4710
+ const inferredValue = result[key];
4711
+ if (Array.isArray(inferredValue) && Array.isArray(enrichValue)) {
4712
+ const combined = [
4713
+ ...inferredValue,
4714
+ ...enrichValue,
4715
+ ];
4716
+ result[key] = [...new Set(combined)];
4717
+ }
4718
+ else {
4719
+ result[key] = enrichValue;
4720
+ }
4721
+ }
4722
+ return result;
4723
+ }
4724
+
4583
4725
  /**
4584
4726
  * @module util/JsonFileStore
4585
4727
  * Small base class for JSON-backed read/modify/write stores with in-memory caching.
@@ -4825,7 +4967,7 @@ class ConfigWatcher {
4825
4967
  */
4826
4968
  /** Default root-level config values. */
4827
4969
  const ROOT_DEFAULTS = {
4828
- metadataDir: '.jeeves-watcher',
4970
+ stateDir: '.jeeves-metadata',
4829
4971
  shutdownTimeoutMs: 10000,
4830
4972
  };
4831
4973
  /** Default configWatch values. */
@@ -5197,7 +5339,7 @@ function createLogger(config) {
5197
5339
 
5198
5340
  /**
5199
5341
  * @module hash
5200
- * Provides SHA-256 content hashing. Pure function: given text string, returns hex digest. No I/O or side effects.
5342
+ * Provides SHA-256 content hashing. Pure functions: text hash and file hash. File hash does I/O.
5201
5343
  */
5202
5344
  /**
5203
5345
  * Compute a SHA-256 hex digest of the given text.
@@ -5208,6 +5350,16 @@ function createLogger(config) {
5208
5350
  function contentHash(text) {
5209
5351
  return createHash('sha256').update(text, 'utf8').digest('hex');
5210
5352
  }
5353
+ /**
5354
+ * Compute a SHA-256 hex digest of a file's raw bytes.
5355
+ *
5356
+ * @param filePath - Path to the file.
5357
+ * @returns The hex-encoded SHA-256 hash.
5358
+ */
5359
+ async function fileHash(filePath) {
5360
+ const buffer = await readFile(filePath);
5361
+ return createHash('sha256').update(buffer).digest('hex');
5362
+ }
5211
5363
 
5212
5364
  /**
5213
5365
  * @module pointId
@@ -5369,7 +5521,7 @@ async function extractText(filePath, extension, additionalExtractors) {
5369
5521
 
5370
5522
  /**
5371
5523
  * @module processor/buildMetadata
5372
- * Builds merged metadata from file content, inference rules, and enrichment. I/O: reads files, extracts text, loads enrichment .meta.json.
5524
+ * Builds merged metadata from file content, inference rules, and enrichment store. I/O: reads files, extracts text, queries SQLite enrichment.
5373
5525
  */
5374
5526
  /**
5375
5527
  * Build merged metadata for a file by applying inference rules and merging with enrichment metadata.
@@ -5378,7 +5530,7 @@ async function extractText(filePath, extension, additionalExtractors) {
5378
5530
  * @returns The merged metadata and intermediate data.
5379
5531
  */
5380
5532
  async function buildMergedMetadata(options) {
5381
- const { filePath, compiledRules, metadataDir, maps, logger, templateEngine, configDir, customMapLib, globalSchemas, } = options;
5533
+ const { filePath, compiledRules, enrichmentStore, maps, logger, templateEngine, configDir, customMapLib, globalSchemas, } = options;
5382
5534
  const ext = extname(filePath);
5383
5535
  const stats = await stat(filePath);
5384
5536
  // 1. Extract text and structured data
@@ -5393,12 +5545,11 @@ async function buildMergedMetadata(options) {
5393
5545
  customMapLib,
5394
5546
  globalSchemas,
5395
5547
  });
5396
- // 3. Read enrichment metadata (merge, enrichment wins)
5397
- const enrichment = await readMetadata(filePath, metadataDir);
5398
- const metadata = {
5399
- ...inferred,
5400
- ...(enrichment ?? {}),
5401
- };
5548
+ // 3. Read enrichment metadata from store (composable merge)
5549
+ const enrichment = enrichmentStore?.get(filePath) ?? null;
5550
+ const metadata = enrichment
5551
+ ? mergeEnrichment(inferred, enrichment)
5552
+ : { ...inferred };
5402
5553
  return {
5403
5554
  inferred,
5404
5555
  enrichment,
@@ -5587,22 +5738,26 @@ class DocumentProcessor {
5587
5738
  compiledRules;
5588
5739
  logger;
5589
5740
  templateEngine;
5741
+ enrichmentStore;
5590
5742
  issuesManager;
5591
5743
  valuesManager;
5744
+ contentHashCache;
5592
5745
  /**
5593
5746
  * Create a new DocumentProcessor.
5594
5747
  *
5595
5748
  * @param deps - The processor dependencies.
5596
5749
  */
5597
- constructor({ config, embeddingProvider, vectorStore, compiledRules, logger, templateEngine, issuesManager, valuesManager, }) {
5750
+ constructor({ config, embeddingProvider, vectorStore, compiledRules, logger, templateEngine, enrichmentStore, issuesManager, valuesManager, contentHashCache, }) {
5598
5751
  this.config = config;
5599
5752
  this.embeddingProvider = embeddingProvider;
5600
5753
  this.vectorStore = vectorStore;
5601
5754
  this.compiledRules = compiledRules;
5602
5755
  this.logger = logger;
5603
5756
  this.templateEngine = templateEngine;
5757
+ this.enrichmentStore = enrichmentStore;
5604
5758
  this.issuesManager = issuesManager;
5605
5759
  this.valuesManager = valuesManager;
5760
+ this.contentHashCache = contentHashCache;
5606
5761
  }
5607
5762
  /**
5608
5763
  * Build merged metadata for a file and add matched_rules.
@@ -5611,7 +5766,7 @@ class DocumentProcessor {
5611
5766
  const result = await buildMergedMetadata({
5612
5767
  filePath,
5613
5768
  compiledRules: this.compiledRules,
5614
- metadataDir: this.config.metadataDir,
5769
+ enrichmentStore: this.enrichmentStore,
5615
5770
  maps: this.config.maps,
5616
5771
  logger: this.logger,
5617
5772
  templateEngine: this.templateEngine,
@@ -5658,6 +5813,9 @@ class DocumentProcessor {
5658
5813
  this.logger.debug({ filePath }, 'Skipping empty file');
5659
5814
  return;
5660
5815
  }
5816
+ // Compute file-level hash for move correlation cache.
5817
+ const rawHash = await fileHash(filePath);
5818
+ this.contentHashCache?.set(filePath, rawHash);
5661
5819
  const hash = contentHash(textToEmbed);
5662
5820
  const baseId = pointId(filePath, 0);
5663
5821
  const existingPayload = await this.vectorStore.getPayload(baseId);
@@ -5694,12 +5852,13 @@ class DocumentProcessor {
5694
5852
  const totalChunks = getChunkCount(existingPayload);
5695
5853
  const ids = chunkIds(filePath, totalChunks);
5696
5854
  await this.vectorStore.delete(ids);
5697
- await deleteMetadata(filePath, this.config.metadataDir);
5855
+ this.enrichmentStore?.delete(filePath);
5856
+ this.contentHashCache?.delete(filePath);
5698
5857
  this.logger.info({ filePath }, 'File deleted from index');
5699
5858
  }, undefined);
5700
5859
  }
5701
5860
  /**
5702
- * Process a metadata update: merge metadata, write to disk, update Qdrant payloads (no re-embed).
5861
+ * Process a metadata update: merge into enrichment store, update Qdrant payloads (no re-embed).
5703
5862
  *
5704
5863
  * @param filePath - The file whose metadata to update.
5705
5864
  * @param metadata - The new metadata to merge.
@@ -5707,9 +5866,8 @@ class DocumentProcessor {
5707
5866
  */
5708
5867
  async processMetadataUpdate(filePath, metadata) {
5709
5868
  return this.withFileErrorHandling(filePath, 'Failed to update metadata', async () => {
5710
- const existing = (await readMetadata(filePath, this.config.metadataDir)) ?? {};
5711
- const merged = { ...existing, ...metadata };
5712
- await writeMetadata(filePath, this.config.metadataDir, merged);
5869
+ this.enrichmentStore?.set(filePath, metadata);
5870
+ const merged = this.enrichmentStore?.get(filePath) ?? metadata;
5713
5871
  const baseId = pointId(filePath, 0);
5714
5872
  const existingPayload = await this.vectorStore.getPayload(baseId);
5715
5873
  if (!existingPayload)
@@ -5771,6 +5929,56 @@ class DocumentProcessor {
5771
5929
  transformed: renderedContent !== null,
5772
5930
  };
5773
5931
  }
5932
+ /**
5933
+ * Move a file's vector points from old path to new path without re-embedding.
5934
+ * Re-applies inference rules against the new path.
5935
+ *
5936
+ * @param oldPath - The original file path.
5937
+ * @param newPath - The new file path.
5938
+ */
5939
+ async moveFile(oldPath, newPath) {
5940
+ await this.withFileErrorHandling(newPath, 'Failed to move file', async () => {
5941
+ const baseId = pointId(oldPath, 0);
5942
+ const existingPayload = await this.vectorStore.getPayload(baseId);
5943
+ const totalChunks = getChunkCount(existingPayload);
5944
+ const oldIds = chunkIds(oldPath, totalChunks);
5945
+ const oldPoints = await this.vectorStore.getPointsWithVectors(oldIds);
5946
+ if (oldPoints.length === 0) {
5947
+ this.logger.warn({ oldPath, newPath }, 'No points found for move');
5948
+ return;
5949
+ }
5950
+ // Build new metadata from inference rules against the new path.
5951
+ const { metadataWithRules, matchedRules, metadata } = await this.buildMetadataWithRules(newPath);
5952
+ // Create new points with updated IDs and file_path payload.
5953
+ const newPoints = oldPoints.map((pt, i) => ({
5954
+ id: pointId(newPath, i),
5955
+ vector: pt.vector,
5956
+ payload: {
5957
+ ...pt.payload,
5958
+ ...metadataWithRules,
5959
+ [FIELD_FILE_PATH]: normalizeSlashes(newPath),
5960
+ },
5961
+ }));
5962
+ await this.vectorStore.upsert(newPoints);
5963
+ await this.vectorStore.delete(oldIds);
5964
+ // Migrate enrichment and clear old issues.
5965
+ this.enrichmentStore?.move(oldPath, newPath);
5966
+ this.issuesManager?.clear(oldPath);
5967
+ // Update values index for the new path's matched rules.
5968
+ if (this.valuesManager) {
5969
+ for (const ruleName of matchedRules) {
5970
+ this.valuesManager.update(ruleName, metadata);
5971
+ }
5972
+ }
5973
+ // Update content hash cache.
5974
+ const oldHash = this.contentHashCache?.get(oldPath);
5975
+ if (oldHash) {
5976
+ this.contentHashCache?.set(newPath, oldHash);
5977
+ }
5978
+ this.contentHashCache?.delete(oldPath);
5979
+ this.logger.info({ oldPath, newPath, chunks: oldPoints.length }, 'File moved in index');
5980
+ }, undefined);
5981
+ }
5774
5982
  /**
5775
5983
  * Update compiled inference rules, template engine, and custom map lib.
5776
5984
  *
@@ -6429,6 +6637,33 @@ class VectorStoreClient {
6429
6637
  async hybridSearch(vector, queryText, limit, textWeight, filter) {
6430
6638
  return hybridSearch(this.client, this.collectionName, vector, queryText, limit, textWeight, filter);
6431
6639
  }
6640
+ /**
6641
+ * Retrieve points with their vectors by ID.
6642
+ *
6643
+ * @param ids - The point IDs to retrieve.
6644
+ * @returns Points with vectors and payloads; missing IDs are omitted.
6645
+ */
6646
+ async getPointsWithVectors(ids) {
6647
+ if (ids.length === 0)
6648
+ return [];
6649
+ try {
6650
+ const results = await this.client.retrieve(this.collectionName, {
6651
+ ids,
6652
+ with_payload: true,
6653
+ with_vector: true,
6654
+ });
6655
+ return results
6656
+ .filter((r) => r.vector != null)
6657
+ .map((r) => ({
6658
+ id: String(r.id),
6659
+ vector: r.vector,
6660
+ payload: r.payload,
6661
+ }));
6662
+ }
6663
+ catch {
6664
+ return [];
6665
+ }
6666
+ }
6432
6667
  /**
6433
6668
  * Scroll one page of points matching a filter.
6434
6669
  *
@@ -6636,6 +6871,163 @@ function resolveIgnored(ignored) {
6636
6871
  });
6637
6872
  }
6638
6873
 
6874
+ /**
6875
+ * @module watcher/MoveCorrelator
6876
+ * Correlates unlink+add events as file moves using content hash matching.
6877
+ * Buffers unlink events and matches against subsequent add events.
6878
+ */
6879
+ /**
6880
+ * Correlates unlink+add file system events as moves using content hash matching.
6881
+ *
6882
+ * When move detection is disabled, events pass straight through.
6883
+ */
6884
+ class MoveCorrelator {
6885
+ enabled;
6886
+ bufferMs;
6887
+ cache;
6888
+ logger;
6889
+ onMove;
6890
+ onDelete;
6891
+ onCreate;
6892
+ /** Buffered unlinks indexed by content hash (FIFO per hash). */
6893
+ buffer = new Map();
6894
+ /** Track unlink burst rate per parent directory for bulk mode. */
6895
+ burstCounters = new Map();
6896
+ /** Threshold: if N+ unlinks from same parent in burstWindowMs, extend buffer. */
6897
+ static BURST_THRESHOLD = 5;
6898
+ static BURST_WINDOW_MS = 500;
6899
+ static BURST_MULTIPLIER = 3;
6900
+ constructor(options) {
6901
+ this.enabled = options.enabled;
6902
+ this.bufferMs = options.bufferMs;
6903
+ this.cache = options.contentHashCache;
6904
+ this.logger = options.logger;
6905
+ this.onMove = options.onMove;
6906
+ this.onDelete = options.onDelete;
6907
+ this.onCreate = options.onCreate;
6908
+ }
6909
+ /**
6910
+ * Handle an unlink event. Buffers the event for correlation.
6911
+ *
6912
+ * @param path - The removed file path.
6913
+ */
6914
+ handleUnlink(path) {
6915
+ if (!this.enabled) {
6916
+ this.onDelete(path);
6917
+ return;
6918
+ }
6919
+ const hash = this.cache.get(path);
6920
+ if (!hash) {
6921
+ this.logger.debug({ path }, 'No cached hash for unlinked file, treating as delete');
6922
+ this.onDelete(path);
6923
+ return;
6924
+ }
6925
+ const timeoutMs = this.getEffectiveTimeout(path);
6926
+ const timer = setTimeout(() => {
6927
+ this.expireUnlink(hash, path);
6928
+ }, timeoutMs);
6929
+ const entry = {
6930
+ path,
6931
+ hash,
6932
+ timestamp: Date.now(),
6933
+ timer,
6934
+ };
6935
+ let entries = this.buffer.get(hash);
6936
+ if (!entries) {
6937
+ entries = [];
6938
+ this.buffer.set(hash, entries);
6939
+ }
6940
+ entries.push(entry);
6941
+ this.logger.debug({ path, hash: hash.slice(0, 12), timeoutMs }, 'Buffered unlink for move correlation');
6942
+ }
6943
+ /**
6944
+ * Handle an add event. Checks buffer for matching unlink (move detection).
6945
+ *
6946
+ * @param path - The added file path.
6947
+ */
6948
+ async handleAdd(path) {
6949
+ if (!this.enabled) {
6950
+ this.onCreate(path);
6951
+ return;
6952
+ }
6953
+ let hash;
6954
+ try {
6955
+ hash = await fileHash(path);
6956
+ }
6957
+ catch {
6958
+ this.onCreate(path);
6959
+ return;
6960
+ }
6961
+ const entries = this.buffer.get(hash);
6962
+ if (entries && entries.length > 0) {
6963
+ // FIFO: consume oldest matching unlink
6964
+ const matched = entries.shift();
6965
+ clearTimeout(matched.timer);
6966
+ if (entries.length === 0)
6967
+ this.buffer.delete(hash);
6968
+ this.logger.info({ oldPath: matched.path, newPath: path }, 'Move detected');
6969
+ this.onMove(matched.path, path);
6970
+ }
6971
+ else {
6972
+ this.onCreate(path);
6973
+ }
6974
+ }
6975
+ /**
6976
+ * Flush all buffered unlinks as deletes. Call on shutdown.
6977
+ */
6978
+ flush() {
6979
+ for (const [, entries] of this.buffer) {
6980
+ for (const entry of entries) {
6981
+ clearTimeout(entry.timer);
6982
+ this.onDelete(entry.path);
6983
+ }
6984
+ }
6985
+ this.buffer.clear();
6986
+ this.burstCounters.clear();
6987
+ }
6988
+ /** Number of currently buffered unlink events. */
6989
+ get pendingCount() {
6990
+ let count = 0;
6991
+ for (const [, entries] of this.buffer) {
6992
+ count += entries.length;
6993
+ }
6994
+ return count;
6995
+ }
6996
+ /**
6997
+ * Get effective timeout, applying burst detection for bulk moves.
6998
+ */
6999
+ getEffectiveTimeout(path) {
7000
+ const parentDir = dirname(path);
7001
+ const now = Date.now();
7002
+ let counter = this.burstCounters.get(parentDir);
7003
+ if (!counter || now - counter.firstTs > MoveCorrelator.BURST_WINDOW_MS) {
7004
+ counter = { count: 0, firstTs: now };
7005
+ this.burstCounters.set(parentDir, counter);
7006
+ }
7007
+ counter.count++;
7008
+ if (counter.count >= MoveCorrelator.BURST_THRESHOLD) {
7009
+ return this.bufferMs * MoveCorrelator.BURST_MULTIPLIER;
7010
+ }
7011
+ return this.bufferMs;
7012
+ }
7013
+ /**
7014
+ * Handle a buffered unlink timeout — emit as delete.
7015
+ */
7016
+ expireUnlink(hash, path) {
7017
+ const entries = this.buffer.get(hash);
7018
+ if (entries) {
7019
+ const idx = entries.findIndex((e) => e.path === path);
7020
+ if (idx >= 0) {
7021
+ entries.splice(idx, 1);
7022
+ if (entries.length === 0)
7023
+ this.buffer.delete(hash);
7024
+ }
7025
+ }
7026
+ this.logger.debug({ path, hash: hash.slice(0, 12) }, 'Buffered unlink expired, treating as delete');
7027
+ this.onDelete(path);
7028
+ }
7029
+ }
7030
+
6639
7031
  /**
6640
7032
  * @module watcher
6641
7033
  * Filesystem watcher wrapping chokidar. I/O: watches files/directories for add/change/unlink events, enqueues to processing queue.
@@ -6651,6 +7043,8 @@ class FileSystemWatcher {
6651
7043
  health;
6652
7044
  gitignoreFilter;
6653
7045
  initialScanTracker;
7046
+ contentHashCache;
7047
+ moveCorrelator;
6654
7048
  globMatches;
6655
7049
  watcher;
6656
7050
  /**
@@ -6669,6 +7063,7 @@ class FileSystemWatcher {
6669
7063
  this.logger = logger;
6670
7064
  this.gitignoreFilter = options.gitignoreFilter;
6671
7065
  this.initialScanTracker = options.initialScanTracker;
7066
+ this.contentHashCache = options.contentHashCache;
6672
7067
  this.globMatches = () => true;
6673
7068
  const healthOptions = {
6674
7069
  maxRetries: options.maxRetries,
@@ -6714,6 +7109,26 @@ class FileSystemWatcher {
6714
7109
  }
6715
7110
  }
6716
7111
  };
7112
+ // Create move correlator if move detection is configured and cache is available.
7113
+ const moveConfig = this.config.moveDetection;
7114
+ if (moveConfig?.enabled && this.contentHashCache) {
7115
+ this.moveCorrelator = new MoveCorrelator({
7116
+ enabled: true,
7117
+ bufferMs: moveConfig.bufferMs,
7118
+ contentHashCache: this.contentHashCache,
7119
+ logger: this.logger,
7120
+ onMove: (oldPath, newPath) => {
7121
+ this.queue.enqueue({ type: 'move', path: newPath, oldPath, priority: 'normal' }, () => this.wrapProcessing(() => this.processor.moveFile(oldPath, newPath)));
7122
+ },
7123
+ onDelete: (deletedPath) => {
7124
+ this.queue.enqueue({ type: 'delete', path: deletedPath, priority: 'normal' }, () => this.wrapProcessing(() => this.processor.deleteFile(deletedPath)));
7125
+ },
7126
+ onCreate: (createdPath) => {
7127
+ this.queue.enqueue({ type: 'create', path: createdPath, priority: 'normal' }, () => this.wrapProcessing(() => this.processor.processFile(createdPath)));
7128
+ },
7129
+ });
7130
+ }
7131
+ const correlator = this.moveCorrelator;
6717
7132
  this.watcher = chokidar.watch(roots, {
6718
7133
  ignored,
6719
7134
  usePolling: this.config.usePolling,
@@ -6747,7 +7162,12 @@ class FileSystemWatcher {
6747
7162
  this.initialScanTracker?.incrementEnqueued();
6748
7163
  }
6749
7164
  this.logger.debug({ path }, 'File added');
6750
- this.queue.enqueue({ type: 'create', path, priority: 'normal' }, () => this.wrapProcessing(() => this.processor.processFile(path)));
7165
+ if (correlator && initialScanComplete) {
7166
+ void correlator.handleAdd(path);
7167
+ }
7168
+ else {
7169
+ this.queue.enqueue({ type: 'create', path, priority: 'normal' }, () => this.wrapProcessing(() => this.processor.processFile(path)));
7170
+ }
6751
7171
  });
6752
7172
  this.watcher.on('change', (path) => {
6753
7173
  this.handleGitignoreChange(path);
@@ -6767,7 +7187,12 @@ class FileSystemWatcher {
6767
7187
  if (this.isGitignored(path))
6768
7188
  return;
6769
7189
  this.logger.debug({ path }, 'File removed');
6770
- this.queue.enqueue({ type: 'delete', path, priority: 'normal' }, () => this.wrapProcessing(() => this.processor.deleteFile(path)));
7190
+ if (correlator) {
7191
+ correlator.handleUnlink(path);
7192
+ }
7193
+ else {
7194
+ this.queue.enqueue({ type: 'delete', path, priority: 'normal' }, () => this.wrapProcessing(() => this.processor.deleteFile(path)));
7195
+ }
6771
7196
  });
6772
7197
  this.watcher.on('ready', () => {
6773
7198
  initialScanComplete = true;
@@ -6812,6 +7237,7 @@ class FileSystemWatcher {
6812
7237
  * Stop the filesystem watcher.
6813
7238
  */
6814
7239
  async stop() {
7240
+ this.moveCorrelator?.flush();
6815
7241
  if (this.watcher) {
6816
7242
  await this.watcher.close();
6817
7243
  this.watcher = undefined;
@@ -6945,6 +7371,8 @@ class JeevesWatcher {
6945
7371
  vectorStore;
6946
7372
  embeddingProvider;
6947
7373
  gitignoreFilter;
7374
+ enrichmentStore;
7375
+ contentHashCache;
6948
7376
  initialScanTracker;
6949
7377
  version;
6950
7378
  /** Create a new JeevesWatcher instance. */
@@ -6985,9 +7413,13 @@ class JeevesWatcher {
6985
7413
  const { templateEngine, customMapLib } = await buildTemplateEngineAndCustomMapLib(this.config, configDir);
6986
7414
  this.helperIntrospection = await introspectHelpers(this.config, configDir);
6987
7415
  const processorConfig = createProcessorConfig(this.config, configDir, customMapLib);
6988
- const stateDir = this.config.stateDir ?? this.config.metadataDir ?? '.jeeves-metadata';
7416
+ const stateDir = this.config.stateDir ?? '.jeeves-metadata';
6989
7417
  this.issuesManager = new IssuesManager(stateDir, logger);
6990
7418
  this.valuesManager = new ValuesManager(stateDir, logger);
7419
+ this.enrichmentStore = new EnrichmentStore(stateDir);
7420
+ const enrichmentStore = this.enrichmentStore;
7421
+ this.contentHashCache = new ContentHashCache();
7422
+ const contentHashCache = this.contentHashCache;
6991
7423
  const processor = this.factories.createDocumentProcessor({
6992
7424
  config: processorConfig,
6993
7425
  embeddingProvider,
@@ -6995,8 +7427,10 @@ class JeevesWatcher {
6995
7427
  compiledRules,
6996
7428
  logger,
6997
7429
  templateEngine,
7430
+ enrichmentStore,
6998
7431
  issuesManager: this.issuesManager,
6999
7432
  valuesManager: this.valuesManager,
7433
+ contentHashCache,
7000
7434
  });
7001
7435
  this.processor = processor;
7002
7436
  this.queue = this.factories.createEventQueue({
@@ -7004,7 +7438,7 @@ class JeevesWatcher {
7004
7438
  concurrency: this.config.embedding.concurrency ?? 5,
7005
7439
  rateLimitPerMinute: this.config.embedding.rateLimitPerMinute,
7006
7440
  });
7007
- const { watcher, gitignoreFilter } = createWatcher(this.config, this.factories, this.queue, processor, logger, this.runtimeOptions, this.initialScanTracker);
7441
+ const { watcher, gitignoreFilter } = createWatcher(this.config, this.factories, this.queue, processor, logger, this.runtimeOptions, this.initialScanTracker, contentHashCache);
7008
7442
  this.watcher = watcher;
7009
7443
  this.gitignoreFilter = gitignoreFilter;
7010
7444
  this.server = await this.startApiServer();
@@ -7056,6 +7490,7 @@ class JeevesWatcher {
7056
7490
  version: this.version,
7057
7491
  initialScanTracker: this.initialScanTracker,
7058
7492
  fileSystemWatcher: this.watcher,
7493
+ enrichmentStore: this.enrichmentStore,
7059
7494
  });
7060
7495
  await server.listen({
7061
7496
  host: this.config.api?.host ?? '127.0.0.1',
@@ -7121,4 +7556,4 @@ class JeevesWatcher {
7121
7556
  }
7122
7557
  }
7123
7558
 
7124
- export { DocumentProcessor, EventQueue, FileSystemWatcher, GitignoreFilter, InitialScanTracker, IssuesManager, JeevesWatcher, ReindexTracker, SystemHealth, TemplateEngine, ValuesManager, VectorStoreClient, VirtualRuleStore, apiConfigSchema, applyRules, buildAttributes, buildTemplateEngine, compileRules, configWatchConfigSchema, contentHash, createApiServer, createEmbeddingProvider, createHandlebarsInstance, createLogger, deleteMetadata, embeddingConfigSchema, extractText, inferenceRuleSchema, issueRecordSchema, jeevesWatcherConfigSchema, loadConfig, loadCustomHelpers, loggingConfigSchema, metadataPath, pointId, readMetadata, registerBuiltinHelpers, resolveTemplateSource, startFromConfig, vectorStoreConfigSchema, watchConfigSchema, writeMetadata };
7559
+ export { DocumentProcessor, EnrichmentStore, EventQueue, FileSystemWatcher, GitignoreFilter, InitialScanTracker, IssuesManager, JeevesWatcher, ReindexTracker, SystemHealth, TemplateEngine, ValuesManager, VectorStoreClient, VirtualRuleStore, apiConfigSchema, applyRules, buildAttributes, buildTemplateEngine, compileRules, configWatchConfigSchema, contentHash, createApiServer, createEmbeddingProvider, createHandlebarsInstance, createLogger, embeddingConfigSchema, extractText, inferenceRuleSchema, issueRecordSchema, jeevesWatcherConfigSchema, loadConfig, loadCustomHelpers, loggingConfigSchema, mergeEnrichment, pointId, registerBuiltinHelpers, resolveTemplateSource, startFromConfig, vectorStoreConfigSchema, watchConfigSchema };