@karmaniverous/jeeves-watcher 0.13.0 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,10 +5,11 @@ import { existsSync, statSync, readFileSync, readdirSync, mkdirSync, writeFileSy
5
5
  import { join, dirname, resolve, relative, extname, basename, isAbsolute } from 'node:path';
6
6
  import { pathToFileURL, fileURLToPath } from 'node:url';
7
7
  import { packageDirectorySync } from 'package-directory';
8
- import { readdir, stat, writeFile, rm, readFile, mkdir } from 'node:fs/promises';
8
+ import { readdir, stat, writeFile, readFile } from 'node:fs/promises';
9
9
  import { parallel, capitalize, title, camel, snake, dash, isEqual, get, omit } from 'radash';
10
10
  import ignore from 'ignore';
11
11
  import picomatch from 'picomatch';
12
+ import Database from 'better-sqlite3';
12
13
  import { z, ZodError } from 'zod';
13
14
  import Ajv from 'ajv';
14
15
  import addFormats from 'ajv-formats';
@@ -24,10 +25,10 @@ import { toMarkdown } from 'mdast-util-to-markdown';
24
25
  import rehypeParse from 'rehype-parse';
25
26
  import { unified } from 'unified';
26
27
  import yaml from 'js-yaml';
27
- import { createHash } from 'node:crypto';
28
28
  import crypto from 'crypto';
29
29
  import { cosmiconfig } from 'cosmiconfig';
30
30
  import https from 'node:https';
31
+ import { createHash } from 'node:crypto';
31
32
  import pino from 'pino';
32
33
  import { v5 } from 'uuid';
33
34
  import * as cheerio from 'cheerio';
@@ -1019,6 +1020,218 @@ class InitialScanTracker {
1019
1020
  }
1020
1021
  }
1021
1022
 
1023
+ /**
1024
+ * @module util/normalizePath
1025
+ * Normalizes file paths for deterministic mapping: lowercase, forward slashes, optional drive letter stripping.
1026
+ */
1027
+ /**
1028
+ * Normalize a file path: lowercase, forward slashes, optionally strip drive letter colon.
1029
+ *
1030
+ * @param filePath - The original file path.
1031
+ * @param stripDriveLetter - Whether to strip the colon from a leading drive letter (e.g. `C:` → `c`).
1032
+ * @returns The normalized path string.
1033
+ */
1034
+ function normalizePath(filePath, stripDriveLetter = false) {
1035
+ let result = filePath.replace(/\\/g, '/').toLowerCase();
1036
+ if (stripDriveLetter) {
1037
+ result = result.replace(/^([a-z]):/, (_m, letter) => letter);
1038
+ }
1039
+ return result;
1040
+ }
1041
+
1042
+ /**
1043
+ * @module cache/ContentHashCache
1044
+ * In-memory cache mapping normalized file paths to content hashes.
1045
+ * Supports reverse lookup by hash for move correlation.
1046
+ */
1047
+ /**
1048
+ * In-memory content hash cache for move detection.
1049
+ *
1050
+ * Maps normalized file paths to SHA-256 content hashes.
1051
+ * Supports reverse lookup (hash → paths) for correlating
1052
+ * unlink+add events as file moves.
1053
+ */
1054
+ class ContentHashCache {
1055
+ pathToHash = new Map();
1056
+ hashToPaths = new Map();
1057
+ /**
1058
+ * Store or update the content hash for a file path.
1059
+ *
1060
+ * @param filePath - The file path (will be normalized).
1061
+ * @param hash - The SHA-256 content hash.
1062
+ */
1063
+ set(filePath, hash) {
1064
+ const normalized = normalizePath(filePath, true);
1065
+ const oldHash = this.pathToHash.get(normalized);
1066
+ // Remove from old hash index if hash changed
1067
+ if (oldHash !== undefined && oldHash !== hash) {
1068
+ const oldPaths = this.hashToPaths.get(oldHash);
1069
+ if (oldPaths) {
1070
+ oldPaths.delete(normalized);
1071
+ if (oldPaths.size === 0)
1072
+ this.hashToPaths.delete(oldHash);
1073
+ }
1074
+ }
1075
+ this.pathToHash.set(normalized, hash);
1076
+ let paths = this.hashToPaths.get(hash);
1077
+ if (!paths) {
1078
+ paths = new Set();
1079
+ this.hashToPaths.set(hash, paths);
1080
+ }
1081
+ paths.add(normalized);
1082
+ }
1083
+ /**
1084
+ * Get the content hash for a file path.
1085
+ *
1086
+ * @param filePath - The file path (will be normalized).
1087
+ * @returns The content hash, or undefined if not cached.
1088
+ */
1089
+ get(filePath) {
1090
+ return this.pathToHash.get(normalizePath(filePath, true));
1091
+ }
1092
+ /**
1093
+ * Remove a file path from the cache.
1094
+ *
1095
+ * @param filePath - The file path (will be normalized).
1096
+ */
1097
+ delete(filePath) {
1098
+ const normalized = normalizePath(filePath, true);
1099
+ const hash = this.pathToHash.get(normalized);
1100
+ if (hash === undefined)
1101
+ return;
1102
+ this.pathToHash.delete(normalized);
1103
+ const paths = this.hashToPaths.get(hash);
1104
+ if (paths) {
1105
+ paths.delete(normalized);
1106
+ if (paths.size === 0)
1107
+ this.hashToPaths.delete(hash);
1108
+ }
1109
+ }
1110
+ /**
1111
+ * Reverse lookup: get all file paths with a given content hash.
1112
+ *
1113
+ * @param hash - The content hash to look up.
1114
+ * @returns Array of normalized file paths with that hash.
1115
+ */
1116
+ getByHash(hash) {
1117
+ const paths = this.hashToPaths.get(hash);
1118
+ return paths ? [...paths] : [];
1119
+ }
1120
+ /** Number of cached entries. */
1121
+ get size() {
1122
+ return this.pathToHash.size;
1123
+ }
1124
+ }
1125
+
1126
+ /**
1127
+ * @module enrichment/EnrichmentStore
1128
+ * SQLite-backed enrichment metadata store. Persists path-keyed metadata at stateDir/enrichments.sqlite. Atomic writes, supports move.
1129
+ */
1130
+ /**
1131
+ * SQLite-backed enrichment metadata store.
1132
+ */
1133
+ class EnrichmentStore {
1134
+ db;
1135
+ /**
1136
+ * Create or open the enrichment store.
1137
+ *
1138
+ * @param stateDir - Directory for the SQLite database file.
1139
+ */
1140
+ constructor(stateDir) {
1141
+ mkdirSync(stateDir, { recursive: true });
1142
+ const dbPath = join(stateDir, 'enrichments.sqlite');
1143
+ this.db = new Database(dbPath);
1144
+ this.db.pragma('journal_mode = WAL');
1145
+ this.db.exec(`
1146
+ CREATE TABLE IF NOT EXISTS enrichments (
1147
+ path TEXT PRIMARY KEY,
1148
+ metadata TEXT NOT NULL,
1149
+ created_at TEXT NOT NULL,
1150
+ updated_at TEXT NOT NULL
1151
+ )
1152
+ `);
1153
+ }
1154
+ get(path) {
1155
+ const normalized = normalizePath(path);
1156
+ const row = this.db
1157
+ .prepare('SELECT metadata FROM enrichments WHERE path = ?')
1158
+ .get(normalized);
1159
+ if (!row)
1160
+ return null;
1161
+ return JSON.parse(row.metadata);
1162
+ }
1163
+ set(path, metadata) {
1164
+ const normalized = normalizePath(path);
1165
+ const now = new Date().toISOString();
1166
+ const existing = this.get(path);
1167
+ const merged = existing ? { ...existing, ...metadata } : metadata;
1168
+ const json = JSON.stringify(merged);
1169
+ if (existing) {
1170
+ this.db
1171
+ .prepare('UPDATE enrichments SET metadata = ?, updated_at = ? WHERE path = ?')
1172
+ .run(json, now, normalized);
1173
+ }
1174
+ else {
1175
+ this.db
1176
+ .prepare('INSERT INTO enrichments (path, metadata, created_at, updated_at) VALUES (?, ?, ?, ?)')
1177
+ .run(normalized, json, now, now);
1178
+ }
1179
+ }
1180
+ delete(path) {
1181
+ const normalized = normalizePath(path);
1182
+ this.db.prepare('DELETE FROM enrichments WHERE path = ?').run(normalized);
1183
+ }
1184
+ move(oldPath, newPath) {
1185
+ const normalizedOld = normalizePath(oldPath);
1186
+ const normalizedNew = normalizePath(newPath);
1187
+ const now = new Date().toISOString();
1188
+ this.db
1189
+ .prepare('UPDATE enrichments SET path = ?, updated_at = ? WHERE path = ?')
1190
+ .run(normalizedNew, now, normalizedOld);
1191
+ }
1192
+ list() {
1193
+ const rows = this.db
1194
+ .prepare('SELECT path FROM enrichments ORDER BY path')
1195
+ .all();
1196
+ return rows.map((r) => r.path);
1197
+ }
1198
+ close() {
1199
+ this.db.close();
1200
+ }
1201
+ }
1202
+
1203
+ /**
1204
+ * @module enrichment/merge
1205
+ * Composable merge for inferred + enrichment metadata. Scalars: enrichment overwrites. Arrays: union + deduplicate. No I/O.
1206
+ */
1207
+ /**
1208
+ * Merge enrichment metadata into inferred metadata with composable semantics.
1209
+ *
1210
+ * - Scalar fields: enrichment value overwrites inferred value.
1211
+ * - Array fields: union merge with deduplication (enrichment values appended).
1212
+ *
1213
+ * @param inferred - Metadata derived from inference rules.
1214
+ * @param enrichment - Human/agent-provided enrichment metadata.
1215
+ * @returns Merged metadata.
1216
+ */
1217
+ function mergeEnrichment(inferred, enrichment) {
1218
+ const result = { ...inferred };
1219
+ for (const [key, enrichValue] of Object.entries(enrichment)) {
1220
+ const inferredValue = result[key];
1221
+ if (Array.isArray(inferredValue) && Array.isArray(enrichValue)) {
1222
+ const combined = [
1223
+ ...inferredValue,
1224
+ ...enrichValue,
1225
+ ];
1226
+ result[key] = [...new Set(combined)];
1227
+ }
1228
+ else {
1229
+ result[key] = enrichValue;
1230
+ }
1231
+ }
1232
+ return result;
1233
+ }
1234
+
1022
1235
  /**
1023
1236
  * @module util/JsonFileStore
1024
1237
  * Small base class for JSON-backed read/modify/write stores with in-memory caching.
@@ -1438,6 +1651,24 @@ const watchConfigSchema = z.object({
1438
1651
  .boolean()
1439
1652
  .optional()
1440
1653
  .describe('Skip files ignored by .gitignore in git repositories. Only applies to repos with a .git directory. Default: true.'),
1654
+ /** Move detection configuration for correlating unlink+add as file moves. */
1655
+ moveDetection: z
1656
+ .object({
1657
+ /** Enable move correlation. Default: true. */
1658
+ enabled: z
1659
+ .boolean()
1660
+ .default(true)
1661
+ .describe('Enable move detection via content hash correlation.'),
1662
+ /** Buffer time in ms for holding unlink events before treating as deletes. Default: 2000. */
1663
+ bufferMs: z
1664
+ .number()
1665
+ .int()
1666
+ .min(100)
1667
+ .default(2000)
1668
+ .describe('How long (ms) to buffer unlink events before treating as deletes.'),
1669
+ })
1670
+ .optional()
1671
+ .describe('Move detection: correlate unlink+add events as file moves to avoid re-embedding.'),
1441
1672
  });
1442
1673
  /**
1443
1674
  * Configuration watch settings.
@@ -1746,18 +1977,13 @@ const jeevesWatcherConfigSchema = z.object({
1746
1977
  embedding: embeddingConfigSchema.describe('Embedding model configuration.'),
1747
1978
  /** Vector store configuration. */
1748
1979
  vectorStore: vectorStoreConfigSchema.describe('Qdrant vector store configuration.'),
1749
- /** Directory for persisted metadata. */
1750
- metadataDir: z
1751
- .string()
1752
- .optional()
1753
- .describe('Directory for persisted metadata sidecar files.'),
1754
1980
  /** API server configuration. */
1755
1981
  api: apiConfigSchema.optional().describe('API server configuration.'),
1756
- /** Directory for persistent state files (issues.json, values.json). Defaults to metadataDir. */
1982
+ /** Directory for persistent state files (issues.json, values.json, enrichments.sqlite). */
1757
1983
  stateDir: z
1758
1984
  .string()
1759
1985
  .optional()
1760
- .describe('Directory for persistent state files (issues.json, values.json). Defaults to metadataDir.'),
1986
+ .describe('Directory for persistent state files (issues.json, values.json, enrichments.sqlite). Defaults to .jeeves-metadata.'),
1761
1987
  /** Rules for inferring metadata from document properties (inline objects or file paths). */
1762
1988
  inferenceRules: z
1763
1989
  .array(z.union([inferenceRuleSchema, z.string()]))
@@ -3892,97 +4118,6 @@ function createPointsDeleteHandler(deps) {
3892
4118
  }, deps.logger, 'PointsDelete');
3893
4119
  }
3894
4120
 
3895
- /**
3896
- * @module util/normalizePath
3897
- * Normalizes file paths for deterministic mapping: lowercase, forward slashes, optional drive letter stripping.
3898
- */
3899
- /**
3900
- * Normalize a file path: lowercase, forward slashes, optionally strip drive letter colon.
3901
- *
3902
- * @param filePath - The original file path.
3903
- * @param stripDriveLetter - Whether to strip the colon from a leading drive letter (e.g. `C:` → `c`).
3904
- * @returns The normalized path string.
3905
- */
3906
- function normalizePath(filePath, stripDriveLetter = false) {
3907
- let result = filePath.replace(/\\/g, '/').toLowerCase();
3908
- if (stripDriveLetter) {
3909
- result = result.replace(/^([a-z]):/, (_m, letter) => letter);
3910
- }
3911
- return result;
3912
- }
3913
-
3914
- /**
3915
- * @module metadata/metadata
3916
- * Persists file metadata as .meta.json. I/O: reads/writes/deletes metadata files under metadataDir. Path mapping via SHA-256 hash.
3917
- */
3918
- /**
3919
- * Derive a deterministic `.meta.json` path for a given file.
3920
- *
3921
- * @param filePath - The watched file path.
3922
- * @param metadataDir - The root metadata directory.
3923
- * @returns The full path to the metadata file.
3924
- */
3925
- function metadataPath(filePath, metadataDir) {
3926
- const normalised = normalizePath(filePath, true);
3927
- const hash = createHash('sha256').update(normalised, 'utf8').digest('hex');
3928
- return join(metadataDir, `${hash}.meta.json`);
3929
- }
3930
- /**
3931
- * Read persisted metadata for a file.
3932
- *
3933
- * @param filePath - The watched file path.
3934
- * @param metadataDir - The root metadata directory.
3935
- * @returns The parsed metadata object, or `null` if not found.
3936
- */
3937
- async function readMetadata(filePath, metadataDir) {
3938
- try {
3939
- const raw = await readFile(metadataPath(filePath, metadataDir), 'utf8');
3940
- return JSON.parse(raw);
3941
- }
3942
- catch {
3943
- return null;
3944
- }
3945
- }
3946
- /**
3947
- * Write metadata for a file.
3948
- *
3949
- * @param filePath - The watched file path.
3950
- * @param metadataDir - The root metadata directory.
3951
- * @param metadata - The metadata to persist.
3952
- */
3953
- async function writeMetadata(filePath, metadataDir, metadata) {
3954
- const dest = metadataPath(filePath, metadataDir);
3955
- await mkdir(dirname(dest), { recursive: true });
3956
- await writeFile(dest, JSON.stringify(metadata, null, 2), 'utf8');
3957
- }
3958
- /**
3959
- * Delete metadata for a file.
3960
- *
3961
- * @param filePath - The watched file path.
3962
- * @param metadataDir - The root metadata directory.
3963
- */
3964
- async function deleteMetadata(filePath, metadataDir) {
3965
- try {
3966
- await rm(metadataPath(filePath, metadataDir));
3967
- }
3968
- catch {
3969
- // Ignore if file doesn't exist.
3970
- }
3971
- }
3972
-
3973
- /**
3974
- * @module metadata/constants
3975
- * Shared constants for metadata key classification. System keys are injected by the indexing pipeline, not user-provided.
3976
- */
3977
- /** Keys managed by the indexing pipeline (not user enrichment). */
3978
- const SYSTEM_METADATA_KEYS = [
3979
- 'file_path',
3980
- 'chunk_index',
3981
- 'total_chunks',
3982
- 'content_hash',
3983
- 'chunk_text',
3984
- ];
3985
-
3986
4121
  /**
3987
4122
  * @module processor/payloadFields
3988
4123
  * Constants for Qdrant payload field names used across the processing pipeline.
@@ -4005,10 +4140,18 @@ const FIELD_MODIFIED_AT = 'modified_at';
4005
4140
  const FIELD_LINE_START = 'line_start';
4006
4141
  /** Qdrant payload field: 1-indexed line number where this chunk ends in the source file. */
4007
4142
  const FIELD_LINE_END = 'line_end';
4143
+ /** Keys managed by the indexing pipeline (not user enrichment). */
4144
+ const SYSTEM_METADATA_KEYS = [
4145
+ FIELD_FILE_PATH,
4146
+ FIELD_CHUNK_INDEX,
4147
+ FIELD_TOTAL_CHUNKS,
4148
+ FIELD_CONTENT_HASH,
4149
+ FIELD_CHUNK_TEXT,
4150
+ ];
4008
4151
 
4009
4152
  /**
4010
4153
  * @module api/handlers/rebuildMetadata
4011
- * Fastify route handler for POST /rebuild-metadata. Recreates enrichment metadata files from vector store payloads.
4154
+ * Fastify route handler for POST /rebuild-metadata. Rebuilds enrichment store from vector store payloads.
4012
4155
  */
4013
4156
  /**
4014
4157
  * Create handler for POST /rebuild-metadata.
@@ -4017,7 +4160,6 @@ const FIELD_LINE_END = 'line_end';
4017
4160
  */
4018
4161
  function createRebuildMetadataHandler(deps) {
4019
4162
  return wrapHandler(async (_request, reply) => {
4020
- const metadataDir = deps.metadataDir ?? '.jeeves-metadata';
4021
4163
  const systemKeys = [...SYSTEM_METADATA_KEYS];
4022
4164
  for await (const point of deps.vectorStore.scroll()) {
4023
4165
  const payload = point.payload;
@@ -4025,7 +4167,7 @@ function createRebuildMetadataHandler(deps) {
4025
4167
  if (typeof filePath !== 'string' || filePath.length === 0)
4026
4168
  continue;
4027
4169
  const enrichment = omit(payload, systemKeys);
4028
- await writeMetadata(filePath, metadataDir, enrichment);
4170
+ deps.enrichmentStore?.set(filePath, enrichment);
4029
4171
  }
4030
4172
  return await reply.status(200).send({ ok: true });
4031
4173
  }, deps.logger, 'Rebuild metadata');
@@ -4525,7 +4667,6 @@ async function buildTemplateEngineAndCustomMapLib(config, configDir) {
4525
4667
  */
4526
4668
  function createProcessorConfig(config, configDir, customMapLib) {
4527
4669
  return {
4528
- metadataDir: config.metadataDir ?? '.jeeves-metadata',
4529
4670
  chunkSize: config.embedding.chunkSize,
4530
4671
  chunkOverlap: config.embedding.chunkOverlap,
4531
4672
  maps: resolveMapsConfig(config.maps),
@@ -4537,7 +4678,7 @@ function createProcessorConfig(config, configDir, customMapLib) {
4537
4678
  /**
4538
4679
  * Create file system watcher with gitignore filtering.
4539
4680
  */
4540
- function createWatcher(config, factories, queue, processor, logger, runtimeOptions, initialScanTracker) {
4681
+ function createWatcher(config, factories, queue, processor, logger, runtimeOptions, initialScanTracker, contentHashCache) {
4541
4682
  const respectGitignore = config.watch.respectGitignore ?? true;
4542
4683
  const gitignoreFilter = respectGitignore
4543
4684
  ? new GitignoreFilter(config.watch.paths)
@@ -4548,6 +4689,7 @@ function createWatcher(config, factories, queue, processor, logger, runtimeOptio
4548
4689
  onFatalError: runtimeOptions.onFatalError,
4549
4690
  gitignoreFilter,
4550
4691
  initialScanTracker,
4692
+ contentHashCache,
4551
4693
  });
4552
4694
  return { watcher, gitignoreFilter };
4553
4695
  }
@@ -4748,7 +4890,7 @@ function createApiServer(options) {
4748
4890
  hybridConfig,
4749
4891
  }));
4750
4892
  app.post('/rebuild-metadata', createRebuildMetadataHandler({
4751
- metadataDir: config.metadataDir,
4893
+ enrichmentStore: options.enrichmentStore,
4752
4894
  vectorStore,
4753
4895
  logger,
4754
4896
  }));
@@ -4828,7 +4970,7 @@ function createApiServer(options) {
4828
4970
  */
4829
4971
  /** Default root-level config values. */
4830
4972
  const ROOT_DEFAULTS = {
4831
- metadataDir: '.jeeves-watcher',
4973
+ stateDir: '.jeeves-metadata',
4832
4974
  shutdownTimeoutMs: 10000,
4833
4975
  };
4834
4976
  /** Default configWatch values. */
@@ -4879,7 +5021,7 @@ const INIT_CONFIG_TEMPLATE = {
4879
5021
  url: 'http://127.0.0.1:6333',
4880
5022
  collectionName: 'jeeves-watcher',
4881
5023
  },
4882
- metadataDir: ROOT_DEFAULTS.metadataDir,
5024
+ stateDir: ROOT_DEFAULTS.stateDir,
4883
5025
  api: API_DEFAULTS,
4884
5026
  logging: LOGGING_DEFAULTS,
4885
5027
  };
@@ -5221,7 +5363,7 @@ function createLogger(config) {
5221
5363
 
5222
5364
  /**
5223
5365
  * @module hash
5224
- * Provides SHA-256 content hashing. Pure function: given text string, returns hex digest. No I/O or side effects.
5366
+ * Provides SHA-256 content hashing. Pure functions: text hash and file hash. File hash does I/O.
5225
5367
  */
5226
5368
  /**
5227
5369
  * Compute a SHA-256 hex digest of the given text.
@@ -5232,6 +5374,16 @@ function createLogger(config) {
5232
5374
  function contentHash(text) {
5233
5375
  return createHash('sha256').update(text, 'utf8').digest('hex');
5234
5376
  }
5377
+ /**
5378
+ * Compute a SHA-256 hex digest of a file's raw bytes.
5379
+ *
5380
+ * @param filePath - Path to the file.
5381
+ * @returns The hex-encoded SHA-256 hash.
5382
+ */
5383
+ async function fileHash(filePath) {
5384
+ const buffer = await readFile(filePath);
5385
+ return createHash('sha256').update(buffer).digest('hex');
5386
+ }
5235
5387
 
5236
5388
  /**
5237
5389
  * @module pointId
@@ -5391,7 +5543,7 @@ async function extractText(filePath, extension, additionalExtractors) {
5391
5543
 
5392
5544
  /**
5393
5545
  * @module processor/buildMetadata
5394
- * Builds merged metadata from file content, inference rules, and enrichment. I/O: reads files, extracts text, loads enrichment .meta.json.
5546
+ * Builds merged metadata from file content, inference rules, and enrichment store. I/O: reads files, extracts text, queries SQLite enrichment.
5395
5547
  */
5396
5548
  /**
5397
5549
  * Build merged metadata for a file by applying inference rules and merging with enrichment metadata.
@@ -5400,7 +5552,7 @@ async function extractText(filePath, extension, additionalExtractors) {
5400
5552
  * @returns The merged metadata and intermediate data.
5401
5553
  */
5402
5554
  async function buildMergedMetadata(options) {
5403
- const { filePath, compiledRules, metadataDir, maps, logger, templateEngine, configDir, customMapLib, globalSchemas, } = options;
5555
+ const { filePath, compiledRules, enrichmentStore, maps, logger, templateEngine, configDir, customMapLib, globalSchemas, } = options;
5404
5556
  const ext = extname(filePath);
5405
5557
  const stats = await stat(filePath);
5406
5558
  // 1. Extract text and structured data
@@ -5415,12 +5567,11 @@ async function buildMergedMetadata(options) {
5415
5567
  customMapLib,
5416
5568
  globalSchemas,
5417
5569
  });
5418
- // 3. Read enrichment metadata (merge, enrichment wins)
5419
- const enrichment = await readMetadata(filePath, metadataDir);
5420
- const metadata = {
5421
- ...inferred,
5422
- ...(enrichment ?? {}),
5423
- };
5570
+ // 3. Read enrichment metadata from store (composable merge)
5571
+ const enrichment = enrichmentStore?.get(filePath) ?? null;
5572
+ const metadata = enrichment
5573
+ ? mergeEnrichment(inferred, enrichment)
5574
+ : { ...inferred };
5424
5575
  return {
5425
5576
  inferred,
5426
5577
  enrichment,
@@ -5609,22 +5760,26 @@ class DocumentProcessor {
5609
5760
  compiledRules;
5610
5761
  logger;
5611
5762
  templateEngine;
5763
+ enrichmentStore;
5612
5764
  issuesManager;
5613
5765
  valuesManager;
5766
+ contentHashCache;
5614
5767
  /**
5615
5768
  * Create a new DocumentProcessor.
5616
5769
  *
5617
5770
  * @param deps - The processor dependencies.
5618
5771
  */
5619
- constructor({ config, embeddingProvider, vectorStore, compiledRules, logger, templateEngine, issuesManager, valuesManager, }) {
5772
+ constructor({ config, embeddingProvider, vectorStore, compiledRules, logger, templateEngine, enrichmentStore, issuesManager, valuesManager, contentHashCache, }) {
5620
5773
  this.config = config;
5621
5774
  this.embeddingProvider = embeddingProvider;
5622
5775
  this.vectorStore = vectorStore;
5623
5776
  this.compiledRules = compiledRules;
5624
5777
  this.logger = logger;
5625
5778
  this.templateEngine = templateEngine;
5779
+ this.enrichmentStore = enrichmentStore;
5626
5780
  this.issuesManager = issuesManager;
5627
5781
  this.valuesManager = valuesManager;
5782
+ this.contentHashCache = contentHashCache;
5628
5783
  }
5629
5784
  /**
5630
5785
  * Build merged metadata for a file and add matched_rules.
@@ -5633,7 +5788,7 @@ class DocumentProcessor {
5633
5788
  const result = await buildMergedMetadata({
5634
5789
  filePath,
5635
5790
  compiledRules: this.compiledRules,
5636
- metadataDir: this.config.metadataDir,
5791
+ enrichmentStore: this.enrichmentStore,
5637
5792
  maps: this.config.maps,
5638
5793
  logger: this.logger,
5639
5794
  templateEngine: this.templateEngine,
@@ -5680,6 +5835,9 @@ class DocumentProcessor {
5680
5835
  this.logger.debug({ filePath }, 'Skipping empty file');
5681
5836
  return;
5682
5837
  }
5838
+ // Compute file-level hash for move correlation cache.
5839
+ const rawHash = await fileHash(filePath);
5840
+ this.contentHashCache?.set(filePath, rawHash);
5683
5841
  const hash = contentHash(textToEmbed);
5684
5842
  const baseId = pointId(filePath, 0);
5685
5843
  const existingPayload = await this.vectorStore.getPayload(baseId);
@@ -5716,12 +5874,13 @@ class DocumentProcessor {
5716
5874
  const totalChunks = getChunkCount(existingPayload);
5717
5875
  const ids = chunkIds(filePath, totalChunks);
5718
5876
  await this.vectorStore.delete(ids);
5719
- await deleteMetadata(filePath, this.config.metadataDir);
5877
+ this.enrichmentStore?.delete(filePath);
5878
+ this.contentHashCache?.delete(filePath);
5720
5879
  this.logger.info({ filePath }, 'File deleted from index');
5721
5880
  }, undefined);
5722
5881
  }
5723
5882
  /**
5724
- * Process a metadata update: merge metadata, write to disk, update Qdrant payloads (no re-embed).
5883
+ * Process a metadata update: merge into enrichment store, update Qdrant payloads (no re-embed).
5725
5884
  *
5726
5885
  * @param filePath - The file whose metadata to update.
5727
5886
  * @param metadata - The new metadata to merge.
@@ -5729,9 +5888,8 @@ class DocumentProcessor {
5729
5888
  */
5730
5889
  async processMetadataUpdate(filePath, metadata) {
5731
5890
  return this.withFileErrorHandling(filePath, 'Failed to update metadata', async () => {
5732
- const existing = (await readMetadata(filePath, this.config.metadataDir)) ?? {};
5733
- const merged = { ...existing, ...metadata };
5734
- await writeMetadata(filePath, this.config.metadataDir, merged);
5891
+ this.enrichmentStore?.set(filePath, metadata);
5892
+ const merged = this.enrichmentStore?.get(filePath) ?? metadata;
5735
5893
  const baseId = pointId(filePath, 0);
5736
5894
  const existingPayload = await this.vectorStore.getPayload(baseId);
5737
5895
  if (!existingPayload)
@@ -5793,6 +5951,56 @@ class DocumentProcessor {
5793
5951
  transformed: renderedContent !== null,
5794
5952
  };
5795
5953
  }
5954
+ /**
5955
+ * Move a file's vector points from old path to new path without re-embedding.
5956
+ * Re-applies inference rules against the new path.
5957
+ *
5958
+ * @param oldPath - The original file path.
5959
+ * @param newPath - The new file path.
5960
+ */
5961
+ async moveFile(oldPath, newPath) {
5962
+ await this.withFileErrorHandling(newPath, 'Failed to move file', async () => {
5963
+ const baseId = pointId(oldPath, 0);
5964
+ const existingPayload = await this.vectorStore.getPayload(baseId);
5965
+ const totalChunks = getChunkCount(existingPayload);
5966
+ const oldIds = chunkIds(oldPath, totalChunks);
5967
+ const oldPoints = await this.vectorStore.getPointsWithVectors(oldIds);
5968
+ if (oldPoints.length === 0) {
5969
+ this.logger.warn({ oldPath, newPath }, 'No points found for move');
5970
+ return;
5971
+ }
5972
+ // Build new metadata from inference rules against the new path.
5973
+ const { metadataWithRules, matchedRules, metadata } = await this.buildMetadataWithRules(newPath);
5974
+ // Create new points with updated IDs and file_path payload.
5975
+ const newPoints = oldPoints.map((pt, i) => ({
5976
+ id: pointId(newPath, i),
5977
+ vector: pt.vector,
5978
+ payload: {
5979
+ ...pt.payload,
5980
+ ...metadataWithRules,
5981
+ [FIELD_FILE_PATH]: normalizeSlashes(newPath),
5982
+ },
5983
+ }));
5984
+ await this.vectorStore.upsert(newPoints);
5985
+ await this.vectorStore.delete(oldIds);
5986
+ // Migrate enrichment and clear old issues.
5987
+ this.enrichmentStore?.move(oldPath, newPath);
5988
+ this.issuesManager?.clear(oldPath);
5989
+ // Update values index for the new path's matched rules.
5990
+ if (this.valuesManager) {
5991
+ for (const ruleName of matchedRules) {
5992
+ this.valuesManager.update(ruleName, metadata);
5993
+ }
5994
+ }
5995
+ // Update content hash cache.
5996
+ const oldHash = this.contentHashCache?.get(oldPath);
5997
+ if (oldHash) {
5998
+ this.contentHashCache?.set(newPath, oldHash);
5999
+ }
6000
+ this.contentHashCache?.delete(oldPath);
6001
+ this.logger.info({ oldPath, newPath, chunks: oldPoints.length }, 'File moved in index');
6002
+ }, undefined);
6003
+ }
5796
6004
  /**
5797
6005
  * Update compiled inference rules, template engine, and custom map lib.
5798
6006
  *
@@ -6451,6 +6659,33 @@ class VectorStoreClient {
6451
6659
  async hybridSearch(vector, queryText, limit, textWeight, filter) {
6452
6660
  return hybridSearch(this.client, this.collectionName, vector, queryText, limit, textWeight, filter);
6453
6661
  }
6662
+ /**
6663
+ * Retrieve points with their vectors by ID.
6664
+ *
6665
+ * @param ids - The point IDs to retrieve.
6666
+ * @returns Points with vectors and payloads; missing IDs are omitted.
6667
+ */
6668
+ async getPointsWithVectors(ids) {
6669
+ if (ids.length === 0)
6670
+ return [];
6671
+ try {
6672
+ const results = await this.client.retrieve(this.collectionName, {
6673
+ ids,
6674
+ with_payload: true,
6675
+ with_vector: true,
6676
+ });
6677
+ return results
6678
+ .filter((r) => r.vector != null)
6679
+ .map((r) => ({
6680
+ id: String(r.id),
6681
+ vector: r.vector,
6682
+ payload: r.payload,
6683
+ }));
6684
+ }
6685
+ catch {
6686
+ return [];
6687
+ }
6688
+ }
6454
6689
  /**
6455
6690
  * Scroll one page of points matching a filter.
6456
6691
  *
@@ -6658,6 +6893,163 @@ function resolveIgnored(ignored) {
6658
6893
  });
6659
6894
  }
6660
6895
 
6896
+ /**
6897
+ * @module watcher/MoveCorrelator
6898
+ * Correlates unlink+add events as file moves using content hash matching.
6899
+ * Buffers unlink events and matches against subsequent add events.
6900
+ */
6901
+ /**
6902
+ * Correlates unlink+add file system events as moves using content hash matching.
6903
+ *
6904
+ * When move detection is disabled, events pass straight through.
6905
+ */
6906
+ class MoveCorrelator {
6907
+ enabled;
6908
+ bufferMs;
6909
+ cache;
6910
+ logger;
6911
+ onMove;
6912
+ onDelete;
6913
+ onCreate;
6914
+ /** Buffered unlinks indexed by content hash (FIFO per hash). */
6915
+ buffer = new Map();
6916
+ /** Track unlink burst rate per parent directory for bulk mode. */
6917
+ burstCounters = new Map();
6918
+ /** Threshold: if N+ unlinks from same parent in burstWindowMs, extend buffer. */
6919
+ static BURST_THRESHOLD = 5;
6920
+ static BURST_WINDOW_MS = 500;
6921
+ static BURST_MULTIPLIER = 3;
6922
+ constructor(options) {
6923
+ this.enabled = options.enabled;
6924
+ this.bufferMs = options.bufferMs;
6925
+ this.cache = options.contentHashCache;
6926
+ this.logger = options.logger;
6927
+ this.onMove = options.onMove;
6928
+ this.onDelete = options.onDelete;
6929
+ this.onCreate = options.onCreate;
6930
+ }
6931
+ /**
6932
+ * Handle an unlink event. Buffers the event for correlation.
6933
+ *
6934
+ * @param path - The removed file path.
6935
+ */
6936
+ handleUnlink(path) {
6937
+ if (!this.enabled) {
6938
+ this.onDelete(path);
6939
+ return;
6940
+ }
6941
+ const hash = this.cache.get(path);
6942
+ if (!hash) {
6943
+ this.logger.debug({ path }, 'No cached hash for unlinked file, treating as delete');
6944
+ this.onDelete(path);
6945
+ return;
6946
+ }
6947
+ const timeoutMs = this.getEffectiveTimeout(path);
6948
+ const timer = setTimeout(() => {
6949
+ this.expireUnlink(hash, path);
6950
+ }, timeoutMs);
6951
+ const entry = {
6952
+ path,
6953
+ hash,
6954
+ timestamp: Date.now(),
6955
+ timer,
6956
+ };
6957
+ let entries = this.buffer.get(hash);
6958
+ if (!entries) {
6959
+ entries = [];
6960
+ this.buffer.set(hash, entries);
6961
+ }
6962
+ entries.push(entry);
6963
+ this.logger.debug({ path, hash: hash.slice(0, 12), timeoutMs }, 'Buffered unlink for move correlation');
6964
+ }
6965
+ /**
6966
+ * Handle an add event. Checks buffer for matching unlink (move detection).
6967
+ *
6968
+ * @param path - The added file path.
6969
+ */
6970
+ async handleAdd(path) {
6971
+ if (!this.enabled) {
6972
+ this.onCreate(path);
6973
+ return;
6974
+ }
6975
+ let hash;
6976
+ try {
6977
+ hash = await fileHash(path);
6978
+ }
6979
+ catch {
6980
+ this.onCreate(path);
6981
+ return;
6982
+ }
6983
+ const entries = this.buffer.get(hash);
6984
+ if (entries && entries.length > 0) {
6985
+ // FIFO: consume oldest matching unlink
6986
+ const matched = entries.shift();
6987
+ clearTimeout(matched.timer);
6988
+ if (entries.length === 0)
6989
+ this.buffer.delete(hash);
6990
+ this.logger.info({ oldPath: matched.path, newPath: path }, 'Move detected');
6991
+ this.onMove(matched.path, path);
6992
+ }
6993
+ else {
6994
+ this.onCreate(path);
6995
+ }
6996
+ }
6997
+ /**
6998
+ * Flush all buffered unlinks as deletes. Call on shutdown.
6999
+ */
7000
+ flush() {
7001
+ for (const [, entries] of this.buffer) {
7002
+ for (const entry of entries) {
7003
+ clearTimeout(entry.timer);
7004
+ this.onDelete(entry.path);
7005
+ }
7006
+ }
7007
+ this.buffer.clear();
7008
+ this.burstCounters.clear();
7009
+ }
7010
+ /** Number of currently buffered unlink events. */
7011
+ get pendingCount() {
7012
+ let count = 0;
7013
+ for (const [, entries] of this.buffer) {
7014
+ count += entries.length;
7015
+ }
7016
+ return count;
7017
+ }
7018
+ /**
7019
+ * Get effective timeout, applying burst detection for bulk moves.
7020
+ */
7021
+ getEffectiveTimeout(path) {
7022
+ const parentDir = dirname(path);
7023
+ const now = Date.now();
7024
+ let counter = this.burstCounters.get(parentDir);
7025
+ if (!counter || now - counter.firstTs > MoveCorrelator.BURST_WINDOW_MS) {
7026
+ counter = { count: 0, firstTs: now };
7027
+ this.burstCounters.set(parentDir, counter);
7028
+ }
7029
+ counter.count++;
7030
+ if (counter.count >= MoveCorrelator.BURST_THRESHOLD) {
7031
+ return this.bufferMs * MoveCorrelator.BURST_MULTIPLIER;
7032
+ }
7033
+ return this.bufferMs;
7034
+ }
7035
+ /**
7036
+ * Handle a buffered unlink timeout — emit as delete.
7037
+ */
7038
+ expireUnlink(hash, path) {
7039
+ const entries = this.buffer.get(hash);
7040
+ if (entries) {
7041
+ const idx = entries.findIndex((e) => e.path === path);
7042
+ if (idx >= 0) {
7043
+ entries.splice(idx, 1);
7044
+ if (entries.length === 0)
7045
+ this.buffer.delete(hash);
7046
+ }
7047
+ }
7048
+ this.logger.debug({ path, hash: hash.slice(0, 12) }, 'Buffered unlink expired, treating as delete');
7049
+ this.onDelete(path);
7050
+ }
7051
+ }
7052
+
6661
7053
  /**
6662
7054
  * @module watcher
6663
7055
  * Filesystem watcher wrapping chokidar. I/O: watches files/directories for add/change/unlink events, enqueues to processing queue.
@@ -6673,6 +7065,8 @@ class FileSystemWatcher {
6673
7065
  health;
6674
7066
  gitignoreFilter;
6675
7067
  initialScanTracker;
7068
+ contentHashCache;
7069
+ moveCorrelator;
6676
7070
  globMatches;
6677
7071
  watcher;
6678
7072
  /**
@@ -6691,6 +7085,7 @@ class FileSystemWatcher {
6691
7085
  this.logger = logger;
6692
7086
  this.gitignoreFilter = options.gitignoreFilter;
6693
7087
  this.initialScanTracker = options.initialScanTracker;
7088
+ this.contentHashCache = options.contentHashCache;
6694
7089
  this.globMatches = () => true;
6695
7090
  const healthOptions = {
6696
7091
  maxRetries: options.maxRetries,
@@ -6736,6 +7131,26 @@ class FileSystemWatcher {
6736
7131
  }
6737
7132
  }
6738
7133
  };
7134
+ // Create move correlator if move detection is configured and cache is available.
7135
+ const moveConfig = this.config.moveDetection;
7136
+ if (moveConfig?.enabled && this.contentHashCache) {
7137
+ this.moveCorrelator = new MoveCorrelator({
7138
+ enabled: true,
7139
+ bufferMs: moveConfig.bufferMs,
7140
+ contentHashCache: this.contentHashCache,
7141
+ logger: this.logger,
7142
+ onMove: (oldPath, newPath) => {
7143
+ this.queue.enqueue({ type: 'move', path: newPath, oldPath, priority: 'normal' }, () => this.wrapProcessing(() => this.processor.moveFile(oldPath, newPath)));
7144
+ },
7145
+ onDelete: (deletedPath) => {
7146
+ this.queue.enqueue({ type: 'delete', path: deletedPath, priority: 'normal' }, () => this.wrapProcessing(() => this.processor.deleteFile(deletedPath)));
7147
+ },
7148
+ onCreate: (createdPath) => {
7149
+ this.queue.enqueue({ type: 'create', path: createdPath, priority: 'normal' }, () => this.wrapProcessing(() => this.processor.processFile(createdPath)));
7150
+ },
7151
+ });
7152
+ }
7153
+ const correlator = this.moveCorrelator;
6739
7154
  this.watcher = chokidar.watch(roots, {
6740
7155
  ignored,
6741
7156
  usePolling: this.config.usePolling,
@@ -6769,7 +7184,12 @@ class FileSystemWatcher {
6769
7184
  this.initialScanTracker?.incrementEnqueued();
6770
7185
  }
6771
7186
  this.logger.debug({ path }, 'File added');
6772
- this.queue.enqueue({ type: 'create', path, priority: 'normal' }, () => this.wrapProcessing(() => this.processor.processFile(path)));
7187
+ if (correlator && initialScanComplete) {
7188
+ void correlator.handleAdd(path);
7189
+ }
7190
+ else {
7191
+ this.queue.enqueue({ type: 'create', path, priority: 'normal' }, () => this.wrapProcessing(() => this.processor.processFile(path)));
7192
+ }
6773
7193
  });
6774
7194
  this.watcher.on('change', (path) => {
6775
7195
  this.handleGitignoreChange(path);
@@ -6789,7 +7209,12 @@ class FileSystemWatcher {
6789
7209
  if (this.isGitignored(path))
6790
7210
  return;
6791
7211
  this.logger.debug({ path }, 'File removed');
6792
- this.queue.enqueue({ type: 'delete', path, priority: 'normal' }, () => this.wrapProcessing(() => this.processor.deleteFile(path)));
7212
+ if (correlator) {
7213
+ correlator.handleUnlink(path);
7214
+ }
7215
+ else {
7216
+ this.queue.enqueue({ type: 'delete', path, priority: 'normal' }, () => this.wrapProcessing(() => this.processor.deleteFile(path)));
7217
+ }
6793
7218
  });
6794
7219
  this.watcher.on('ready', () => {
6795
7220
  initialScanComplete = true;
@@ -6834,6 +7259,7 @@ class FileSystemWatcher {
6834
7259
  * Stop the filesystem watcher.
6835
7260
  */
6836
7261
  async stop() {
7262
+ this.moveCorrelator?.flush();
6837
7263
  if (this.watcher) {
6838
7264
  await this.watcher.close();
6839
7265
  this.watcher = undefined;
@@ -6967,6 +7393,8 @@ class JeevesWatcher {
6967
7393
  vectorStore;
6968
7394
  embeddingProvider;
6969
7395
  gitignoreFilter;
7396
+ enrichmentStore;
7397
+ contentHashCache;
6970
7398
  initialScanTracker;
6971
7399
  version;
6972
7400
  /** Create a new JeevesWatcher instance. */
@@ -7007,9 +7435,13 @@ class JeevesWatcher {
7007
7435
  const { templateEngine, customMapLib } = await buildTemplateEngineAndCustomMapLib(this.config, configDir);
7008
7436
  this.helperIntrospection = await introspectHelpers(this.config, configDir);
7009
7437
  const processorConfig = createProcessorConfig(this.config, configDir, customMapLib);
7010
- const stateDir = this.config.stateDir ?? this.config.metadataDir ?? '.jeeves-metadata';
7438
+ const stateDir = this.config.stateDir ?? '.jeeves-metadata';
7011
7439
  this.issuesManager = new IssuesManager(stateDir, logger);
7012
7440
  this.valuesManager = new ValuesManager(stateDir, logger);
7441
+ this.enrichmentStore = new EnrichmentStore(stateDir);
7442
+ const enrichmentStore = this.enrichmentStore;
7443
+ this.contentHashCache = new ContentHashCache();
7444
+ const contentHashCache = this.contentHashCache;
7013
7445
  const processor = this.factories.createDocumentProcessor({
7014
7446
  config: processorConfig,
7015
7447
  embeddingProvider,
@@ -7017,8 +7449,10 @@ class JeevesWatcher {
7017
7449
  compiledRules,
7018
7450
  logger,
7019
7451
  templateEngine,
7452
+ enrichmentStore,
7020
7453
  issuesManager: this.issuesManager,
7021
7454
  valuesManager: this.valuesManager,
7455
+ contentHashCache,
7022
7456
  });
7023
7457
  this.processor = processor;
7024
7458
  this.queue = this.factories.createEventQueue({
@@ -7026,7 +7460,7 @@ class JeevesWatcher {
7026
7460
  concurrency: this.config.embedding.concurrency ?? 5,
7027
7461
  rateLimitPerMinute: this.config.embedding.rateLimitPerMinute,
7028
7462
  });
7029
- const { watcher, gitignoreFilter } = createWatcher(this.config, this.factories, this.queue, processor, logger, this.runtimeOptions, this.initialScanTracker);
7463
+ const { watcher, gitignoreFilter } = createWatcher(this.config, this.factories, this.queue, processor, logger, this.runtimeOptions, this.initialScanTracker, contentHashCache);
7030
7464
  this.watcher = watcher;
7031
7465
  this.gitignoreFilter = gitignoreFilter;
7032
7466
  this.server = await this.startApiServer();
@@ -7078,6 +7512,7 @@ class JeevesWatcher {
7078
7512
  version: this.version,
7079
7513
  initialScanTracker: this.initialScanTracker,
7080
7514
  fileSystemWatcher: this.watcher,
7515
+ enrichmentStore: this.enrichmentStore,
7081
7516
  });
7082
7517
  await server.listen({
7083
7518
  host: this.config.api?.host ?? '127.0.0.1',