@karmaniverous/jeeves-watcher 0.13.0 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config.schema.json +185 -154
- package/dist/cli/jeeves-watcher/index.js +563 -128
- package/dist/index.d.ts +156 -41
- package/dist/index.js +563 -128
- package/package.json +3 -1
|
@@ -5,10 +5,11 @@ import { existsSync, statSync, readFileSync, readdirSync, mkdirSync, writeFileSy
|
|
|
5
5
|
import { join, dirname, resolve, relative, extname, basename, isAbsolute } from 'node:path';
|
|
6
6
|
import { pathToFileURL, fileURLToPath } from 'node:url';
|
|
7
7
|
import { packageDirectorySync } from 'package-directory';
|
|
8
|
-
import { readdir, stat, writeFile,
|
|
8
|
+
import { readdir, stat, writeFile, readFile } from 'node:fs/promises';
|
|
9
9
|
import { parallel, capitalize, title, camel, snake, dash, isEqual, get, omit } from 'radash';
|
|
10
10
|
import ignore from 'ignore';
|
|
11
11
|
import picomatch from 'picomatch';
|
|
12
|
+
import Database from 'better-sqlite3';
|
|
12
13
|
import { z, ZodError } from 'zod';
|
|
13
14
|
import Ajv from 'ajv';
|
|
14
15
|
import addFormats from 'ajv-formats';
|
|
@@ -24,10 +25,10 @@ import { toMarkdown } from 'mdast-util-to-markdown';
|
|
|
24
25
|
import rehypeParse from 'rehype-parse';
|
|
25
26
|
import { unified } from 'unified';
|
|
26
27
|
import yaml from 'js-yaml';
|
|
27
|
-
import { createHash } from 'node:crypto';
|
|
28
28
|
import crypto from 'crypto';
|
|
29
29
|
import { cosmiconfig } from 'cosmiconfig';
|
|
30
30
|
import https from 'node:https';
|
|
31
|
+
import { createHash } from 'node:crypto';
|
|
31
32
|
import pino from 'pino';
|
|
32
33
|
import { v5 } from 'uuid';
|
|
33
34
|
import * as cheerio from 'cheerio';
|
|
@@ -1019,6 +1020,218 @@ class InitialScanTracker {
|
|
|
1019
1020
|
}
|
|
1020
1021
|
}
|
|
1021
1022
|
|
|
1023
|
+
/**
|
|
1024
|
+
* @module util/normalizePath
|
|
1025
|
+
* Normalizes file paths for deterministic mapping: lowercase, forward slashes, optional drive letter stripping.
|
|
1026
|
+
*/
|
|
1027
|
+
/**
|
|
1028
|
+
* Normalize a file path: lowercase, forward slashes, optionally strip drive letter colon.
|
|
1029
|
+
*
|
|
1030
|
+
* @param filePath - The original file path.
|
|
1031
|
+
* @param stripDriveLetter - Whether to strip the colon from a leading drive letter (e.g. `C:` → `c`).
|
|
1032
|
+
* @returns The normalized path string.
|
|
1033
|
+
*/
|
|
1034
|
+
function normalizePath(filePath, stripDriveLetter = false) {
|
|
1035
|
+
let result = filePath.replace(/\\/g, '/').toLowerCase();
|
|
1036
|
+
if (stripDriveLetter) {
|
|
1037
|
+
result = result.replace(/^([a-z]):/, (_m, letter) => letter);
|
|
1038
|
+
}
|
|
1039
|
+
return result;
|
|
1040
|
+
}
|
|
1041
|
+
|
|
1042
|
+
/**
|
|
1043
|
+
* @module cache/ContentHashCache
|
|
1044
|
+
* In-memory cache mapping normalized file paths to content hashes.
|
|
1045
|
+
* Supports reverse lookup by hash for move correlation.
|
|
1046
|
+
*/
|
|
1047
|
+
/**
|
|
1048
|
+
* In-memory content hash cache for move detection.
|
|
1049
|
+
*
|
|
1050
|
+
* Maps normalized file paths to SHA-256 content hashes.
|
|
1051
|
+
* Supports reverse lookup (hash → paths) for correlating
|
|
1052
|
+
* unlink+add events as file moves.
|
|
1053
|
+
*/
|
|
1054
|
+
class ContentHashCache {
|
|
1055
|
+
pathToHash = new Map();
|
|
1056
|
+
hashToPaths = new Map();
|
|
1057
|
+
/**
|
|
1058
|
+
* Store or update the content hash for a file path.
|
|
1059
|
+
*
|
|
1060
|
+
* @param filePath - The file path (will be normalized).
|
|
1061
|
+
* @param hash - The SHA-256 content hash.
|
|
1062
|
+
*/
|
|
1063
|
+
set(filePath, hash) {
|
|
1064
|
+
const normalized = normalizePath(filePath, true);
|
|
1065
|
+
const oldHash = this.pathToHash.get(normalized);
|
|
1066
|
+
// Remove from old hash index if hash changed
|
|
1067
|
+
if (oldHash !== undefined && oldHash !== hash) {
|
|
1068
|
+
const oldPaths = this.hashToPaths.get(oldHash);
|
|
1069
|
+
if (oldPaths) {
|
|
1070
|
+
oldPaths.delete(normalized);
|
|
1071
|
+
if (oldPaths.size === 0)
|
|
1072
|
+
this.hashToPaths.delete(oldHash);
|
|
1073
|
+
}
|
|
1074
|
+
}
|
|
1075
|
+
this.pathToHash.set(normalized, hash);
|
|
1076
|
+
let paths = this.hashToPaths.get(hash);
|
|
1077
|
+
if (!paths) {
|
|
1078
|
+
paths = new Set();
|
|
1079
|
+
this.hashToPaths.set(hash, paths);
|
|
1080
|
+
}
|
|
1081
|
+
paths.add(normalized);
|
|
1082
|
+
}
|
|
1083
|
+
/**
|
|
1084
|
+
* Get the content hash for a file path.
|
|
1085
|
+
*
|
|
1086
|
+
* @param filePath - The file path (will be normalized).
|
|
1087
|
+
* @returns The content hash, or undefined if not cached.
|
|
1088
|
+
*/
|
|
1089
|
+
get(filePath) {
|
|
1090
|
+
return this.pathToHash.get(normalizePath(filePath, true));
|
|
1091
|
+
}
|
|
1092
|
+
/**
|
|
1093
|
+
* Remove a file path from the cache.
|
|
1094
|
+
*
|
|
1095
|
+
* @param filePath - The file path (will be normalized).
|
|
1096
|
+
*/
|
|
1097
|
+
delete(filePath) {
|
|
1098
|
+
const normalized = normalizePath(filePath, true);
|
|
1099
|
+
const hash = this.pathToHash.get(normalized);
|
|
1100
|
+
if (hash === undefined)
|
|
1101
|
+
return;
|
|
1102
|
+
this.pathToHash.delete(normalized);
|
|
1103
|
+
const paths = this.hashToPaths.get(hash);
|
|
1104
|
+
if (paths) {
|
|
1105
|
+
paths.delete(normalized);
|
|
1106
|
+
if (paths.size === 0)
|
|
1107
|
+
this.hashToPaths.delete(hash);
|
|
1108
|
+
}
|
|
1109
|
+
}
|
|
1110
|
+
/**
|
|
1111
|
+
* Reverse lookup: get all file paths with a given content hash.
|
|
1112
|
+
*
|
|
1113
|
+
* @param hash - The content hash to look up.
|
|
1114
|
+
* @returns Array of normalized file paths with that hash.
|
|
1115
|
+
*/
|
|
1116
|
+
getByHash(hash) {
|
|
1117
|
+
const paths = this.hashToPaths.get(hash);
|
|
1118
|
+
return paths ? [...paths] : [];
|
|
1119
|
+
}
|
|
1120
|
+
/** Number of cached entries. */
|
|
1121
|
+
get size() {
|
|
1122
|
+
return this.pathToHash.size;
|
|
1123
|
+
}
|
|
1124
|
+
}
|
|
1125
|
+
|
|
1126
|
+
/**
|
|
1127
|
+
* @module enrichment/EnrichmentStore
|
|
1128
|
+
* SQLite-backed enrichment metadata store. Persists path-keyed metadata at stateDir/enrichments.sqlite. Atomic writes, supports move.
|
|
1129
|
+
*/
|
|
1130
|
+
/**
|
|
1131
|
+
* SQLite-backed enrichment metadata store.
|
|
1132
|
+
*/
|
|
1133
|
+
class EnrichmentStore {
|
|
1134
|
+
db;
|
|
1135
|
+
/**
|
|
1136
|
+
* Create or open the enrichment store.
|
|
1137
|
+
*
|
|
1138
|
+
* @param stateDir - Directory for the SQLite database file.
|
|
1139
|
+
*/
|
|
1140
|
+
constructor(stateDir) {
|
|
1141
|
+
mkdirSync(stateDir, { recursive: true });
|
|
1142
|
+
const dbPath = join(stateDir, 'enrichments.sqlite');
|
|
1143
|
+
this.db = new Database(dbPath);
|
|
1144
|
+
this.db.pragma('journal_mode = WAL');
|
|
1145
|
+
this.db.exec(`
|
|
1146
|
+
CREATE TABLE IF NOT EXISTS enrichments (
|
|
1147
|
+
path TEXT PRIMARY KEY,
|
|
1148
|
+
metadata TEXT NOT NULL,
|
|
1149
|
+
created_at TEXT NOT NULL,
|
|
1150
|
+
updated_at TEXT NOT NULL
|
|
1151
|
+
)
|
|
1152
|
+
`);
|
|
1153
|
+
}
|
|
1154
|
+
get(path) {
|
|
1155
|
+
const normalized = normalizePath(path);
|
|
1156
|
+
const row = this.db
|
|
1157
|
+
.prepare('SELECT metadata FROM enrichments WHERE path = ?')
|
|
1158
|
+
.get(normalized);
|
|
1159
|
+
if (!row)
|
|
1160
|
+
return null;
|
|
1161
|
+
return JSON.parse(row.metadata);
|
|
1162
|
+
}
|
|
1163
|
+
set(path, metadata) {
|
|
1164
|
+
const normalized = normalizePath(path);
|
|
1165
|
+
const now = new Date().toISOString();
|
|
1166
|
+
const existing = this.get(path);
|
|
1167
|
+
const merged = existing ? { ...existing, ...metadata } : metadata;
|
|
1168
|
+
const json = JSON.stringify(merged);
|
|
1169
|
+
if (existing) {
|
|
1170
|
+
this.db
|
|
1171
|
+
.prepare('UPDATE enrichments SET metadata = ?, updated_at = ? WHERE path = ?')
|
|
1172
|
+
.run(json, now, normalized);
|
|
1173
|
+
}
|
|
1174
|
+
else {
|
|
1175
|
+
this.db
|
|
1176
|
+
.prepare('INSERT INTO enrichments (path, metadata, created_at, updated_at) VALUES (?, ?, ?, ?)')
|
|
1177
|
+
.run(normalized, json, now, now);
|
|
1178
|
+
}
|
|
1179
|
+
}
|
|
1180
|
+
delete(path) {
|
|
1181
|
+
const normalized = normalizePath(path);
|
|
1182
|
+
this.db.prepare('DELETE FROM enrichments WHERE path = ?').run(normalized);
|
|
1183
|
+
}
|
|
1184
|
+
move(oldPath, newPath) {
|
|
1185
|
+
const normalizedOld = normalizePath(oldPath);
|
|
1186
|
+
const normalizedNew = normalizePath(newPath);
|
|
1187
|
+
const now = new Date().toISOString();
|
|
1188
|
+
this.db
|
|
1189
|
+
.prepare('UPDATE enrichments SET path = ?, updated_at = ? WHERE path = ?')
|
|
1190
|
+
.run(normalizedNew, now, normalizedOld);
|
|
1191
|
+
}
|
|
1192
|
+
list() {
|
|
1193
|
+
const rows = this.db
|
|
1194
|
+
.prepare('SELECT path FROM enrichments ORDER BY path')
|
|
1195
|
+
.all();
|
|
1196
|
+
return rows.map((r) => r.path);
|
|
1197
|
+
}
|
|
1198
|
+
close() {
|
|
1199
|
+
this.db.close();
|
|
1200
|
+
}
|
|
1201
|
+
}
|
|
1202
|
+
|
|
1203
|
+
/**
|
|
1204
|
+
* @module enrichment/merge
|
|
1205
|
+
* Composable merge for inferred + enrichment metadata. Scalars: enrichment overwrites. Arrays: union + deduplicate. No I/O.
|
|
1206
|
+
*/
|
|
1207
|
+
/**
|
|
1208
|
+
* Merge enrichment metadata into inferred metadata with composable semantics.
|
|
1209
|
+
*
|
|
1210
|
+
* - Scalar fields: enrichment value overwrites inferred value.
|
|
1211
|
+
* - Array fields: union merge with deduplication (enrichment values appended).
|
|
1212
|
+
*
|
|
1213
|
+
* @param inferred - Metadata derived from inference rules.
|
|
1214
|
+
* @param enrichment - Human/agent-provided enrichment metadata.
|
|
1215
|
+
* @returns Merged metadata.
|
|
1216
|
+
*/
|
|
1217
|
+
function mergeEnrichment(inferred, enrichment) {
|
|
1218
|
+
const result = { ...inferred };
|
|
1219
|
+
for (const [key, enrichValue] of Object.entries(enrichment)) {
|
|
1220
|
+
const inferredValue = result[key];
|
|
1221
|
+
if (Array.isArray(inferredValue) && Array.isArray(enrichValue)) {
|
|
1222
|
+
const combined = [
|
|
1223
|
+
...inferredValue,
|
|
1224
|
+
...enrichValue,
|
|
1225
|
+
];
|
|
1226
|
+
result[key] = [...new Set(combined)];
|
|
1227
|
+
}
|
|
1228
|
+
else {
|
|
1229
|
+
result[key] = enrichValue;
|
|
1230
|
+
}
|
|
1231
|
+
}
|
|
1232
|
+
return result;
|
|
1233
|
+
}
|
|
1234
|
+
|
|
1022
1235
|
/**
|
|
1023
1236
|
* @module util/JsonFileStore
|
|
1024
1237
|
* Small base class for JSON-backed read/modify/write stores with in-memory caching.
|
|
@@ -1438,6 +1651,24 @@ const watchConfigSchema = z.object({
|
|
|
1438
1651
|
.boolean()
|
|
1439
1652
|
.optional()
|
|
1440
1653
|
.describe('Skip files ignored by .gitignore in git repositories. Only applies to repos with a .git directory. Default: true.'),
|
|
1654
|
+
/** Move detection configuration for correlating unlink+add as file moves. */
|
|
1655
|
+
moveDetection: z
|
|
1656
|
+
.object({
|
|
1657
|
+
/** Enable move correlation. Default: true. */
|
|
1658
|
+
enabled: z
|
|
1659
|
+
.boolean()
|
|
1660
|
+
.default(true)
|
|
1661
|
+
.describe('Enable move detection via content hash correlation.'),
|
|
1662
|
+
/** Buffer time in ms for holding unlink events before treating as deletes. Default: 2000. */
|
|
1663
|
+
bufferMs: z
|
|
1664
|
+
.number()
|
|
1665
|
+
.int()
|
|
1666
|
+
.min(100)
|
|
1667
|
+
.default(2000)
|
|
1668
|
+
.describe('How long (ms) to buffer unlink events before treating as deletes.'),
|
|
1669
|
+
})
|
|
1670
|
+
.optional()
|
|
1671
|
+
.describe('Move detection: correlate unlink+add events as file moves to avoid re-embedding.'),
|
|
1441
1672
|
});
|
|
1442
1673
|
/**
|
|
1443
1674
|
* Configuration watch settings.
|
|
@@ -1746,18 +1977,13 @@ const jeevesWatcherConfigSchema = z.object({
|
|
|
1746
1977
|
embedding: embeddingConfigSchema.describe('Embedding model configuration.'),
|
|
1747
1978
|
/** Vector store configuration. */
|
|
1748
1979
|
vectorStore: vectorStoreConfigSchema.describe('Qdrant vector store configuration.'),
|
|
1749
|
-
/** Directory for persisted metadata. */
|
|
1750
|
-
metadataDir: z
|
|
1751
|
-
.string()
|
|
1752
|
-
.optional()
|
|
1753
|
-
.describe('Directory for persisted metadata sidecar files.'),
|
|
1754
1980
|
/** API server configuration. */
|
|
1755
1981
|
api: apiConfigSchema.optional().describe('API server configuration.'),
|
|
1756
|
-
/** Directory for persistent state files (issues.json, values.json).
|
|
1982
|
+
/** Directory for persistent state files (issues.json, values.json, enrichments.sqlite). */
|
|
1757
1983
|
stateDir: z
|
|
1758
1984
|
.string()
|
|
1759
1985
|
.optional()
|
|
1760
|
-
.describe('Directory for persistent state files (issues.json, values.json). Defaults to
|
|
1986
|
+
.describe('Directory for persistent state files (issues.json, values.json, enrichments.sqlite). Defaults to .jeeves-metadata.'),
|
|
1761
1987
|
/** Rules for inferring metadata from document properties (inline objects or file paths). */
|
|
1762
1988
|
inferenceRules: z
|
|
1763
1989
|
.array(z.union([inferenceRuleSchema, z.string()]))
|
|
@@ -3892,97 +4118,6 @@ function createPointsDeleteHandler(deps) {
|
|
|
3892
4118
|
}, deps.logger, 'PointsDelete');
|
|
3893
4119
|
}
|
|
3894
4120
|
|
|
3895
|
-
/**
|
|
3896
|
-
* @module util/normalizePath
|
|
3897
|
-
* Normalizes file paths for deterministic mapping: lowercase, forward slashes, optional drive letter stripping.
|
|
3898
|
-
*/
|
|
3899
|
-
/**
|
|
3900
|
-
* Normalize a file path: lowercase, forward slashes, optionally strip drive letter colon.
|
|
3901
|
-
*
|
|
3902
|
-
* @param filePath - The original file path.
|
|
3903
|
-
* @param stripDriveLetter - Whether to strip the colon from a leading drive letter (e.g. `C:` → `c`).
|
|
3904
|
-
* @returns The normalized path string.
|
|
3905
|
-
*/
|
|
3906
|
-
function normalizePath(filePath, stripDriveLetter = false) {
|
|
3907
|
-
let result = filePath.replace(/\\/g, '/').toLowerCase();
|
|
3908
|
-
if (stripDriveLetter) {
|
|
3909
|
-
result = result.replace(/^([a-z]):/, (_m, letter) => letter);
|
|
3910
|
-
}
|
|
3911
|
-
return result;
|
|
3912
|
-
}
|
|
3913
|
-
|
|
3914
|
-
/**
|
|
3915
|
-
* @module metadata/metadata
|
|
3916
|
-
* Persists file metadata as .meta.json. I/O: reads/writes/deletes metadata files under metadataDir. Path mapping via SHA-256 hash.
|
|
3917
|
-
*/
|
|
3918
|
-
/**
|
|
3919
|
-
* Derive a deterministic `.meta.json` path for a given file.
|
|
3920
|
-
*
|
|
3921
|
-
* @param filePath - The watched file path.
|
|
3922
|
-
* @param metadataDir - The root metadata directory.
|
|
3923
|
-
* @returns The full path to the metadata file.
|
|
3924
|
-
*/
|
|
3925
|
-
function metadataPath(filePath, metadataDir) {
|
|
3926
|
-
const normalised = normalizePath(filePath, true);
|
|
3927
|
-
const hash = createHash('sha256').update(normalised, 'utf8').digest('hex');
|
|
3928
|
-
return join(metadataDir, `${hash}.meta.json`);
|
|
3929
|
-
}
|
|
3930
|
-
/**
|
|
3931
|
-
* Read persisted metadata for a file.
|
|
3932
|
-
*
|
|
3933
|
-
* @param filePath - The watched file path.
|
|
3934
|
-
* @param metadataDir - The root metadata directory.
|
|
3935
|
-
* @returns The parsed metadata object, or `null` if not found.
|
|
3936
|
-
*/
|
|
3937
|
-
async function readMetadata(filePath, metadataDir) {
|
|
3938
|
-
try {
|
|
3939
|
-
const raw = await readFile(metadataPath(filePath, metadataDir), 'utf8');
|
|
3940
|
-
return JSON.parse(raw);
|
|
3941
|
-
}
|
|
3942
|
-
catch {
|
|
3943
|
-
return null;
|
|
3944
|
-
}
|
|
3945
|
-
}
|
|
3946
|
-
/**
|
|
3947
|
-
* Write metadata for a file.
|
|
3948
|
-
*
|
|
3949
|
-
* @param filePath - The watched file path.
|
|
3950
|
-
* @param metadataDir - The root metadata directory.
|
|
3951
|
-
* @param metadata - The metadata to persist.
|
|
3952
|
-
*/
|
|
3953
|
-
async function writeMetadata(filePath, metadataDir, metadata) {
|
|
3954
|
-
const dest = metadataPath(filePath, metadataDir);
|
|
3955
|
-
await mkdir(dirname(dest), { recursive: true });
|
|
3956
|
-
await writeFile(dest, JSON.stringify(metadata, null, 2), 'utf8');
|
|
3957
|
-
}
|
|
3958
|
-
/**
|
|
3959
|
-
* Delete metadata for a file.
|
|
3960
|
-
*
|
|
3961
|
-
* @param filePath - The watched file path.
|
|
3962
|
-
* @param metadataDir - The root metadata directory.
|
|
3963
|
-
*/
|
|
3964
|
-
async function deleteMetadata(filePath, metadataDir) {
|
|
3965
|
-
try {
|
|
3966
|
-
await rm(metadataPath(filePath, metadataDir));
|
|
3967
|
-
}
|
|
3968
|
-
catch {
|
|
3969
|
-
// Ignore if file doesn't exist.
|
|
3970
|
-
}
|
|
3971
|
-
}
|
|
3972
|
-
|
|
3973
|
-
/**
|
|
3974
|
-
* @module metadata/constants
|
|
3975
|
-
* Shared constants for metadata key classification. System keys are injected by the indexing pipeline, not user-provided.
|
|
3976
|
-
*/
|
|
3977
|
-
/** Keys managed by the indexing pipeline (not user enrichment). */
|
|
3978
|
-
const SYSTEM_METADATA_KEYS = [
|
|
3979
|
-
'file_path',
|
|
3980
|
-
'chunk_index',
|
|
3981
|
-
'total_chunks',
|
|
3982
|
-
'content_hash',
|
|
3983
|
-
'chunk_text',
|
|
3984
|
-
];
|
|
3985
|
-
|
|
3986
4121
|
/**
|
|
3987
4122
|
* @module processor/payloadFields
|
|
3988
4123
|
* Constants for Qdrant payload field names used across the processing pipeline.
|
|
@@ -4005,10 +4140,18 @@ const FIELD_MODIFIED_AT = 'modified_at';
|
|
|
4005
4140
|
const FIELD_LINE_START = 'line_start';
|
|
4006
4141
|
/** Qdrant payload field: 1-indexed line number where this chunk ends in the source file. */
|
|
4007
4142
|
const FIELD_LINE_END = 'line_end';
|
|
4143
|
+
/** Keys managed by the indexing pipeline (not user enrichment). */
|
|
4144
|
+
const SYSTEM_METADATA_KEYS = [
|
|
4145
|
+
FIELD_FILE_PATH,
|
|
4146
|
+
FIELD_CHUNK_INDEX,
|
|
4147
|
+
FIELD_TOTAL_CHUNKS,
|
|
4148
|
+
FIELD_CONTENT_HASH,
|
|
4149
|
+
FIELD_CHUNK_TEXT,
|
|
4150
|
+
];
|
|
4008
4151
|
|
|
4009
4152
|
/**
|
|
4010
4153
|
* @module api/handlers/rebuildMetadata
|
|
4011
|
-
* Fastify route handler for POST /rebuild-metadata.
|
|
4154
|
+
* Fastify route handler for POST /rebuild-metadata. Rebuilds enrichment store from vector store payloads.
|
|
4012
4155
|
*/
|
|
4013
4156
|
/**
|
|
4014
4157
|
* Create handler for POST /rebuild-metadata.
|
|
@@ -4017,7 +4160,6 @@ const FIELD_LINE_END = 'line_end';
|
|
|
4017
4160
|
*/
|
|
4018
4161
|
function createRebuildMetadataHandler(deps) {
|
|
4019
4162
|
return wrapHandler(async (_request, reply) => {
|
|
4020
|
-
const metadataDir = deps.metadataDir ?? '.jeeves-metadata';
|
|
4021
4163
|
const systemKeys = [...SYSTEM_METADATA_KEYS];
|
|
4022
4164
|
for await (const point of deps.vectorStore.scroll()) {
|
|
4023
4165
|
const payload = point.payload;
|
|
@@ -4025,7 +4167,7 @@ function createRebuildMetadataHandler(deps) {
|
|
|
4025
4167
|
if (typeof filePath !== 'string' || filePath.length === 0)
|
|
4026
4168
|
continue;
|
|
4027
4169
|
const enrichment = omit(payload, systemKeys);
|
|
4028
|
-
|
|
4170
|
+
deps.enrichmentStore?.set(filePath, enrichment);
|
|
4029
4171
|
}
|
|
4030
4172
|
return await reply.status(200).send({ ok: true });
|
|
4031
4173
|
}, deps.logger, 'Rebuild metadata');
|
|
@@ -4525,7 +4667,6 @@ async function buildTemplateEngineAndCustomMapLib(config, configDir) {
|
|
|
4525
4667
|
*/
|
|
4526
4668
|
function createProcessorConfig(config, configDir, customMapLib) {
|
|
4527
4669
|
return {
|
|
4528
|
-
metadataDir: config.metadataDir ?? '.jeeves-metadata',
|
|
4529
4670
|
chunkSize: config.embedding.chunkSize,
|
|
4530
4671
|
chunkOverlap: config.embedding.chunkOverlap,
|
|
4531
4672
|
maps: resolveMapsConfig(config.maps),
|
|
@@ -4537,7 +4678,7 @@ function createProcessorConfig(config, configDir, customMapLib) {
|
|
|
4537
4678
|
/**
|
|
4538
4679
|
* Create file system watcher with gitignore filtering.
|
|
4539
4680
|
*/
|
|
4540
|
-
function createWatcher(config, factories, queue, processor, logger, runtimeOptions, initialScanTracker) {
|
|
4681
|
+
function createWatcher(config, factories, queue, processor, logger, runtimeOptions, initialScanTracker, contentHashCache) {
|
|
4541
4682
|
const respectGitignore = config.watch.respectGitignore ?? true;
|
|
4542
4683
|
const gitignoreFilter = respectGitignore
|
|
4543
4684
|
? new GitignoreFilter(config.watch.paths)
|
|
@@ -4548,6 +4689,7 @@ function createWatcher(config, factories, queue, processor, logger, runtimeOptio
|
|
|
4548
4689
|
onFatalError: runtimeOptions.onFatalError,
|
|
4549
4690
|
gitignoreFilter,
|
|
4550
4691
|
initialScanTracker,
|
|
4692
|
+
contentHashCache,
|
|
4551
4693
|
});
|
|
4552
4694
|
return { watcher, gitignoreFilter };
|
|
4553
4695
|
}
|
|
@@ -4748,7 +4890,7 @@ function createApiServer(options) {
|
|
|
4748
4890
|
hybridConfig,
|
|
4749
4891
|
}));
|
|
4750
4892
|
app.post('/rebuild-metadata', createRebuildMetadataHandler({
|
|
4751
|
-
|
|
4893
|
+
enrichmentStore: options.enrichmentStore,
|
|
4752
4894
|
vectorStore,
|
|
4753
4895
|
logger,
|
|
4754
4896
|
}));
|
|
@@ -4828,7 +4970,7 @@ function createApiServer(options) {
|
|
|
4828
4970
|
*/
|
|
4829
4971
|
/** Default root-level config values. */
|
|
4830
4972
|
const ROOT_DEFAULTS = {
|
|
4831
|
-
|
|
4973
|
+
stateDir: '.jeeves-metadata',
|
|
4832
4974
|
shutdownTimeoutMs: 10000,
|
|
4833
4975
|
};
|
|
4834
4976
|
/** Default configWatch values. */
|
|
@@ -4879,7 +5021,7 @@ const INIT_CONFIG_TEMPLATE = {
|
|
|
4879
5021
|
url: 'http://127.0.0.1:6333',
|
|
4880
5022
|
collectionName: 'jeeves-watcher',
|
|
4881
5023
|
},
|
|
4882
|
-
|
|
5024
|
+
stateDir: ROOT_DEFAULTS.stateDir,
|
|
4883
5025
|
api: API_DEFAULTS,
|
|
4884
5026
|
logging: LOGGING_DEFAULTS,
|
|
4885
5027
|
};
|
|
@@ -5221,7 +5363,7 @@ function createLogger(config) {
|
|
|
5221
5363
|
|
|
5222
5364
|
/**
|
|
5223
5365
|
* @module hash
|
|
5224
|
-
* Provides SHA-256 content hashing. Pure
|
|
5366
|
+
* Provides SHA-256 content hashing. Pure functions: text hash and file hash. File hash does I/O.
|
|
5225
5367
|
*/
|
|
5226
5368
|
/**
|
|
5227
5369
|
* Compute a SHA-256 hex digest of the given text.
|
|
@@ -5232,6 +5374,16 @@ function createLogger(config) {
|
|
|
5232
5374
|
function contentHash(text) {
|
|
5233
5375
|
return createHash('sha256').update(text, 'utf8').digest('hex');
|
|
5234
5376
|
}
|
|
5377
|
+
/**
|
|
5378
|
+
* Compute a SHA-256 hex digest of a file's raw bytes.
|
|
5379
|
+
*
|
|
5380
|
+
* @param filePath - Path to the file.
|
|
5381
|
+
* @returns The hex-encoded SHA-256 hash.
|
|
5382
|
+
*/
|
|
5383
|
+
async function fileHash(filePath) {
|
|
5384
|
+
const buffer = await readFile(filePath);
|
|
5385
|
+
return createHash('sha256').update(buffer).digest('hex');
|
|
5386
|
+
}
|
|
5235
5387
|
|
|
5236
5388
|
/**
|
|
5237
5389
|
* @module pointId
|
|
@@ -5391,7 +5543,7 @@ async function extractText(filePath, extension, additionalExtractors) {
|
|
|
5391
5543
|
|
|
5392
5544
|
/**
|
|
5393
5545
|
* @module processor/buildMetadata
|
|
5394
|
-
* Builds merged metadata from file content, inference rules, and enrichment. I/O: reads files, extracts text,
|
|
5546
|
+
* Builds merged metadata from file content, inference rules, and enrichment store. I/O: reads files, extracts text, queries SQLite enrichment.
|
|
5395
5547
|
*/
|
|
5396
5548
|
/**
|
|
5397
5549
|
* Build merged metadata for a file by applying inference rules and merging with enrichment metadata.
|
|
@@ -5400,7 +5552,7 @@ async function extractText(filePath, extension, additionalExtractors) {
|
|
|
5400
5552
|
* @returns The merged metadata and intermediate data.
|
|
5401
5553
|
*/
|
|
5402
5554
|
async function buildMergedMetadata(options) {
|
|
5403
|
-
const { filePath, compiledRules,
|
|
5555
|
+
const { filePath, compiledRules, enrichmentStore, maps, logger, templateEngine, configDir, customMapLib, globalSchemas, } = options;
|
|
5404
5556
|
const ext = extname(filePath);
|
|
5405
5557
|
const stats = await stat(filePath);
|
|
5406
5558
|
// 1. Extract text and structured data
|
|
@@ -5415,12 +5567,11 @@ async function buildMergedMetadata(options) {
|
|
|
5415
5567
|
customMapLib,
|
|
5416
5568
|
globalSchemas,
|
|
5417
5569
|
});
|
|
5418
|
-
// 3. Read enrichment metadata (merge
|
|
5419
|
-
const enrichment =
|
|
5420
|
-
const metadata =
|
|
5421
|
-
|
|
5422
|
-
...
|
|
5423
|
-
};
|
|
5570
|
+
// 3. Read enrichment metadata from store (composable merge)
|
|
5571
|
+
const enrichment = enrichmentStore?.get(filePath) ?? null;
|
|
5572
|
+
const metadata = enrichment
|
|
5573
|
+
? mergeEnrichment(inferred, enrichment)
|
|
5574
|
+
: { ...inferred };
|
|
5424
5575
|
return {
|
|
5425
5576
|
inferred,
|
|
5426
5577
|
enrichment,
|
|
@@ -5609,22 +5760,26 @@ class DocumentProcessor {
|
|
|
5609
5760
|
compiledRules;
|
|
5610
5761
|
logger;
|
|
5611
5762
|
templateEngine;
|
|
5763
|
+
enrichmentStore;
|
|
5612
5764
|
issuesManager;
|
|
5613
5765
|
valuesManager;
|
|
5766
|
+
contentHashCache;
|
|
5614
5767
|
/**
|
|
5615
5768
|
* Create a new DocumentProcessor.
|
|
5616
5769
|
*
|
|
5617
5770
|
* @param deps - The processor dependencies.
|
|
5618
5771
|
*/
|
|
5619
|
-
constructor({ config, embeddingProvider, vectorStore, compiledRules, logger, templateEngine, issuesManager, valuesManager, }) {
|
|
5772
|
+
constructor({ config, embeddingProvider, vectorStore, compiledRules, logger, templateEngine, enrichmentStore, issuesManager, valuesManager, contentHashCache, }) {
|
|
5620
5773
|
this.config = config;
|
|
5621
5774
|
this.embeddingProvider = embeddingProvider;
|
|
5622
5775
|
this.vectorStore = vectorStore;
|
|
5623
5776
|
this.compiledRules = compiledRules;
|
|
5624
5777
|
this.logger = logger;
|
|
5625
5778
|
this.templateEngine = templateEngine;
|
|
5779
|
+
this.enrichmentStore = enrichmentStore;
|
|
5626
5780
|
this.issuesManager = issuesManager;
|
|
5627
5781
|
this.valuesManager = valuesManager;
|
|
5782
|
+
this.contentHashCache = contentHashCache;
|
|
5628
5783
|
}
|
|
5629
5784
|
/**
|
|
5630
5785
|
* Build merged metadata for a file and add matched_rules.
|
|
@@ -5633,7 +5788,7 @@ class DocumentProcessor {
|
|
|
5633
5788
|
const result = await buildMergedMetadata({
|
|
5634
5789
|
filePath,
|
|
5635
5790
|
compiledRules: this.compiledRules,
|
|
5636
|
-
|
|
5791
|
+
enrichmentStore: this.enrichmentStore,
|
|
5637
5792
|
maps: this.config.maps,
|
|
5638
5793
|
logger: this.logger,
|
|
5639
5794
|
templateEngine: this.templateEngine,
|
|
@@ -5680,6 +5835,9 @@ class DocumentProcessor {
|
|
|
5680
5835
|
this.logger.debug({ filePath }, 'Skipping empty file');
|
|
5681
5836
|
return;
|
|
5682
5837
|
}
|
|
5838
|
+
// Compute file-level hash for move correlation cache.
|
|
5839
|
+
const rawHash = await fileHash(filePath);
|
|
5840
|
+
this.contentHashCache?.set(filePath, rawHash);
|
|
5683
5841
|
const hash = contentHash(textToEmbed);
|
|
5684
5842
|
const baseId = pointId(filePath, 0);
|
|
5685
5843
|
const existingPayload = await this.vectorStore.getPayload(baseId);
|
|
@@ -5716,12 +5874,13 @@ class DocumentProcessor {
|
|
|
5716
5874
|
const totalChunks = getChunkCount(existingPayload);
|
|
5717
5875
|
const ids = chunkIds(filePath, totalChunks);
|
|
5718
5876
|
await this.vectorStore.delete(ids);
|
|
5719
|
-
|
|
5877
|
+
this.enrichmentStore?.delete(filePath);
|
|
5878
|
+
this.contentHashCache?.delete(filePath);
|
|
5720
5879
|
this.logger.info({ filePath }, 'File deleted from index');
|
|
5721
5880
|
}, undefined);
|
|
5722
5881
|
}
|
|
5723
5882
|
/**
|
|
5724
|
-
* Process a metadata update: merge
|
|
5883
|
+
* Process a metadata update: merge into enrichment store, update Qdrant payloads (no re-embed).
|
|
5725
5884
|
*
|
|
5726
5885
|
* @param filePath - The file whose metadata to update.
|
|
5727
5886
|
* @param metadata - The new metadata to merge.
|
|
@@ -5729,9 +5888,8 @@ class DocumentProcessor {
|
|
|
5729
5888
|
*/
|
|
5730
5889
|
async processMetadataUpdate(filePath, metadata) {
|
|
5731
5890
|
return this.withFileErrorHandling(filePath, 'Failed to update metadata', async () => {
|
|
5732
|
-
|
|
5733
|
-
const merged =
|
|
5734
|
-
await writeMetadata(filePath, this.config.metadataDir, merged);
|
|
5891
|
+
this.enrichmentStore?.set(filePath, metadata);
|
|
5892
|
+
const merged = this.enrichmentStore?.get(filePath) ?? metadata;
|
|
5735
5893
|
const baseId = pointId(filePath, 0);
|
|
5736
5894
|
const existingPayload = await this.vectorStore.getPayload(baseId);
|
|
5737
5895
|
if (!existingPayload)
|
|
@@ -5793,6 +5951,56 @@ class DocumentProcessor {
|
|
|
5793
5951
|
transformed: renderedContent !== null,
|
|
5794
5952
|
};
|
|
5795
5953
|
}
|
|
5954
|
+
/**
|
|
5955
|
+
* Move a file's vector points from old path to new path without re-embedding.
|
|
5956
|
+
* Re-applies inference rules against the new path.
|
|
5957
|
+
*
|
|
5958
|
+
* @param oldPath - The original file path.
|
|
5959
|
+
* @param newPath - The new file path.
|
|
5960
|
+
*/
|
|
5961
|
+
async moveFile(oldPath, newPath) {
|
|
5962
|
+
await this.withFileErrorHandling(newPath, 'Failed to move file', async () => {
|
|
5963
|
+
const baseId = pointId(oldPath, 0);
|
|
5964
|
+
const existingPayload = await this.vectorStore.getPayload(baseId);
|
|
5965
|
+
const totalChunks = getChunkCount(existingPayload);
|
|
5966
|
+
const oldIds = chunkIds(oldPath, totalChunks);
|
|
5967
|
+
const oldPoints = await this.vectorStore.getPointsWithVectors(oldIds);
|
|
5968
|
+
if (oldPoints.length === 0) {
|
|
5969
|
+
this.logger.warn({ oldPath, newPath }, 'No points found for move');
|
|
5970
|
+
return;
|
|
5971
|
+
}
|
|
5972
|
+
// Build new metadata from inference rules against the new path.
|
|
5973
|
+
const { metadataWithRules, matchedRules, metadata } = await this.buildMetadataWithRules(newPath);
|
|
5974
|
+
// Create new points with updated IDs and file_path payload.
|
|
5975
|
+
const newPoints = oldPoints.map((pt, i) => ({
|
|
5976
|
+
id: pointId(newPath, i),
|
|
5977
|
+
vector: pt.vector,
|
|
5978
|
+
payload: {
|
|
5979
|
+
...pt.payload,
|
|
5980
|
+
...metadataWithRules,
|
|
5981
|
+
[FIELD_FILE_PATH]: normalizeSlashes(newPath),
|
|
5982
|
+
},
|
|
5983
|
+
}));
|
|
5984
|
+
await this.vectorStore.upsert(newPoints);
|
|
5985
|
+
await this.vectorStore.delete(oldIds);
|
|
5986
|
+
// Migrate enrichment and clear old issues.
|
|
5987
|
+
this.enrichmentStore?.move(oldPath, newPath);
|
|
5988
|
+
this.issuesManager?.clear(oldPath);
|
|
5989
|
+
// Update values index for the new path's matched rules.
|
|
5990
|
+
if (this.valuesManager) {
|
|
5991
|
+
for (const ruleName of matchedRules) {
|
|
5992
|
+
this.valuesManager.update(ruleName, metadata);
|
|
5993
|
+
}
|
|
5994
|
+
}
|
|
5995
|
+
// Update content hash cache.
|
|
5996
|
+
const oldHash = this.contentHashCache?.get(oldPath);
|
|
5997
|
+
if (oldHash) {
|
|
5998
|
+
this.contentHashCache?.set(newPath, oldHash);
|
|
5999
|
+
}
|
|
6000
|
+
this.contentHashCache?.delete(oldPath);
|
|
6001
|
+
this.logger.info({ oldPath, newPath, chunks: oldPoints.length }, 'File moved in index');
|
|
6002
|
+
}, undefined);
|
|
6003
|
+
}
|
|
5796
6004
|
/**
|
|
5797
6005
|
* Update compiled inference rules, template engine, and custom map lib.
|
|
5798
6006
|
*
|
|
@@ -6451,6 +6659,33 @@ class VectorStoreClient {
|
|
|
6451
6659
|
async hybridSearch(vector, queryText, limit, textWeight, filter) {
|
|
6452
6660
|
return hybridSearch(this.client, this.collectionName, vector, queryText, limit, textWeight, filter);
|
|
6453
6661
|
}
|
|
6662
|
+
/**
|
|
6663
|
+
* Retrieve points with their vectors by ID.
|
|
6664
|
+
*
|
|
6665
|
+
* @param ids - The point IDs to retrieve.
|
|
6666
|
+
* @returns Points with vectors and payloads; missing IDs are omitted.
|
|
6667
|
+
*/
|
|
6668
|
+
async getPointsWithVectors(ids) {
|
|
6669
|
+
if (ids.length === 0)
|
|
6670
|
+
return [];
|
|
6671
|
+
try {
|
|
6672
|
+
const results = await this.client.retrieve(this.collectionName, {
|
|
6673
|
+
ids,
|
|
6674
|
+
with_payload: true,
|
|
6675
|
+
with_vector: true,
|
|
6676
|
+
});
|
|
6677
|
+
return results
|
|
6678
|
+
.filter((r) => r.vector != null)
|
|
6679
|
+
.map((r) => ({
|
|
6680
|
+
id: String(r.id),
|
|
6681
|
+
vector: r.vector,
|
|
6682
|
+
payload: r.payload,
|
|
6683
|
+
}));
|
|
6684
|
+
}
|
|
6685
|
+
catch {
|
|
6686
|
+
return [];
|
|
6687
|
+
}
|
|
6688
|
+
}
|
|
6454
6689
|
/**
|
|
6455
6690
|
* Scroll one page of points matching a filter.
|
|
6456
6691
|
*
|
|
@@ -6658,6 +6893,163 @@ function resolveIgnored(ignored) {
|
|
|
6658
6893
|
});
|
|
6659
6894
|
}
|
|
6660
6895
|
|
|
6896
|
+
/**
|
|
6897
|
+
* @module watcher/MoveCorrelator
|
|
6898
|
+
* Correlates unlink+add events as file moves using content hash matching.
|
|
6899
|
+
* Buffers unlink events and matches against subsequent add events.
|
|
6900
|
+
*/
|
|
6901
|
+
/**
|
|
6902
|
+
* Correlates unlink+add file system events as moves using content hash matching.
|
|
6903
|
+
*
|
|
6904
|
+
* When move detection is disabled, events pass straight through.
|
|
6905
|
+
*/
|
|
6906
|
+
class MoveCorrelator {
|
|
6907
|
+
enabled;
|
|
6908
|
+
bufferMs;
|
|
6909
|
+
cache;
|
|
6910
|
+
logger;
|
|
6911
|
+
onMove;
|
|
6912
|
+
onDelete;
|
|
6913
|
+
onCreate;
|
|
6914
|
+
/** Buffered unlinks indexed by content hash (FIFO per hash). */
|
|
6915
|
+
buffer = new Map();
|
|
6916
|
+
/** Track unlink burst rate per parent directory for bulk mode. */
|
|
6917
|
+
burstCounters = new Map();
|
|
6918
|
+
/** Threshold: if N+ unlinks from same parent in burstWindowMs, extend buffer. */
|
|
6919
|
+
static BURST_THRESHOLD = 5;
|
|
6920
|
+
static BURST_WINDOW_MS = 500;
|
|
6921
|
+
static BURST_MULTIPLIER = 3;
|
|
6922
|
+
constructor(options) {
|
|
6923
|
+
this.enabled = options.enabled;
|
|
6924
|
+
this.bufferMs = options.bufferMs;
|
|
6925
|
+
this.cache = options.contentHashCache;
|
|
6926
|
+
this.logger = options.logger;
|
|
6927
|
+
this.onMove = options.onMove;
|
|
6928
|
+
this.onDelete = options.onDelete;
|
|
6929
|
+
this.onCreate = options.onCreate;
|
|
6930
|
+
}
|
|
6931
|
+
/**
|
|
6932
|
+
* Handle an unlink event. Buffers the event for correlation.
|
|
6933
|
+
*
|
|
6934
|
+
* @param path - The removed file path.
|
|
6935
|
+
*/
|
|
6936
|
+
handleUnlink(path) {
|
|
6937
|
+
if (!this.enabled) {
|
|
6938
|
+
this.onDelete(path);
|
|
6939
|
+
return;
|
|
6940
|
+
}
|
|
6941
|
+
const hash = this.cache.get(path);
|
|
6942
|
+
if (!hash) {
|
|
6943
|
+
this.logger.debug({ path }, 'No cached hash for unlinked file, treating as delete');
|
|
6944
|
+
this.onDelete(path);
|
|
6945
|
+
return;
|
|
6946
|
+
}
|
|
6947
|
+
const timeoutMs = this.getEffectiveTimeout(path);
|
|
6948
|
+
const timer = setTimeout(() => {
|
|
6949
|
+
this.expireUnlink(hash, path);
|
|
6950
|
+
}, timeoutMs);
|
|
6951
|
+
const entry = {
|
|
6952
|
+
path,
|
|
6953
|
+
hash,
|
|
6954
|
+
timestamp: Date.now(),
|
|
6955
|
+
timer,
|
|
6956
|
+
};
|
|
6957
|
+
let entries = this.buffer.get(hash);
|
|
6958
|
+
if (!entries) {
|
|
6959
|
+
entries = [];
|
|
6960
|
+
this.buffer.set(hash, entries);
|
|
6961
|
+
}
|
|
6962
|
+
entries.push(entry);
|
|
6963
|
+
this.logger.debug({ path, hash: hash.slice(0, 12), timeoutMs }, 'Buffered unlink for move correlation');
|
|
6964
|
+
}
|
|
6965
|
+
/**
|
|
6966
|
+
* Handle an add event. Checks buffer for matching unlink (move detection).
|
|
6967
|
+
*
|
|
6968
|
+
* @param path - The added file path.
|
|
6969
|
+
*/
|
|
6970
|
+
async handleAdd(path) {
|
|
6971
|
+
if (!this.enabled) {
|
|
6972
|
+
this.onCreate(path);
|
|
6973
|
+
return;
|
|
6974
|
+
}
|
|
6975
|
+
let hash;
|
|
6976
|
+
try {
|
|
6977
|
+
hash = await fileHash(path);
|
|
6978
|
+
}
|
|
6979
|
+
catch {
|
|
6980
|
+
this.onCreate(path);
|
|
6981
|
+
return;
|
|
6982
|
+
}
|
|
6983
|
+
const entries = this.buffer.get(hash);
|
|
6984
|
+
if (entries && entries.length > 0) {
|
|
6985
|
+
// FIFO: consume oldest matching unlink
|
|
6986
|
+
const matched = entries.shift();
|
|
6987
|
+
clearTimeout(matched.timer);
|
|
6988
|
+
if (entries.length === 0)
|
|
6989
|
+
this.buffer.delete(hash);
|
|
6990
|
+
this.logger.info({ oldPath: matched.path, newPath: path }, 'Move detected');
|
|
6991
|
+
this.onMove(matched.path, path);
|
|
6992
|
+
}
|
|
6993
|
+
else {
|
|
6994
|
+
this.onCreate(path);
|
|
6995
|
+
}
|
|
6996
|
+
}
|
|
6997
|
+
/**
|
|
6998
|
+
* Flush all buffered unlinks as deletes. Call on shutdown.
|
|
6999
|
+
*/
|
|
7000
|
+
flush() {
|
|
7001
|
+
for (const [, entries] of this.buffer) {
|
|
7002
|
+
for (const entry of entries) {
|
|
7003
|
+
clearTimeout(entry.timer);
|
|
7004
|
+
this.onDelete(entry.path);
|
|
7005
|
+
}
|
|
7006
|
+
}
|
|
7007
|
+
this.buffer.clear();
|
|
7008
|
+
this.burstCounters.clear();
|
|
7009
|
+
}
|
|
7010
|
+
/** Number of currently buffered unlink events. */
|
|
7011
|
+
get pendingCount() {
|
|
7012
|
+
let count = 0;
|
|
7013
|
+
for (const [, entries] of this.buffer) {
|
|
7014
|
+
count += entries.length;
|
|
7015
|
+
}
|
|
7016
|
+
return count;
|
|
7017
|
+
}
|
|
7018
|
+
/**
|
|
7019
|
+
* Get effective timeout, applying burst detection for bulk moves.
|
|
7020
|
+
*/
|
|
7021
|
+
getEffectiveTimeout(path) {
|
|
7022
|
+
const parentDir = dirname(path);
|
|
7023
|
+
const now = Date.now();
|
|
7024
|
+
let counter = this.burstCounters.get(parentDir);
|
|
7025
|
+
if (!counter || now - counter.firstTs > MoveCorrelator.BURST_WINDOW_MS) {
|
|
7026
|
+
counter = { count: 0, firstTs: now };
|
|
7027
|
+
this.burstCounters.set(parentDir, counter);
|
|
7028
|
+
}
|
|
7029
|
+
counter.count++;
|
|
7030
|
+
if (counter.count >= MoveCorrelator.BURST_THRESHOLD) {
|
|
7031
|
+
return this.bufferMs * MoveCorrelator.BURST_MULTIPLIER;
|
|
7032
|
+
}
|
|
7033
|
+
return this.bufferMs;
|
|
7034
|
+
}
|
|
7035
|
+
/**
|
|
7036
|
+
* Handle a buffered unlink timeout — emit as delete.
|
|
7037
|
+
*/
|
|
7038
|
+
expireUnlink(hash, path) {
|
|
7039
|
+
const entries = this.buffer.get(hash);
|
|
7040
|
+
if (entries) {
|
|
7041
|
+
const idx = entries.findIndex((e) => e.path === path);
|
|
7042
|
+
if (idx >= 0) {
|
|
7043
|
+
entries.splice(idx, 1);
|
|
7044
|
+
if (entries.length === 0)
|
|
7045
|
+
this.buffer.delete(hash);
|
|
7046
|
+
}
|
|
7047
|
+
}
|
|
7048
|
+
this.logger.debug({ path, hash: hash.slice(0, 12) }, 'Buffered unlink expired, treating as delete');
|
|
7049
|
+
this.onDelete(path);
|
|
7050
|
+
}
|
|
7051
|
+
}
|
|
7052
|
+
|
|
6661
7053
|
/**
|
|
6662
7054
|
* @module watcher
|
|
6663
7055
|
* Filesystem watcher wrapping chokidar. I/O: watches files/directories for add/change/unlink events, enqueues to processing queue.
|
|
@@ -6673,6 +7065,8 @@ class FileSystemWatcher {
|
|
|
6673
7065
|
health;
|
|
6674
7066
|
gitignoreFilter;
|
|
6675
7067
|
initialScanTracker;
|
|
7068
|
+
contentHashCache;
|
|
7069
|
+
moveCorrelator;
|
|
6676
7070
|
globMatches;
|
|
6677
7071
|
watcher;
|
|
6678
7072
|
/**
|
|
@@ -6691,6 +7085,7 @@ class FileSystemWatcher {
|
|
|
6691
7085
|
this.logger = logger;
|
|
6692
7086
|
this.gitignoreFilter = options.gitignoreFilter;
|
|
6693
7087
|
this.initialScanTracker = options.initialScanTracker;
|
|
7088
|
+
this.contentHashCache = options.contentHashCache;
|
|
6694
7089
|
this.globMatches = () => true;
|
|
6695
7090
|
const healthOptions = {
|
|
6696
7091
|
maxRetries: options.maxRetries,
|
|
@@ -6736,6 +7131,26 @@ class FileSystemWatcher {
|
|
|
6736
7131
|
}
|
|
6737
7132
|
}
|
|
6738
7133
|
};
|
|
7134
|
+
// Create move correlator if move detection is configured and cache is available.
|
|
7135
|
+
const moveConfig = this.config.moveDetection;
|
|
7136
|
+
if (moveConfig?.enabled && this.contentHashCache) {
|
|
7137
|
+
this.moveCorrelator = new MoveCorrelator({
|
|
7138
|
+
enabled: true,
|
|
7139
|
+
bufferMs: moveConfig.bufferMs,
|
|
7140
|
+
contentHashCache: this.contentHashCache,
|
|
7141
|
+
logger: this.logger,
|
|
7142
|
+
onMove: (oldPath, newPath) => {
|
|
7143
|
+
this.queue.enqueue({ type: 'move', path: newPath, oldPath, priority: 'normal' }, () => this.wrapProcessing(() => this.processor.moveFile(oldPath, newPath)));
|
|
7144
|
+
},
|
|
7145
|
+
onDelete: (deletedPath) => {
|
|
7146
|
+
this.queue.enqueue({ type: 'delete', path: deletedPath, priority: 'normal' }, () => this.wrapProcessing(() => this.processor.deleteFile(deletedPath)));
|
|
7147
|
+
},
|
|
7148
|
+
onCreate: (createdPath) => {
|
|
7149
|
+
this.queue.enqueue({ type: 'create', path: createdPath, priority: 'normal' }, () => this.wrapProcessing(() => this.processor.processFile(createdPath)));
|
|
7150
|
+
},
|
|
7151
|
+
});
|
|
7152
|
+
}
|
|
7153
|
+
const correlator = this.moveCorrelator;
|
|
6739
7154
|
this.watcher = chokidar.watch(roots, {
|
|
6740
7155
|
ignored,
|
|
6741
7156
|
usePolling: this.config.usePolling,
|
|
@@ -6769,7 +7184,12 @@ class FileSystemWatcher {
|
|
|
6769
7184
|
this.initialScanTracker?.incrementEnqueued();
|
|
6770
7185
|
}
|
|
6771
7186
|
this.logger.debug({ path }, 'File added');
|
|
6772
|
-
|
|
7187
|
+
if (correlator && initialScanComplete) {
|
|
7188
|
+
void correlator.handleAdd(path);
|
|
7189
|
+
}
|
|
7190
|
+
else {
|
|
7191
|
+
this.queue.enqueue({ type: 'create', path, priority: 'normal' }, () => this.wrapProcessing(() => this.processor.processFile(path)));
|
|
7192
|
+
}
|
|
6773
7193
|
});
|
|
6774
7194
|
this.watcher.on('change', (path) => {
|
|
6775
7195
|
this.handleGitignoreChange(path);
|
|
@@ -6789,7 +7209,12 @@ class FileSystemWatcher {
|
|
|
6789
7209
|
if (this.isGitignored(path))
|
|
6790
7210
|
return;
|
|
6791
7211
|
this.logger.debug({ path }, 'File removed');
|
|
6792
|
-
|
|
7212
|
+
if (correlator) {
|
|
7213
|
+
correlator.handleUnlink(path);
|
|
7214
|
+
}
|
|
7215
|
+
else {
|
|
7216
|
+
this.queue.enqueue({ type: 'delete', path, priority: 'normal' }, () => this.wrapProcessing(() => this.processor.deleteFile(path)));
|
|
7217
|
+
}
|
|
6793
7218
|
});
|
|
6794
7219
|
this.watcher.on('ready', () => {
|
|
6795
7220
|
initialScanComplete = true;
|
|
@@ -6834,6 +7259,7 @@ class FileSystemWatcher {
|
|
|
6834
7259
|
* Stop the filesystem watcher.
|
|
6835
7260
|
*/
|
|
6836
7261
|
async stop() {
|
|
7262
|
+
this.moveCorrelator?.flush();
|
|
6837
7263
|
if (this.watcher) {
|
|
6838
7264
|
await this.watcher.close();
|
|
6839
7265
|
this.watcher = undefined;
|
|
@@ -6967,6 +7393,8 @@ class JeevesWatcher {
|
|
|
6967
7393
|
vectorStore;
|
|
6968
7394
|
embeddingProvider;
|
|
6969
7395
|
gitignoreFilter;
|
|
7396
|
+
enrichmentStore;
|
|
7397
|
+
contentHashCache;
|
|
6970
7398
|
initialScanTracker;
|
|
6971
7399
|
version;
|
|
6972
7400
|
/** Create a new JeevesWatcher instance. */
|
|
@@ -7007,9 +7435,13 @@ class JeevesWatcher {
|
|
|
7007
7435
|
const { templateEngine, customMapLib } = await buildTemplateEngineAndCustomMapLib(this.config, configDir);
|
|
7008
7436
|
this.helperIntrospection = await introspectHelpers(this.config, configDir);
|
|
7009
7437
|
const processorConfig = createProcessorConfig(this.config, configDir, customMapLib);
|
|
7010
|
-
const stateDir = this.config.stateDir ??
|
|
7438
|
+
const stateDir = this.config.stateDir ?? '.jeeves-metadata';
|
|
7011
7439
|
this.issuesManager = new IssuesManager(stateDir, logger);
|
|
7012
7440
|
this.valuesManager = new ValuesManager(stateDir, logger);
|
|
7441
|
+
this.enrichmentStore = new EnrichmentStore(stateDir);
|
|
7442
|
+
const enrichmentStore = this.enrichmentStore;
|
|
7443
|
+
this.contentHashCache = new ContentHashCache();
|
|
7444
|
+
const contentHashCache = this.contentHashCache;
|
|
7013
7445
|
const processor = this.factories.createDocumentProcessor({
|
|
7014
7446
|
config: processorConfig,
|
|
7015
7447
|
embeddingProvider,
|
|
@@ -7017,8 +7449,10 @@ class JeevesWatcher {
|
|
|
7017
7449
|
compiledRules,
|
|
7018
7450
|
logger,
|
|
7019
7451
|
templateEngine,
|
|
7452
|
+
enrichmentStore,
|
|
7020
7453
|
issuesManager: this.issuesManager,
|
|
7021
7454
|
valuesManager: this.valuesManager,
|
|
7455
|
+
contentHashCache,
|
|
7022
7456
|
});
|
|
7023
7457
|
this.processor = processor;
|
|
7024
7458
|
this.queue = this.factories.createEventQueue({
|
|
@@ -7026,7 +7460,7 @@ class JeevesWatcher {
|
|
|
7026
7460
|
concurrency: this.config.embedding.concurrency ?? 5,
|
|
7027
7461
|
rateLimitPerMinute: this.config.embedding.rateLimitPerMinute,
|
|
7028
7462
|
});
|
|
7029
|
-
const { watcher, gitignoreFilter } = createWatcher(this.config, this.factories, this.queue, processor, logger, this.runtimeOptions, this.initialScanTracker);
|
|
7463
|
+
const { watcher, gitignoreFilter } = createWatcher(this.config, this.factories, this.queue, processor, logger, this.runtimeOptions, this.initialScanTracker, contentHashCache);
|
|
7030
7464
|
this.watcher = watcher;
|
|
7031
7465
|
this.gitignoreFilter = gitignoreFilter;
|
|
7032
7466
|
this.server = await this.startApiServer();
|
|
@@ -7078,6 +7512,7 @@ class JeevesWatcher {
|
|
|
7078
7512
|
version: this.version,
|
|
7079
7513
|
initialScanTracker: this.initialScanTracker,
|
|
7080
7514
|
fileSystemWatcher: this.watcher,
|
|
7515
|
+
enrichmentStore: this.enrichmentStore,
|
|
7081
7516
|
});
|
|
7082
7517
|
await server.listen({
|
|
7083
7518
|
host: this.config.api?.host ?? '127.0.0.1',
|