npm - @karmaniverous/jeeves-watcher - Versions diffs - 0.1.0 → 0.2.0 - Mend

@karmaniverous/jeeves-watcher 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/README.md +12 -16
package/config.schema.json +577 -0
package/dist/cjs/index.js +800 -340
package/dist/cli/jeeves-watcher/index.js +1130 -517
package/dist/index.d.ts +160 -103
package/dist/index.iife.js +796 -339
package/dist/index.iife.min.js +1 -1
package/dist/mjs/index.js +793 -341
package/package.json +28 -22

package/dist/mjs/index.js CHANGED Viewed

@@ -1,21 +1,28 @@
 import Fastify from 'fastify';
+import { omit, get } from 'radash';
 import { createHash } from 'node:crypto';
 import { readFile, mkdir, writeFile, rm, readdir, stat } from 'node:fs/promises';
 import { join, dirname, resolve, extname, basename } from 'node:path';
 import picomatch from 'picomatch';
 import chokidar from 'chokidar';
-import Ajv from 'ajv';
 import { cosmiconfig } from 'cosmiconfig';
+import { z, ZodError } from 'zod';
+import { jsonMapMapSchema, JsonMap } from '@karmaniverous/jsonmap';
 import { GoogleGenerativeAIEmbeddings } from '@langchain/google-genai';
 import pino from 'pino';
-import { MarkdownTextSplitter, RecursiveCharacterTextSplitter } from '@langchain/textsplitters';
+import { v5 } from 'uuid';
 import * as cheerio from 'cheerio';
 import yaml from 'js-yaml';
 import mammoth from 'mammoth';
-import { v5 } from 'uuid';
+import Ajv from 'ajv';
 import addFormats from 'ajv-formats';
+import { MarkdownTextSplitter, RecursiveCharacterTextSplitter } from '@langchain/textsplitters';
 import { QdrantClient } from '@qdrant/js-client-rest';
+/**
+ * @module metadata/metadata
+ * Persists file metadata as .meta.json. I/O: reads/writes/deletes metadata files under metadataDir. Path mapping via SHA-256 hash.
+ */
 /**
  * Normalise a file path for deterministic mapping: lowercase, forward slashes, strip leading drive letter colon.
  *
@@ -155,6 +162,30 @@ async function listFilesFromGlobs(patterns, ignored = []) {
     return Array.from(seen);
 }
+/**
+ * @module processAllFiles
+ *
+ * Shared helper for processing all files matching configured globs.
+ */
+/**
+ * Process all files from globs using the specified processor method.
+ *
+ * @param watchPaths - The glob patterns to match.
+ * @param ignoredPaths - The glob patterns to ignore.
+ * @param processor - The document processor instance.
+ * @param method - The processor method to call ('processFile' or 'processRulesUpdate').
+ * @returns The number of files processed.
+ */
+async function processAllFiles(watchPaths, ignoredPaths, processor, method) {
+    const files = await listFilesFromGlobs(watchPaths, ignoredPaths);
+    for (const file of files) {
+        // Sequential on purpose to avoid surprising load.
+        // Queue integration can come later.
+        await processor[method](file);
+    }
+    return files.length;
+}
 /**
  * Create the Fastify API server with all routes registered.
  *
@@ -195,15 +226,8 @@ function createApiServer(options) {
     });
     app.post('/reindex', async (_request, reply) => {
         try {
-            const files = await listFilesFromGlobs(options.config.watch.paths, options.config.watch.ignored);
-            for (const file of files) {
-                // Sequential on purpose to avoid surprising load.
-                // Queue integration can come later.
-                await processor.processFile(file);
-            }
-            return await reply
-                .status(200)
-                .send({ ok: true, filesIndexed: files.length });
+            const count = await processAllFiles(options.config.watch.paths, options.config.watch.ignored, processor, 'processFile');
+            return await reply.status(200).send({ ok: true, filesIndexed: count });
         }
         catch (error) {
             logger.error({ error }, 'Reindex failed');
@@ -213,19 +237,21 @@ function createApiServer(options) {
     app.post('/rebuild-metadata', async (_request, reply) => {
         try {
             const metadataDir = options.config.metadataDir ?? '.jeeves-metadata';
+            const SYSTEM_KEYS = [
+                'file_path',
+                'chunk_index',
+                'total_chunks',
+                'content_hash',
+                'chunk_text',
+            ];
             for await (const point of vectorStore.scroll()) {
                 const payload = point.payload;
                 const filePath = payload['file_path'];
                 if (typeof filePath !== 'string' || filePath.length === 0)
                     continue;
                 // Persist only enrichment-ish fields, not chunking/index fields.
-                const rest = { ...payload };
-                delete rest.file_path;
-                delete rest.chunk_index;
-                delete rest.total_chunks;
-                delete rest.content_hash;
-                delete rest.chunk_text;
-                await writeMetadata(filePath, metadataDir, rest);
+                const enrichment = omit(payload, SYSTEM_KEYS);
+                await writeMetadata(filePath, metadataDir, enrichment);
             }
             return await reply.status(200).send({ ok: true });
         }
@@ -242,20 +268,13 @@ function createApiServer(options) {
                 try {
                     if (scope === 'rules') {
                         // Re-apply inference rules to all files, update Qdrant payloads (no re-embedding)
-                        const files = await listFilesFromGlobs(options.config.watch.paths, options.config.watch.ignored);
-                        for (const file of files) {
-                            // Use the new processRulesUpdate method
-                            await processor.processRulesUpdate(file);
-                        }
-                        logger.info({ scope, filesProcessed: files.length }, 'Config reindex (rules) completed');
+                        const count = await processAllFiles(options.config.watch.paths, options.config.watch.ignored, processor, 'processRulesUpdate');
+                        logger.info({ scope, filesProcessed: count }, 'Config reindex (rules) completed');
                     }
                     else {
                         // Full reindex: re-extract, re-embed, re-upsert
-                        const files = await listFilesFromGlobs(options.config.watch.paths, options.config.watch.ignored);
-                        for (const file of files) {
-                            await processor.processFile(file);
-                        }
-                        logger.info({ scope, filesProcessed: files.length }, 'Config reindex (full) completed');
+                        const count = await processAllFiles(options.config.watch.paths, options.config.watch.ignored, processor, 'processFile');
+                        logger.info({ scope, filesProcessed: count }, 'Config reindex (full) completed');
                     }
                 }
                 catch (error) {
@@ -272,117 +291,249 @@ function createApiServer(options) {
     return app;
 }
-const MODULE_NAME = 'jeeves-watcher';
-/** JSON Schema for validating jeeves-watcher configuration. */
-const configSchema = {
-    type: 'object',
-    required: ['watch', 'embedding', 'vectorStore'],
-    properties: {
-        watch: {
-            type: 'object',
-            required: ['paths'],
-            properties: {
-                paths: { type: 'array', items: { type: 'string' }, minItems: 1 },
-                ignored: { type: 'array', items: { type: 'string' } },
-                pollIntervalMs: { type: 'number' },
-                usePolling: { type: 'boolean' },
-                debounceMs: { type: 'number' },
-                stabilityThresholdMs: { type: 'number' },
-            },
-            additionalProperties: false,
-        },
-        configWatch: {
-            type: 'object',
-            properties: {
-                enabled: { type: 'boolean' },
-                debounceMs: { type: 'number' },
-            },
-            additionalProperties: false,
-        },
-        embedding: {
-            type: 'object',
-            required: ['provider', 'model'],
-            properties: {
-                provider: { type: 'string' },
-                model: { type: 'string' },
-                chunkSize: { type: 'number' },
-                chunkOverlap: { type: 'number' },
-                dimensions: { type: 'number' },
-                apiKey: { type: 'string' },
-                rateLimitPerMinute: { type: 'number' },
-                concurrency: { type: 'number' },
-            },
-            additionalProperties: false,
-        },
-        vectorStore: {
-            type: 'object',
-            required: ['url', 'collectionName'],
-            properties: {
-                url: { type: 'string' },
-                collectionName: { type: 'string' },
-                apiKey: { type: 'string' },
-            },
-            additionalProperties: false,
-        },
-        metadataDir: { type: 'string' },
-        api: {
-            type: 'object',
-            properties: {
-                host: { type: 'string' },
-                port: { type: 'number' },
-            },
-            additionalProperties: false,
-        },
-        extractors: { type: 'object' },
-        inferenceRules: {
-            type: 'array',
-            items: {
-                type: 'object',
-                required: ['match', 'set'],
-                properties: {
-                    match: { type: 'object' },
-                    set: { type: 'object' },
-                },
-                additionalProperties: false,
-            },
-        },
-        logging: {
-            type: 'object',
-            properties: {
-                level: { type: 'string' },
-                file: { type: 'string' },
-            },
-            additionalProperties: false,
-        },
-        shutdownTimeoutMs: { type: 'number' },
-    },
-    additionalProperties: false,
-};
-const ajv = new Ajv({ allErrors: true });
-const validate = ajv.compile(configSchema);
-/** Default values for optional configuration fields. */
-const DEFAULTS = {
-    configWatch: { enabled: true, debounceMs: 1000 },
+/**
+ * @module config/defaults
+ * Default configuration values for jeeves-watcher. Pure data export, no I/O or side effects.
+ */
+/** Default root-level config values. */
+const ROOT_DEFAULTS = {
     metadataDir: '.jeeves-watcher',
-    api: { host: '127.0.0.1', port: 3100 },
-    logging: { level: 'info' },
     shutdownTimeoutMs: 10000,
 };
-/** Default values for watch configuration. */
+/** Default configWatch values. */
+const CONFIG_WATCH_DEFAULTS = {
+    enabled: true,
+    debounceMs: 1000,
+};
+/** Default API values. */
+const API_DEFAULTS = {
+    host: '127.0.0.1',
+    port: 3456,
+};
+/** Default logging values. */
+const LOGGING_DEFAULTS = {
+    level: 'info',
+};
+/** Default watch configuration. */
 const WATCH_DEFAULTS = {
     debounceMs: 300,
     stabilityThresholdMs: 500,
     usePolling: false,
     pollIntervalMs: 1000,
 };
-/** Default values for embedding configuration. */
+/** Default embedding configuration. */
 const EMBEDDING_DEFAULTS = {
     chunkSize: 1000,
     chunkOverlap: 200,
-    dimensions: 768,
+    dimensions: 3072,
     rateLimitPerMinute: 300,
     concurrency: 5,
 };
+/**
+ * Watch configuration for file system monitoring.
+ */
+const watchConfigSchema = z.object({
+    /** Glob patterns to watch. */
+    paths: z
+        .array(z.string())
+        .min(1)
+        .describe('Glob patterns for files to watch (e.g., "**/*.md"). At least one required.'),
+    /** Glob patterns to ignore. */
+    ignored: z
+        .array(z.string())
+        .optional()
+        .describe('Glob patterns to exclude from watching (e.g., "**/node_modules/**").'),
+    /** Polling interval in milliseconds. */
+    pollIntervalMs: z
+        .number()
+        .optional()
+        .describe('Polling interval in milliseconds when usePolling is enabled.'),
+    /** Whether to use polling instead of native watchers. */
+    usePolling: z
+        .boolean()
+        .optional()
+        .describe('Use polling instead of native file system events (for network drives).'),
+    /** Debounce delay in milliseconds for file change events. */
+    debounceMs: z
+        .number()
+        .optional()
+        .describe('Debounce delay in milliseconds for file change events.'),
+    /** Time in milliseconds a file must be stable before processing. */
+    stabilityThresholdMs: z
+        .number()
+        .optional()
+        .describe('Time in milliseconds a file must remain unchanged before processing.'),
+});
+/**
+ * Configuration watch settings.
+ */
+const configWatchConfigSchema = z.object({
+    /** Whether config file watching is enabled. */
+    enabled: z
+        .boolean()
+        .optional()
+        .describe('Enable automatic reloading when config file changes.'),
+    /** Debounce delay in milliseconds for config change events. */
+    debounceMs: z
+        .number()
+        .optional()
+        .describe('Debounce delay in milliseconds for config file change detection.'),
+});
+/**
+ * Embedding model configuration.
+ */
+const embeddingConfigSchema = z.object({
+    /** The embedding model provider. */
+    provider: z
+        .string()
+        .default('gemini')
+        .describe('Embedding provider name (e.g., "gemini", "openai").'),
+    /** The embedding model name. */
+    model: z
+        .string()
+        .default('gemini-embedding-001')
+        .describe('Embedding model identifier (e.g., "gemini-embedding-001", "text-embedding-3-small").'),
+    /** Maximum tokens per chunk for splitting. */
+    chunkSize: z
+        .number()
+        .optional()
+        .describe('Maximum chunk size in characters for text splitting.'),
+    /** Overlap between chunks in tokens. */
+    chunkOverlap: z
+        .number()
+        .optional()
+        .describe('Character overlap between consecutive chunks.'),
+    /** Embedding vector dimensions. */
+    dimensions: z
+        .number()
+        .optional()
+        .describe('Embedding vector dimensions (must match model output).'),
+    /** API key for the embedding provider. */
+    apiKey: z
+        .string()
+        .optional()
+        .describe('API key for embedding provider (supports ${ENV_VAR} substitution).'),
+    /** Maximum embedding requests per minute. */
+    rateLimitPerMinute: z
+        .number()
+        .optional()
+        .describe('Maximum embedding API requests per minute (rate limiting).'),
+    /** Maximum concurrent embedding requests. */
+    concurrency: z
+        .number()
+        .optional()
+        .describe('Maximum concurrent embedding requests.'),
+});
+/**
+ * Vector store configuration for Qdrant.
+ */
+const vectorStoreConfigSchema = z.object({
+    /** Qdrant server URL. */
+    url: z
+        .string()
+        .describe('Qdrant server URL (e.g., "http://localhost:6333").'),
+    /** Qdrant collection name. */
+    collectionName: z
+        .string()
+        .describe('Qdrant collection name for vector storage.'),
+    /** Qdrant API key. */
+    apiKey: z
+        .string()
+        .optional()
+        .describe('Qdrant API key for authentication (supports ${ENV_VAR} substitution).'),
+});
+/**
+ * API server configuration.
+ */
+const apiConfigSchema = z.object({
+    /** Host to bind to. */
+    host: z
+        .string()
+        .optional()
+        .describe('Host address for API server (e.g., "127.0.0.1", "0.0.0.0").'),
+    /** Port to listen on. */
+    port: z.number().optional().describe('Port for API server (e.g., 3456).'),
+});
+/**
+ * Logging configuration.
+ */
+const loggingConfigSchema = z.object({
+    /** Log level. */
+    level: z
+        .string()
+        .optional()
+        .describe('Logging level (trace, debug, info, warn, error, fatal).'),
+    /** Log file path. */
+    file: z
+        .string()
+        .optional()
+        .describe('Path to log file (logs to stdout if omitted).'),
+});
+/**
+ * An inference rule that enriches document metadata.
+ */
+const inferenceRuleSchema = z.object({
+    /** JSON Schema object to match against document metadata. */
+    match: z
+        .record(z.string(), z.unknown())
+        .describe('JSON Schema object to match against file attributes.'),
+    /** Metadata fields to set when the rule matches. */
+    set: z
+        .record(z.string(), z.unknown())
+        .describe('Metadata fields to set when match succeeds.'),
+    /** JsonMap transformation (inline or reference to named map). */
+    map: z
+        .union([jsonMapMapSchema, z.string()])
+        .optional()
+        .describe('JsonMap transformation (inline definition or named map reference).'),
+});
+/**
+ * Top-level configuration for jeeves-watcher.
+ */
+const jeevesWatcherConfigSchema = z.object({
+    /** File system watch configuration. */
+    watch: watchConfigSchema.describe('File system watch configuration.'),
+    /** Configuration file watch settings. */
+    configWatch: configWatchConfigSchema
+        .optional()
+        .describe('Configuration file watch settings.'),
+    /** Embedding model configuration. */
+    embedding: embeddingConfigSchema.describe('Embedding model configuration.'),
+    /** Vector store configuration. */
+    vectorStore: vectorStoreConfigSchema.describe('Qdrant vector store configuration.'),
+    /** Directory for persisted metadata. */
+    metadataDir: z
+        .string()
+        .optional()
+        .describe('Directory for persisted metadata sidecar files.'),
+    /** API server configuration. */
+    api: apiConfigSchema.optional().describe('API server configuration.'),
+    /** Extractor configurations keyed by name. */
+    extractors: z
+        .record(z.string(), z.unknown())
+        .optional()
+        .describe('Extractor configurations keyed by name.'),
+    /** Rules for inferring metadata from document properties. */
+    inferenceRules: z
+        .array(inferenceRuleSchema)
+        .optional()
+        .describe('Rules for inferring metadata from file attributes.'),
+    /** Reusable named JsonMap transformations. */
+    maps: z
+        .record(z.string(), jsonMapMapSchema)
+        .optional()
+        .describe('Reusable named JsonMap transformations.'),
+    /** Logging configuration. */
+    logging: loggingConfigSchema.optional().describe('Logging configuration.'),
+    /** Timeout in milliseconds for graceful shutdown. */
+    shutdownTimeoutMs: z
+        .number()
+        .optional()
+        .describe('Timeout in milliseconds for graceful shutdown.'),
+});
+const MODULE_NAME = 'jeeves-watcher';
 /**
  * Merge sensible defaults into a loaded configuration.
  *
@@ -391,13 +542,13 @@ const EMBEDDING_DEFAULTS = {
  */
 function applyDefaults(raw) {
     return {
-        ...DEFAULTS,
+        ...ROOT_DEFAULTS,
         ...raw,
         watch: { ...WATCH_DEFAULTS, ...raw.watch },
-        configWatch: { ...DEFAULTS.configWatch, ...raw.configWatch },
+        configWatch: { ...CONFIG_WATCH_DEFAULTS, ...raw.configWatch },
         embedding: { ...EMBEDDING_DEFAULTS, ...raw.embedding },
-        api: { ...DEFAULTS.api, ...raw.api },
-        logging: { ...DEFAULTS.logging, ...raw.logging },
+        api: { ...API_DEFAULTS, ...raw.api },
+        logging: { ...LOGGING_DEFAULTS, ...raw.logging },
     };
 }
 /**
@@ -415,21 +566,89 @@ async function loadConfig(configPath) {
     if (!result || result.isEmpty) {
         throw new Error('No jeeves-watcher configuration found. Create a .jeeves-watcherrc or jeeves-watcher.config.{js,ts,json,yaml} file.');
     }
-    const raw = result.config;
-    if (!validate(raw)) {
-        const errors = validate.errors
-            ?.map((e) => {
-            const instancePath = 'instancePath' in e
-                ? e.instancePath
-                : undefined;
-            return `${instancePath ?? '/'}: ${e.message ?? 'unknown error'}`;
-        })
-            .join('; ');
-        throw new Error(`Invalid jeeves-watcher configuration: ${errors ?? 'unknown error'}`);
+    try {
+        const validated = jeevesWatcherConfigSchema.parse(result.config);
+        return applyDefaults(validated);
+    }
+    catch (error) {
+        if (error instanceof ZodError) {
+            const errors = error.issues
+                .map((issue) => `${issue.path.join('.')}: ${issue.message}`)
+                .join('; ');
+            throw new Error(`Invalid jeeves-watcher configuration: ${errors}`);
+        }
+        throw error;
     }
-    return applyDefaults(raw);
 }
+/**
+ * @module util/retry
+ * Small async retry helper with exponential backoff. Side effects: sleeps between attempts; can invoke onRetry callback for logging.
+ */
+function sleep(ms, signal) {
+    if (ms <= 0)
+        return Promise.resolve();
+    return new Promise((resolve, reject) => {
+        const timer = setTimeout(() => {
+            cleanup();
+            resolve();
+        }, ms);
+        const onAbort = () => {
+            cleanup();
+            reject(new Error('Retry sleep aborted'));
+        };
+        const cleanup = () => {
+            clearTimeout(timer);
+            if (signal)
+                signal.removeEventListener('abort', onAbort);
+        };
+        if (signal) {
+            if (signal.aborted) {
+                onAbort();
+                return;
+            }
+            signal.addEventListener('abort', onAbort, { once: true });
+        }
+    });
+}
+function computeDelayMs(attempt, baseDelayMs, maxDelayMs, jitter = 0) {
+    const exp = Math.max(0, attempt - 1);
+    const raw = Math.min(maxDelayMs, baseDelayMs * 2 ** exp);
+    const factor = jitter > 0 ? 1 + Math.random() * jitter : 1;
+    return Math.round(raw * factor);
+}
+/**
+ * Retry an async operation using exponential backoff.
+ *
+ * @param fn - Operation to execute.
+ * @param options - Retry policy.
+ * @returns The operation result.
+ */
+async function retry(fn, options) {
+    const attempts = Math.max(1, options.attempts);
+    let lastError;
+    for (let attempt = 1; attempt <= attempts; attempt++) {
+        try {
+            return await fn(attempt);
+        }
+        catch (error) {
+            lastError = error;
+            const isLast = attempt >= attempts;
+            if (isLast)
+                break;
+            const delayMs = computeDelayMs(attempt, options.baseDelayMs, options.maxDelayMs, options.jitter);
+            options.onRetry?.({ attempt, attempts, delayMs, error });
+            await sleep(delayMs, options.signal);
+        }
+    }
+    throw lastError;
+}
+/**
+ * @module embedding
+ *
+ * Embedding provider abstractions and registry-backed factory.
+ */
 /**
  * Create a mock embedding provider that generates deterministic vectors from content hashes.
  *
@@ -457,10 +676,11 @@ function createMockProvider(dimensions) {
  * Create a Gemini embedding provider using the Google Generative AI SDK.
  *
  * @param config - The embedding configuration.
+ * @param logger - Optional pino logger for retry warnings.
  * @returns A Gemini {@link EmbeddingProvider}.
  * @throws If the API key is missing.
  */
-function createGeminiProvider(config) {
+function createGeminiProvider(config, logger) {
     if (!config.apiKey) {
         throw new Error('Gemini embedding provider requires config.embedding.apiKey');
     }
@@ -472,8 +692,43 @@ function createGeminiProvider(config) {
     return {
         dimensions,
         async embed(texts) {
-            // embedDocuments returns vectors for multiple texts
-            const vectors = await embedder.embedDocuments(texts);
+            const vectors = await retry(async (attempt) => {
+                if (attempt > 1) {
+                    const msg = {
+                        attempt,
+                        provider: 'gemini',
+                        model: config.model,
+                    };
+                    if (logger) {
+                        logger.warn(msg, 'Retrying embedding request');
+                    }
+                    else {
+                        console.warn(msg, 'Retrying embedding request');
+                    }
+                }
+                // embedDocuments returns vectors for multiple texts
+                return embedder.embedDocuments(texts);
+            }, {
+                attempts: 5,
+                baseDelayMs: 500,
+                maxDelayMs: 10_000,
+                jitter: 0.2,
+                onRetry: ({ attempt, delayMs, error }) => {
+                    const msg = {
+                        attempt,
+                        delayMs,
+                        provider: 'gemini',
+                        model: config.model,
+                        error,
+                    };
+                    if (logger) {
+                        logger.warn(msg, 'Embedding call failed; will retry');
+                    }
+                    else {
+                        console.warn(msg, 'Embedding call failed; will retry');
+                    }
+                },
+            });
             // Validate dimensions
             for (const vector of vectors) {
                 if (vector.length !== dimensions) {
@@ -484,25 +739,36 @@ function createGeminiProvider(config) {
         },
     };
 }
+function createMockFromConfig(config) {
+    const dimensions = config.dimensions ?? 768;
+    return createMockProvider(dimensions);
+}
+const embeddingProviderRegistry = new Map([
+    ['mock', createMockFromConfig],
+    ['gemini', createGeminiProvider],
+]);
 /**
  * Create an embedding provider based on the given configuration.
  *
+ * Each provider is responsible for its own default dimensions.
+ *
  * @param config - The embedding configuration.
+ * @param logger - Optional pino logger for retry warnings.
  * @returns An {@link EmbeddingProvider} instance.
  * @throws If the configured provider is not supported.
  */
-function createEmbeddingProvider(config) {
-    const dimensions = config.dimensions ?? 768;
-    switch (config.provider) {
-        case 'mock':
-            return createMockProvider(dimensions);
-        case 'gemini':
-            return createGeminiProvider(config);
-        default:
-            throw new Error(`Unsupported embedding provider: ${config.provider}`);
+function createEmbeddingProvider(config, logger) {
+    const factory = embeddingProviderRegistry.get(config.provider);
+    if (!factory) {
+        throw new Error(`Unsupported embedding provider: ${config.provider}`);
     }
+    return factory(config, logger);
 }
+/**
+ * @module logger
+ * Creates pino logger instances. I/O: optionally writes logs to file via pino/file transport. Defaults to stdout at info level.
+ */
 /**
  * Create a pino logger instance.
  *
@@ -521,6 +787,54 @@ function createLogger(config) {
     return pino({ level });
 }
+/**
+ * @module hash
+ * Provides SHA-256 content hashing. Pure function: given text string, returns hex digest. No I/O or side effects.
+ */
+/**
+ * Compute a SHA-256 hex digest of the given text.
+ *
+ * @param text - The input text to hash.
+ * @returns The hex-encoded SHA-256 hash.
+ */
+function contentHash(text) {
+    return createHash('sha256').update(text, 'utf8').digest('hex');
+}
+/**
+ * @module pointId
+ * Generates deterministic UUIDv5 point IDs for file paths and chunk indices. Pure function: normalizes paths, returns stable IDs. No I/O.
+ */
+/** Namespace UUID for jeeves-watcher point IDs. */
+const NAMESPACE = '6a6f686e-6761-4c74-ad6a-656576657321';
+/**
+ * Normalise a file path for deterministic point ID generation.
+ *
+ * @param filePath - The original file path.
+ * @returns The normalised path string.
+ */
+function normalisePath(filePath) {
+    return filePath.replace(/\\/g, '/').toLowerCase();
+}
+/**
+ * Generate a deterministic UUID v5 point ID for a file (and optional chunk index).
+ *
+ * @param filePath - The file path.
+ * @param chunkIndex - Optional chunk index within the file.
+ * @returns A deterministic UUID v5 string.
+ */
+function pointId(filePath, chunkIndex) {
+    const key = chunkIndex !== undefined
+        ? `${normalisePath(filePath)}#${String(chunkIndex)}`
+        : normalisePath(filePath);
+    return v5(key, NAMESPACE);
+}
+/**
+ * @module extractors
+ *
+ * Text extraction registry for supported file formats.
+ */
 /**
  * Extract YAML frontmatter from a Markdown document.
  *
@@ -566,6 +880,55 @@ function extractJsonText(obj) {
     }
     return JSON.stringify(obj);
 }
+async function extractMarkdown(filePath) {
+    const raw = await readFile(filePath, 'utf8');
+    const { frontmatter, body } = extractMarkdownFrontmatter(raw);
+    return { text: body, frontmatter };
+}
+async function extractPlaintext(filePath) {
+    const raw = await readFile(filePath, 'utf8');
+    return { text: raw };
+}
+async function extractJson(filePath) {
+    const raw = await readFile(filePath, 'utf8');
+    const parsed = JSON.parse(raw);
+    const json = parsed && typeof parsed === 'object' && !Array.isArray(parsed)
+        ? parsed
+        : undefined;
+    return { text: extractJsonText(parsed), json };
+}
+async function extractPdf(filePath) {
+    const buffer = await readFile(filePath);
+    const uint8Array = new Uint8Array(buffer);
+    const { extractText: extractPdfText } = await import('unpdf');
+    const { text } = await extractPdfText(uint8Array);
+    // unpdf returns an array of strings (one per page)
+    const content = Array.isArray(text) ? text.join('\n\n') : text;
+    return { text: content };
+}
+async function extractDocx(filePath) {
+    const buffer = await readFile(filePath);
+    const result = await mammoth.extractRawText({ buffer });
+    return { text: result.value };
+}
+async function extractHtml(filePath) {
+    const raw = await readFile(filePath, 'utf8');
+    const $ = cheerio.load(raw);
+    $('script, style').remove();
+    const text = $('body').text().trim() || $.text().trim();
+    return { text };
+}
+const extractorRegistry = new Map([
+    ['.md', extractMarkdown],
+    ['.markdown', extractMarkdown],
+    ['.txt', extractPlaintext],
+    ['.text', extractPlaintext],
+    ['.json', extractJson],
+    ['.pdf', extractPdf],
+    ['.docx', extractDocx],
+    ['.html', extractHtml],
+    ['.htm', extractHtml],
+]);
 /**
  * Extract text from a file based on extension.
  *
@@ -574,85 +937,11 @@ function extractJsonText(obj) {
  * @returns Extracted text and optional structured data.
  */
 async function extractText(filePath, extension) {
-    const ext = extension.toLowerCase();
-    if (ext === '.md' || ext === '.markdown') {
-        const raw = await readFile(filePath, 'utf8');
-        const { frontmatter, body } = extractMarkdownFrontmatter(raw);
-        return { text: body, frontmatter };
-    }
-    if (ext === '.txt' || ext === '.text') {
-        const raw = await readFile(filePath, 'utf8');
-        return { text: raw };
-    }
-    if (ext === '.json') {
-        const raw = await readFile(filePath, 'utf8');
-        const parsed = JSON.parse(raw);
-        const json = parsed && typeof parsed === 'object' && !Array.isArray(parsed)
-            ? parsed
-            : undefined;
-        return { text: extractJsonText(parsed), json };
-    }
-    if (ext === '.pdf') {
-        const buffer = await readFile(filePath);
-        const uint8Array = new Uint8Array(buffer);
-        const { extractText: extractPdfText } = await import('unpdf');
-        const { text } = await extractPdfText(uint8Array);
-        // unpdf returns an array of strings (one per page)
-        const content = Array.isArray(text) ? text.join('\n\n') : text;
-        return { text: content };
-    }
-    if (ext === '.docx') {
-        const buffer = await readFile(filePath);
-        const result = await mammoth.extractRawText({ buffer });
-        return { text: result.value };
-    }
-    if (ext === '.html' || ext === '.htm') {
-        const raw = await readFile(filePath, 'utf8');
-        const $ = cheerio.load(raw);
-        // Remove script and style elements
-        $('script, style').remove();
-        // Extract text content
-        const text = $('body').text().trim() || $.text().trim();
-        return { text };
-    }
+    const extractor = extractorRegistry.get(extension.toLowerCase());
+    if (extractor)
+        return extractor(filePath);
     // Default: treat as plaintext.
-    const raw = await readFile(filePath, 'utf8');
-    return { text: raw };
-}
-/**
- * Compute a SHA-256 hex digest of the given text.
- *
- * @param text - The input text to hash.
- * @returns The hex-encoded SHA-256 hash.
- */
-function contentHash(text) {
-    return createHash('sha256').update(text, 'utf8').digest('hex');
-}
-/** Namespace UUID for jeeves-watcher point IDs. */
-const NAMESPACE = '6a6f686e-6761-4c74-ad6a-656576657321';
-/**
- * Normalise a file path for deterministic point ID generation.
- *
- * @param filePath - The original file path.
- * @returns The normalised path string.
- */
-function normalisePath(filePath) {
-    return filePath.replace(/\\/g, '/').toLowerCase();
-}
-/**
- * Generate a deterministic UUID v5 point ID for a file (and optional chunk index).
- *
- * @param filePath - The file path.
- * @param chunkIndex - Optional chunk index within the file.
- * @returns A deterministic UUID v5 string.
- */
-function pointId(filePath, chunkIndex) {
-    const key = chunkIndex !== undefined
-        ? `${normalisePath(filePath)}#${String(chunkIndex)}`
-        : normalisePath(filePath);
-    return v5(key, NAMESPACE);
+    return extractPlaintext(filePath);
 }
 /**
@@ -725,13 +1014,7 @@ function resolveTemplateVars(value, attributes) {
     if (typeof value !== 'string')
         return value;
     return value.replace(/\$\{([^}]+)\}/g, (_match, varPath) => {
-        const parts = varPath.split('.');
-        let current = attributes;
-        for (const part of parts) {
-            if (current === null || current === undefined)
-                return '';
-            current = current[part];
-        }
+        const current = get(attributes, varPath);
         if (current === null || current === undefined)
             return '';
         return typeof current === 'string' ? current : JSON.stringify(current);
@@ -751,25 +1034,170 @@ function resolveSet(setObj, attributes) {
     }
     return result;
 }
+/**
+ * Create the lib object for JsonMap transformations.
+ * Provides utility functions for path manipulation.
+ *
+ * @returns The lib object.
+ */
+function createJsonMapLib() {
+    return {
+        split: (str, separator) => str.split(separator),
+        slice: (arr, start, end) => arr.slice(start, end),
+        join: (arr, separator) => arr.join(separator),
+        toLowerCase: (str) => str.toLowerCase(),
+        replace: (str, search, replacement) => str.replace(search, replacement),
+        get: (obj, path) => get(obj, path),
+    };
+}
 /**
  * Apply compiled inference rules to file attributes, returning merged metadata.
  *
  * Rules are evaluated in order; later rules override earlier ones.
+ * If a rule has a `map`, the JsonMap transformation is applied after `set` resolution,
+ * and map output overrides set output on conflict.
  *
  * @param compiledRules - The compiled rules to evaluate.
  * @param attributes - The file attributes to match against.
+ * @param namedMaps - Optional record of named JsonMap definitions.
+ * @param logger - Optional pino logger for warnings (falls back to console.warn).
  * @returns The merged metadata from all matching rules.
  */
-function applyRules(compiledRules, attributes) {
+async function applyRules(compiledRules, attributes, namedMaps, logger) {
+    // JsonMap's type definitions expect a generic JsonMapLib shape with unary functions.
+    // Our helper functions accept multiple args, which JsonMap supports at runtime.
+    const lib = createJsonMapLib();
     let merged = {};
+    const log = logger ?? console;
     for (const { rule, validate } of compiledRules) {
         if (validate(attributes)) {
-            merged = { ...merged, ...resolveSet(rule.set, attributes) };
+            // Apply set resolution
+            const setOutput = resolveSet(rule.set, attributes);
+            merged = { ...merged, ...setOutput };
+            // Apply map transformation if present
+            if (rule.map) {
+                let mapDef;
+                // Resolve map reference
+                if (typeof rule.map === 'string') {
+                    mapDef = namedMaps?.[rule.map];
+                    if (!mapDef) {
+                        log.warn(`Map reference "${rule.map}" not found in named maps. Skipping map transformation.`);
+                        continue;
+                    }
+                }
+                else {
+                    mapDef = rule.map;
+                }
+                // Execute JsonMap transformation
+                try {
+                    const jsonMap = new JsonMap(mapDef, lib);
+                    const mapOutput = await jsonMap.transform(attributes);
+                    if (mapOutput &&
+                        typeof mapOutput === 'object' &&
+                        !Array.isArray(mapOutput)) {
+                        merged = { ...merged, ...mapOutput };
+                    }
+                    else {
+                        log.warn(`JsonMap transformation did not return an object; skipping merge.`);
+                    }
+                }
+                catch (error) {
+                    log.warn(`JsonMap transformation failed: ${error instanceof Error ? error.message : String(error)}`);
+                }
+            }
         }
     }
     return merged;
 }
+/**
+ * @module processor/buildMetadata
+ * Builds merged metadata from file content, inference rules, and enrichment. I/O: reads files, extracts text, loads enrichment .meta.json.
+ */
+/**
+ * Build merged metadata for a file by applying inference rules and merging with enrichment metadata.
+ *
+ * @param filePath - The file to process.
+ * @param compiledRules - The compiled inference rules.
+ * @param metadataDir - The metadata directory for enrichment files.
+ * @param maps - Optional named JsonMap definitions.
+ * @param logger - Optional logger for rule warnings.
+ * @returns The merged metadata and intermediate data.
+ */
+async function buildMergedMetadata(filePath, compiledRules, metadataDir, maps, logger) {
+    const ext = extname(filePath);
+    const stats = await stat(filePath);
+    // 1. Extract text and structured data
+    const extracted = await extractText(filePath, ext);
+    // 2. Build attributes + apply rules
+    const attributes = buildAttributes(filePath, stats, extracted.frontmatter, extracted.json);
+    const inferred = await applyRules(compiledRules, attributes, maps, logger);
+    // 3. Read enrichment metadata (merge, enrichment wins)
+    const enrichment = await readMetadata(filePath, metadataDir);
+    const metadata = {
+        ...inferred,
+        ...(enrichment ?? {}),
+    };
+    return { inferred, enrichment, metadata, attributes, extracted };
+}
+/**
+ * @module processor/chunkIds
+ * Generates chunk point IDs from file paths and chunk indices. Extracts chunk counts from Qdrant payloads. Pure functions, no I/O.
+ */
+/**
+ * Generate an array of chunk IDs for a file.
+ *
+ * @param filePath - The file path.
+ * @param totalChunks - The total number of chunks.
+ * @returns An array of point IDs for each chunk.
+ */
+function chunkIds(filePath, totalChunks) {
+    const ids = [];
+    for (let i = 0; i < totalChunks; i++) {
+        ids.push(pointId(filePath, i));
+    }
+    return ids;
+}
+/**
+ * Extract the total chunk count from a payload, with a fallback.
+ *
+ * @param payload - The Qdrant point payload (or null).
+ * @param fallback - The fallback value if total_chunks is missing or invalid.
+ * @returns The total chunk count.
+ */
+function getChunkCount(payload, fallback = 1) {
+    if (!payload)
+        return fallback;
+    const count = payload['total_chunks'];
+    return typeof count === 'number' ? count : fallback;
+}
+/**
+ * @module processor/splitter
+ * Factory for LangChain text splitters. Returns MarkdownTextSplitter or RecursiveCharacterTextSplitter based on file extension. No I/O.
+ */
+/**
+ * Create the appropriate text splitter for the given file extension.
+ *
+ * @param ext - File extension (including leading dot).
+ * @param chunkSize - Maximum chunk size in characters.
+ * @param chunkOverlap - Overlap between chunks in characters.
+ * @returns A text splitter instance.
+ */
+function createSplitter(ext, chunkSize, chunkOverlap) {
+    const lowerExt = ext.toLowerCase();
+    if (lowerExt === '.md' || lowerExt === '.markdown') {
+        return new MarkdownTextSplitter({ chunkSize, chunkOverlap });
+    }
+    return new RecursiveCharacterTextSplitter({ chunkSize, chunkOverlap });
+}
+/**
+ * @module processor
+ *
+ * Core document processing pipeline. Handles extracting text, computing embeddings, syncing with vector store.
+ */
 /**
  * Core document processing pipeline.
  *
@@ -781,11 +1209,10 @@ class DocumentProcessor {
     vectorStore;
     compiledRules;
     logger;
-    metadataDir;
     /**
      * Create a new DocumentProcessor.
      *
-     * @param config - The application configuration.
+     * @param config - The processor configuration.
      * @param embeddingProvider - The embedding provider.
      * @param vectorStore - The vector store client.
      * @param compiledRules - The compiled inference rules.
@@ -797,7 +1224,6 @@ class DocumentProcessor {
         this.vectorStore = vectorStore;
         this.compiledRules = compiledRules;
         this.logger = logger;
-        this.metadataDir = config.metadataDir ?? '.jeeves-metadata';
     }
     /**
      * Process a file through the full pipeline: extract, hash, chunk, embed, upsert.
@@ -807,9 +1233,8 @@ class DocumentProcessor {
     async processFile(filePath) {
         try {
             const ext = extname(filePath);
-            const stats = await stat(filePath);
-            // 1. Extract text
-            const extracted = await extractText(filePath, ext);
+            // 1. Build merged metadata + extract text
+            const { metadata, extracted } = await buildMergedMetadata(filePath, this.compiledRules, this.config.metadataDir, this.config.maps, this.logger);
             if (!extracted.text.trim()) {
                 this.logger.debug({ filePath }, 'Skipping empty file');
                 return;
@@ -822,26 +1247,15 @@ class DocumentProcessor {
                 this.logger.debug({ filePath }, 'Content unchanged, skipping');
                 return;
             }
-            const oldTotalChunks = typeof existingPayload?.['total_chunks'] === 'number'
-                ? existingPayload['total_chunks']
-                : 0;
-            // 3. Build attributes + apply rules → inferred metadata
-            const attributes = buildAttributes(filePath, stats, extracted.frontmatter, extracted.json);
-            const inferred = applyRules(this.compiledRules, attributes);
-            // 4. Read enrichment metadata (merge, enrichment wins)
-            const enrichment = await readMetadata(filePath, this.metadataDir);
-            const metadata = {
-                ...inferred,
-                ...(enrichment ?? {}),
-            };
-            // 5. Chunk text
-            const chunkSize = this.config.embedding.chunkSize ?? 1000;
-            const chunkOverlap = this.config.embedding.chunkOverlap ?? 200;
-            const splitter = this.createSplitter(ext, chunkSize, chunkOverlap);
+            const oldTotalChunks = getChunkCount(existingPayload);
+            // 3. Chunk text
+            const chunkSize = this.config.chunkSize ?? 1000;
+            const chunkOverlap = this.config.chunkOverlap ?? 200;
+            const splitter = createSplitter(ext, chunkSize, chunkOverlap);
             const chunks = await splitter.splitText(extracted.text);
-            // 6. Embed all chunks
+            // 4. Embed all chunks
             const vectors = await this.embeddingProvider.embed(chunks);
-            // 7. Upsert all chunk points
+            // 5. Upsert all chunk points
             const points = chunks.map((chunk, i) => ({
                 id: pointId(filePath, i),
                 vector: vectors[i],
@@ -855,12 +1269,9 @@ class DocumentProcessor {
                 },
             }));
             await this.vectorStore.upsert(points);
-            // 8. Clean up orphaned chunks
+            // 6. Clean up orphaned chunks
             if (oldTotalChunks > chunks.length) {
-                const orphanIds = [];
-                for (let i = chunks.length; i < oldTotalChunks; i++) {
-                    orphanIds.push(pointId(filePath, i));
-                }
+                const orphanIds = chunkIds(filePath, oldTotalChunks).slice(chunks.length);
                 await this.vectorStore.delete(orphanIds);
             }
             this.logger.info({ filePath, chunks: chunks.length }, 'File processed successfully');
@@ -879,15 +1290,10 @@ class DocumentProcessor {
             // Get the existing payload to find total chunks
             const baseId = pointId(filePath, 0);
             const existingPayload = await this.vectorStore.getPayload(baseId);
-            const totalChunks = typeof existingPayload?.['total_chunks'] === 'number'
-                ? existingPayload['total_chunks']
-                : 1;
-            const ids = [];
-            for (let i = 0; i < totalChunks; i++) {
-                ids.push(pointId(filePath, i));
-            }
+            const totalChunks = getChunkCount(existingPayload);
+            const ids = chunkIds(filePath, totalChunks);
             await this.vectorStore.delete(ids);
-            await deleteMetadata(filePath, this.metadataDir);
+            await deleteMetadata(filePath, this.config.metadataDir);
             this.logger.info({ filePath }, 'File deleted from index');
         }
         catch (error) {
@@ -904,21 +1310,16 @@ class DocumentProcessor {
     async processMetadataUpdate(filePath, metadata) {
         try {
             // Read existing enrichment metadata and merge
-            const existing = (await readMetadata(filePath, this.metadataDir)) ?? {};
+            const existing = (await readMetadata(filePath, this.config.metadataDir)) ?? {};
             const merged = { ...existing, ...metadata };
-            await writeMetadata(filePath, this.metadataDir, merged);
+            await writeMetadata(filePath, this.config.metadataDir, merged);
             // Update all chunk payloads in Qdrant
             const baseId = pointId(filePath, 0);
             const existingPayload = await this.vectorStore.getPayload(baseId);
             if (!existingPayload)
                 return null;
-            const totalChunks = typeof existingPayload['total_chunks'] === 'number'
-                ? existingPayload['total_chunks']
-                : 1;
-            const ids = [];
-            for (let i = 0; i < totalChunks; i++) {
-                ids.push(pointId(filePath, i));
-            }
+            const totalChunks = getChunkCount(existingPayload);
+            const ids = chunkIds(filePath, totalChunks);
             await this.vectorStore.setPayload(ids, merged);
             this.logger.info({ filePath, chunks: totalChunks }, 'Metadata updated');
             return merged;
@@ -944,27 +1345,11 @@ class DocumentProcessor {
                 this.logger.debug({ filePath }, 'File not indexed, skipping');
                 return null;
             }
-            const ext = extname(filePath);
-            const stats = await stat(filePath);
-            // Extract frontmatter/json for attribute building (lightweight)
-            const extracted = await extractText(filePath, ext);
-            // Build attributes + apply current rules
-            const attributes = buildAttributes(filePath, stats, extracted.frontmatter, extracted.json);
-            const inferred = applyRules(this.compiledRules, attributes);
-            // Read enrichment metadata (merge, enrichment wins)
-            const enrichment = await readMetadata(filePath, this.metadataDir);
-            const metadata = {
-                ...inferred,
-                ...(enrichment ?? {}),
-            };
+            // Build merged metadata (lightweight — no embedding)
+            const { metadata } = await buildMergedMetadata(filePath, this.compiledRules, this.config.metadataDir, this.config.maps, this.logger);
             // Update all chunk payloads
-            const totalChunks = typeof existingPayload['total_chunks'] === 'number'
-                ? existingPayload['total_chunks']
-                : 1;
-            const ids = [];
-            for (let i = 0; i < totalChunks; i++) {
-                ids.push(pointId(filePath, i));
-            }
+            const totalChunks = getChunkCount(existingPayload);
+            const ids = chunkIds(filePath, totalChunks);
             await this.vectorStore.setPayload(ids, metadata);
             this.logger.info({ filePath, chunks: totalChunks }, 'Rules re-applied');
             return metadata;
@@ -983,23 +1368,12 @@ class DocumentProcessor {
         this.compiledRules = compiledRules;
         this.logger.info({ rules: compiledRules.length }, 'Inference rules updated');
     }
-    /**
-     * Create the appropriate text splitter for the given file extension.
-     *
-     * @param ext - File extension.
-     * @param chunkSize - Maximum chunk size in characters.
-     * @param chunkOverlap - Overlap between chunks in characters.
-     * @returns A text splitter instance.
-     */
-    createSplitter(ext, chunkSize, chunkOverlap) {
-        const lowerExt = ext.toLowerCase();
-        if (lowerExt === '.md' || lowerExt === '.markdown') {
-            return new MarkdownTextSplitter({ chunkSize, chunkOverlap });
-        }
-        return new RecursiveCharacterTextSplitter({ chunkSize, chunkOverlap });
-    }
 }
+/**
+ * @module queue
+ * Debounced, rate-limited, concurrent event queue for file watchers. Manages priority queuing and async callbacks. No direct I/O; orchestrates processing.
+ */
 /**
  * A debounced, rate-limited, concurrent event queue.
  */
@@ -1148,19 +1522,23 @@ class VectorStoreClient {
     client;
     collectionName;
     dims;
+    logger;
     /**
      * Create a new VectorStoreClient.
      *
      * @param config - Vector store configuration.
      * @param dimensions - The embedding vector dimensions.
+     * @param logger - Optional pino logger for retry warnings.
      */
-    constructor(config, dimensions) {
+    constructor(config, dimensions, logger) {
         this.client = new QdrantClient({
             url: config.url,
             apiKey: config.apiKey,
+            checkCompatibility: false,
         });
         this.collectionName = config.collectionName;
         this.dims = dimensions;
+        this.logger = logger;
     }
     /**
      * Ensure the collection exists with correct dimensions and Cosine distance.
@@ -1187,13 +1565,42 @@ class VectorStoreClient {
     async upsert(points) {
         if (points.length === 0)
             return;
-        await this.client.upsert(this.collectionName, {
-            wait: true,
-            points: points.map((p) => ({
-                id: p.id,
-                vector: p.vector,
-                payload: p.payload,
-            })),
+        await retry(async (attempt) => {
+            if (attempt > 1) {
+                const msg = {
+                    attempt,
+                    operation: 'qdrant.upsert',
+                    points: points.length,
+                };
+                if (this.logger) {
+                    this.logger.warn(msg, 'Retrying Qdrant upsert');
+                }
+                else {
+                    console.warn(msg, 'Retrying Qdrant upsert');
+                }
+            }
+            await this.client.upsert(this.collectionName, {
+                wait: true,
+                points: points.map((p) => ({
+                    id: p.id,
+                    vector: p.vector,
+                    payload: p.payload,
+                })),
+            });
+        }, {
+            attempts: 5,
+            baseDelayMs: 500,
+            maxDelayMs: 10_000,
+            jitter: 0.2,
+            onRetry: ({ attempt, delayMs, error }) => {
+                const msg = { attempt, delayMs, operation: 'qdrant.upsert', error };
+                if (this.logger) {
+                    this.logger.warn(msg, 'Qdrant upsert failed; will retry');
+                }
+                else {
+                    console.warn(msg, 'Qdrant upsert failed; will retry');
+                }
+            },
         });
     }
     /**
@@ -1204,9 +1611,38 @@ class VectorStoreClient {
     async delete(ids) {
         if (ids.length === 0)
             return;
-        await this.client.delete(this.collectionName, {
-            wait: true,
-            points: ids,
+        await retry(async (attempt) => {
+            if (attempt > 1) {
+                const msg = {
+                    attempt,
+                    operation: 'qdrant.delete',
+                    ids: ids.length,
+                };
+                if (this.logger) {
+                    this.logger.warn(msg, 'Retrying Qdrant delete');
+                }
+                else {
+                    console.warn(msg, 'Retrying Qdrant delete');
+                }
+            }
+            await this.client.delete(this.collectionName, {
+                wait: true,
+                points: ids,
+            });
+        }, {
+            attempts: 5,
+            baseDelayMs: 500,
+            maxDelayMs: 10_000,
+            jitter: 0.2,
+            onRetry: ({ attempt, delayMs, error }) => {
+                const msg = { attempt, delayMs, operation: 'qdrant.delete', error };
+                if (this.logger) {
+                    this.logger.warn(msg, 'Qdrant delete failed; will retry');
+                }
+                else {
+                    console.warn(msg, 'Qdrant delete failed; will retry');
+                }
+            },
         });
     }
     /**
@@ -1306,6 +1742,10 @@ class VectorStoreClient {
     }
 }
+/**
+ * @module watcher
+ * Filesystem watcher wrapping chokidar. I/O: watches files/directories for add/change/unlink events, enqueues to processing queue.
+ */
 /**
  * Filesystem watcher that maps chokidar events to the processing queue.
  */
@@ -1403,16 +1843,22 @@ class JeevesWatcher {
         this.logger = logger;
         let embeddingProvider;
         try {
-            embeddingProvider = createEmbeddingProvider(this.config.embedding);
+            embeddingProvider = createEmbeddingProvider(this.config.embedding, logger);
         }
         catch (error) {
             logger.fatal({ error }, 'Failed to create embedding provider');
             throw error;
         }
-        const vectorStore = new VectorStoreClient(this.config.vectorStore, embeddingProvider.dimensions);
+        const vectorStore = new VectorStoreClient(this.config.vectorStore, embeddingProvider.dimensions, logger);
         await vectorStore.ensureCollection();
         const compiledRules = compileRules(this.config.inferenceRules ?? []);
-        const processor = new DocumentProcessor(this.config, embeddingProvider, vectorStore, compiledRules, logger);
+        const processorConfig = {
+            metadataDir: this.config.metadataDir ?? '.jeeves-metadata',
+            chunkSize: this.config.embedding.chunkSize,
+            chunkOverlap: this.config.embedding.chunkOverlap,
+            maps: this.config.maps,
+        };
+        const processor = new DocumentProcessor(processorConfig, embeddingProvider, vectorStore, compiledRules, logger);
         this.processor = processor;
         const queue = new EventQueue({
             debounceMs: this.config.watch.debounceMs ?? 2000,
@@ -1433,7 +1879,7 @@ class JeevesWatcher {
         this.server = server;
         await server.listen({
             host: this.config.api?.host ?? '127.0.0.1',
-            port: this.config.api?.port ?? 3458,
+            port: this.config.api?.port ?? 3456,
         });
         watcher.start();
         this.startConfigWatch();
@@ -1449,12 +1895,17 @@ class JeevesWatcher {
         }
         if (this.queue) {
             const timeout = this.config.shutdownTimeoutMs ?? 10000;
-            await Promise.race([
-                this.queue.drain(),
+            const drained = await Promise.race([
+                this.queue.drain().then(() => true),
                 new Promise((resolve) => {
-                    setTimeout(resolve, timeout);
+                    setTimeout(() => {
+                        resolve(false);
+                    }, timeout);
                 }),
             ]);
+            if (!drained) {
+                this.logger?.warn({ timeoutMs: timeout }, 'Queue drain timeout hit, forcing shutdown');
+            }
         }
         if (this.server) {
             await this.server.close();
@@ -1503,6 +1954,7 @@ class JeevesWatcher {
         const processor = this.processor;
         if (!logger || !processor || !this.configPath)
             return;
+        logger.info({ configPath: this.configPath }, 'Config change detected, reloading...');
         try {
             const newConfig = await loadConfig(this.configPath);
             this.config = newConfig;
@@ -1534,4 +1986,4 @@ async function startFromConfig(configPath) {
     return app;
 }
-export { DocumentProcessor, EventQueue, FileSystemWatcher, JeevesWatcher, VectorStoreClient, applyRules, buildAttributes, compileRules, contentHash, createApiServer, createEmbeddingProvider, createLogger, deleteMetadata, extractText, loadConfig, metadataPath, pointId, readMetadata, startFromConfig, writeMetadata };
+export { DocumentProcessor, EventQueue, FileSystemWatcher, JeevesWatcher, VectorStoreClient, apiConfigSchema, applyRules, buildAttributes, compileRules, configWatchConfigSchema, contentHash, createApiServer, createEmbeddingProvider, createLogger, deleteMetadata, embeddingConfigSchema, extractText, inferenceRuleSchema, jeevesWatcherConfigSchema, loadConfig, loggingConfigSchema, metadataPath, pointId, readMetadata, startFromConfig, vectorStoreConfigSchema, watchConfigSchema, writeMetadata };