@karmaniverous/jeeves-watcher 0.4.4 → 0.5.0-0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -6,6 +6,11 @@ import { z } from 'zod';
6
6
  import Handlebars from 'handlebars';
7
7
  import { Stats } from 'node:fs';
8
8
 
9
+ /**
10
+ * @module config/schemas/base
11
+ * Base configuration schemas: watch, logging, API.
12
+ */
13
+
9
14
  /**
10
15
  * Watch configuration for file system monitoring.
11
16
  */
@@ -26,34 +31,13 @@ type WatchConfig = z.infer<typeof watchConfigSchema>;
26
31
  declare const configWatchConfigSchema: z.ZodObject<{
27
32
  enabled: z.ZodOptional<z.ZodBoolean>;
28
33
  debounceMs: z.ZodOptional<z.ZodNumber>;
34
+ reindex: z.ZodOptional<z.ZodEnum<{
35
+ issues: "issues";
36
+ full: "full";
37
+ }>>;
29
38
  }, z.core.$strip>;
30
39
  /** Configuration file watch settings controlling auto-reload behavior on config changes. */
31
40
  type ConfigWatchConfig = z.infer<typeof configWatchConfigSchema>;
32
- /**
33
- * Embedding model configuration.
34
- */
35
- declare const embeddingConfigSchema: z.ZodObject<{
36
- provider: z.ZodDefault<z.ZodString>;
37
- model: z.ZodDefault<z.ZodString>;
38
- chunkSize: z.ZodOptional<z.ZodNumber>;
39
- chunkOverlap: z.ZodOptional<z.ZodNumber>;
40
- dimensions: z.ZodOptional<z.ZodNumber>;
41
- apiKey: z.ZodOptional<z.ZodString>;
42
- rateLimitPerMinute: z.ZodOptional<z.ZodNumber>;
43
- concurrency: z.ZodOptional<z.ZodNumber>;
44
- }, z.core.$strip>;
45
- /** Embedding model configuration: provider, model, chunking, dimensions, rate limits, and API key. */
46
- type EmbeddingConfig = z.infer<typeof embeddingConfigSchema>;
47
- /**
48
- * Vector store configuration for Qdrant.
49
- */
50
- declare const vectorStoreConfigSchema: z.ZodObject<{
51
- url: z.ZodString;
52
- collectionName: z.ZodString;
53
- apiKey: z.ZodOptional<z.ZodString>;
54
- }, z.core.$strip>;
55
- /** Qdrant vector store connection configuration: server URL, collection name, and optional API key. */
56
- type VectorStoreConfig = z.infer<typeof vectorStoreConfigSchema>;
57
41
  /**
58
42
  * API server configuration.
59
43
  */
@@ -72,21 +56,42 @@ declare const loggingConfigSchema: z.ZodObject<{
72
56
  }, z.core.$strip>;
73
57
  /** Logging configuration: level and optional file output path. */
74
58
  type LoggingConfig = z.infer<typeof loggingConfigSchema>;
59
+
60
+ /**
61
+ * Global schema entry: inline object or file path.
62
+ */
63
+ declare const schemaEntrySchema: z.ZodUnion<readonly [z.ZodObject<{
64
+ type: z.ZodOptional<z.ZodLiteral<"object">>;
65
+ properties: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodRecord<z.ZodString, z.ZodUnknown>>>;
66
+ }, z.core.$strip>, z.ZodString]>;
67
+ /** Global schema entry. */
68
+ type SchemaEntry = z.infer<typeof schemaEntrySchema>;
75
69
  /**
76
70
  * An inference rule that enriches document metadata.
77
71
  */
78
72
  declare const inferenceRuleSchema: z.ZodObject<{
73
+ name: z.ZodString;
74
+ description: z.ZodString;
79
75
  match: z.ZodRecord<z.ZodString, z.ZodUnknown>;
80
- set: z.ZodRecord<z.ZodString, z.ZodUnknown>;
76
+ schema: z.ZodOptional<z.ZodArray<z.ZodUnion<readonly [z.ZodString, z.ZodObject<{
77
+ type: z.ZodOptional<z.ZodLiteral<"object">>;
78
+ properties: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodRecord<z.ZodString, z.ZodUnknown>>>;
79
+ }, z.core.$strip>]>>>;
81
80
  map: z.ZodOptional<z.ZodUnion<readonly [z.ZodType<_karmaniverous_jsonmap.JsonMapMap, unknown, z.core.$ZodTypeInternals<_karmaniverous_jsonmap.JsonMapMap, unknown>>, z.ZodString]>>;
82
81
  template: z.ZodOptional<z.ZodString>;
83
82
  }, z.core.$strip>;
84
- /** An inference rule: JSON Schema match condition, set fields, and optional JsonMap transformation. */
83
+ /** An inference rule: JSON Schema match condition, schema array, and optional JsonMap transformation. */
85
84
  type InferenceRule = z.infer<typeof inferenceRuleSchema>;
85
+
86
86
  /**
87
87
  * Top-level configuration for jeeves-watcher.
88
88
  */
89
89
  declare const jeevesWatcherConfigSchema: z.ZodObject<{
90
+ description: z.ZodOptional<z.ZodString>;
91
+ schemas: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnion<readonly [z.ZodObject<{
92
+ type: z.ZodOptional<z.ZodLiteral<"object">>;
93
+ properties: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodRecord<z.ZodString, z.ZodUnknown>>>;
94
+ }, z.core.$strip>, z.ZodString]>>>;
90
95
  watch: z.ZodObject<{
91
96
  paths: z.ZodArray<z.ZodString>;
92
97
  ignored: z.ZodOptional<z.ZodArray<z.ZodString>>;
@@ -99,6 +104,10 @@ declare const jeevesWatcherConfigSchema: z.ZodObject<{
99
104
  configWatch: z.ZodOptional<z.ZodObject<{
100
105
  enabled: z.ZodOptional<z.ZodBoolean>;
101
106
  debounceMs: z.ZodOptional<z.ZodNumber>;
107
+ reindex: z.ZodOptional<z.ZodEnum<{
108
+ issues: "issues";
109
+ full: "full";
110
+ }>>;
102
111
  }, z.core.$strip>>;
103
112
  embedding: z.ZodObject<{
104
113
  provider: z.ZodDefault<z.ZodString>;
@@ -121,19 +130,44 @@ declare const jeevesWatcherConfigSchema: z.ZodObject<{
121
130
  port: z.ZodOptional<z.ZodNumber>;
122
131
  }, z.core.$strip>>;
123
132
  extractors: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
133
+ stateDir: z.ZodOptional<z.ZodString>;
124
134
  inferenceRules: z.ZodOptional<z.ZodArray<z.ZodObject<{
135
+ name: z.ZodString;
136
+ description: z.ZodString;
125
137
  match: z.ZodRecord<z.ZodString, z.ZodUnknown>;
126
- set: z.ZodRecord<z.ZodString, z.ZodUnknown>;
138
+ schema: z.ZodOptional<z.ZodArray<z.ZodUnion<readonly [z.ZodString, z.ZodObject<{
139
+ type: z.ZodOptional<z.ZodLiteral<"object">>;
140
+ properties: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodRecord<z.ZodString, z.ZodUnknown>>>;
141
+ }, z.core.$strip>]>>>;
127
142
  map: z.ZodOptional<z.ZodUnion<readonly [z.ZodType<_karmaniverous_jsonmap.JsonMapMap, unknown, z.core.$ZodTypeInternals<_karmaniverous_jsonmap.JsonMapMap, unknown>>, z.ZodString]>>;
128
143
  template: z.ZodOptional<z.ZodString>;
129
144
  }, z.core.$strip>>>;
130
- maps: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnion<[z.ZodType<_karmaniverous_jsonmap.JsonMapMap, unknown, z.core.$ZodTypeInternals<_karmaniverous_jsonmap.JsonMapMap, unknown>>, z.ZodString]>>>;
131
- templates: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
132
- templateHelpers: z.ZodOptional<z.ZodObject<{
133
- paths: z.ZodOptional<z.ZodArray<z.ZodString>>;
145
+ maps: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnion<readonly [z.ZodType<_karmaniverous_jsonmap.JsonMapMap, unknown, z.core.$ZodTypeInternals<_karmaniverous_jsonmap.JsonMapMap, unknown>>, z.ZodString, z.ZodObject<{
146
+ map: z.ZodUnion<[z.ZodType<_karmaniverous_jsonmap.JsonMapMap, unknown, z.core.$ZodTypeInternals<_karmaniverous_jsonmap.JsonMapMap, unknown>>, z.ZodString]>;
147
+ description: z.ZodOptional<z.ZodString>;
148
+ }, z.core.$strip>]>>>;
149
+ templates: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnion<readonly [z.ZodString, z.ZodObject<{
150
+ template: z.ZodString;
151
+ description: z.ZodOptional<z.ZodString>;
152
+ }, z.core.$strip>]>>>;
153
+ templateHelpers: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodObject<{
154
+ path: z.ZodString;
155
+ description: z.ZodOptional<z.ZodString>;
156
+ }, z.core.$strip>>>;
157
+ mapHelpers: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodObject<{
158
+ path: z.ZodString;
159
+ description: z.ZodOptional<z.ZodString>;
160
+ }, z.core.$strip>>>;
161
+ reindex: z.ZodOptional<z.ZodObject<{
162
+ callbackUrl: z.ZodOptional<z.ZodURL>;
134
163
  }, z.core.$strip>>;
135
- mapHelpers: z.ZodOptional<z.ZodObject<{
136
- paths: z.ZodOptional<z.ZodArray<z.ZodString>>;
164
+ slots: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
165
+ search: z.ZodOptional<z.ZodObject<{
166
+ scoreThresholds: z.ZodOptional<z.ZodObject<{
167
+ strong: z.ZodNumber;
168
+ relevant: z.ZodNumber;
169
+ noise: z.ZodNumber;
170
+ }, z.core.$strip>>;
137
171
  }, z.core.$strip>>;
138
172
  logging: z.ZodOptional<z.ZodObject<{
139
173
  level: z.ZodOptional<z.ZodString>;
@@ -147,9 +181,39 @@ declare const jeevesWatcherConfigSchema: z.ZodObject<{
147
181
  type JeevesWatcherConfig = z.infer<typeof jeevesWatcherConfigSchema>;
148
182
 
149
183
  /**
150
- * @module embedding
151
- *
152
- * Embedding provider abstractions and registry-backed factory.
184
+ * @module config/schemas/services
185
+ * Service configuration schemas: embedding and vector store.
186
+ */
187
+
188
+ /**
189
+ * Embedding model configuration.
190
+ */
191
+ declare const embeddingConfigSchema: z.ZodObject<{
192
+ provider: z.ZodDefault<z.ZodString>;
193
+ model: z.ZodDefault<z.ZodString>;
194
+ chunkSize: z.ZodOptional<z.ZodNumber>;
195
+ chunkOverlap: z.ZodOptional<z.ZodNumber>;
196
+ dimensions: z.ZodOptional<z.ZodNumber>;
197
+ apiKey: z.ZodOptional<z.ZodString>;
198
+ rateLimitPerMinute: z.ZodOptional<z.ZodNumber>;
199
+ concurrency: z.ZodOptional<z.ZodNumber>;
200
+ }, z.core.$strip>;
201
+ /** Embedding model configuration: provider, model, chunking, dimensions, rate limits, and API key. */
202
+ type EmbeddingConfig = z.infer<typeof embeddingConfigSchema>;
203
+ /**
204
+ * Vector store configuration for Qdrant.
205
+ */
206
+ declare const vectorStoreConfigSchema: z.ZodObject<{
207
+ url: z.ZodString;
208
+ collectionName: z.ZodString;
209
+ apiKey: z.ZodOptional<z.ZodString>;
210
+ }, z.core.$strip>;
211
+ /** Qdrant vector store connection configuration: server URL, collection name, and optional API key. */
212
+ type VectorStoreConfig = z.infer<typeof vectorStoreConfigSchema>;
213
+
214
+ /**
215
+ * @module embedding/types
216
+ * Embedding provider type definitions.
153
217
  */
154
218
 
155
219
  /**
@@ -161,6 +225,17 @@ interface EmbeddingProvider {
161
225
  /** The dimensionality of the embedding vectors. */
162
226
  dimensions: number;
163
227
  }
228
+ /**
229
+ * Factory function for creating embedding providers.
230
+ */
231
+ type ProviderFactory = (config: EmbeddingConfig, logger?: pino.Logger) => EmbeddingProvider;
232
+
233
+ /**
234
+ * @module embedding
235
+ *
236
+ * Embedding provider abstractions and registry-backed factory.
237
+ */
238
+
164
239
  /**
165
240
  * Create an embedding provider based on the given configuration.
166
241
  *
@@ -168,10 +243,107 @@ interface EmbeddingProvider {
168
243
  *
169
244
  * @param config - The embedding configuration.
170
245
  * @param logger - Optional pino logger for retry warnings.
246
+ * @param additionalProviders - Optional map of additional provider factories to register.
171
247
  * @returns An {@link EmbeddingProvider} instance.
172
248
  * @throws If the configured provider is not supported.
173
249
  */
174
- declare function createEmbeddingProvider(config: EmbeddingConfig, logger?: pino.Logger): EmbeddingProvider;
250
+ declare function createEmbeddingProvider(config: EmbeddingConfig, logger?: pino.Logger, additionalProviders?: Map<string, ProviderFactory>): EmbeddingProvider;
251
+
252
+ /**
253
+ * @module helpers/introspect
254
+ * JSDoc introspection for helper modules. Extracts function exports and their descriptions.
255
+ */
256
+ /** Result of introspecting a single helper module. */
257
+ interface HelperModuleIntrospection {
258
+ /** Map of namespace_exportName to JSDoc description (empty string if none). */
259
+ exports: Record<string, string>;
260
+ }
261
+ /** Result of introspecting all helper modules. */
262
+ interface AllHelpersIntrospection {
263
+ mapHelpers: Record<string, HelperModuleIntrospection>;
264
+ templateHelpers: Record<string, HelperModuleIntrospection>;
265
+ }
266
+
267
+ /**
268
+ * @module util/JsonFileStore
269
+ * Small base class for JSON-backed read/modify/write stores with in-memory caching.
270
+ */
271
+
272
+ /** Options for {@link JsonFileStore}. */
273
+ interface JsonFileStoreOptions {
274
+ /** Path to the JSON file on disk. */
275
+ filePath: string;
276
+ /** Logger for warnings. */
277
+ logger: pino.Logger;
278
+ }
279
+ /**
280
+ * Base class for JSON file stores.
281
+ *
282
+ * @typeParam T - The JSON-serializable data structure stored.
283
+ */
284
+ declare abstract class JsonFileStore<T> {
285
+ /** Path to the JSON file on disk. */
286
+ protected readonly filePath: string;
287
+ /** In-memory cache of the parsed file contents, or `null` if not yet loaded. */
288
+ protected cache: T | null;
289
+ /** Logger instance for warnings and diagnostics. */
290
+ protected readonly logger: pino.Logger;
291
+ protected constructor(options: JsonFileStoreOptions);
292
+ /** Create an empty default value when file is missing or unreadable. */
293
+ protected abstract createEmpty(): T;
294
+ /** Load from disk into cache if not already loaded. */
295
+ protected load(): T;
296
+ /** Flush cache to disk. */
297
+ protected save(): void;
298
+ }
299
+
300
+ /**
301
+ * @module issues/types
302
+ * Zod schemas and TypeScript types for the issues tracking system. Defines IssueRecord and IssuesFile structures.
303
+ */
304
+
305
+ /** Schema for a single issue record tracking a processing failure. */
306
+ declare const issueRecordSchema: z.ZodObject<{
307
+ type: z.ZodEnum<{
308
+ type_collision: "type_collision";
309
+ interpolation_error: "interpolation_error";
310
+ }>;
311
+ property: z.ZodOptional<z.ZodString>;
312
+ rules: z.ZodOptional<z.ZodArray<z.ZodString>>;
313
+ rule: z.ZodOptional<z.ZodString>;
314
+ types: z.ZodOptional<z.ZodArray<z.ZodString>>;
315
+ message: z.ZodString;
316
+ timestamp: z.ZodUnion<readonly [z.ZodNumber, z.ZodString]>;
317
+ }, z.core.$strip>;
318
+ /** A single issue record tracking a processing failure for a file. */
319
+ type IssueRecord = z.infer<typeof issueRecordSchema>;
320
+ /** Issues file: array of issue records per file path. */
321
+ type IssuesFile = Partial<Record<string, IssueRecord[]>>;
322
+
323
+ /**
324
+ * @module issues/IssuesManager
325
+ * Manages persistent issue tracking for file processing failures. Read-modify-write with in-memory cache.
326
+ */
327
+
328
+ /**
329
+ * Manages a persistent issues.json file tracking processing failures per file.
330
+ */
331
+ declare class IssuesManager extends JsonFileStore<IssuesFile> {
332
+ constructor(stateDir: string, logger: pino.Logger);
333
+ protected createEmpty(): IssuesFile;
334
+ /** Record or update an issue for a file path. */
335
+ record(filePath: string, type: IssueRecord['type'], message: string, options?: {
336
+ property?: string;
337
+ rules?: string[];
338
+ types?: string[];
339
+ }): void;
340
+ /** Clear an issue for a file path (called on successful processing). */
341
+ clear(filePath: string): void;
342
+ /** Wipe all issues (called on full reindex start). */
343
+ clearAll(): void;
344
+ /** Get all current issues. */
345
+ getAll(): IssuesFile;
346
+ }
175
347
 
176
348
  /**
177
349
  * @module templates/engine
@@ -202,15 +374,24 @@ declare function resolveTemplateSource(value: string, namedTemplates: Record<str
202
374
  */
203
375
  declare function createHandlebarsInstance(): typeof Handlebars;
204
376
  /**
205
- * Load custom helpers from file paths.
377
+ * Load custom helpers from named helper config.
378
+ *
379
+ * Each file should export a default function that receives the Handlebars instance
380
+ * and a namespace prefix string. The function should register helpers with the
381
+ * namespace prefix applied.
206
382
  *
207
- * Each file should export a default function that receives the Handlebars instance.
383
+ * If the module does not accept a namespace argument, helpers are registered
384
+ * with namespace prefixing applied automatically to any helpers registered
385
+ * during the call.
208
386
  *
209
387
  * @param hbs - The Handlebars instance.
210
- * @param paths - File paths to custom helper modules.
388
+ * @param helpers - Named helper config: Record of namespace to path/description.
211
389
  * @param configDir - Directory to resolve relative paths against.
212
390
  */
213
- declare function loadCustomHelpers(hbs: typeof Handlebars, paths: string[], configDir: string): Promise<void>;
391
+ declare function loadCustomHelpers(hbs: typeof Handlebars, helpers: Record<string, {
392
+ path: string;
393
+ description?: string;
394
+ }>, configDir: string): Promise<void>;
214
395
  /**
215
396
  * The template engine: holds compiled templates and renders them against context.
216
397
  */
@@ -253,11 +434,17 @@ declare class TemplateEngine {
253
434
  *
254
435
  * @param rules - The inference rules (may contain template fields).
255
436
  * @param namedTemplates - Named template definitions from config.
256
- * @param templateHelperPaths - Paths to custom helper modules.
437
+ * @param templateHelpers - Custom helper registrations with paths and descriptions.
257
438
  * @param configDir - Directory to resolve relative paths against.
258
439
  * @returns The configured TemplateEngine, or undefined if no templates are used.
259
440
  */
260
- declare function buildTemplateEngine(rules: InferenceRule[], namedTemplates?: Record<string, string>, templateHelperPaths?: string[], configDir?: string): Promise<TemplateEngine | undefined>;
441
+ declare function buildTemplateEngine(rules: InferenceRule[], namedTemplates?: Record<string, string | {
442
+ template: string;
443
+ description?: string;
444
+ }>, templateHelpers?: Record<string, {
445
+ path: string;
446
+ description?: string;
447
+ }>, configDir?: string): Promise<TemplateEngine | undefined>;
261
448
 
262
449
  /**
263
450
  * @module templates/helpers
@@ -327,6 +514,7 @@ interface CompiledRule {
327
514
  }
328
515
  /**
329
516
  * Compile an array of inference rules into executable validators.
517
+ * Validates rule name uniqueness before compilation.
330
518
  *
331
519
  * @param rules - The inference rule definitions.
332
520
  * @returns An array of compiled rules.
@@ -353,6 +541,8 @@ interface ApplyRulesResult {
353
541
  metadata: Record<string, unknown>;
354
542
  /** Rendered template content from the last matching rule with a template, or null. */
355
543
  renderedContent: string | null;
544
+ /** Names of rules that matched. */
545
+ matchedRules: string[];
356
546
  }
357
547
  /**
358
548
  * Apply compiled inference rules to file attributes, returning merged metadata and optional rendered content.
@@ -367,10 +557,40 @@ interface ApplyRulesResult {
367
557
  * @param logger - Optional logger for warnings (falls back to console.warn).
368
558
  * @param templateEngine - Optional template engine for rendering content templates.
369
559
  * @param configDir - Optional config directory for resolving .json map file paths.
560
+ * @param globalSchemas - Optional global schemas collection for resolving schema references.
370
561
  * @returns The merged metadata and optional rendered content.
371
562
  */
372
- declare function applyRules(compiledRules: CompiledRule[], attributes: FileAttributes, namedMaps?: Record<string, JsonMapMap>, logger?: RuleLogger, templateEngine?: TemplateEngine, configDir?: string, customMapLib?: Record<string, (...args: unknown[]) => unknown>): Promise<ApplyRulesResult>;
563
+ declare function applyRules(compiledRules: CompiledRule[], attributes: FileAttributes, namedMaps?: Record<string, JsonMapMap>, logger?: RuleLogger, templateEngine?: TemplateEngine, configDir?: string, customMapLib?: Record<string, (...args: unknown[]) => unknown>, globalSchemas?: Record<string, SchemaEntry>): Promise<ApplyRulesResult>;
564
+
565
+ /**
566
+ * @module values/ValuesManager
567
+ * Manages per-rule distinct metadata value tracking. Persists to disk with in-memory caching and sorted deduplication.
568
+ */
569
+
570
+ /** Per-rule distinct values: rule name → field name → sorted unique values. */
571
+ type ValuesIndex = Record<string, Record<string, unknown[]>>;
572
+ /**
573
+ * Manages a persistent values.json file tracking distinct metadata values per rule.
574
+ */
575
+ declare class ValuesManager extends JsonFileStore<ValuesIndex> {
576
+ constructor(stateDir: string, logger: pino.Logger);
577
+ protected createEmpty(): ValuesIndex;
578
+ /** Check if a value is a trackable primitive (string, number, boolean). */
579
+ private isTrackable;
580
+ /** Update distinct values for a rule from metadata. */
581
+ update(ruleName: string, metadata: Record<string, unknown>): void;
582
+ /** Wipe all values (called on full reindex start). */
583
+ clearAll(): void;
584
+ /** Get all current values. */
585
+ getAll(): ValuesIndex;
586
+ /** Get values for a specific rule. */
587
+ getForRule(ruleName: string): Record<string, unknown[]>;
588
+ }
373
589
 
590
+ /**
591
+ * @module vectorStore/types
592
+ * Vector store interface and type definitions.
593
+ */
374
594
  /**
375
595
  * A point to upsert into the vector store.
376
596
  */
@@ -418,10 +638,72 @@ interface CollectionInfo {
418
638
  /** Payload field schema keyed by field name. */
419
639
  payloadFields: Record<string, PayloadFieldSchema>;
420
640
  }
641
+ /**
642
+ * Abstract interface for vector store operations.
643
+ *
644
+ * Enables dependency inversion and easier testing.
645
+ */
646
+ interface VectorStore {
647
+ /**
648
+ * Ensure the collection exists with correct configuration.
649
+ */
650
+ ensureCollection(): Promise<void>;
651
+ /**
652
+ * Upsert points into the collection.
653
+ *
654
+ * @param points - The points to upsert.
655
+ */
656
+ upsert(points: VectorPoint[]): Promise<void>;
657
+ /**
658
+ * Delete points by their IDs.
659
+ *
660
+ * @param ids - The point IDs to delete.
661
+ */
662
+ delete(ids: string[]): Promise<void>;
663
+ /**
664
+ * Set payload fields for the specified point IDs.
665
+ *
666
+ * @param ids - Point IDs to update.
667
+ * @param payload - Payload fields to set.
668
+ */
669
+ setPayload(ids: string[], payload: Record<string, unknown>): Promise<void>;
670
+ /**
671
+ * Get the payload of a point by ID.
672
+ *
673
+ * @param id - The point ID.
674
+ * @returns The payload, or `null` if the point doesn't exist.
675
+ */
676
+ getPayload(id: string): Promise<Record<string, unknown> | null>;
677
+ /**
678
+ * Get collection info including point count, dimensions, and payload field schema.
679
+ */
680
+ getCollectionInfo(): Promise<CollectionInfo>;
681
+ /**
682
+ * Search for similar vectors.
683
+ *
684
+ * @param vector - The query vector.
685
+ * @param limit - Maximum results to return.
686
+ * @param filter - Optional Qdrant filter.
687
+ * @param offset - Optional result offset.
688
+ * @returns An array of search results.
689
+ */
690
+ search(vector: number[], limit: number, filter?: Record<string, unknown>, offset?: number): Promise<SearchResult[]>;
691
+ /**
692
+ * Scroll through all points matching a filter.
693
+ *
694
+ * @param filter - Optional Qdrant filter.
695
+ * @param limit - Page size for scrolling.
696
+ * @yields Scrolled points.
697
+ */
698
+ scroll(filter?: Record<string, unknown>, limit?: number): AsyncGenerator<ScrolledPoint>;
699
+ }
700
+
421
701
  /**
422
702
  * Client wrapper for Qdrant vector store operations.
703
+ *
704
+ * Implements the {@link VectorStore} interface for dependency inversion.
423
705
  */
424
- declare class VectorStoreClient {
706
+ declare class VectorStoreClient implements VectorStore {
425
707
  private readonly client;
426
708
  private readonly collectionName;
427
709
  private readonly dims;
@@ -438,6 +720,13 @@ declare class VectorStoreClient {
438
720
  * Ensure the collection exists with correct dimensions and Cosine distance.
439
721
  */
440
722
  ensureCollection(): Promise<void>;
723
+ /**
724
+ * Retry a Qdrant operation with standardized config and logging.
725
+ *
726
+ * @param operation - Operation name for logging (e.g., 'upsert', 'delete').
727
+ * @param fn - Async function to retry.
728
+ */
729
+ private retryOperation;
441
730
  /**
442
731
  * Upsert points into the collection.
443
732
  *
@@ -488,7 +777,7 @@ declare class VectorStoreClient {
488
777
  * @param filter - Optional Qdrant filter.
489
778
  * @returns An array of search results.
490
779
  */
491
- search(vector: number[], limit: number, filter?: Record<string, unknown>): Promise<SearchResult[]>;
780
+ search(vector: number[], limit: number, filter?: Record<string, unknown>, offset?: number): Promise<SearchResult[]>;
492
781
  /**
493
782
  * Scroll through all points matching a filter.
494
783
  *
@@ -500,9 +789,8 @@ declare class VectorStoreClient {
500
789
  }
501
790
 
502
791
  /**
503
- * @module processor
504
- *
505
- * Core document processing pipeline. Handles extracting text, computing embeddings, syncing with vector store.
792
+ * @module processor/ProcessorConfig
793
+ * Configuration interface for DocumentProcessor, extracted for single-responsibility.
506
794
  */
507
795
 
508
796
  /**
@@ -521,30 +809,82 @@ interface ProcessorConfig {
521
809
  configDir?: string;
522
810
  /** Custom JsonMap lib functions loaded from mapHelpers config. */
523
811
  customMapLib?: Record<string, (...args: unknown[]) => unknown>;
812
+ /** Global schemas collection for inference rule schema references. */
813
+ globalSchemas?: Record<string, SchemaEntry>;
814
+ }
815
+
816
+ /**
817
+ * @module processor/types
818
+ * Document processor interface definitions.
819
+ */
820
+
821
+ /**
822
+ * Abstraction for document processing operations.
823
+ *
824
+ * Enables factories and consumers to depend on an interface (DIP).
825
+ */
826
+ interface DocumentProcessorInterface {
827
+ /** Process a file through the full pipeline (extract/embed/upsert). */
828
+ processFile(filePath: string): Promise<void>;
829
+ /** Delete a file's points/metadata from the system. */
830
+ deleteFile(filePath: string): Promise<void>;
831
+ /** Process a metadata sidecar update for a file (payload update only). */
832
+ processMetadataUpdate(filePath: string, metadata: Record<string, unknown>): Promise<Record<string, unknown> | null>;
833
+ /** Process a rules update for a file (rebuild merged metadata, payload update only). */
834
+ processRulesUpdate(filePath: string): Promise<Record<string, unknown> | null>;
835
+ /** Update compiled inference rules and associated engines. */
836
+ updateRules(compiledRules: CompiledRule[], templateEngine?: TemplateEngine, customMapLib?: Record<string, (...args: unknown[]) => unknown>): void;
837
+ }
838
+
839
+ /**
840
+ * @module processor
841
+ *
842
+ * Core document processing pipeline. Handles extracting text, computing embeddings, syncing with vector store.
843
+ */
844
+
845
+ /**
846
+ * Core document processing pipeline.
847
+ *
848
+ * Handles extracting text, computing embeddings, and syncing with the vector store.
849
+ */
850
+ interface DocumentProcessorDeps {
851
+ /** Processor configuration (chunk sizes, directories, maps). */
852
+ config: ProcessorConfig;
853
+ /** Provider for generating text embeddings. */
854
+ embeddingProvider: EmbeddingProvider;
855
+ /** Vector store for persistence. */
856
+ vectorStore: VectorStore;
857
+ /** Pre-compiled inference rules for metadata extraction. */
858
+ compiledRules: CompiledRule[];
859
+ /** Pino logger instance. */
860
+ logger: pino.Logger;
861
+ /** Optional Handlebars template engine for content templates. */
862
+ templateEngine?: TemplateEngine;
863
+ /** Optional issues manager for tracking processing errors. */
864
+ issuesManager?: IssuesManager;
865
+ /** Optional values manager for tracking rule-extracted values. */
866
+ valuesManager?: ValuesManager;
524
867
  }
525
868
  /**
526
869
  * Core document processing pipeline.
527
870
  *
528
871
  * Handles extracting text, computing embeddings, and syncing with the vector store.
529
872
  */
530
- declare class DocumentProcessor {
873
+ declare class DocumentProcessor implements DocumentProcessorInterface {
531
874
  private config;
532
875
  private readonly embeddingProvider;
533
876
  private readonly vectorStore;
534
877
  private compiledRules;
535
878
  private readonly logger;
536
879
  private templateEngine?;
880
+ private readonly issuesManager?;
881
+ private readonly valuesManager?;
537
882
  /**
538
883
  * Create a new DocumentProcessor.
539
884
  *
540
- * @param config - The processor configuration.
541
- * @param embeddingProvider - The embedding provider.
542
- * @param vectorStore - The vector store client.
543
- * @param compiledRules - The compiled inference rules.
544
- * @param logger - The logger instance.
545
- * @param templateEngine - Optional template engine for content templates.
885
+ * @param deps - The processor dependencies.
546
886
  */
547
- constructor(config: ProcessorConfig, embeddingProvider: EmbeddingProvider, vectorStore: VectorStoreClient, compiledRules: CompiledRule[], logger: pino.Logger, templateEngine?: TemplateEngine);
887
+ constructor({ config, embeddingProvider, vectorStore, compiledRules, logger, templateEngine, issuesManager, valuesManager, }: DocumentProcessorDeps);
548
888
  /**
549
889
  * Process a file through the full pipeline: extract, hash, chunk, embed, upsert.
550
890
  *
@@ -662,12 +1002,45 @@ declare class EventQueue {
662
1002
  private maybeResolveDrain;
663
1003
  }
664
1004
 
1005
+ /**
1006
+ * @module api/ReindexTracker
1007
+ * Tracks reindex operation state for status reporting. Single instance shared across handlers.
1008
+ */
1009
+ /** Reindex status snapshot for API consumers. */
1010
+ interface ReindexStatus {
1011
+ /** Whether a reindex operation is currently in progress. */
1012
+ active: boolean;
1013
+ /** The active reindex scope (when {@link active} is true). */
1014
+ scope?: string;
1015
+ /** ISO 8601 timestamp when the current reindex started (when {@link active} is true). */
1016
+ startedAt?: string;
1017
+ }
1018
+ /**
1019
+ * Tracks the state of reindex operations.
1020
+ */
1021
+ declare class ReindexTracker {
1022
+ private _active;
1023
+ private _scope?;
1024
+ private _startedAt?;
1025
+ /** Mark a reindex as started. */
1026
+ start(scope: 'issues' | 'full'): void;
1027
+ /** Mark the current reindex as complete. */
1028
+ complete(): void;
1029
+ /** Get current reindex status. */
1030
+ getStatus(): ReindexStatus;
1031
+ }
1032
+
1033
+ /**
1034
+ * @module api
1035
+ * Fastify API server factory. Registers all route handlers and returns an unstarted server instance.
1036
+ */
1037
+
665
1038
  /**
666
1039
  * Options for {@link createApiServer}.
667
1040
  */
668
1041
  interface ApiServerOptions {
669
1042
  /** The document processor. */
670
- processor: DocumentProcessor;
1043
+ processor: DocumentProcessorInterface;
671
1044
  /** The vector store client. */
672
1045
  vectorStore: VectorStoreClient;
673
1046
  /** The embedding provider. */
@@ -678,6 +1051,16 @@ interface ApiServerOptions {
678
1051
  config: JeevesWatcherConfig;
679
1052
  /** The logger instance. */
680
1053
  logger: pino.Logger;
1054
+ /** The issues manager. */
1055
+ issuesManager: IssuesManager;
1056
+ /** The values manager. */
1057
+ valuesManager: ValuesManager;
1058
+ /** The reindex tracker (optional, created if not provided). */
1059
+ reindexTracker?: ReindexTracker;
1060
+ /** Path to the config file on disk. */
1061
+ configPath: string;
1062
+ /** Helper introspection for merged document. */
1063
+ helperIntrospection?: AllHelpersIntrospection;
681
1064
  }
682
1065
  /**
683
1066
  * Create the Fastify API server with all routes registered.
@@ -831,7 +1214,7 @@ declare class FileSystemWatcher {
831
1214
  * @param logger - The logger instance.
832
1215
  * @param options - Optional health/fatal error options.
833
1216
  */
834
- constructor(config: WatchConfig, queue: EventQueue, processor: DocumentProcessor, logger: pino.Logger, options?: FileSystemWatcherOptions);
1217
+ constructor(config: WatchConfig, queue: EventQueue, processor: DocumentProcessorInterface, logger: pino.Logger, options?: FileSystemWatcherOptions);
835
1218
  /**
836
1219
  * Start watching the filesystem and processing events.
837
1220
  */
@@ -880,11 +1263,11 @@ interface JeevesWatcherFactories {
880
1263
  /** Compile inference rules from config. */
881
1264
  compileRules: typeof compileRules;
882
1265
  /** Create a document processor for file ingestion. */
883
- createDocumentProcessor: (config: ConstructorParameters<typeof DocumentProcessor>[0], embeddingProvider: EmbeddingProvider, vectorStore: VectorStoreClient, compiledRules: ConstructorParameters<typeof DocumentProcessor>[3], logger: pino.Logger, templateEngine?: ConstructorParameters<typeof DocumentProcessor>[5]) => DocumentProcessor;
1266
+ createDocumentProcessor: (deps: ConstructorParameters<typeof DocumentProcessor>[0]) => DocumentProcessorInterface;
884
1267
  /** Create an event queue for batching file-system events. */
885
1268
  createEventQueue: (options: ConstructorParameters<typeof EventQueue>[0]) => EventQueue;
886
1269
  /** Create a file-system watcher for the configured watch paths. */
887
- createFileSystemWatcher: (config: JeevesWatcherConfig['watch'], queue: EventQueue, processor: DocumentProcessor, logger: pino.Logger, options?: FileSystemWatcherOptions) => FileSystemWatcher;
1270
+ createFileSystemWatcher: (config: JeevesWatcherConfig['watch'], queue: EventQueue, processor: DocumentProcessorInterface, logger: pino.Logger, options?: FileSystemWatcherOptions) => FileSystemWatcher;
888
1271
  /** Create the HTTP API server. */
889
1272
  createApiServer: typeof createApiServer;
890
1273
  }
@@ -928,6 +1311,9 @@ declare class JeevesWatcher {
928
1311
  private server;
929
1312
  private processor;
930
1313
  private configWatcher;
1314
+ private issuesManager;
1315
+ private valuesManager;
1316
+ private helperIntrospection;
931
1317
  /**
932
1318
  * Create a new JeevesWatcher instance.
933
1319
  *
@@ -945,8 +1331,6 @@ declare class JeevesWatcher {
945
1331
  * Gracefully stop all components.
946
1332
  */
947
1333
  stop(): Promise<void>;
948
- private initEmbeddingAndStore;
949
- private createWatcher;
950
1334
  private startApiServer;
951
1335
  private startConfigWatch;
952
1336
  private stopConfigWatch;
@@ -978,14 +1362,16 @@ interface ExtractedText {
978
1362
  /** Parsed JSON object (JSON files). */
979
1363
  json?: Record<string, unknown>;
980
1364
  }
1365
+ type Extractor = (filePath: string) => Promise<ExtractedText>;
981
1366
  /**
982
1367
  * Extract text from a file based on extension.
983
1368
  *
984
1369
  * @param filePath - Path to the file.
985
1370
  * @param extension - File extension (including leading dot).
1371
+ * @param additionalExtractors - Optional map of additional extractors by extension.
986
1372
  * @returns Extracted text and optional structured data.
987
1373
  */
988
- declare function extractText(filePath: string, extension: string): Promise<ExtractedText>;
1374
+ declare function extractText(filePath: string, extension: string, additionalExtractors?: Map<string, Extractor>): Promise<ExtractedText>;
989
1375
 
990
1376
  /**
991
1377
  * Compute a SHA-256 hex digest of the given text.
@@ -1036,5 +1422,5 @@ declare function deleteMetadata(filePath: string, metadataDir: string): Promise<
1036
1422
  */
1037
1423
  declare function pointId(filePath: string, chunkIndex?: number): string;
1038
1424
 
1039
- export { DocumentProcessor, EventQueue, FileSystemWatcher, GitignoreFilter, JeevesWatcher, SystemHealth, TemplateEngine, VectorStoreClient, apiConfigSchema, applyRules, buildAttributes, buildTemplateEngine, compileRules, configWatchConfigSchema, contentHash, createApiServer, createEmbeddingProvider, createHandlebarsInstance, createLogger, deleteMetadata, embeddingConfigSchema, extractText, inferenceRuleSchema, jeevesWatcherConfigSchema, loadConfig, loadCustomHelpers, loggingConfigSchema, metadataPath, pointId, readMetadata, registerBuiltinHelpers, resolveTemplateSource, startFromConfig, vectorStoreConfigSchema, watchConfigSchema, writeMetadata };
1040
- export type { ApiConfig, ApiServerOptions, ApplyRulesResult, CollectionInfo, CompiledRule, CompiledTemplate, ConfigWatchConfig, EmbeddingConfig, EmbeddingProvider, EventQueueOptions, ExtractedText, FileAttributes, FileSystemWatcherOptions, InferenceRule, JeevesWatcherConfig, JeevesWatcherFactories, JeevesWatcherRuntimeOptions, LoggingConfig, PayloadFieldSchema, ProcessFn, ProcessorConfig, RuleLogger, ScrolledPoint, SearchResult, SystemHealthOptions, VectorPoint, VectorStoreConfig, WatchConfig, WatchEvent };
1425
+ export { DocumentProcessor, EventQueue, FileSystemWatcher, GitignoreFilter, IssuesManager, JeevesWatcher, ReindexTracker, SystemHealth, TemplateEngine, ValuesManager, VectorStoreClient, apiConfigSchema, applyRules, buildAttributes, buildTemplateEngine, compileRules, configWatchConfigSchema, contentHash, createApiServer, createEmbeddingProvider, createHandlebarsInstance, createLogger, deleteMetadata, embeddingConfigSchema, extractText, inferenceRuleSchema, issueRecordSchema, jeevesWatcherConfigSchema, loadConfig, loadCustomHelpers, loggingConfigSchema, metadataPath, pointId, readMetadata, registerBuiltinHelpers, resolveTemplateSource, startFromConfig, vectorStoreConfigSchema, watchConfigSchema, writeMetadata };
1426
+ export type { ApiConfig, ApiServerOptions, ApplyRulesResult, CollectionInfo, CompiledRule, CompiledTemplate, ConfigWatchConfig, DocumentProcessorDeps, EmbeddingConfig, EmbeddingProvider, EventQueueOptions, ExtractedText, FileAttributes, FileSystemWatcherOptions, InferenceRule, IssueRecord, IssuesFile, JeevesWatcherConfig, JeevesWatcherFactories, JeevesWatcherRuntimeOptions, LoggingConfig, PayloadFieldSchema, ProcessFn, ProcessorConfig, ReindexStatus, RuleLogger, ScrolledPoint, SearchResult, SystemHealthOptions, ValuesIndex, VectorPoint, VectorStoreConfig, WatchConfig, WatchEvent };