@karmaniverous/jeeves-watcher 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -13,6 +13,12 @@ Filesystem watcher that keeps a Qdrant vector store in sync with document change
13
13
  - **Syncs** to Qdrant for fast semantic search
14
14
  - **Enriches** metadata via rules and API endpoints
15
15
 
16
+ ### Architecture
17
+
18
+ ![System Architecture](assets/system-architecture.png)
19
+
20
+ For detailed architecture documentation, see [guides/architecture.md](guides/architecture.md).
21
+
16
22
  ## Quick Start
17
23
 
18
24
  ### Installation
@@ -50,7 +56,7 @@ Example minimal configuration:
50
56
  },
51
57
  "embedding": {
52
58
  "provider": "google",
53
- "model": "text-embedding-004",
59
+ "model": "gemini-embedding-001",
54
60
  "apiKey": "${GOOGLE_API_KEY}"
55
61
  },
56
62
  "vectorStore": {
@@ -81,7 +87,7 @@ The watcher will:
81
87
  | `jeeves-watcher reindex` | Reindex all watched files |
82
88
  | `jeeves-watcher rebuild-metadata` | Rebuild metadata files from Qdrant payloads |
83
89
  | `jeeves-watcher search <query>` | Search the vector store |
84
- | `jeeves-watcher enrich <path>` | Enrich document metadata |
90
+ | `jeeves-watcher enrich <path>` | Enrich document metadata with key-value pairs |
85
91
  | `jeeves-watcher validate` | Validate the configuration |
86
92
  | `jeeves-watcher service` | Manage the watcher as a system service |
87
93
  | `jeeves-watcher config-reindex` | Reindex after configuration changes (rules only or full) |
@@ -110,24 +116,12 @@ The watcher will:
110
116
  {
111
117
  "embedding": {
112
118
  "provider": "google",
113
- "model": "text-embedding-004",
119
+ "model": "gemini-embedding-001",
114
120
  "apiKey": "${GOOGLE_API_KEY}"
115
121
  }
116
122
  }
117
123
  ```
118
124
 
119
- #### OpenAI
120
-
121
- ```json
122
- {
123
- "embedding": {
124
- "provider": "openai",
125
- "model": "text-embedding-3-small",
126
- "apiKey": "${OPENAI_API_KEY}"
127
- }
128
- }
129
- ```
130
-
131
125
  ### Vector Store
132
126
 
133
127
  ```json
@@ -168,9 +162,11 @@ Automatically enrich metadata based on file patterns:
168
162
 
169
163
  ### Chunking
170
164
 
165
+ Chunking settings are configured under `embedding`:
166
+
171
167
  ```json
172
168
  {
173
- "chunking": {
169
+ "embedding": {
174
170
  "chunkSize": 1000,
175
171
  "chunkOverlap": 200
176
172
  }
@@ -0,0 +1,577 @@
1
+ {
2
+ "$schema": "http://json-schema.org/draft-07/schema#",
3
+ "type": "object",
4
+ "properties": {
5
+ "watch": {
6
+ "type": "object",
7
+ "properties": {
8
+ "paths": {
9
+ "$ref": "#/definitions/__schema0"
10
+ },
11
+ "ignored": {
12
+ "$ref": "#/definitions/__schema2"
13
+ },
14
+ "pollIntervalMs": {
15
+ "$ref": "#/definitions/__schema4"
16
+ },
17
+ "usePolling": {
18
+ "$ref": "#/definitions/__schema6"
19
+ },
20
+ "debounceMs": {
21
+ "$ref": "#/definitions/__schema8"
22
+ },
23
+ "stabilityThresholdMs": {
24
+ "$ref": "#/definitions/__schema10"
25
+ }
26
+ },
27
+ "required": [
28
+ "paths"
29
+ ],
30
+ "description": "File system watch configuration."
31
+ },
32
+ "configWatch": {
33
+ "description": "Configuration file watch settings.",
34
+ "allOf": [
35
+ {
36
+ "$ref": "#/definitions/__schema12"
37
+ }
38
+ ]
39
+ },
40
+ "embedding": {
41
+ "type": "object",
42
+ "properties": {
43
+ "provider": {
44
+ "$ref": "#/definitions/__schema15"
45
+ },
46
+ "model": {
47
+ "$ref": "#/definitions/__schema17"
48
+ },
49
+ "chunkSize": {
50
+ "$ref": "#/definitions/__schema19"
51
+ },
52
+ "chunkOverlap": {
53
+ "$ref": "#/definitions/__schema21"
54
+ },
55
+ "dimensions": {
56
+ "$ref": "#/definitions/__schema23"
57
+ },
58
+ "apiKey": {
59
+ "$ref": "#/definitions/__schema25"
60
+ },
61
+ "rateLimitPerMinute": {
62
+ "$ref": "#/definitions/__schema27"
63
+ },
64
+ "concurrency": {
65
+ "$ref": "#/definitions/__schema29"
66
+ }
67
+ },
68
+ "description": "Embedding model configuration."
69
+ },
70
+ "vectorStore": {
71
+ "type": "object",
72
+ "properties": {
73
+ "url": {
74
+ "$ref": "#/definitions/__schema31"
75
+ },
76
+ "collectionName": {
77
+ "$ref": "#/definitions/__schema32"
78
+ },
79
+ "apiKey": {
80
+ "$ref": "#/definitions/__schema33"
81
+ }
82
+ },
83
+ "required": [
84
+ "url",
85
+ "collectionName"
86
+ ],
87
+ "description": "Qdrant vector store configuration."
88
+ },
89
+ "metadataDir": {
90
+ "description": "Directory for persisted metadata sidecar files.",
91
+ "allOf": [
92
+ {
93
+ "$ref": "#/definitions/__schema35"
94
+ }
95
+ ]
96
+ },
97
+ "api": {
98
+ "description": "API server configuration.",
99
+ "allOf": [
100
+ {
101
+ "$ref": "#/definitions/__schema36"
102
+ }
103
+ ]
104
+ },
105
+ "extractors": {
106
+ "description": "Extractor configurations keyed by name.",
107
+ "allOf": [
108
+ {
109
+ "$ref": "#/definitions/__schema39"
110
+ }
111
+ ]
112
+ },
113
+ "inferenceRules": {
114
+ "description": "Rules for inferring metadata from file attributes.",
115
+ "allOf": [
116
+ {
117
+ "$ref": "#/definitions/__schema40"
118
+ }
119
+ ]
120
+ },
121
+ "maps": {
122
+ "description": "Reusable named JsonMap transformations.",
123
+ "allOf": [
124
+ {
125
+ "$ref": "#/definitions/__schema48"
126
+ }
127
+ ]
128
+ },
129
+ "logging": {
130
+ "description": "Logging configuration.",
131
+ "allOf": [
132
+ {
133
+ "$ref": "#/definitions/__schema49"
134
+ }
135
+ ]
136
+ },
137
+ "shutdownTimeoutMs": {
138
+ "description": "Timeout in milliseconds for graceful shutdown.",
139
+ "allOf": [
140
+ {
141
+ "$ref": "#/definitions/__schema52"
142
+ }
143
+ ]
144
+ }
145
+ },
146
+ "required": [
147
+ "watch",
148
+ "embedding",
149
+ "vectorStore"
150
+ ],
151
+ "definitions": {
152
+ "__schema0": {
153
+ "minItems": 1,
154
+ "type": "array",
155
+ "items": {
156
+ "$ref": "#/definitions/__schema1"
157
+ },
158
+ "description": "Glob patterns for files to watch (e.g., \"**/*.md\"). At least one required."
159
+ },
160
+ "__schema1": {
161
+ "type": "string"
162
+ },
163
+ "__schema2": {
164
+ "description": "Glob patterns to exclude from watching (e.g., \"**/node_modules/**\").",
165
+ "allOf": [
166
+ {
167
+ "$ref": "#/definitions/__schema3"
168
+ }
169
+ ]
170
+ },
171
+ "__schema3": {
172
+ "type": "array",
173
+ "items": {
174
+ "type": "string"
175
+ }
176
+ },
177
+ "__schema4": {
178
+ "description": "Polling interval in milliseconds when usePolling is enabled.",
179
+ "allOf": [
180
+ {
181
+ "$ref": "#/definitions/__schema5"
182
+ }
183
+ ]
184
+ },
185
+ "__schema5": {
186
+ "type": "number"
187
+ },
188
+ "__schema6": {
189
+ "description": "Use polling instead of native file system events (for network drives).",
190
+ "allOf": [
191
+ {
192
+ "$ref": "#/definitions/__schema7"
193
+ }
194
+ ]
195
+ },
196
+ "__schema7": {
197
+ "type": "boolean"
198
+ },
199
+ "__schema8": {
200
+ "description": "Debounce delay in milliseconds for file change events.",
201
+ "allOf": [
202
+ {
203
+ "$ref": "#/definitions/__schema9"
204
+ }
205
+ ]
206
+ },
207
+ "__schema9": {
208
+ "type": "number"
209
+ },
210
+ "__schema10": {
211
+ "description": "Time in milliseconds a file must remain unchanged before processing.",
212
+ "allOf": [
213
+ {
214
+ "$ref": "#/definitions/__schema11"
215
+ }
216
+ ]
217
+ },
218
+ "__schema11": {
219
+ "type": "number"
220
+ },
221
+ "__schema12": {
222
+ "type": "object",
223
+ "properties": {
224
+ "enabled": {
225
+ "description": "Enable automatic reloading when config file changes.",
226
+ "allOf": [
227
+ {
228
+ "$ref": "#/definitions/__schema13"
229
+ }
230
+ ]
231
+ },
232
+ "debounceMs": {
233
+ "description": "Debounce delay in milliseconds for config file change detection.",
234
+ "allOf": [
235
+ {
236
+ "$ref": "#/definitions/__schema14"
237
+ }
238
+ ]
239
+ }
240
+ }
241
+ },
242
+ "__schema13": {
243
+ "type": "boolean"
244
+ },
245
+ "__schema14": {
246
+ "type": "number"
247
+ },
248
+ "__schema15": {
249
+ "default": "gemini",
250
+ "description": "Embedding provider name (e.g., \"gemini\", \"openai\").",
251
+ "allOf": [
252
+ {
253
+ "$ref": "#/definitions/__schema16"
254
+ }
255
+ ]
256
+ },
257
+ "__schema16": {
258
+ "type": "string"
259
+ },
260
+ "__schema17": {
261
+ "default": "gemini-embedding-001",
262
+ "description": "Embedding model identifier (e.g., \"gemini-embedding-001\", \"text-embedding-3-small\").",
263
+ "allOf": [
264
+ {
265
+ "$ref": "#/definitions/__schema18"
266
+ }
267
+ ]
268
+ },
269
+ "__schema18": {
270
+ "type": "string"
271
+ },
272
+ "__schema19": {
273
+ "description": "Maximum chunk size in characters for text splitting.",
274
+ "allOf": [
275
+ {
276
+ "$ref": "#/definitions/__schema20"
277
+ }
278
+ ]
279
+ },
280
+ "__schema20": {
281
+ "type": "number"
282
+ },
283
+ "__schema21": {
284
+ "description": "Character overlap between consecutive chunks.",
285
+ "allOf": [
286
+ {
287
+ "$ref": "#/definitions/__schema22"
288
+ }
289
+ ]
290
+ },
291
+ "__schema22": {
292
+ "type": "number"
293
+ },
294
+ "__schema23": {
295
+ "description": "Embedding vector dimensions (must match model output).",
296
+ "allOf": [
297
+ {
298
+ "$ref": "#/definitions/__schema24"
299
+ }
300
+ ]
301
+ },
302
+ "__schema24": {
303
+ "type": "number"
304
+ },
305
+ "__schema25": {
306
+ "description": "API key for embedding provider (supports ${ENV_VAR} substitution).",
307
+ "allOf": [
308
+ {
309
+ "$ref": "#/definitions/__schema26"
310
+ }
311
+ ]
312
+ },
313
+ "__schema26": {
314
+ "type": "string"
315
+ },
316
+ "__schema27": {
317
+ "description": "Maximum embedding API requests per minute (rate limiting).",
318
+ "allOf": [
319
+ {
320
+ "$ref": "#/definitions/__schema28"
321
+ }
322
+ ]
323
+ },
324
+ "__schema28": {
325
+ "type": "number"
326
+ },
327
+ "__schema29": {
328
+ "description": "Maximum concurrent embedding requests.",
329
+ "allOf": [
330
+ {
331
+ "$ref": "#/definitions/__schema30"
332
+ }
333
+ ]
334
+ },
335
+ "__schema30": {
336
+ "type": "number"
337
+ },
338
+ "__schema31": {
339
+ "type": "string",
340
+ "description": "Qdrant server URL (e.g., \"http://localhost:6333\")."
341
+ },
342
+ "__schema32": {
343
+ "type": "string",
344
+ "description": "Qdrant collection name for vector storage."
345
+ },
346
+ "__schema33": {
347
+ "description": "Qdrant API key for authentication (supports ${ENV_VAR} substitution).",
348
+ "allOf": [
349
+ {
350
+ "$ref": "#/definitions/__schema34"
351
+ }
352
+ ]
353
+ },
354
+ "__schema34": {
355
+ "type": "string"
356
+ },
357
+ "__schema35": {
358
+ "type": "string"
359
+ },
360
+ "__schema36": {
361
+ "type": "object",
362
+ "properties": {
363
+ "host": {
364
+ "description": "Host address for API server (e.g., \"127.0.0.1\", \"0.0.0.0\").",
365
+ "allOf": [
366
+ {
367
+ "$ref": "#/definitions/__schema37"
368
+ }
369
+ ]
370
+ },
371
+ "port": {
372
+ "description": "Port for API server (e.g., 3456).",
373
+ "allOf": [
374
+ {
375
+ "$ref": "#/definitions/__schema38"
376
+ }
377
+ ]
378
+ }
379
+ }
380
+ },
381
+ "__schema37": {
382
+ "type": "string"
383
+ },
384
+ "__schema38": {
385
+ "type": "number"
386
+ },
387
+ "__schema39": {
388
+ "type": "object",
389
+ "propertyNames": {
390
+ "type": "string"
391
+ },
392
+ "additionalProperties": {}
393
+ },
394
+ "__schema40": {
395
+ "type": "array",
396
+ "items": {
397
+ "type": "object",
398
+ "properties": {
399
+ "match": {
400
+ "type": "object",
401
+ "propertyNames": {
402
+ "$ref": "#/definitions/__schema41"
403
+ },
404
+ "additionalProperties": {
405
+ "$ref": "#/definitions/__schema42"
406
+ },
407
+ "description": "JSON Schema object to match against file attributes."
408
+ },
409
+ "set": {
410
+ "type": "object",
411
+ "propertyNames": {
412
+ "$ref": "#/definitions/__schema43"
413
+ },
414
+ "additionalProperties": {
415
+ "$ref": "#/definitions/__schema44"
416
+ },
417
+ "description": "Metadata fields to set when match succeeds."
418
+ },
419
+ "map": {
420
+ "description": "JsonMap transformation (inline definition or named map reference).",
421
+ "allOf": [
422
+ {
423
+ "$ref": "#/definitions/__schema45"
424
+ }
425
+ ]
426
+ }
427
+ },
428
+ "required": [
429
+ "match",
430
+ "set"
431
+ ]
432
+ }
433
+ },
434
+ "__schema41": {
435
+ "type": "string"
436
+ },
437
+ "__schema42": {},
438
+ "__schema43": {
439
+ "type": "string"
440
+ },
441
+ "__schema44": {},
442
+ "__schema45": {
443
+ "anyOf": [
444
+ {
445
+ "$ref": "#/definitions/__schema46"
446
+ },
447
+ {
448
+ "type": "string"
449
+ }
450
+ ]
451
+ },
452
+ "__schema46": {
453
+ "anyOf": [
454
+ {
455
+ "anyOf": [
456
+ {
457
+ "type": "string"
458
+ },
459
+ {
460
+ "type": "number"
461
+ },
462
+ {
463
+ "type": "boolean"
464
+ },
465
+ {
466
+ "type": "null"
467
+ }
468
+ ]
469
+ },
470
+ {
471
+ "type": "object",
472
+ "propertyNames": {
473
+ "type": "string"
474
+ },
475
+ "additionalProperties": {
476
+ "anyOf": [
477
+ {
478
+ "$ref": "#/definitions/__schema46"
479
+ },
480
+ {
481
+ "type": "object",
482
+ "properties": {
483
+ "$": {
484
+ "anyOf": [
485
+ {
486
+ "$ref": "#/definitions/__schema47"
487
+ },
488
+ {
489
+ "type": "array",
490
+ "items": {
491
+ "$ref": "#/definitions/__schema47"
492
+ }
493
+ }
494
+ ]
495
+ }
496
+ },
497
+ "required": [
498
+ "$"
499
+ ]
500
+ }
501
+ ]
502
+ }
503
+ },
504
+ {
505
+ "type": "array",
506
+ "items": {
507
+ "$ref": "#/definitions/__schema46"
508
+ }
509
+ }
510
+ ]
511
+ },
512
+ "__schema47": {
513
+ "type": "object",
514
+ "properties": {
515
+ "method": {
516
+ "type": "string"
517
+ },
518
+ "params": {
519
+ "anyOf": [
520
+ {
521
+ "type": "string"
522
+ },
523
+ {
524
+ "type": "array",
525
+ "items": {
526
+ "type": "string"
527
+ }
528
+ }
529
+ ]
530
+ }
531
+ },
532
+ "required": [
533
+ "method",
534
+ "params"
535
+ ]
536
+ },
537
+ "__schema48": {
538
+ "type": "object",
539
+ "propertyNames": {
540
+ "type": "string"
541
+ },
542
+ "additionalProperties": {
543
+ "$ref": "#/definitions/__schema46"
544
+ }
545
+ },
546
+ "__schema49": {
547
+ "type": "object",
548
+ "properties": {
549
+ "level": {
550
+ "description": "Logging level (trace, debug, info, warn, error, fatal).",
551
+ "allOf": [
552
+ {
553
+ "$ref": "#/definitions/__schema50"
554
+ }
555
+ ]
556
+ },
557
+ "file": {
558
+ "description": "Path to log file (logs to stdout if omitted).",
559
+ "allOf": [
560
+ {
561
+ "$ref": "#/definitions/__schema51"
562
+ }
563
+ ]
564
+ }
565
+ }
566
+ },
567
+ "__schema50": {
568
+ "type": "string"
569
+ },
570
+ "__schema51": {
571
+ "type": "string"
572
+ },
573
+ "__schema52": {
574
+ "type": "number"
575
+ }
576
+ }
577
+ }