@karmaniverous/jeeves-watcher 0.2.6 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +39 -9
- package/dist/cjs/index.js +88 -9
- package/dist/cli/jeeves-watcher/index.js +89 -9
- package/dist/index.d.ts +31 -1
- package/dist/index.iife.js +88 -9
- package/dist/index.iife.min.js +1 -1
- package/dist/mjs/index.js +88 -9
- package/dist/plugin/index.js +118 -0
- package/dist/plugin/openclaw.plugin.json +24 -0
- package/dist/plugin/skill/SKILL.md +149 -0
- package/package.json +7 -2
package/README.md
CHANGED
|
@@ -9,7 +9,7 @@ Filesystem watcher that keeps a Qdrant vector store in sync with document change
|
|
|
9
9
|
- **Watches** directories for file additions, modifications, and deletions
|
|
10
10
|
- **Extracts** text from various formats (Markdown, PDF, DOCX, HTML, JSON, plain text)
|
|
11
11
|
- **Chunks** large documents for optimal embedding
|
|
12
|
-
- **Embeds** content using configurable providers (Google Gemini,
|
|
12
|
+
- **Embeds** content using configurable providers (Google Gemini, mock for testing)
|
|
13
13
|
- **Syncs** to Qdrant for fast semantic search
|
|
14
14
|
- **Enriches** metadata via rules and API endpoints
|
|
15
15
|
|
|
@@ -35,14 +35,14 @@ Create a new configuration file in your project:
|
|
|
35
35
|
jeeves-watcher init
|
|
36
36
|
```
|
|
37
37
|
|
|
38
|
-
This generates a
|
|
38
|
+
This generates a `jeeves-watcher.config.json` file with sensible defaults.
|
|
39
39
|
|
|
40
40
|
### Configure
|
|
41
41
|
|
|
42
|
-
Edit
|
|
42
|
+
Edit `jeeves-watcher.config.json` to specify:
|
|
43
43
|
|
|
44
44
|
- **Watch paths**: Directories to monitor
|
|
45
|
-
- **Embedding provider**: Google Gemini
|
|
45
|
+
- **Embedding provider**: Google Gemini or mock (for testing)
|
|
46
46
|
- **Qdrant connection**: URL and collection name
|
|
47
47
|
- **Inference rules**: Automatic metadata enrichment based on file patterns
|
|
48
48
|
|
|
@@ -55,7 +55,7 @@ Example minimal configuration:
|
|
|
55
55
|
"ignored": ["**/node_modules/**", "**/.git/**"]
|
|
56
56
|
},
|
|
57
57
|
"embedding": {
|
|
58
|
-
"provider": "
|
|
58
|
+
"provider": "gemini",
|
|
59
59
|
"model": "gemini-embedding-001",
|
|
60
60
|
"apiKey": "${GOOGLE_API_KEY}"
|
|
61
61
|
},
|
|
@@ -130,7 +130,7 @@ If `GOOGLE_API_KEY` is set in the environment, the value is substituted at confi
|
|
|
130
130
|
```json
|
|
131
131
|
{
|
|
132
132
|
"embedding": {
|
|
133
|
-
"provider": "
|
|
133
|
+
"provider": "gemini",
|
|
134
134
|
"model": "gemini-embedding-001",
|
|
135
135
|
"apiKey": "${GOOGLE_API_KEY}"
|
|
136
136
|
}
|
|
@@ -192,7 +192,7 @@ Chunking settings are configured under `embedding`:
|
|
|
192
192
|
|
|
193
193
|
```json
|
|
194
194
|
{
|
|
195
|
-
"metadataDir": ".jeeves-
|
|
195
|
+
"metadataDir": ".jeeves-watcher"
|
|
196
196
|
}
|
|
197
197
|
```
|
|
198
198
|
|
|
@@ -204,8 +204,8 @@ The watcher provides a REST API (default port: 3456):
|
|
|
204
204
|
|
|
205
205
|
| Endpoint | Method | Description |
|
|
206
206
|
|----------|--------|-------------|
|
|
207
|
-
| `/status` | GET | Health check and
|
|
208
|
-
| `/search` | POST | Semantic search (`{ query: string, limit?: number }`) |
|
|
207
|
+
| `/status` | GET | Health check, uptime, and collection stats |
|
|
208
|
+
| `/search` | POST | Semantic search (`{ query: string, limit?: number, filter?: object }`) |
|
|
209
209
|
| `/metadata` | POST | Update document metadata (`{ path: string, metadata: object }`) |
|
|
210
210
|
| `/reindex` | POST | Reindex all watched files |
|
|
211
211
|
| `/rebuild-metadata` | POST | Rebuild metadata files from Qdrant |
|
|
@@ -219,6 +219,20 @@ curl -X POST http://localhost:3456/search \
|
|
|
219
219
|
-d '{"query": "machine learning algorithms", "limit": 5}'
|
|
220
220
|
```
|
|
221
221
|
|
|
222
|
+
### Example: Search With Filter
|
|
223
|
+
|
|
224
|
+
```bash
|
|
225
|
+
curl -X POST http://localhost:3456/search \
|
|
226
|
+
-H "Content-Type: application/json" \
|
|
227
|
+
-d '{
|
|
228
|
+
"query": "error handling",
|
|
229
|
+
"limit": 10,
|
|
230
|
+
"filter": {
|
|
231
|
+
"must": [{ "key": "domain", "match": { "value": "backend" } }]
|
|
232
|
+
}
|
|
233
|
+
}'
|
|
234
|
+
```
|
|
235
|
+
|
|
222
236
|
### Example: Update Metadata
|
|
223
237
|
|
|
224
238
|
```bash
|
|
@@ -233,6 +247,22 @@ curl -X POST http://localhost:3456/metadata \
|
|
|
233
247
|
}'
|
|
234
248
|
```
|
|
235
249
|
|
|
250
|
+
## OpenClaw Plugin
|
|
251
|
+
|
|
252
|
+
This repo ships an OpenClaw plugin that exposes the jeeves-watcher API as native agent tools:
|
|
253
|
+
|
|
254
|
+
- `watcher_status` (GET `/status`)
|
|
255
|
+
- `watcher_search` (POST `/search`)
|
|
256
|
+
- `watcher_enrich` (POST `/metadata`)
|
|
257
|
+
|
|
258
|
+
Build output:
|
|
259
|
+
|
|
260
|
+
- Plugin entry: `dist/plugin/index.js`
|
|
261
|
+
- Plugin manifest: `dist/plugin/openclaw.plugin.json`
|
|
262
|
+
- Skill: `dist/plugin/skill/SKILL.md`
|
|
263
|
+
|
|
264
|
+
Plugin configuration supports `apiUrl` (defaults to `http://127.0.0.1:3458`).
|
|
265
|
+
|
|
236
266
|
## Supported File Formats
|
|
237
267
|
|
|
238
268
|
- **Markdown** (`.md`, `.markdown`) — with YAML frontmatter support
|
package/dist/cjs/index.js
CHANGED
|
@@ -381,9 +381,9 @@ function createReindexHandler(deps) {
|
|
|
381
381
|
function createSearchHandler(deps) {
|
|
382
382
|
return async (request, reply) => {
|
|
383
383
|
try {
|
|
384
|
-
const { query, limit = 10 } = request.body;
|
|
384
|
+
const { query, limit = 10, filter } = request.body;
|
|
385
385
|
const vectors = await deps.embeddingProvider.embed([query]);
|
|
386
|
-
const results = await deps.vectorStore.search(vectors[0], limit);
|
|
386
|
+
const results = await deps.vectorStore.search(vectors[0], limit, filter);
|
|
387
387
|
return results;
|
|
388
388
|
}
|
|
389
389
|
catch (error) {
|
|
@@ -395,16 +395,27 @@ function createSearchHandler(deps) {
|
|
|
395
395
|
|
|
396
396
|
/**
|
|
397
397
|
* @module api/handlers/status
|
|
398
|
-
* Fastify route handler for GET /status.
|
|
398
|
+
* Fastify route handler for GET /status. Returns process health, uptime, and collection stats.
|
|
399
399
|
*/
|
|
400
400
|
/**
|
|
401
401
|
* Create handler for GET /status.
|
|
402
|
+
*
|
|
403
|
+
* @param deps - Route dependencies.
|
|
402
404
|
*/
|
|
403
|
-
function createStatusHandler() {
|
|
404
|
-
return () =>
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
405
|
+
function createStatusHandler(deps) {
|
|
406
|
+
return async () => {
|
|
407
|
+
const collectionInfo = await deps.vectorStore.getCollectionInfo();
|
|
408
|
+
return {
|
|
409
|
+
status: 'ok',
|
|
410
|
+
uptime: process.uptime(),
|
|
411
|
+
collection: {
|
|
412
|
+
name: deps.config.vectorStore.collectionName,
|
|
413
|
+
pointCount: collectionInfo.pointCount,
|
|
414
|
+
dimensions: collectionInfo.dimensions,
|
|
415
|
+
},
|
|
416
|
+
payloadFields: collectionInfo.payloadFields,
|
|
417
|
+
};
|
|
418
|
+
};
|
|
408
419
|
}
|
|
409
420
|
|
|
410
421
|
/**
|
|
@@ -418,7 +429,7 @@ function createStatusHandler() {
|
|
|
418
429
|
function createApiServer(options) {
|
|
419
430
|
const { processor, vectorStore, embeddingProvider, logger, config } = options;
|
|
420
431
|
const app = Fastify({ logger: false });
|
|
421
|
-
app.get('/status', createStatusHandler());
|
|
432
|
+
app.get('/status', createStatusHandler({ vectorStore, config }));
|
|
422
433
|
app.post('/metadata', createMetadataHandler({ processor, logger }));
|
|
423
434
|
app.post('/search', createSearchHandler({ embeddingProvider, vectorStore, logger }));
|
|
424
435
|
app.post('/reindex', createReindexHandler({ config, processor, logger }));
|
|
@@ -1944,6 +1955,22 @@ class EventQueue {
|
|
|
1944
1955
|
}
|
|
1945
1956
|
}
|
|
1946
1957
|
|
|
1958
|
+
/** Infer a Qdrant-style type name from a JS value. */
|
|
1959
|
+
function inferPayloadType(value) {
|
|
1960
|
+
if (value === null || value === undefined)
|
|
1961
|
+
return 'keyword';
|
|
1962
|
+
if (typeof value === 'number') {
|
|
1963
|
+
return Number.isInteger(value) ? 'integer' : 'float';
|
|
1964
|
+
}
|
|
1965
|
+
if (typeof value === 'boolean')
|
|
1966
|
+
return 'bool';
|
|
1967
|
+
if (Array.isArray(value))
|
|
1968
|
+
return 'keyword[]';
|
|
1969
|
+
if (typeof value === 'string') {
|
|
1970
|
+
return value.length > 256 ? 'text' : 'keyword';
|
|
1971
|
+
}
|
|
1972
|
+
return 'keyword';
|
|
1973
|
+
}
|
|
1947
1974
|
/**
|
|
1948
1975
|
* Client wrapper for Qdrant vector store operations.
|
|
1949
1976
|
*/
|
|
@@ -2090,6 +2117,58 @@ class VectorStoreClient {
|
|
|
2090
2117
|
return null;
|
|
2091
2118
|
}
|
|
2092
2119
|
}
|
|
2120
|
+
/**
|
|
2121
|
+
* Get collection info including point count, dimensions, and payload field schema.
|
|
2122
|
+
*
|
|
2123
|
+
* When Qdrant has payload indexes, uses `payload_schema` directly. Otherwise
|
|
2124
|
+
* samples points to discover fields and infer types.
|
|
2125
|
+
*/
|
|
2126
|
+
async getCollectionInfo() {
|
|
2127
|
+
const info = await this.client.getCollection(this.collectionName);
|
|
2128
|
+
const pointCount = info.points_count ?? 0;
|
|
2129
|
+
const vectorsConfig = info.config.params.vectors;
|
|
2130
|
+
const dimensions = vectorsConfig !== undefined && 'size' in vectorsConfig
|
|
2131
|
+
? vectorsConfig.size
|
|
2132
|
+
: 0;
|
|
2133
|
+
// Try indexed payload_schema first.
|
|
2134
|
+
const payloadFields = {};
|
|
2135
|
+
const schemaEntries = Object.entries(info.payload_schema);
|
|
2136
|
+
if (schemaEntries.length > 0) {
|
|
2137
|
+
for (const [key, schema] of schemaEntries) {
|
|
2138
|
+
payloadFields[key] = {
|
|
2139
|
+
type: schema.data_type ?? 'unknown',
|
|
2140
|
+
};
|
|
2141
|
+
}
|
|
2142
|
+
}
|
|
2143
|
+
else if (pointCount > 0) {
|
|
2144
|
+
// No indexed schema — sample points to discover fields.
|
|
2145
|
+
await this.discoverPayloadFields(payloadFields);
|
|
2146
|
+
}
|
|
2147
|
+
return { pointCount, dimensions, payloadFields };
|
|
2148
|
+
}
|
|
2149
|
+
/**
|
|
2150
|
+
* Sample points and discover payload field names and inferred types.
|
|
2151
|
+
*
|
|
2152
|
+
* @param target - Object to populate with discovered fields.
|
|
2153
|
+
* @param sampleSize - Number of points to sample.
|
|
2154
|
+
*/
|
|
2155
|
+
async discoverPayloadFields(target, sampleSize = 100) {
|
|
2156
|
+
const result = await this.client.scroll(this.collectionName, {
|
|
2157
|
+
limit: sampleSize,
|
|
2158
|
+
with_payload: true,
|
|
2159
|
+
with_vector: false,
|
|
2160
|
+
});
|
|
2161
|
+
for (const point of result.points) {
|
|
2162
|
+
const payload = point.payload;
|
|
2163
|
+
if (!payload)
|
|
2164
|
+
continue;
|
|
2165
|
+
for (const [key, value] of Object.entries(payload)) {
|
|
2166
|
+
if (key in target)
|
|
2167
|
+
continue;
|
|
2168
|
+
target[key] = { type: inferPayloadType(value) };
|
|
2169
|
+
}
|
|
2170
|
+
}
|
|
2171
|
+
}
|
|
2093
2172
|
/**
|
|
2094
2173
|
* Search for similar vectors.
|
|
2095
2174
|
*
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
1
2
|
import { Command } from '@commander-js/extra-typings';
|
|
2
3
|
import Fastify from 'fastify';
|
|
3
4
|
import { readdir, stat, rm, readFile, mkdir, writeFile } from 'node:fs/promises';
|
|
@@ -361,9 +362,9 @@ function createReindexHandler(deps) {
|
|
|
361
362
|
function createSearchHandler(deps) {
|
|
362
363
|
return async (request, reply) => {
|
|
363
364
|
try {
|
|
364
|
-
const { query, limit = 10 } = request.body;
|
|
365
|
+
const { query, limit = 10, filter } = request.body;
|
|
365
366
|
const vectors = await deps.embeddingProvider.embed([query]);
|
|
366
|
-
const results = await deps.vectorStore.search(vectors[0], limit);
|
|
367
|
+
const results = await deps.vectorStore.search(vectors[0], limit, filter);
|
|
367
368
|
return results;
|
|
368
369
|
}
|
|
369
370
|
catch (error) {
|
|
@@ -375,16 +376,27 @@ function createSearchHandler(deps) {
|
|
|
375
376
|
|
|
376
377
|
/**
|
|
377
378
|
* @module api/handlers/status
|
|
378
|
-
* Fastify route handler for GET /status.
|
|
379
|
+
* Fastify route handler for GET /status. Returns process health, uptime, and collection stats.
|
|
379
380
|
*/
|
|
380
381
|
/**
|
|
381
382
|
* Create handler for GET /status.
|
|
383
|
+
*
|
|
384
|
+
* @param deps - Route dependencies.
|
|
382
385
|
*/
|
|
383
|
-
function createStatusHandler() {
|
|
384
|
-
return () =>
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
386
|
+
function createStatusHandler(deps) {
|
|
387
|
+
return async () => {
|
|
388
|
+
const collectionInfo = await deps.vectorStore.getCollectionInfo();
|
|
389
|
+
return {
|
|
390
|
+
status: 'ok',
|
|
391
|
+
uptime: process.uptime(),
|
|
392
|
+
collection: {
|
|
393
|
+
name: deps.config.vectorStore.collectionName,
|
|
394
|
+
pointCount: collectionInfo.pointCount,
|
|
395
|
+
dimensions: collectionInfo.dimensions,
|
|
396
|
+
},
|
|
397
|
+
payloadFields: collectionInfo.payloadFields,
|
|
398
|
+
};
|
|
399
|
+
};
|
|
388
400
|
}
|
|
389
401
|
|
|
390
402
|
/**
|
|
@@ -398,7 +410,7 @@ function createStatusHandler() {
|
|
|
398
410
|
function createApiServer(options) {
|
|
399
411
|
const { processor, vectorStore, embeddingProvider, logger, config } = options;
|
|
400
412
|
const app = Fastify({ logger: false });
|
|
401
|
-
app.get('/status', createStatusHandler());
|
|
413
|
+
app.get('/status', createStatusHandler({ vectorStore, config }));
|
|
402
414
|
app.post('/metadata', createMetadataHandler({ processor, logger }));
|
|
403
415
|
app.post('/search', createSearchHandler({ embeddingProvider, vectorStore, logger }));
|
|
404
416
|
app.post('/reindex', createReindexHandler({ config, processor, logger }));
|
|
@@ -1945,6 +1957,22 @@ class EventQueue {
|
|
|
1945
1957
|
}
|
|
1946
1958
|
}
|
|
1947
1959
|
|
|
1960
|
+
/** Infer a Qdrant-style type name from a JS value. */
|
|
1961
|
+
function inferPayloadType(value) {
|
|
1962
|
+
if (value === null || value === undefined)
|
|
1963
|
+
return 'keyword';
|
|
1964
|
+
if (typeof value === 'number') {
|
|
1965
|
+
return Number.isInteger(value) ? 'integer' : 'float';
|
|
1966
|
+
}
|
|
1967
|
+
if (typeof value === 'boolean')
|
|
1968
|
+
return 'bool';
|
|
1969
|
+
if (Array.isArray(value))
|
|
1970
|
+
return 'keyword[]';
|
|
1971
|
+
if (typeof value === 'string') {
|
|
1972
|
+
return value.length > 256 ? 'text' : 'keyword';
|
|
1973
|
+
}
|
|
1974
|
+
return 'keyword';
|
|
1975
|
+
}
|
|
1948
1976
|
/**
|
|
1949
1977
|
* Client wrapper for Qdrant vector store operations.
|
|
1950
1978
|
*/
|
|
@@ -2091,6 +2119,58 @@ class VectorStoreClient {
|
|
|
2091
2119
|
return null;
|
|
2092
2120
|
}
|
|
2093
2121
|
}
|
|
2122
|
+
/**
|
|
2123
|
+
* Get collection info including point count, dimensions, and payload field schema.
|
|
2124
|
+
*
|
|
2125
|
+
* When Qdrant has payload indexes, uses `payload_schema` directly. Otherwise
|
|
2126
|
+
* samples points to discover fields and infer types.
|
|
2127
|
+
*/
|
|
2128
|
+
async getCollectionInfo() {
|
|
2129
|
+
const info = await this.client.getCollection(this.collectionName);
|
|
2130
|
+
const pointCount = info.points_count ?? 0;
|
|
2131
|
+
const vectorsConfig = info.config.params.vectors;
|
|
2132
|
+
const dimensions = vectorsConfig !== undefined && 'size' in vectorsConfig
|
|
2133
|
+
? vectorsConfig.size
|
|
2134
|
+
: 0;
|
|
2135
|
+
// Try indexed payload_schema first.
|
|
2136
|
+
const payloadFields = {};
|
|
2137
|
+
const schemaEntries = Object.entries(info.payload_schema);
|
|
2138
|
+
if (schemaEntries.length > 0) {
|
|
2139
|
+
for (const [key, schema] of schemaEntries) {
|
|
2140
|
+
payloadFields[key] = {
|
|
2141
|
+
type: schema.data_type ?? 'unknown',
|
|
2142
|
+
};
|
|
2143
|
+
}
|
|
2144
|
+
}
|
|
2145
|
+
else if (pointCount > 0) {
|
|
2146
|
+
// No indexed schema — sample points to discover fields.
|
|
2147
|
+
await this.discoverPayloadFields(payloadFields);
|
|
2148
|
+
}
|
|
2149
|
+
return { pointCount, dimensions, payloadFields };
|
|
2150
|
+
}
|
|
2151
|
+
/**
|
|
2152
|
+
* Sample points and discover payload field names and inferred types.
|
|
2153
|
+
*
|
|
2154
|
+
* @param target - Object to populate with discovered fields.
|
|
2155
|
+
* @param sampleSize - Number of points to sample.
|
|
2156
|
+
*/
|
|
2157
|
+
async discoverPayloadFields(target, sampleSize = 100) {
|
|
2158
|
+
const result = await this.client.scroll(this.collectionName, {
|
|
2159
|
+
limit: sampleSize,
|
|
2160
|
+
with_payload: true,
|
|
2161
|
+
with_vector: false,
|
|
2162
|
+
});
|
|
2163
|
+
for (const point of result.points) {
|
|
2164
|
+
const payload = point.payload;
|
|
2165
|
+
if (!payload)
|
|
2166
|
+
continue;
|
|
2167
|
+
for (const [key, value] of Object.entries(payload)) {
|
|
2168
|
+
if (key in target)
|
|
2169
|
+
continue;
|
|
2170
|
+
target[key] = { type: inferPayloadType(value) };
|
|
2171
|
+
}
|
|
2172
|
+
}
|
|
2173
|
+
}
|
|
2094
2174
|
/**
|
|
2095
2175
|
* Search for similar vectors.
|
|
2096
2176
|
*
|
package/dist/index.d.ts
CHANGED
|
@@ -283,6 +283,22 @@ interface ScrolledPoint {
|
|
|
283
283
|
/** The payload metadata. */
|
|
284
284
|
payload: Record<string, unknown>;
|
|
285
285
|
}
|
|
286
|
+
/** Payload field schema information as reported by Qdrant. */
|
|
287
|
+
interface PayloadFieldSchema {
|
|
288
|
+
/** Qdrant data type for the field (e.g. `keyword`, `text`, `integer`). */
|
|
289
|
+
type: string;
|
|
290
|
+
}
|
|
291
|
+
/**
|
|
292
|
+
* Collection stats and payload schema information.
|
|
293
|
+
*/
|
|
294
|
+
interface CollectionInfo {
|
|
295
|
+
/** Total number of points in the collection. */
|
|
296
|
+
pointCount: number;
|
|
297
|
+
/** Vector dimensions for the collection's configured vector params. */
|
|
298
|
+
dimensions: number;
|
|
299
|
+
/** Payload field schema keyed by field name. */
|
|
300
|
+
payloadFields: Record<string, PayloadFieldSchema>;
|
|
301
|
+
}
|
|
286
302
|
/**
|
|
287
303
|
* Client wrapper for Qdrant vector store operations.
|
|
288
304
|
*/
|
|
@@ -331,6 +347,20 @@ declare class VectorStoreClient {
|
|
|
331
347
|
* @returns The payload, or `null` if the point doesn't exist.
|
|
332
348
|
*/
|
|
333
349
|
getPayload(id: string): Promise<Record<string, unknown> | null>;
|
|
350
|
+
/**
|
|
351
|
+
* Get collection info including point count, dimensions, and payload field schema.
|
|
352
|
+
*
|
|
353
|
+
* When Qdrant has payload indexes, uses `payload_schema` directly. Otherwise
|
|
354
|
+
* samples points to discover fields and infer types.
|
|
355
|
+
*/
|
|
356
|
+
getCollectionInfo(): Promise<CollectionInfo>;
|
|
357
|
+
/**
|
|
358
|
+
* Sample points and discover payload field names and inferred types.
|
|
359
|
+
*
|
|
360
|
+
* @param target - Object to populate with discovered fields.
|
|
361
|
+
* @param sampleSize - Number of points to sample.
|
|
362
|
+
*/
|
|
363
|
+
private discoverPayloadFields;
|
|
334
364
|
/**
|
|
335
365
|
* Search for similar vectors.
|
|
336
366
|
*
|
|
@@ -862,4 +892,4 @@ declare function deleteMetadata(filePath: string, metadataDir: string): Promise<
|
|
|
862
892
|
declare function pointId(filePath: string, chunkIndex?: number): string;
|
|
863
893
|
|
|
864
894
|
export { DocumentProcessor, EventQueue, FileSystemWatcher, GitignoreFilter, JeevesWatcher, SystemHealth, VectorStoreClient, apiConfigSchema, applyRules, buildAttributes, compileRules, configWatchConfigSchema, contentHash, createApiServer, createEmbeddingProvider, createLogger, deleteMetadata, embeddingConfigSchema, extractText, inferenceRuleSchema, jeevesWatcherConfigSchema, loadConfig, loggingConfigSchema, metadataPath, pointId, readMetadata, startFromConfig, vectorStoreConfigSchema, watchConfigSchema, writeMetadata };
|
|
865
|
-
export type { ApiConfig, ApiServerOptions, CompiledRule, ConfigWatchConfig, EmbeddingConfig, EmbeddingProvider, EventQueueOptions, ExtractedText, FileAttributes, FileSystemWatcherOptions, InferenceRule, JeevesWatcherConfig, JeevesWatcherFactories, JeevesWatcherRuntimeOptions, LoggingConfig, ProcessFn, ProcessorConfig, RuleLogger, ScrolledPoint, SearchResult, SystemHealthOptions, VectorPoint, VectorStoreConfig, WatchConfig, WatchEvent };
|
|
895
|
+
export type { ApiConfig, ApiServerOptions, CollectionInfo, CompiledRule, ConfigWatchConfig, EmbeddingConfig, EmbeddingProvider, EventQueueOptions, ExtractedText, FileAttributes, FileSystemWatcherOptions, InferenceRule, JeevesWatcherConfig, JeevesWatcherFactories, JeevesWatcherRuntimeOptions, LoggingConfig, PayloadFieldSchema, ProcessFn, ProcessorConfig, RuleLogger, ScrolledPoint, SearchResult, SystemHealthOptions, VectorPoint, VectorStoreConfig, WatchConfig, WatchEvent };
|
package/dist/index.iife.js
CHANGED
|
@@ -359,9 +359,9 @@
|
|
|
359
359
|
function createSearchHandler(deps) {
|
|
360
360
|
return async (request, reply) => {
|
|
361
361
|
try {
|
|
362
|
-
const { query, limit = 10 } = request.body;
|
|
362
|
+
const { query, limit = 10, filter } = request.body;
|
|
363
363
|
const vectors = await deps.embeddingProvider.embed([query]);
|
|
364
|
-
const results = await deps.vectorStore.search(vectors[0], limit);
|
|
364
|
+
const results = await deps.vectorStore.search(vectors[0], limit, filter);
|
|
365
365
|
return results;
|
|
366
366
|
}
|
|
367
367
|
catch (error) {
|
|
@@ -373,16 +373,27 @@
|
|
|
373
373
|
|
|
374
374
|
/**
|
|
375
375
|
* @module api/handlers/status
|
|
376
|
-
* Fastify route handler for GET /status.
|
|
376
|
+
* Fastify route handler for GET /status. Returns process health, uptime, and collection stats.
|
|
377
377
|
*/
|
|
378
378
|
/**
|
|
379
379
|
* Create handler for GET /status.
|
|
380
|
+
*
|
|
381
|
+
* @param deps - Route dependencies.
|
|
380
382
|
*/
|
|
381
|
-
function createStatusHandler() {
|
|
382
|
-
return () =>
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
383
|
+
function createStatusHandler(deps) {
|
|
384
|
+
return async () => {
|
|
385
|
+
const collectionInfo = await deps.vectorStore.getCollectionInfo();
|
|
386
|
+
return {
|
|
387
|
+
status: 'ok',
|
|
388
|
+
uptime: process.uptime(),
|
|
389
|
+
collection: {
|
|
390
|
+
name: deps.config.vectorStore.collectionName,
|
|
391
|
+
pointCount: collectionInfo.pointCount,
|
|
392
|
+
dimensions: collectionInfo.dimensions,
|
|
393
|
+
},
|
|
394
|
+
payloadFields: collectionInfo.payloadFields,
|
|
395
|
+
};
|
|
396
|
+
};
|
|
386
397
|
}
|
|
387
398
|
|
|
388
399
|
/**
|
|
@@ -396,7 +407,7 @@
|
|
|
396
407
|
function createApiServer(options) {
|
|
397
408
|
const { processor, vectorStore, embeddingProvider, logger, config } = options;
|
|
398
409
|
const app = Fastify({ logger: false });
|
|
399
|
-
app.get('/status', createStatusHandler());
|
|
410
|
+
app.get('/status', createStatusHandler({ vectorStore, config }));
|
|
400
411
|
app.post('/metadata', createMetadataHandler({ processor, logger }));
|
|
401
412
|
app.post('/search', createSearchHandler({ embeddingProvider, vectorStore, logger }));
|
|
402
413
|
app.post('/reindex', createReindexHandler({ config, processor, logger }));
|
|
@@ -1922,6 +1933,22 @@
|
|
|
1922
1933
|
}
|
|
1923
1934
|
}
|
|
1924
1935
|
|
|
1936
|
+
/** Infer a Qdrant-style type name from a JS value. */
|
|
1937
|
+
function inferPayloadType(value) {
|
|
1938
|
+
if (value === null || value === undefined)
|
|
1939
|
+
return 'keyword';
|
|
1940
|
+
if (typeof value === 'number') {
|
|
1941
|
+
return Number.isInteger(value) ? 'integer' : 'float';
|
|
1942
|
+
}
|
|
1943
|
+
if (typeof value === 'boolean')
|
|
1944
|
+
return 'bool';
|
|
1945
|
+
if (Array.isArray(value))
|
|
1946
|
+
return 'keyword[]';
|
|
1947
|
+
if (typeof value === 'string') {
|
|
1948
|
+
return value.length > 256 ? 'text' : 'keyword';
|
|
1949
|
+
}
|
|
1950
|
+
return 'keyword';
|
|
1951
|
+
}
|
|
1925
1952
|
/**
|
|
1926
1953
|
* Client wrapper for Qdrant vector store operations.
|
|
1927
1954
|
*/
|
|
@@ -2068,6 +2095,58 @@
|
|
|
2068
2095
|
return null;
|
|
2069
2096
|
}
|
|
2070
2097
|
}
|
|
2098
|
+
/**
|
|
2099
|
+
* Get collection info including point count, dimensions, and payload field schema.
|
|
2100
|
+
*
|
|
2101
|
+
* When Qdrant has payload indexes, uses `payload_schema` directly. Otherwise
|
|
2102
|
+
* samples points to discover fields and infer types.
|
|
2103
|
+
*/
|
|
2104
|
+
async getCollectionInfo() {
|
|
2105
|
+
const info = await this.client.getCollection(this.collectionName);
|
|
2106
|
+
const pointCount = info.points_count ?? 0;
|
|
2107
|
+
const vectorsConfig = info.config.params.vectors;
|
|
2108
|
+
const dimensions = vectorsConfig !== undefined && 'size' in vectorsConfig
|
|
2109
|
+
? vectorsConfig.size
|
|
2110
|
+
: 0;
|
|
2111
|
+
// Try indexed payload_schema first.
|
|
2112
|
+
const payloadFields = {};
|
|
2113
|
+
const schemaEntries = Object.entries(info.payload_schema);
|
|
2114
|
+
if (schemaEntries.length > 0) {
|
|
2115
|
+
for (const [key, schema] of schemaEntries) {
|
|
2116
|
+
payloadFields[key] = {
|
|
2117
|
+
type: schema.data_type ?? 'unknown',
|
|
2118
|
+
};
|
|
2119
|
+
}
|
|
2120
|
+
}
|
|
2121
|
+
else if (pointCount > 0) {
|
|
2122
|
+
// No indexed schema — sample points to discover fields.
|
|
2123
|
+
await this.discoverPayloadFields(payloadFields);
|
|
2124
|
+
}
|
|
2125
|
+
return { pointCount, dimensions, payloadFields };
|
|
2126
|
+
}
|
|
2127
|
+
/**
|
|
2128
|
+
* Sample points and discover payload field names and inferred types.
|
|
2129
|
+
*
|
|
2130
|
+
* @param target - Object to populate with discovered fields.
|
|
2131
|
+
* @param sampleSize - Number of points to sample.
|
|
2132
|
+
*/
|
|
2133
|
+
async discoverPayloadFields(target, sampleSize = 100) {
|
|
2134
|
+
const result = await this.client.scroll(this.collectionName, {
|
|
2135
|
+
limit: sampleSize,
|
|
2136
|
+
with_payload: true,
|
|
2137
|
+
with_vector: false,
|
|
2138
|
+
});
|
|
2139
|
+
for (const point of result.points) {
|
|
2140
|
+
const payload = point.payload;
|
|
2141
|
+
if (!payload)
|
|
2142
|
+
continue;
|
|
2143
|
+
for (const [key, value] of Object.entries(payload)) {
|
|
2144
|
+
if (key in target)
|
|
2145
|
+
continue;
|
|
2146
|
+
target[key] = { type: inferPayloadType(value) };
|
|
2147
|
+
}
|
|
2148
|
+
}
|
|
2149
|
+
}
|
|
2071
2150
|
/**
|
|
2072
2151
|
* Search for similar vectors.
|
|
2073
2152
|
*
|
package/dist/index.iife.min.js
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
!function(e,t,i,r,o,n,s,a,c,l,h,d,u,g,f,p,m,y,w,b,v,M,x){"use strict";function S(e){var t=Object.create(null);return e&&Object.keys(e).forEach((function(i){if("default"!==i){var r=Object.getOwnPropertyDescriptor(e,i);Object.defineProperty(t,i,r.get?r:{enumerable:!0,get:function(){return e[i]}})}})),t.default=e,Object.freeze(t)}var F=S(p);function P(e){if(e instanceof Error)return e;if("string"==typeof e)return new Error(e);const t=String("object"==typeof e&&null!==e&&"message"in e?e.message:e),i=new Error(t);return i.cause=e,i}function k(e){const t=e.replace(/\\/g,"/"),i=t.search(/[*?\[]/);if(-1===i)return r.resolve(e);const o=t.slice(0,i),n=o.endsWith("/")?o.slice(0,-1):r.dirname(o);return r.resolve(n)}async function*z(e){let t;try{t=(await i.readdir(e,{withFileTypes:!0})).map((e=>({name:e.name,isDirectory:e.isDirectory()})))}catch{return}for(const o of t){const t=r.resolve(e,o.name);if(o.isDirectory)yield*z(t);else try{(await i.stat(t)).isFile()&&(yield t)}catch{}}}async function j(e,t,i,r){const n=await async function(e,t=[]){const i=e.map((e=>e.replace(/\\/g,"/"))),r=t.map((e=>e.replace(/\\/g,"/"))),n=o(i,{dot:!0}),s=r.length?o(r,{dot:!0}):()=>!1,a=Array.from(new Set(e.map(k))),c=new Set;for(const e of a)for await(const t of z(e)){const e=t.replace(/\\/g,"/");s(e)||n(e)&&c.add(t)}return Array.from(c)}(e,t);for(const e of n)await i[r](e);return n.length}function R(e,t=!1){let i=e.replace(/\\/g,"/").toLowerCase();return t&&(i=i.replace(/^([a-z]):/,((e,t)=>t))),i}function E(e,t){const i=R(e,!0),o=s.createHash("sha256").update(i,"utf8").digest("hex");return r.join(t,`${o}.meta.json`)}async function C(e,t){try{const r=await i.readFile(E(e,t),"utf8");return JSON.parse(r)}catch{return null}}async function D(e,t,o){const n=E(e,t);await i.mkdir(r.dirname(n),{recursive:!0}),await i.writeFile(n,JSON.stringify(o,null,2),"utf8")}async function T(e,t){try{await i.rm(E(e,t))}catch{}}const I=["file_path","chunk_index","total_chunks","content_hash","chunk_text"];function W(e){const{processor:i,vectorStore:r,embeddingProvider:o,logger:s,config:a}=e,c=t({logger:!1});var l;return c.get("/status",(()=>({status:"ok",uptime:process.uptime()}))),c.post("/metadata",(l={processor:i,logger:s},async(e,t)=>{try{const{path:t,metadata:i}=e.body;return await l.processor.processMetadataUpdate(t,i),{ok:!0}}catch(e){return l.logger.error({err:P(e)},"Metadata update failed"),t.status(500).send({error:"Internal server error"})}})),c.post("/search",function(e){return async(t,i)=>{try{const{query:i,limit:r=10}=t.body,o=await e.embeddingProvider.embed([i]);return await e.vectorStore.search(o[0],r)}catch(t){return e.logger.error({err:P(t)},"Search failed"),i.status(500).send({error:"Internal server error"})}}}({embeddingProvider:o,vectorStore:r,logger:s})),c.post("/reindex",function(e){return async(t,i)=>{try{const t=await j(e.config.watch.paths,e.config.watch.ignored,e.processor,"processFile");return await i.status(200).send({ok:!0,filesIndexed:t})}catch(t){return e.logger.error({err:P(t)},"Reindex failed"),await i.status(500).send({error:"Internal server error"})}}}({config:a,processor:i,logger:s})),c.post("/rebuild-metadata",function(e){return async(t,i)=>{try{const t=e.config.metadataDir??".jeeves-metadata",r=[...I];for await(const i of e.vectorStore.scroll()){const e=i.payload,o=e.file_path;if("string"!=typeof o||0===o.length)continue;const s=n.omit(e,r);await D(o,t,s)}return await i.status(200).send({ok:!0})}catch(t){return e.logger.error({err:P(t)},"Rebuild metadata failed"),await i.status(500).send({error:"Internal server error"})}}}({config:a,vectorStore:r,logger:s})),c.post("/config-reindex",function(e){return async(t,i)=>{try{const r=t.body.scope??"rules";return(async()=>{try{if("rules"===r){const t=await j(e.config.watch.paths,e.config.watch.ignored,e.processor,"processRulesUpdate");e.logger.info({scope:r,filesProcessed:t},"Config reindex (rules) completed")}else{const t=await j(e.config.watch.paths,e.config.watch.ignored,e.processor,"processFile");e.logger.info({scope:r,filesProcessed:t},"Config reindex (full) completed")}}catch(t){e.logger.error({err:P(t),scope:r},"Config reindex failed")}})(),await i.status(200).send({status:"started",scope:r})}catch(t){return e.logger.error({err:P(t)},"Config reindex request failed"),await i.status(500).send({error:"Internal server error"})}}}({config:a,processor:i,logger:s})),c}const N={metadataDir:".jeeves-watcher",shutdownTimeoutMs:1e4},O={enabled:!0,debounceMs:1e3},_={host:"127.0.0.1",port:3456},A={level:"info"},q={debounceMs:300,stabilityThresholdMs:500,usePolling:!1,pollIntervalMs:1e3,respectGitignore:!0},L={chunkSize:1e3,chunkOverlap:200,dimensions:3072,rateLimitPerMinute:300,concurrency:5},Q=c.z.object({paths:c.z.array(c.z.string()).min(1).describe('Glob patterns for files to watch (e.g., "**/*.md"). At least one required.'),ignored:c.z.array(c.z.string()).optional().describe('Glob patterns to exclude from watching (e.g., "**/node_modules/**").'),pollIntervalMs:c.z.number().optional().describe("Polling interval in milliseconds when usePolling is enabled."),usePolling:c.z.boolean().optional().describe("Use polling instead of native file system events (for network drives)."),debounceMs:c.z.number().optional().describe("Debounce delay in milliseconds for file change events."),stabilityThresholdMs:c.z.number().optional().describe("Time in milliseconds a file must remain unchanged before processing."),respectGitignore:c.z.boolean().optional().describe("Skip files ignored by .gitignore in git repositories. Only applies to repos with a .git directory. Default: true.")}),$=c.z.object({enabled:c.z.boolean().optional().describe("Enable automatic reloading when config file changes."),debounceMs:c.z.number().optional().describe("Debounce delay in milliseconds for config file change detection.")}),G=c.z.object({provider:c.z.string().default("gemini").describe('Embedding provider name (e.g., "gemini", "openai").'),model:c.z.string().default("gemini-embedding-001").describe('Embedding model identifier (e.g., "gemini-embedding-001", "text-embedding-3-small").'),chunkSize:c.z.number().optional().describe("Maximum chunk size in characters for text splitting."),chunkOverlap:c.z.number().optional().describe("Character overlap between consecutive chunks."),dimensions:c.z.number().optional().describe("Embedding vector dimensions (must match model output)."),apiKey:c.z.string().optional().describe("API key for embedding provider (supports ${ENV_VAR} substitution)."),rateLimitPerMinute:c.z.number().optional().describe("Maximum embedding API requests per minute (rate limiting)."),concurrency:c.z.number().optional().describe("Maximum concurrent embedding requests.")}),B=c.z.object({url:c.z.string().describe('Qdrant server URL (e.g., "http://localhost:6333").'),collectionName:c.z.string().describe("Qdrant collection name for vector storage."),apiKey:c.z.string().optional().describe("Qdrant API key for authentication (supports ${ENV_VAR} substitution).")}),K=c.z.object({host:c.z.string().optional().describe('Host address for API server (e.g., "127.0.0.1", "0.0.0.0").'),port:c.z.number().optional().describe("Port for API server (e.g., 3456).")}),J=c.z.object({level:c.z.string().optional().describe("Logging level (trace, debug, info, warn, error, fatal)."),file:c.z.string().optional().describe("Path to log file (logs to stdout if omitted).")}),V=c.z.object({match:c.z.record(c.z.string(),c.z.unknown()).describe("JSON Schema object to match against file attributes."),set:c.z.record(c.z.string(),c.z.unknown()).describe("Metadata fields to set when match succeeds."),map:c.z.union([l.jsonMapMapSchema,c.z.string()]).optional().describe("JsonMap transformation (inline definition or named map reference).")}),U=c.z.object({watch:Q.describe("File system watch configuration."),configWatch:$.optional().describe("Configuration file watch settings."),embedding:G.describe("Embedding model configuration."),vectorStore:B.describe("Qdrant vector store configuration."),metadataDir:c.z.string().optional().describe("Directory for persisted metadata sidecar files."),api:K.optional().describe("API server configuration."),extractors:c.z.record(c.z.string(),c.z.unknown()).optional().describe("Extractor configurations keyed by name."),inferenceRules:c.z.array(V).optional().describe("Rules for inferring metadata from file attributes."),maps:c.z.record(c.z.string(),l.jsonMapMapSchema).optional().describe("Reusable named JsonMap transformations."),logging:J.optional().describe("Logging configuration."),shutdownTimeoutMs:c.z.number().optional().describe("Timeout in milliseconds for graceful shutdown."),maxRetries:c.z.number().optional().describe("Maximum consecutive system-level failures before triggering fatal error. Default: Infinity."),maxBackoffMs:c.z.number().optional().describe("Maximum backoff delay in milliseconds for system errors. Default: 60000.")}),H=/\$\{([^}]+)\}/g;function Y(e){if("string"==typeof e)return function(e){return e.replace(H,((e,t)=>{const i=process.env[t];return void 0===i?e:i}))}(e);if(Array.isArray(e))return e.map((e=>Y(e)));if(null!==e&&"object"==typeof e){const t={};for(const[i,r]of Object.entries(e))t[i]=Y(r);return t}return e}const Z="jeeves-watcher";async function X(e){const t=a.cosmiconfig(Z),i=e?await t.load(e):await t.search();if(!i||i.isEmpty)throw new Error("No jeeves-watcher configuration found. Create a .jeeves-watcherrc or jeeves-watcher.config.{js,ts,json,yaml} file.");try{const e=U.parse(i.config);return Y((r=e,{...N,...r,watch:{...q,...r.watch},configWatch:{...O,...r.configWatch},embedding:{...L,...r.embedding},api:{..._,...r.api},logging:{...A,...r.logging}}))}catch(e){if(e instanceof c.ZodError){const t=e.issues.map((e=>`${e.path.join(".")}: ${e.message}`)).join("; ");throw new Error(`Invalid jeeves-watcher configuration: ${t}`)}throw e}var r}function ee(e){return e||{warn(e,t){t?console.warn(e,t):console.warn(e)}}}function te(e,t){return e<=0?Promise.resolve():new Promise(((i,r)=>{const o=setTimeout((()=>{s(),i()}),e),n=()=>{s(),r(new Error("Retry sleep aborted"))},s=()=>{clearTimeout(o),t&&t.removeEventListener("abort",n)};if(t){if(t.aborted)return void n();t.addEventListener("abort",n,{once:!0})}}))}function ie(e,t,i,r=0){const o=Math.max(0,e-1),n=Math.min(i,t*2**o),s=r>0?1+Math.random()*r:1;return Math.round(n*s)}async function re(e,t){const i=Math.max(1,t.attempts);let r;for(let o=1;o<=i;o++)try{return await e(o)}catch(e){r=e;if(o>=i)break;const n=ie(o,t.baseDelayMs,t.maxDelayMs,t.jitter);t.onRetry?.({attempt:o,attempts:i,delayMs:n,error:e}),await te(n,t.signal)}throw r}const oe=new Map([["mock",function(e){return function(e){return{dimensions:e,embed:t=>Promise.resolve(t.map((t=>{const i=s.createHash("sha256").update(t,"utf8").digest(),r=[];for(let t=0;t<e;t++){const e=i[t%i.length];r.push(e/127.5-1)}return r})))}}(e.dimensions??768)}],["gemini",function(e,t){if(!e.apiKey)throw new Error("Gemini embedding provider requires config.embedding.apiKey");const i=e.dimensions??3072,r=ee(t),o=new h.GoogleGenerativeAIEmbeddings({apiKey:e.apiKey,model:e.model});return{dimensions:i,async embed(t){const n=await re((async i=>(i>1&&r.warn({attempt:i,provider:"gemini",model:e.model},"Retrying embedding request"),o.embedDocuments(t))),{attempts:5,baseDelayMs:500,maxDelayMs:1e4,jitter:.2,onRetry:({attempt:t,delayMs:i,error:o})=>{r.warn({attempt:t,delayMs:i,provider:"gemini",model:e.model,err:P(o)},"Embedding call failed; will retry")}});for(const e of n)if(e.length!==i)throw new Error(`Gemini embedding returned invalid dimensions: expected ${String(i)}, got ${String(e.length)}`);return n}}}]]);function ne(e,t){const i=oe.get(e.provider);if(!i)throw new Error(`Unsupported embedding provider: ${e.provider}`);return i(e,t)}function se(e){let t=r.resolve(e);const i=r.resolve("/");for(;t!==i;){if(d.existsSync(r.join(t,".git"))&&d.statSync(r.join(t,".git")).isDirectory())return t;const e=r.dirname(t);if(e===t)break;t=e}}function ae(e){const t=r.resolve(e);try{return d.statSync(t).isDirectory()?t:r.dirname(t)}catch{}const i=/[*?[{]/.exec(e);if(!i)return;const o=e.slice(0,i.index).trim(),n=0===o.length?".":o.endsWith("/")||o.endsWith("\\")?o:r.dirname(o),s=r.resolve(n);return d.existsSync(s)?s:void 0}function ce(e){const t=[],i=r.join(e,".gitignore");let o;d.existsSync(i)&&t.push(i);try{o=d.readdirSync(e)}catch{return t}for(const i of o){if(".git"===i||"node_modules"===i)continue;const o=r.join(e,i);try{d.statSync(o).isDirectory()&&t.push(...ce(o))}catch{}}return t}function le(e){const t=d.readFileSync(e,"utf8");return u().add(t)}class he{repos=new Map;constructor(e){this.scan(e)}scan(e){this.repos.clear();const t=new Set;for(const i of e){const e=ae(i);if(!e)continue;if(t.has(e))continue;t.add(e);const o=se(e);if(!o)continue;if(this.repos.has(o))continue;const n=ce(o).map((e=>({dir:r.dirname(e),ig:le(e)})));n.sort(((e,t)=>t.dir.length-e.dir.length)),this.repos.set(o,{root:o,entries:n})}}isIgnored(e){const t=r.resolve(e);for(const[,e]of this.repos){const i=r.relative(e.root,t);if(!i.startsWith("..")&&!i.startsWith(r.resolve("/")))for(const i of e.entries){const e=r.relative(i.dir,t);if(e.startsWith(".."))continue;const o=e.replace(/\\/g,"/");if(i.ig.ignores(o))return!0}}return!1}invalidate(e){const t=r.resolve(e),i=r.dirname(t);for(const[,e]of this.repos){if(!r.relative(e.root,i).startsWith(".."))return e.entries=e.entries.filter((e=>e.dir!==i)),void(d.existsSync(t)&&(e.entries.push({dir:i,ig:le(t)}),e.entries.sort(((e,t)=>t.dir.length-e.dir.length))))}const o=se(i);if(o&&d.existsSync(t)){const e=[{dir:i,ig:le(t)}];if(this.repos.has(o)){const t=this.repos.get(o);t.entries.push(e[0]),t.entries.sort(((e,t)=>t.dir.length-e.dir.length))}else this.repos.set(o,{root:o,entries:e})}}}function de(e){const t=e?.level??"info";if(e?.file){const i=g.transport({target:"pino/file",options:{destination:e.file,mkdir:!0}});return g({level:t},i)}return g({level:t})}function ue(e){return s.createHash("sha256").update(e,"utf8").digest("hex")}const ge="6a6f686e-6761-4c74-ad6a-656576657321";function fe(e,t){const i=void 0!==t?`${R(e)}#${String(t)}`:R(e);return f.v5(i,ge)}const pe=["content","body","text","snippet","subject","description","summary","transcript"];function me(e){if(!e||"object"!=typeof e)return JSON.stringify(e);const t=e;for(const e of pe){const i=t[e];if("string"==typeof i&&i.trim())return i}return JSON.stringify(e)}async function ye(e){const t=await i.readFile(e,"utf8"),{frontmatter:r,body:o}=function(e){const t=e.replace(/^\uFEFF/,"");if(!/^\s*---/.test(t))return{body:e};const i=/^---\s*\n([\s\S]*?)\n---\s*\n?([\s\S]*)$/m.exec(t);if(!i)return{body:e};const[,r,o]=i,n=m.load(r);return{frontmatter:n&&"object"==typeof n&&!Array.isArray(n)?n:void 0,body:o}}(t);return{text:o,frontmatter:r}}async function we(e){return{text:(await i.readFile(e,"utf8")).replace(/^\uFEFF/,"")}}async function be(e){const t=await i.readFile(e,"utf8"),r=F.load(t.replace(/^\uFEFF/,""));r("script, style").remove();return{text:r("body").text().trim()||r.text().trim()}}const ve=new Map([[".md",ye],[".markdown",ye],[".txt",we],[".text",we],[".json",async function(e){const t=await i.readFile(e,"utf8"),r=JSON.parse(t.replace(/^\uFEFF/,"")),o=r&&"object"==typeof r&&!Array.isArray(r)?r:void 0;return{text:me(r),json:o}}],[".pdf",async function(e){const t=await i.readFile(e),r=new Uint8Array(t),{extractText:o}=await import("unpdf"),{text:n}=await o(r);return{text:Array.isArray(n)?n.join("\n\n"):n}}],[".docx",async function(e){const t=await i.readFile(e);return{text:(await y.extractRawText({buffer:t})).value}}],[".html",be],[".htm",be]]);async function Me(e,t){const i=ve.get(t.toLowerCase());return i?i(e):we(e)}function xe(e,t){return"string"!=typeof e?e:e.replace(/\$\{([^}]+)\}/g,((e,i)=>{const r=n.get(t,i);return null==r?"":"string"==typeof r?r:JSON.stringify(r)}))}function Se(e,t){const i={};for(const[r,o]of Object.entries(e))i[r]=xe(o,t);return i}async function Fe(e,t,i,r){const o={split:(e,t)=>e.split(t),slice:(e,t,i)=>e.slice(t,i),join:(e,t)=>e.join(t),toLowerCase:e=>e.toLowerCase(),replace:(e,t,i)=>e.replace(t,i),get:(e,t)=>n.get(e,t)};let s={};const a=r??console;for(const{rule:r,validate:n}of e)if(n(t)){const e=Se(r.set,t);if(s={...s,...e},r.map){let e;if("string"==typeof r.map){if(e=i?.[r.map],!e){a.warn(`Map reference "${r.map}" not found in named maps. Skipping map transformation.`);continue}}else e=r.map;try{const i=new l.JsonMap(e,o),r=await i.transform(t);r&&"object"==typeof r&&!Array.isArray(r)?s={...s,...r}:a.warn("JsonMap transformation did not return an object; skipping merge.")}catch(e){a.warn(`JsonMap transformation failed: ${e instanceof Error?e.message:String(e)}`)}}}return s}function Pe(e,t,i,o){const n=e.replace(/\\/g,"/"),s={file:{path:n,directory:r.dirname(n).replace(/\\/g,"/"),filename:r.basename(n),extension:r.extname(n),sizeBytes:t.size,modified:t.mtime.toISOString()}};return i&&(s.frontmatter=i),o&&(s.json=o),s}function ke(e){const t=function(){const e=new w({allErrors:!0});return b(e),e.addKeyword({keyword:"glob",type:"string",schemaType:"string",validate:(e,t)=>o.isMatch(t,e)}),e}();return e.map(((e,i)=>({rule:e,validate:t.compile({$id:`rule-${String(i)}`,...e.match})})))}async function ze(e,t,o,n,s){const a=r.extname(e),c=await i.stat(e),l=await Me(e,a),h=Pe(e,c,l.frontmatter,l.json),d=await Fe(t,h,n,s),u=await C(e,o);return{inferred:d,enrichment:u,metadata:{...d,...u??{}},attributes:h,extracted:l}}function je(e,t){const i=[];for(let r=0;r<t;r++)i.push(fe(e,r));return i}function Re(e,t=1){if(!e)return t;const i=e.total_chunks;return"number"==typeof i?i:t}class Ee{config;embeddingProvider;vectorStore;compiledRules;logger;constructor(e,t,i,r,o){this.config=e,this.embeddingProvider=t,this.vectorStore=i,this.compiledRules=r,this.logger=o}async processFile(e){try{const t=r.extname(e),{metadata:i,extracted:o}=await ze(e,this.compiledRules,this.config.metadataDir,this.config.maps,this.logger);if(!o.text.trim())return void this.logger.debug({filePath:e},"Skipping empty file");const n=ue(o.text),s=fe(e,0),a=await this.vectorStore.getPayload(s);if(a&&a.content_hash===n)return void this.logger.debug({filePath:e},"Content unchanged, skipping");const c=Re(a),l=this.config.chunkSize??1e3,h=function(e,t,i){const r=e.toLowerCase();return".md"===r||".markdown"===r?new v.MarkdownTextSplitter({chunkSize:t,chunkOverlap:i}):new v.RecursiveCharacterTextSplitter({chunkSize:t,chunkOverlap:i})}(t,l,this.config.chunkOverlap??200),d=await h.splitText(o.text),u=await this.embeddingProvider.embed(d),g=d.map(((t,r)=>({id:fe(e,r),vector:u[r],payload:{...i,file_path:e.replace(/\\/g,"/"),chunk_index:r,total_chunks:d.length,content_hash:n,chunk_text:t}})));if(await this.vectorStore.upsert(g),c>d.length){const t=je(e,c).slice(d.length);await this.vectorStore.delete(t)}this.logger.info({filePath:e,chunks:d.length},"File processed successfully")}catch(t){this.logger.error({filePath:e,err:P(t)},"Failed to process file")}}async deleteFile(e){try{const t=fe(e,0),i=await this.vectorStore.getPayload(t),r=je(e,Re(i));await this.vectorStore.delete(r),await T(e,this.config.metadataDir),this.logger.info({filePath:e},"File deleted from index")}catch(t){this.logger.error({filePath:e,err:P(t)},"Failed to delete file")}}async processMetadataUpdate(e,t){try{const i={...await C(e,this.config.metadataDir)??{},...t};await D(e,this.config.metadataDir,i);const r=fe(e,0),o=await this.vectorStore.getPayload(r);if(!o)return null;const n=Re(o),s=je(e,n);return await this.vectorStore.setPayload(s,i),this.logger.info({filePath:e,chunks:n},"Metadata updated"),i}catch(t){return this.logger.error({filePath:e,err:P(t)},"Failed to update metadata"),null}}async processRulesUpdate(e){try{const t=fe(e,0),i=await this.vectorStore.getPayload(t);if(!i)return this.logger.debug({filePath:e},"File not indexed, skipping"),null;const{metadata:r}=await ze(e,this.compiledRules,this.config.metadataDir,this.config.maps,this.logger),o=Re(i),n=je(e,o);return await this.vectorStore.setPayload(n,r),this.logger.info({filePath:e,chunks:o},"Rules re-applied"),r}catch(t){return this.logger.error({filePath:e,err:P(t)},"Failed to re-apply rules"),null}}updateRules(e){this.compiledRules=e,this.logger.info({rules:e.length},"Inference rules updated")}}class Ce{debounceMs;concurrency;rateLimitPerMinute;started=!1;active=0;debounceTimers=new Map;latestByKey=new Map;normalQueue=[];lowQueue=[];tokens;lastRefillMs=Date.now();drainWaiters=[];constructor(e){this.debounceMs=e.debounceMs,this.concurrency=e.concurrency,this.rateLimitPerMinute=e.rateLimitPerMinute,this.tokens=this.rateLimitPerMinute??Number.POSITIVE_INFINITY}enqueue(e,t){const i=`${e.priority}:${e.path}`;this.latestByKey.set(i,{event:e,fn:t});const r=this.debounceTimers.get(i);r&&clearTimeout(r);const o=setTimeout((()=>{this.debounceTimers.delete(i);const e=this.latestByKey.get(i);e&&(this.latestByKey.delete(i),this.push(e),this.pump())}),this.debounceMs);this.debounceTimers.set(i,o)}process(){this.started=!0,this.pump()}async drain(){this.isIdle()||await new Promise((e=>{this.drainWaiters.push(e)}))}push(e){"low"===e.event.priority?this.lowQueue.push(e):this.normalQueue.push(e)}refillTokens(e){if(void 0===this.rateLimitPerMinute)return;const t=Math.max(0,e-this.lastRefillMs)*(this.rateLimitPerMinute/6e4);this.tokens=Math.min(this.rateLimitPerMinute,this.tokens+t),this.lastRefillMs=e}takeToken(){const e=Date.now();return this.refillTokens(e),!(this.tokens<1)&&(this.tokens-=1,!0)}nextItem(){return this.normalQueue.shift()??this.lowQueue.shift()}pump(){if(this.started){for(;this.active<this.concurrency;){const e=this.nextItem();if(!e)break;if(!this.takeToken()){"low"===e.event.priority?this.lowQueue.unshift(e):this.normalQueue.unshift(e),setTimeout((()=>{this.pump()}),250);break}this.active+=1,Promise.resolve().then((()=>e.fn(e.event))).finally((()=>{this.active-=1,this.pump(),this.maybeResolveDrain()}))}this.maybeResolveDrain()}}isIdle(){return 0===this.active&&0===this.normalQueue.length&&0===this.lowQueue.length&&0===this.debounceTimers.size&&0===this.latestByKey.size}maybeResolveDrain(){if(!this.isIdle())return;const e=this.drainWaiters;this.drainWaiters=[];for(const t of e)t()}}class De{client;collectionName;dims;log;constructor(e,t,i){this.client=new M.QdrantClient({url:e.url,apiKey:e.apiKey,checkCompatibility:!1}),this.collectionName=e.collectionName,this.dims=t,this.log=ee(i)}async ensureCollection(){try{const e=await this.client.getCollections();e.collections.some((e=>e.name===this.collectionName))||await this.client.createCollection(this.collectionName,{vectors:{size:this.dims,distance:"Cosine"}})}catch(e){throw new Error(`Failed to ensure collection "${this.collectionName}": ${String(e)}`)}}async upsert(e){0!==e.length&&await re((async t=>{t>1&&this.log.warn({attempt:t,operation:"qdrant.upsert",points:e.length},"Retrying Qdrant upsert"),await this.client.upsert(this.collectionName,{wait:!0,points:e.map((e=>({id:e.id,vector:e.vector,payload:e.payload})))})}),{attempts:5,baseDelayMs:500,maxDelayMs:1e4,jitter:.2,onRetry:({attempt:e,delayMs:t,error:i})=>{this.log.warn({attempt:e,delayMs:t,operation:"qdrant.upsert",err:P(i)},"Qdrant upsert failed; will retry")}})}async delete(e){0!==e.length&&await re((async t=>{t>1&&this.log.warn({attempt:t,operation:"qdrant.delete",ids:e.length},"Retrying Qdrant delete"),await this.client.delete(this.collectionName,{wait:!0,points:e})}),{attempts:5,baseDelayMs:500,maxDelayMs:1e4,jitter:.2,onRetry:({attempt:e,delayMs:t,error:i})=>{this.log.warn({attempt:e,delayMs:t,operation:"qdrant.delete",err:P(i)},"Qdrant delete failed; will retry")}})}async setPayload(e,t){0!==e.length&&await this.client.setPayload(this.collectionName,{wait:!0,points:e,payload:t})}async getPayload(e){try{const t=await this.client.retrieve(this.collectionName,{ids:[e],with_payload:!0,with_vector:!1});return 0===t.length?null:t[0].payload}catch{return null}}async search(e,t,i){return(await this.client.search(this.collectionName,{vector:e,limit:t,with_payload:!0,...i?{filter:i}:{}})).map((e=>({id:String(e.id),score:e.score,payload:e.payload})))}async*scroll(e,t=100){let i;for(;;){const r=await this.client.scroll(this.collectionName,{limit:t,with_payload:!0,with_vector:!1,...e?{filter:e}:{},...void 0!==i?{offset:i}:{}});for(const e of r.points)yield{id:String(e.id),payload:e.payload};const o=r.next_page_offset;if(null==o)break;if("string"!=typeof o&&"number"!=typeof o)break;i=o}}}class Te{consecutiveFailures=0;maxRetries;maxBackoffMs;baseDelayMs;onFatalError;logger;constructor(e){this.maxRetries=e.maxRetries??Number.POSITIVE_INFINITY,this.maxBackoffMs=e.maxBackoffMs??6e4,this.baseDelayMs=e.baseDelayMs??1e3,this.onFatalError=e.onFatalError,this.logger=e.logger}recordSuccess(){this.consecutiveFailures>0&&this.logger.info({previousFailures:this.consecutiveFailures},"System health recovered"),this.consecutiveFailures=0}recordFailure(e){if(this.consecutiveFailures+=1,this.logger.error({consecutiveFailures:this.consecutiveFailures,maxRetries:this.maxRetries,err:P(e)},"System-level failure recorded"),this.consecutiveFailures>=this.maxRetries){if(this.logger.fatal({consecutiveFailures:this.consecutiveFailures},"Maximum retries exceeded, triggering fatal error"),this.onFatalError)return this.onFatalError(e),!1;throw e instanceof Error?e:new Error(`Fatal system error: ${String(e)}`)}return!0}get currentBackoffMs(){if(0===this.consecutiveFailures)return 0;const e=Math.max(0,this.consecutiveFailures-1);return Math.min(this.maxBackoffMs,this.baseDelayMs*2**e)}async backoff(e){const t=this.currentBackoffMs;t<=0||(this.logger.warn({delayMs:t,consecutiveFailures:this.consecutiveFailures},"Backing off before next attempt"),await new Promise(((i,r)=>{const o=setTimeout((()=>{s(),i()}),t),n=()=>{s(),r(new Error("Backoff aborted"))},s=()=>{clearTimeout(o),e&&e.removeEventListener("abort",n)};if(e){if(e.aborted)return void n();e.addEventListener("abort",n,{once:!0})}})))}get failures(){return this.consecutiveFailures}}class Ie{config;queue;processor;logger;health;gitignoreFilter;watcher;constructor(e,t,i,r,o={}){this.config=e,this.queue=t,this.processor=i,this.logger=r,this.gitignoreFilter=o.gitignoreFilter;const n={maxRetries:o.maxRetries,maxBackoffMs:o.maxBackoffMs,onFatalError:o.onFatalError,logger:r};this.health=new Te(n)}start(){this.watcher=x.watch(this.config.paths,{ignored:this.config.ignored,usePolling:this.config.usePolling,interval:this.config.pollIntervalMs,awaitWriteFinish:!!this.config.stabilityThresholdMs&&{stabilityThreshold:this.config.stabilityThresholdMs},ignoreInitial:!1}),this.watcher.on("add",(e=>{this.handleGitignoreChange(e),this.isGitignored(e)||(this.logger.debug({path:e},"File added"),this.queue.enqueue({type:"create",path:e,priority:"normal"},(()=>this.wrapProcessing((()=>this.processor.processFile(e))))))})),this.watcher.on("change",(e=>{this.handleGitignoreChange(e),this.isGitignored(e)||(this.logger.debug({path:e},"File changed"),this.queue.enqueue({type:"modify",path:e,priority:"normal"},(()=>this.wrapProcessing((()=>this.processor.processFile(e))))))})),this.watcher.on("unlink",(e=>{this.handleGitignoreChange(e),this.isGitignored(e)||(this.logger.debug({path:e},"File removed"),this.queue.enqueue({type:"delete",path:e,priority:"normal"},(()=>this.wrapProcessing((()=>this.processor.deleteFile(e))))))})),this.watcher.on("error",(e=>{this.logger.error({err:P(e)},"Watcher error"),this.health.recordFailure(e)})),this.queue.process(),this.logger.info({paths:this.config.paths},"Filesystem watcher started")}async stop(){this.watcher&&(await this.watcher.close(),this.watcher=void 0,this.logger.info("Filesystem watcher stopped"))}get systemHealth(){return this.health}isGitignored(e){if(!this.gitignoreFilter)return!1;const t=this.gitignoreFilter.isIgnored(e);return t&&this.logger.debug({path:e},"Skipping gitignored file"),t}handleGitignoreChange(e){this.gitignoreFilter&&e.endsWith(".gitignore")&&(this.logger.info({path:e},"Gitignore file changed, refreshing filter"),this.gitignoreFilter.invalidate(e))}async wrapProcessing(e){try{await this.health.backoff(),await e(),this.health.recordSuccess()}catch(e){this.health.recordFailure(e)||await this.stop()}}}class We{options;watcher;debounce;constructor(e){this.options=e}start(){this.options.enabled&&(this.watcher=x.watch(this.options.configPath,{ignoreInitial:!0}),this.watcher.on("change",(()=>{this.debounce&&clearTimeout(this.debounce),this.debounce=setTimeout((()=>{this.options.onChange()}),this.options.debounceMs)})),this.watcher.on("error",(e=>{this.options.logger.error({err:P(e)},"Config watcher error")})),this.options.logger.info({configPath:this.options.configPath,debounceMs:this.options.debounceMs},"Config watcher started"))}async stop(){this.debounce&&(clearTimeout(this.debounce),this.debounce=void 0),this.watcher&&(await this.watcher.close(),this.watcher=void 0)}}const Ne={loadConfig:X,createLogger:de,createEmbeddingProvider:ne,createVectorStoreClient:(e,t,i)=>new De(e,t,i),compileRules:ke,createDocumentProcessor:(e,t,i,r,o)=>new Ee(e,t,i,r,o),createEventQueue:e=>new Ce(e),createFileSystemWatcher:(e,t,i,r,o)=>new Ie(e,t,i,r,o),createApiServer:W};class Oe{config;configPath;factories;runtimeOptions;logger;watcher;queue;server;processor;configWatcher;constructor(e,t,i={},r={}){this.config=e,this.configPath=t,this.factories={...Ne,...i},this.runtimeOptions=r}async start(){const e=this.factories.createLogger(this.config.logging);let t;this.logger=e;try{t=this.factories.createEmbeddingProvider(this.config.embedding,e)}catch(t){throw e.fatal({err:P(t)},"Failed to create embedding provider"),t}const i=this.factories.createVectorStoreClient(this.config.vectorStore,t.dimensions,e);await i.ensureCollection();const r=this.factories.compileRules(this.config.inferenceRules??[]),o={metadataDir:this.config.metadataDir??".jeeves-metadata",chunkSize:this.config.embedding.chunkSize,chunkOverlap:this.config.embedding.chunkOverlap,maps:this.config.maps},n=this.factories.createDocumentProcessor(o,t,i,r,e);this.processor=n;const s=this.factories.createEventQueue({debounceMs:this.config.watch.debounceMs??2e3,concurrency:this.config.embedding.concurrency??5,rateLimitPerMinute:this.config.embedding.rateLimitPerMinute});this.queue=s;const a=this.config.watch.respectGitignore??!0?new he(this.config.watch.paths):void 0,c=this.factories.createFileSystemWatcher(this.config.watch,s,n,e,{maxRetries:this.config.maxRetries,maxBackoffMs:this.config.maxBackoffMs,onFatalError:this.runtimeOptions.onFatalError,gitignoreFilter:a});this.watcher=c;const l=this.factories.createApiServer({processor:n,vectorStore:i,embeddingProvider:t,queue:s,config:this.config,logger:e});this.server=l,await l.listen({host:this.config.api?.host??"127.0.0.1",port:this.config.api?.port??3456}),c.start(),this.startConfigWatch(),e.info("jeeves-watcher started")}async stop(){if(await this.stopConfigWatch(),this.watcher&&await this.watcher.stop(),this.queue){const e=this.config.shutdownTimeoutMs??1e4;await Promise.race([this.queue.drain().then((()=>!0)),new Promise((t=>{setTimeout((()=>{t(!1)}),e)}))])||this.logger?.warn({timeoutMs:e},"Queue drain timeout hit, forcing shutdown")}this.server&&await this.server.close(),this.logger?.info("jeeves-watcher stopped")}startConfigWatch(){const e=this.logger;if(!e)return;const t=this.config.configWatch?.enabled??!0;if(!t)return;if(!this.configPath)return void e.debug("Config watch enabled, but no config path was provided");const i=this.config.configWatch?.debounceMs??1e4;this.configWatcher=new We({configPath:this.configPath,enabled:t,debounceMs:i,logger:e,onChange:async()=>this.reloadConfig()}),this.configWatcher.start()}async stopConfigWatch(){this.configWatcher&&(await this.configWatcher.stop(),this.configWatcher=void 0)}async reloadConfig(){const e=this.logger,t=this.processor;if(e&&t&&this.configPath){e.info({configPath:this.configPath},"Config change detected, reloading...");try{const i=await this.factories.loadConfig(this.configPath);this.config=i;const r=this.factories.compileRules(i.inferenceRules??[]);t.updateRules(r),e.info({configPath:this.configPath,rules:r.length},"Config reloaded")}catch(t){e.error({err:P(t)},"Failed to reload config")}}}}e.DocumentProcessor=Ee,e.EventQueue=Ce,e.FileSystemWatcher=Ie,e.GitignoreFilter=he,e.JeevesWatcher=Oe,e.SystemHealth=Te,e.VectorStoreClient=De,e.apiConfigSchema=K,e.applyRules=Fe,e.buildAttributes=Pe,e.compileRules=ke,e.configWatchConfigSchema=$,e.contentHash=ue,e.createApiServer=W,e.createEmbeddingProvider=ne,e.createLogger=de,e.deleteMetadata=T,e.embeddingConfigSchema=G,e.extractText=Me,e.inferenceRuleSchema=V,e.jeevesWatcherConfigSchema=U,e.loadConfig=X,e.loggingConfigSchema=J,e.metadataPath=E,e.pointId=fe,e.readMetadata=C,e.startFromConfig=async function(e){const t=await X(e),i=new Oe(t,e);return function(e){const t=async()=>{await e(),process.exit(0)};process.on("SIGTERM",(()=>{t()})),process.on("SIGINT",(()=>{t()}))}((()=>i.stop())),await i.start(),i},e.vectorStoreConfigSchema=B,e.watchConfigSchema=Q,e.writeMetadata=D}(this["jeeves-watcher"]=this["jeeves-watcher"]||{},Fastify,promises,node_path,picomatch,radash,node_crypto,cosmiconfig,zod,jsonmap,googleGenai,node_fs,ignore,pino,uuid,cheerio,yaml,mammoth,Ajv,addFormats,textsplitters,jsClientRest,chokidar);
|
|
1
|
+
!function(e,t,i,r,o,n,s,a,c,l,h,d,u,g,f,p,m,y,w,b,v,M,x){"use strict";function S(e){var t=Object.create(null);return e&&Object.keys(e).forEach((function(i){if("default"!==i){var r=Object.getOwnPropertyDescriptor(e,i);Object.defineProperty(t,i,r.get?r:{enumerable:!0,get:function(){return e[i]}})}})),t.default=e,Object.freeze(t)}var F=S(p);function k(e){if(e instanceof Error)return e;if("string"==typeof e)return new Error(e);const t=String("object"==typeof e&&null!==e&&"message"in e?e.message:e),i=new Error(t);return i.cause=e,i}function P(e){const t=e.replace(/\\/g,"/"),i=t.search(/[*?\[]/);if(-1===i)return r.resolve(e);const o=t.slice(0,i),n=o.endsWith("/")?o.slice(0,-1):r.dirname(o);return r.resolve(n)}async function*z(e){let t;try{t=(await i.readdir(e,{withFileTypes:!0})).map((e=>({name:e.name,isDirectory:e.isDirectory()})))}catch{return}for(const o of t){const t=r.resolve(e,o.name);if(o.isDirectory)yield*z(t);else try{(await i.stat(t)).isFile()&&(yield t)}catch{}}}async function j(e,t,i,r){const n=await async function(e,t=[]){const i=e.map((e=>e.replace(/\\/g,"/"))),r=t.map((e=>e.replace(/\\/g,"/"))),n=o(i,{dot:!0}),s=r.length?o(r,{dot:!0}):()=>!1,a=Array.from(new Set(e.map(P))),c=new Set;for(const e of a)for await(const t of z(e)){const e=t.replace(/\\/g,"/");s(e)||n(e)&&c.add(t)}return Array.from(c)}(e,t);for(const e of n)await i[r](e);return n.length}function C(e,t=!1){let i=e.replace(/\\/g,"/").toLowerCase();return t&&(i=i.replace(/^([a-z]):/,((e,t)=>t))),i}function R(e,t){const i=C(e,!0),o=s.createHash("sha256").update(i,"utf8").digest("hex");return r.join(t,`${o}.meta.json`)}async function E(e,t){try{const r=await i.readFile(R(e,t),"utf8");return JSON.parse(r)}catch{return null}}async function I(e,t,o){const n=R(e,t);await i.mkdir(r.dirname(n),{recursive:!0}),await i.writeFile(n,JSON.stringify(o,null,2),"utf8")}async function D(e,t){try{await i.rm(R(e,t))}catch{}}const T=["file_path","chunk_index","total_chunks","content_hash","chunk_text"];function W(e){const{processor:i,vectorStore:r,embeddingProvider:o,logger:s,config:a}=e,c=t({logger:!1});var l;return c.get("/status",(l={vectorStore:r,config:a},async()=>{const e=await l.vectorStore.getCollectionInfo();return{status:"ok",uptime:process.uptime(),collection:{name:l.config.vectorStore.collectionName,pointCount:e.pointCount,dimensions:e.dimensions},payloadFields:e.payloadFields}})),c.post("/metadata",function(e){return async(t,i)=>{try{const{path:i,metadata:r}=t.body;return await e.processor.processMetadataUpdate(i,r),{ok:!0}}catch(t){return e.logger.error({err:k(t)},"Metadata update failed"),i.status(500).send({error:"Internal server error"})}}}({processor:i,logger:s})),c.post("/search",function(e){return async(t,i)=>{try{const{query:i,limit:r=10,filter:o}=t.body,n=await e.embeddingProvider.embed([i]);return await e.vectorStore.search(n[0],r,o)}catch(t){return e.logger.error({err:k(t)},"Search failed"),i.status(500).send({error:"Internal server error"})}}}({embeddingProvider:o,vectorStore:r,logger:s})),c.post("/reindex",function(e){return async(t,i)=>{try{const t=await j(e.config.watch.paths,e.config.watch.ignored,e.processor,"processFile");return await i.status(200).send({ok:!0,filesIndexed:t})}catch(t){return e.logger.error({err:k(t)},"Reindex failed"),await i.status(500).send({error:"Internal server error"})}}}({config:a,processor:i,logger:s})),c.post("/rebuild-metadata",function(e){return async(t,i)=>{try{const t=e.config.metadataDir??".jeeves-metadata",r=[...T];for await(const i of e.vectorStore.scroll()){const e=i.payload,o=e.file_path;if("string"!=typeof o||0===o.length)continue;const s=n.omit(e,r);await I(o,t,s)}return await i.status(200).send({ok:!0})}catch(t){return e.logger.error({err:k(t)},"Rebuild metadata failed"),await i.status(500).send({error:"Internal server error"})}}}({config:a,vectorStore:r,logger:s})),c.post("/config-reindex",function(e){return async(t,i)=>{try{const r=t.body.scope??"rules";return(async()=>{try{if("rules"===r){const t=await j(e.config.watch.paths,e.config.watch.ignored,e.processor,"processRulesUpdate");e.logger.info({scope:r,filesProcessed:t},"Config reindex (rules) completed")}else{const t=await j(e.config.watch.paths,e.config.watch.ignored,e.processor,"processFile");e.logger.info({scope:r,filesProcessed:t},"Config reindex (full) completed")}}catch(t){e.logger.error({err:k(t),scope:r},"Config reindex failed")}})(),await i.status(200).send({status:"started",scope:r})}catch(t){return e.logger.error({err:k(t)},"Config reindex request failed"),await i.status(500).send({error:"Internal server error"})}}}({config:a,processor:i,logger:s})),c}const N={metadataDir:".jeeves-watcher",shutdownTimeoutMs:1e4},_={enabled:!0,debounceMs:1e3},O={host:"127.0.0.1",port:3456},A={level:"info"},q={debounceMs:300,stabilityThresholdMs:500,usePolling:!1,pollIntervalMs:1e3,respectGitignore:!0},L={chunkSize:1e3,chunkOverlap:200,dimensions:3072,rateLimitPerMinute:300,concurrency:5},Q=c.z.object({paths:c.z.array(c.z.string()).min(1).describe('Glob patterns for files to watch (e.g., "**/*.md"). At least one required.'),ignored:c.z.array(c.z.string()).optional().describe('Glob patterns to exclude from watching (e.g., "**/node_modules/**").'),pollIntervalMs:c.z.number().optional().describe("Polling interval in milliseconds when usePolling is enabled."),usePolling:c.z.boolean().optional().describe("Use polling instead of native file system events (for network drives)."),debounceMs:c.z.number().optional().describe("Debounce delay in milliseconds for file change events."),stabilityThresholdMs:c.z.number().optional().describe("Time in milliseconds a file must remain unchanged before processing."),respectGitignore:c.z.boolean().optional().describe("Skip files ignored by .gitignore in git repositories. Only applies to repos with a .git directory. Default: true.")}),$=c.z.object({enabled:c.z.boolean().optional().describe("Enable automatic reloading when config file changes."),debounceMs:c.z.number().optional().describe("Debounce delay in milliseconds for config file change detection.")}),G=c.z.object({provider:c.z.string().default("gemini").describe('Embedding provider name (e.g., "gemini", "openai").'),model:c.z.string().default("gemini-embedding-001").describe('Embedding model identifier (e.g., "gemini-embedding-001", "text-embedding-3-small").'),chunkSize:c.z.number().optional().describe("Maximum chunk size in characters for text splitting."),chunkOverlap:c.z.number().optional().describe("Character overlap between consecutive chunks."),dimensions:c.z.number().optional().describe("Embedding vector dimensions (must match model output)."),apiKey:c.z.string().optional().describe("API key for embedding provider (supports ${ENV_VAR} substitution)."),rateLimitPerMinute:c.z.number().optional().describe("Maximum embedding API requests per minute (rate limiting)."),concurrency:c.z.number().optional().describe("Maximum concurrent embedding requests.")}),B=c.z.object({url:c.z.string().describe('Qdrant server URL (e.g., "http://localhost:6333").'),collectionName:c.z.string().describe("Qdrant collection name for vector storage."),apiKey:c.z.string().optional().describe("Qdrant API key for authentication (supports ${ENV_VAR} substitution).")}),K=c.z.object({host:c.z.string().optional().describe('Host address for API server (e.g., "127.0.0.1", "0.0.0.0").'),port:c.z.number().optional().describe("Port for API server (e.g., 3456).")}),J=c.z.object({level:c.z.string().optional().describe("Logging level (trace, debug, info, warn, error, fatal)."),file:c.z.string().optional().describe("Path to log file (logs to stdout if omitted).")}),V=c.z.object({match:c.z.record(c.z.string(),c.z.unknown()).describe("JSON Schema object to match against file attributes."),set:c.z.record(c.z.string(),c.z.unknown()).describe("Metadata fields to set when match succeeds."),map:c.z.union([l.jsonMapMapSchema,c.z.string()]).optional().describe("JsonMap transformation (inline definition or named map reference).")}),U=c.z.object({watch:Q.describe("File system watch configuration."),configWatch:$.optional().describe("Configuration file watch settings."),embedding:G.describe("Embedding model configuration."),vectorStore:B.describe("Qdrant vector store configuration."),metadataDir:c.z.string().optional().describe("Directory for persisted metadata sidecar files."),api:K.optional().describe("API server configuration."),extractors:c.z.record(c.z.string(),c.z.unknown()).optional().describe("Extractor configurations keyed by name."),inferenceRules:c.z.array(V).optional().describe("Rules for inferring metadata from file attributes."),maps:c.z.record(c.z.string(),l.jsonMapMapSchema).optional().describe("Reusable named JsonMap transformations."),logging:J.optional().describe("Logging configuration."),shutdownTimeoutMs:c.z.number().optional().describe("Timeout in milliseconds for graceful shutdown."),maxRetries:c.z.number().optional().describe("Maximum consecutive system-level failures before triggering fatal error. Default: Infinity."),maxBackoffMs:c.z.number().optional().describe("Maximum backoff delay in milliseconds for system errors. Default: 60000.")}),H=/\$\{([^}]+)\}/g;function Y(e){if("string"==typeof e)return function(e){return e.replace(H,((e,t)=>{const i=process.env[t];return void 0===i?e:i}))}(e);if(Array.isArray(e))return e.map((e=>Y(e)));if(null!==e&&"object"==typeof e){const t={};for(const[i,r]of Object.entries(e))t[i]=Y(r);return t}return e}const Z="jeeves-watcher";async function X(e){const t=a.cosmiconfig(Z),i=e?await t.load(e):await t.search();if(!i||i.isEmpty)throw new Error("No jeeves-watcher configuration found. Create a .jeeves-watcherrc or jeeves-watcher.config.{js,ts,json,yaml} file.");try{const e=U.parse(i.config);return Y((r=e,{...N,...r,watch:{...q,...r.watch},configWatch:{..._,...r.configWatch},embedding:{...L,...r.embedding},api:{...O,...r.api},logging:{...A,...r.logging}}))}catch(e){if(e instanceof c.ZodError){const t=e.issues.map((e=>`${e.path.join(".")}: ${e.message}`)).join("; ");throw new Error(`Invalid jeeves-watcher configuration: ${t}`)}throw e}var r}function ee(e){return e||{warn(e,t){t?console.warn(e,t):console.warn(e)}}}function te(e,t){return e<=0?Promise.resolve():new Promise(((i,r)=>{const o=setTimeout((()=>{s(),i()}),e),n=()=>{s(),r(new Error("Retry sleep aborted"))},s=()=>{clearTimeout(o),t&&t.removeEventListener("abort",n)};if(t){if(t.aborted)return void n();t.addEventListener("abort",n,{once:!0})}}))}function ie(e,t,i,r=0){const o=Math.max(0,e-1),n=Math.min(i,t*2**o),s=r>0?1+Math.random()*r:1;return Math.round(n*s)}async function re(e,t){const i=Math.max(1,t.attempts);let r;for(let o=1;o<=i;o++)try{return await e(o)}catch(e){r=e;if(o>=i)break;const n=ie(o,t.baseDelayMs,t.maxDelayMs,t.jitter);t.onRetry?.({attempt:o,attempts:i,delayMs:n,error:e}),await te(n,t.signal)}throw r}const oe=new Map([["mock",function(e){return function(e){return{dimensions:e,embed:t=>Promise.resolve(t.map((t=>{const i=s.createHash("sha256").update(t,"utf8").digest(),r=[];for(let t=0;t<e;t++){const e=i[t%i.length];r.push(e/127.5-1)}return r})))}}(e.dimensions??768)}],["gemini",function(e,t){if(!e.apiKey)throw new Error("Gemini embedding provider requires config.embedding.apiKey");const i=e.dimensions??3072,r=ee(t),o=new h.GoogleGenerativeAIEmbeddings({apiKey:e.apiKey,model:e.model});return{dimensions:i,async embed(t){const n=await re((async i=>(i>1&&r.warn({attempt:i,provider:"gemini",model:e.model},"Retrying embedding request"),o.embedDocuments(t))),{attempts:5,baseDelayMs:500,maxDelayMs:1e4,jitter:.2,onRetry:({attempt:t,delayMs:i,error:o})=>{r.warn({attempt:t,delayMs:i,provider:"gemini",model:e.model,err:k(o)},"Embedding call failed; will retry")}});for(const e of n)if(e.length!==i)throw new Error(`Gemini embedding returned invalid dimensions: expected ${String(i)}, got ${String(e.length)}`);return n}}}]]);function ne(e,t){const i=oe.get(e.provider);if(!i)throw new Error(`Unsupported embedding provider: ${e.provider}`);return i(e,t)}function se(e){let t=r.resolve(e);const i=r.resolve("/");for(;t!==i;){if(d.existsSync(r.join(t,".git"))&&d.statSync(r.join(t,".git")).isDirectory())return t;const e=r.dirname(t);if(e===t)break;t=e}}function ae(e){const t=r.resolve(e);try{return d.statSync(t).isDirectory()?t:r.dirname(t)}catch{}const i=/[*?[{]/.exec(e);if(!i)return;const o=e.slice(0,i.index).trim(),n=0===o.length?".":o.endsWith("/")||o.endsWith("\\")?o:r.dirname(o),s=r.resolve(n);return d.existsSync(s)?s:void 0}function ce(e){const t=[],i=r.join(e,".gitignore");let o;d.existsSync(i)&&t.push(i);try{o=d.readdirSync(e)}catch{return t}for(const i of o){if(".git"===i||"node_modules"===i)continue;const o=r.join(e,i);try{d.statSync(o).isDirectory()&&t.push(...ce(o))}catch{}}return t}function le(e){const t=d.readFileSync(e,"utf8");return u().add(t)}class he{repos=new Map;constructor(e){this.scan(e)}scan(e){this.repos.clear();const t=new Set;for(const i of e){const e=ae(i);if(!e)continue;if(t.has(e))continue;t.add(e);const o=se(e);if(!o)continue;if(this.repos.has(o))continue;const n=ce(o).map((e=>({dir:r.dirname(e),ig:le(e)})));n.sort(((e,t)=>t.dir.length-e.dir.length)),this.repos.set(o,{root:o,entries:n})}}isIgnored(e){const t=r.resolve(e);for(const[,e]of this.repos){const i=r.relative(e.root,t);if(!i.startsWith("..")&&!i.startsWith(r.resolve("/")))for(const i of e.entries){const e=r.relative(i.dir,t);if(e.startsWith(".."))continue;const o=e.replace(/\\/g,"/");if(i.ig.ignores(o))return!0}}return!1}invalidate(e){const t=r.resolve(e),i=r.dirname(t);for(const[,e]of this.repos){if(!r.relative(e.root,i).startsWith(".."))return e.entries=e.entries.filter((e=>e.dir!==i)),void(d.existsSync(t)&&(e.entries.push({dir:i,ig:le(t)}),e.entries.sort(((e,t)=>t.dir.length-e.dir.length))))}const o=se(i);if(o&&d.existsSync(t)){const e=[{dir:i,ig:le(t)}];if(this.repos.has(o)){const t=this.repos.get(o);t.entries.push(e[0]),t.entries.sort(((e,t)=>t.dir.length-e.dir.length))}else this.repos.set(o,{root:o,entries:e})}}}function de(e){const t=e?.level??"info";if(e?.file){const i=g.transport({target:"pino/file",options:{destination:e.file,mkdir:!0}});return g({level:t},i)}return g({level:t})}function ue(e){return s.createHash("sha256").update(e,"utf8").digest("hex")}const ge="6a6f686e-6761-4c74-ad6a-656576657321";function fe(e,t){const i=void 0!==t?`${C(e)}#${String(t)}`:C(e);return f.v5(i,ge)}const pe=["content","body","text","snippet","subject","description","summary","transcript"];function me(e){if(!e||"object"!=typeof e)return JSON.stringify(e);const t=e;for(const e of pe){const i=t[e];if("string"==typeof i&&i.trim())return i}return JSON.stringify(e)}async function ye(e){const t=await i.readFile(e,"utf8"),{frontmatter:r,body:o}=function(e){const t=e.replace(/^\uFEFF/,"");if(!/^\s*---/.test(t))return{body:e};const i=/^---\s*\n([\s\S]*?)\n---\s*\n?([\s\S]*)$/m.exec(t);if(!i)return{body:e};const[,r,o]=i,n=m.load(r);return{frontmatter:n&&"object"==typeof n&&!Array.isArray(n)?n:void 0,body:o}}(t);return{text:o,frontmatter:r}}async function we(e){return{text:(await i.readFile(e,"utf8")).replace(/^\uFEFF/,"")}}async function be(e){const t=await i.readFile(e,"utf8"),r=F.load(t.replace(/^\uFEFF/,""));r("script, style").remove();return{text:r("body").text().trim()||r.text().trim()}}const ve=new Map([[".md",ye],[".markdown",ye],[".txt",we],[".text",we],[".json",async function(e){const t=await i.readFile(e,"utf8"),r=JSON.parse(t.replace(/^\uFEFF/,"")),o=r&&"object"==typeof r&&!Array.isArray(r)?r:void 0;return{text:me(r),json:o}}],[".pdf",async function(e){const t=await i.readFile(e),r=new Uint8Array(t),{extractText:o}=await import("unpdf"),{text:n}=await o(r);return{text:Array.isArray(n)?n.join("\n\n"):n}}],[".docx",async function(e){const t=await i.readFile(e);return{text:(await y.extractRawText({buffer:t})).value}}],[".html",be],[".htm",be]]);async function Me(e,t){const i=ve.get(t.toLowerCase());return i?i(e):we(e)}function xe(e,t){return"string"!=typeof e?e:e.replace(/\$\{([^}]+)\}/g,((e,i)=>{const r=n.get(t,i);return null==r?"":"string"==typeof r?r:JSON.stringify(r)}))}function Se(e,t){const i={};for(const[r,o]of Object.entries(e))i[r]=xe(o,t);return i}async function Fe(e,t,i,r){const o={split:(e,t)=>e.split(t),slice:(e,t,i)=>e.slice(t,i),join:(e,t)=>e.join(t),toLowerCase:e=>e.toLowerCase(),replace:(e,t,i)=>e.replace(t,i),get:(e,t)=>n.get(e,t)};let s={};const a=r??console;for(const{rule:r,validate:n}of e)if(n(t)){const e=Se(r.set,t);if(s={...s,...e},r.map){let e;if("string"==typeof r.map){if(e=i?.[r.map],!e){a.warn(`Map reference "${r.map}" not found in named maps. Skipping map transformation.`);continue}}else e=r.map;try{const i=new l.JsonMap(e,o),r=await i.transform(t);r&&"object"==typeof r&&!Array.isArray(r)?s={...s,...r}:a.warn("JsonMap transformation did not return an object; skipping merge.")}catch(e){a.warn(`JsonMap transformation failed: ${e instanceof Error?e.message:String(e)}`)}}}return s}function ke(e,t,i,o){const n=e.replace(/\\/g,"/"),s={file:{path:n,directory:r.dirname(n).replace(/\\/g,"/"),filename:r.basename(n),extension:r.extname(n),sizeBytes:t.size,modified:t.mtime.toISOString()}};return i&&(s.frontmatter=i),o&&(s.json=o),s}function Pe(e){const t=function(){const e=new w({allErrors:!0});return b(e),e.addKeyword({keyword:"glob",type:"string",schemaType:"string",validate:(e,t)=>o.isMatch(t,e)}),e}();return e.map(((e,i)=>({rule:e,validate:t.compile({$id:`rule-${String(i)}`,...e.match})})))}async function ze(e,t,o,n,s){const a=r.extname(e),c=await i.stat(e),l=await Me(e,a),h=ke(e,c,l.frontmatter,l.json),d=await Fe(t,h,n,s),u=await E(e,o);return{inferred:d,enrichment:u,metadata:{...d,...u??{}},attributes:h,extracted:l}}function je(e,t){const i=[];for(let r=0;r<t;r++)i.push(fe(e,r));return i}function Ce(e,t=1){if(!e)return t;const i=e.total_chunks;return"number"==typeof i?i:t}class Re{config;embeddingProvider;vectorStore;compiledRules;logger;constructor(e,t,i,r,o){this.config=e,this.embeddingProvider=t,this.vectorStore=i,this.compiledRules=r,this.logger=o}async processFile(e){try{const t=r.extname(e),{metadata:i,extracted:o}=await ze(e,this.compiledRules,this.config.metadataDir,this.config.maps,this.logger);if(!o.text.trim())return void this.logger.debug({filePath:e},"Skipping empty file");const n=ue(o.text),s=fe(e,0),a=await this.vectorStore.getPayload(s);if(a&&a.content_hash===n)return void this.logger.debug({filePath:e},"Content unchanged, skipping");const c=Ce(a),l=this.config.chunkSize??1e3,h=function(e,t,i){const r=e.toLowerCase();return".md"===r||".markdown"===r?new v.MarkdownTextSplitter({chunkSize:t,chunkOverlap:i}):new v.RecursiveCharacterTextSplitter({chunkSize:t,chunkOverlap:i})}(t,l,this.config.chunkOverlap??200),d=await h.splitText(o.text),u=await this.embeddingProvider.embed(d),g=d.map(((t,r)=>({id:fe(e,r),vector:u[r],payload:{...i,file_path:e.replace(/\\/g,"/"),chunk_index:r,total_chunks:d.length,content_hash:n,chunk_text:t}})));if(await this.vectorStore.upsert(g),c>d.length){const t=je(e,c).slice(d.length);await this.vectorStore.delete(t)}this.logger.info({filePath:e,chunks:d.length},"File processed successfully")}catch(t){this.logger.error({filePath:e,err:k(t)},"Failed to process file")}}async deleteFile(e){try{const t=fe(e,0),i=await this.vectorStore.getPayload(t),r=je(e,Ce(i));await this.vectorStore.delete(r),await D(e,this.config.metadataDir),this.logger.info({filePath:e},"File deleted from index")}catch(t){this.logger.error({filePath:e,err:k(t)},"Failed to delete file")}}async processMetadataUpdate(e,t){try{const i={...await E(e,this.config.metadataDir)??{},...t};await I(e,this.config.metadataDir,i);const r=fe(e,0),o=await this.vectorStore.getPayload(r);if(!o)return null;const n=Ce(o),s=je(e,n);return await this.vectorStore.setPayload(s,i),this.logger.info({filePath:e,chunks:n},"Metadata updated"),i}catch(t){return this.logger.error({filePath:e,err:k(t)},"Failed to update metadata"),null}}async processRulesUpdate(e){try{const t=fe(e,0),i=await this.vectorStore.getPayload(t);if(!i)return this.logger.debug({filePath:e},"File not indexed, skipping"),null;const{metadata:r}=await ze(e,this.compiledRules,this.config.metadataDir,this.config.maps,this.logger),o=Ce(i),n=je(e,o);return await this.vectorStore.setPayload(n,r),this.logger.info({filePath:e,chunks:o},"Rules re-applied"),r}catch(t){return this.logger.error({filePath:e,err:k(t)},"Failed to re-apply rules"),null}}updateRules(e){this.compiledRules=e,this.logger.info({rules:e.length},"Inference rules updated")}}class Ee{debounceMs;concurrency;rateLimitPerMinute;started=!1;active=0;debounceTimers=new Map;latestByKey=new Map;normalQueue=[];lowQueue=[];tokens;lastRefillMs=Date.now();drainWaiters=[];constructor(e){this.debounceMs=e.debounceMs,this.concurrency=e.concurrency,this.rateLimitPerMinute=e.rateLimitPerMinute,this.tokens=this.rateLimitPerMinute??Number.POSITIVE_INFINITY}enqueue(e,t){const i=`${e.priority}:${e.path}`;this.latestByKey.set(i,{event:e,fn:t});const r=this.debounceTimers.get(i);r&&clearTimeout(r);const o=setTimeout((()=>{this.debounceTimers.delete(i);const e=this.latestByKey.get(i);e&&(this.latestByKey.delete(i),this.push(e),this.pump())}),this.debounceMs);this.debounceTimers.set(i,o)}process(){this.started=!0,this.pump()}async drain(){this.isIdle()||await new Promise((e=>{this.drainWaiters.push(e)}))}push(e){"low"===e.event.priority?this.lowQueue.push(e):this.normalQueue.push(e)}refillTokens(e){if(void 0===this.rateLimitPerMinute)return;const t=Math.max(0,e-this.lastRefillMs)*(this.rateLimitPerMinute/6e4);this.tokens=Math.min(this.rateLimitPerMinute,this.tokens+t),this.lastRefillMs=e}takeToken(){const e=Date.now();return this.refillTokens(e),!(this.tokens<1)&&(this.tokens-=1,!0)}nextItem(){return this.normalQueue.shift()??this.lowQueue.shift()}pump(){if(this.started){for(;this.active<this.concurrency;){const e=this.nextItem();if(!e)break;if(!this.takeToken()){"low"===e.event.priority?this.lowQueue.unshift(e):this.normalQueue.unshift(e),setTimeout((()=>{this.pump()}),250);break}this.active+=1,Promise.resolve().then((()=>e.fn(e.event))).finally((()=>{this.active-=1,this.pump(),this.maybeResolveDrain()}))}this.maybeResolveDrain()}}isIdle(){return 0===this.active&&0===this.normalQueue.length&&0===this.lowQueue.length&&0===this.debounceTimers.size&&0===this.latestByKey.size}maybeResolveDrain(){if(!this.isIdle())return;const e=this.drainWaiters;this.drainWaiters=[];for(const t of e)t()}}function Ie(e){return null==e?"keyword":"number"==typeof e?Number.isInteger(e)?"integer":"float":"boolean"==typeof e?"bool":Array.isArray(e)?"keyword[]":"string"==typeof e&&e.length>256?"text":"keyword"}class De{client;collectionName;dims;log;constructor(e,t,i){this.client=new M.QdrantClient({url:e.url,apiKey:e.apiKey,checkCompatibility:!1}),this.collectionName=e.collectionName,this.dims=t,this.log=ee(i)}async ensureCollection(){try{const e=await this.client.getCollections();e.collections.some((e=>e.name===this.collectionName))||await this.client.createCollection(this.collectionName,{vectors:{size:this.dims,distance:"Cosine"}})}catch(e){throw new Error(`Failed to ensure collection "${this.collectionName}": ${String(e)}`)}}async upsert(e){0!==e.length&&await re((async t=>{t>1&&this.log.warn({attempt:t,operation:"qdrant.upsert",points:e.length},"Retrying Qdrant upsert"),await this.client.upsert(this.collectionName,{wait:!0,points:e.map((e=>({id:e.id,vector:e.vector,payload:e.payload})))})}),{attempts:5,baseDelayMs:500,maxDelayMs:1e4,jitter:.2,onRetry:({attempt:e,delayMs:t,error:i})=>{this.log.warn({attempt:e,delayMs:t,operation:"qdrant.upsert",err:k(i)},"Qdrant upsert failed; will retry")}})}async delete(e){0!==e.length&&await re((async t=>{t>1&&this.log.warn({attempt:t,operation:"qdrant.delete",ids:e.length},"Retrying Qdrant delete"),await this.client.delete(this.collectionName,{wait:!0,points:e})}),{attempts:5,baseDelayMs:500,maxDelayMs:1e4,jitter:.2,onRetry:({attempt:e,delayMs:t,error:i})=>{this.log.warn({attempt:e,delayMs:t,operation:"qdrant.delete",err:k(i)},"Qdrant delete failed; will retry")}})}async setPayload(e,t){0!==e.length&&await this.client.setPayload(this.collectionName,{wait:!0,points:e,payload:t})}async getPayload(e){try{const t=await this.client.retrieve(this.collectionName,{ids:[e],with_payload:!0,with_vector:!1});return 0===t.length?null:t[0].payload}catch{return null}}async getCollectionInfo(){const e=await this.client.getCollection(this.collectionName),t=e.points_count??0,i=e.config.params.vectors,r=void 0!==i&&"size"in i?i.size:0,o={},n=Object.entries(e.payload_schema);if(n.length>0)for(const[e,t]of n)o[e]={type:t.data_type??"unknown"};else t>0&&await this.discoverPayloadFields(o);return{pointCount:t,dimensions:r,payloadFields:o}}async discoverPayloadFields(e,t=100){const i=await this.client.scroll(this.collectionName,{limit:t,with_payload:!0,with_vector:!1});for(const t of i.points){const i=t.payload;if(i)for(const[t,r]of Object.entries(i))t in e||(e[t]={type:Ie(r)})}}async search(e,t,i){return(await this.client.search(this.collectionName,{vector:e,limit:t,with_payload:!0,...i?{filter:i}:{}})).map((e=>({id:String(e.id),score:e.score,payload:e.payload})))}async*scroll(e,t=100){let i;for(;;){const r=await this.client.scroll(this.collectionName,{limit:t,with_payload:!0,with_vector:!1,...e?{filter:e}:{},...void 0!==i?{offset:i}:{}});for(const e of r.points)yield{id:String(e.id),payload:e.payload};const o=r.next_page_offset;if(null==o)break;if("string"!=typeof o&&"number"!=typeof o)break;i=o}}}class Te{consecutiveFailures=0;maxRetries;maxBackoffMs;baseDelayMs;onFatalError;logger;constructor(e){this.maxRetries=e.maxRetries??Number.POSITIVE_INFINITY,this.maxBackoffMs=e.maxBackoffMs??6e4,this.baseDelayMs=e.baseDelayMs??1e3,this.onFatalError=e.onFatalError,this.logger=e.logger}recordSuccess(){this.consecutiveFailures>0&&this.logger.info({previousFailures:this.consecutiveFailures},"System health recovered"),this.consecutiveFailures=0}recordFailure(e){if(this.consecutiveFailures+=1,this.logger.error({consecutiveFailures:this.consecutiveFailures,maxRetries:this.maxRetries,err:k(e)},"System-level failure recorded"),this.consecutiveFailures>=this.maxRetries){if(this.logger.fatal({consecutiveFailures:this.consecutiveFailures},"Maximum retries exceeded, triggering fatal error"),this.onFatalError)return this.onFatalError(e),!1;throw e instanceof Error?e:new Error(`Fatal system error: ${String(e)}`)}return!0}get currentBackoffMs(){if(0===this.consecutiveFailures)return 0;const e=Math.max(0,this.consecutiveFailures-1);return Math.min(this.maxBackoffMs,this.baseDelayMs*2**e)}async backoff(e){const t=this.currentBackoffMs;t<=0||(this.logger.warn({delayMs:t,consecutiveFailures:this.consecutiveFailures},"Backing off before next attempt"),await new Promise(((i,r)=>{const o=setTimeout((()=>{s(),i()}),t),n=()=>{s(),r(new Error("Backoff aborted"))},s=()=>{clearTimeout(o),e&&e.removeEventListener("abort",n)};if(e){if(e.aborted)return void n();e.addEventListener("abort",n,{once:!0})}})))}get failures(){return this.consecutiveFailures}}class We{config;queue;processor;logger;health;gitignoreFilter;watcher;constructor(e,t,i,r,o={}){this.config=e,this.queue=t,this.processor=i,this.logger=r,this.gitignoreFilter=o.gitignoreFilter;const n={maxRetries:o.maxRetries,maxBackoffMs:o.maxBackoffMs,onFatalError:o.onFatalError,logger:r};this.health=new Te(n)}start(){this.watcher=x.watch(this.config.paths,{ignored:this.config.ignored,usePolling:this.config.usePolling,interval:this.config.pollIntervalMs,awaitWriteFinish:!!this.config.stabilityThresholdMs&&{stabilityThreshold:this.config.stabilityThresholdMs},ignoreInitial:!1}),this.watcher.on("add",(e=>{this.handleGitignoreChange(e),this.isGitignored(e)||(this.logger.debug({path:e},"File added"),this.queue.enqueue({type:"create",path:e,priority:"normal"},(()=>this.wrapProcessing((()=>this.processor.processFile(e))))))})),this.watcher.on("change",(e=>{this.handleGitignoreChange(e),this.isGitignored(e)||(this.logger.debug({path:e},"File changed"),this.queue.enqueue({type:"modify",path:e,priority:"normal"},(()=>this.wrapProcessing((()=>this.processor.processFile(e))))))})),this.watcher.on("unlink",(e=>{this.handleGitignoreChange(e),this.isGitignored(e)||(this.logger.debug({path:e},"File removed"),this.queue.enqueue({type:"delete",path:e,priority:"normal"},(()=>this.wrapProcessing((()=>this.processor.deleteFile(e))))))})),this.watcher.on("error",(e=>{this.logger.error({err:k(e)},"Watcher error"),this.health.recordFailure(e)})),this.queue.process(),this.logger.info({paths:this.config.paths},"Filesystem watcher started")}async stop(){this.watcher&&(await this.watcher.close(),this.watcher=void 0,this.logger.info("Filesystem watcher stopped"))}get systemHealth(){return this.health}isGitignored(e){if(!this.gitignoreFilter)return!1;const t=this.gitignoreFilter.isIgnored(e);return t&&this.logger.debug({path:e},"Skipping gitignored file"),t}handleGitignoreChange(e){this.gitignoreFilter&&e.endsWith(".gitignore")&&(this.logger.info({path:e},"Gitignore file changed, refreshing filter"),this.gitignoreFilter.invalidate(e))}async wrapProcessing(e){try{await this.health.backoff(),await e(),this.health.recordSuccess()}catch(e){this.health.recordFailure(e)||await this.stop()}}}class Ne{options;watcher;debounce;constructor(e){this.options=e}start(){this.options.enabled&&(this.watcher=x.watch(this.options.configPath,{ignoreInitial:!0}),this.watcher.on("change",(()=>{this.debounce&&clearTimeout(this.debounce),this.debounce=setTimeout((()=>{this.options.onChange()}),this.options.debounceMs)})),this.watcher.on("error",(e=>{this.options.logger.error({err:k(e)},"Config watcher error")})),this.options.logger.info({configPath:this.options.configPath,debounceMs:this.options.debounceMs},"Config watcher started"))}async stop(){this.debounce&&(clearTimeout(this.debounce),this.debounce=void 0),this.watcher&&(await this.watcher.close(),this.watcher=void 0)}}const _e={loadConfig:X,createLogger:de,createEmbeddingProvider:ne,createVectorStoreClient:(e,t,i)=>new De(e,t,i),compileRules:Pe,createDocumentProcessor:(e,t,i,r,o)=>new Re(e,t,i,r,o),createEventQueue:e=>new Ee(e),createFileSystemWatcher:(e,t,i,r,o)=>new We(e,t,i,r,o),createApiServer:W};class Oe{config;configPath;factories;runtimeOptions;logger;watcher;queue;server;processor;configWatcher;constructor(e,t,i={},r={}){this.config=e,this.configPath=t,this.factories={..._e,...i},this.runtimeOptions=r}async start(){const e=this.factories.createLogger(this.config.logging);let t;this.logger=e;try{t=this.factories.createEmbeddingProvider(this.config.embedding,e)}catch(t){throw e.fatal({err:k(t)},"Failed to create embedding provider"),t}const i=this.factories.createVectorStoreClient(this.config.vectorStore,t.dimensions,e);await i.ensureCollection();const r=this.factories.compileRules(this.config.inferenceRules??[]),o={metadataDir:this.config.metadataDir??".jeeves-metadata",chunkSize:this.config.embedding.chunkSize,chunkOverlap:this.config.embedding.chunkOverlap,maps:this.config.maps},n=this.factories.createDocumentProcessor(o,t,i,r,e);this.processor=n;const s=this.factories.createEventQueue({debounceMs:this.config.watch.debounceMs??2e3,concurrency:this.config.embedding.concurrency??5,rateLimitPerMinute:this.config.embedding.rateLimitPerMinute});this.queue=s;const a=this.config.watch.respectGitignore??!0?new he(this.config.watch.paths):void 0,c=this.factories.createFileSystemWatcher(this.config.watch,s,n,e,{maxRetries:this.config.maxRetries,maxBackoffMs:this.config.maxBackoffMs,onFatalError:this.runtimeOptions.onFatalError,gitignoreFilter:a});this.watcher=c;const l=this.factories.createApiServer({processor:n,vectorStore:i,embeddingProvider:t,queue:s,config:this.config,logger:e});this.server=l,await l.listen({host:this.config.api?.host??"127.0.0.1",port:this.config.api?.port??3456}),c.start(),this.startConfigWatch(),e.info("jeeves-watcher started")}async stop(){if(await this.stopConfigWatch(),this.watcher&&await this.watcher.stop(),this.queue){const e=this.config.shutdownTimeoutMs??1e4;await Promise.race([this.queue.drain().then((()=>!0)),new Promise((t=>{setTimeout((()=>{t(!1)}),e)}))])||this.logger?.warn({timeoutMs:e},"Queue drain timeout hit, forcing shutdown")}this.server&&await this.server.close(),this.logger?.info("jeeves-watcher stopped")}startConfigWatch(){const e=this.logger;if(!e)return;const t=this.config.configWatch?.enabled??!0;if(!t)return;if(!this.configPath)return void e.debug("Config watch enabled, but no config path was provided");const i=this.config.configWatch?.debounceMs??1e4;this.configWatcher=new Ne({configPath:this.configPath,enabled:t,debounceMs:i,logger:e,onChange:async()=>this.reloadConfig()}),this.configWatcher.start()}async stopConfigWatch(){this.configWatcher&&(await this.configWatcher.stop(),this.configWatcher=void 0)}async reloadConfig(){const e=this.logger,t=this.processor;if(e&&t&&this.configPath){e.info({configPath:this.configPath},"Config change detected, reloading...");try{const i=await this.factories.loadConfig(this.configPath);this.config=i;const r=this.factories.compileRules(i.inferenceRules??[]);t.updateRules(r),e.info({configPath:this.configPath,rules:r.length},"Config reloaded")}catch(t){e.error({err:k(t)},"Failed to reload config")}}}}e.DocumentProcessor=Re,e.EventQueue=Ee,e.FileSystemWatcher=We,e.GitignoreFilter=he,e.JeevesWatcher=Oe,e.SystemHealth=Te,e.VectorStoreClient=De,e.apiConfigSchema=K,e.applyRules=Fe,e.buildAttributes=ke,e.compileRules=Pe,e.configWatchConfigSchema=$,e.contentHash=ue,e.createApiServer=W,e.createEmbeddingProvider=ne,e.createLogger=de,e.deleteMetadata=D,e.embeddingConfigSchema=G,e.extractText=Me,e.inferenceRuleSchema=V,e.jeevesWatcherConfigSchema=U,e.loadConfig=X,e.loggingConfigSchema=J,e.metadataPath=R,e.pointId=fe,e.readMetadata=E,e.startFromConfig=async function(e){const t=await X(e),i=new Oe(t,e);return function(e){const t=async()=>{await e(),process.exit(0)};process.on("SIGTERM",(()=>{t()})),process.on("SIGINT",(()=>{t()}))}((()=>i.stop())),await i.start(),i},e.vectorStoreConfigSchema=B,e.watchConfigSchema=Q,e.writeMetadata=I}(this["jeeves-watcher"]=this["jeeves-watcher"]||{},Fastify,promises,node_path,picomatch,radash,node_crypto,cosmiconfig,zod,jsonmap,googleGenai,node_fs,ignore,pino,uuid,cheerio,yaml,mammoth,Ajv,addFormats,textsplitters,jsClientRest,chokidar);
|
package/dist/mjs/index.js
CHANGED
|
@@ -360,9 +360,9 @@ function createReindexHandler(deps) {
|
|
|
360
360
|
function createSearchHandler(deps) {
|
|
361
361
|
return async (request, reply) => {
|
|
362
362
|
try {
|
|
363
|
-
const { query, limit = 10 } = request.body;
|
|
363
|
+
const { query, limit = 10, filter } = request.body;
|
|
364
364
|
const vectors = await deps.embeddingProvider.embed([query]);
|
|
365
|
-
const results = await deps.vectorStore.search(vectors[0], limit);
|
|
365
|
+
const results = await deps.vectorStore.search(vectors[0], limit, filter);
|
|
366
366
|
return results;
|
|
367
367
|
}
|
|
368
368
|
catch (error) {
|
|
@@ -374,16 +374,27 @@ function createSearchHandler(deps) {
|
|
|
374
374
|
|
|
375
375
|
/**
|
|
376
376
|
* @module api/handlers/status
|
|
377
|
-
* Fastify route handler for GET /status.
|
|
377
|
+
* Fastify route handler for GET /status. Returns process health, uptime, and collection stats.
|
|
378
378
|
*/
|
|
379
379
|
/**
|
|
380
380
|
* Create handler for GET /status.
|
|
381
|
+
*
|
|
382
|
+
* @param deps - Route dependencies.
|
|
381
383
|
*/
|
|
382
|
-
function createStatusHandler() {
|
|
383
|
-
return () =>
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
384
|
+
function createStatusHandler(deps) {
|
|
385
|
+
return async () => {
|
|
386
|
+
const collectionInfo = await deps.vectorStore.getCollectionInfo();
|
|
387
|
+
return {
|
|
388
|
+
status: 'ok',
|
|
389
|
+
uptime: process.uptime(),
|
|
390
|
+
collection: {
|
|
391
|
+
name: deps.config.vectorStore.collectionName,
|
|
392
|
+
pointCount: collectionInfo.pointCount,
|
|
393
|
+
dimensions: collectionInfo.dimensions,
|
|
394
|
+
},
|
|
395
|
+
payloadFields: collectionInfo.payloadFields,
|
|
396
|
+
};
|
|
397
|
+
};
|
|
387
398
|
}
|
|
388
399
|
|
|
389
400
|
/**
|
|
@@ -397,7 +408,7 @@ function createStatusHandler() {
|
|
|
397
408
|
function createApiServer(options) {
|
|
398
409
|
const { processor, vectorStore, embeddingProvider, logger, config } = options;
|
|
399
410
|
const app = Fastify({ logger: false });
|
|
400
|
-
app.get('/status', createStatusHandler());
|
|
411
|
+
app.get('/status', createStatusHandler({ vectorStore, config }));
|
|
401
412
|
app.post('/metadata', createMetadataHandler({ processor, logger }));
|
|
402
413
|
app.post('/search', createSearchHandler({ embeddingProvider, vectorStore, logger }));
|
|
403
414
|
app.post('/reindex', createReindexHandler({ config, processor, logger }));
|
|
@@ -1923,6 +1934,22 @@ class EventQueue {
|
|
|
1923
1934
|
}
|
|
1924
1935
|
}
|
|
1925
1936
|
|
|
1937
|
+
/** Infer a Qdrant-style type name from a JS value. */
|
|
1938
|
+
function inferPayloadType(value) {
|
|
1939
|
+
if (value === null || value === undefined)
|
|
1940
|
+
return 'keyword';
|
|
1941
|
+
if (typeof value === 'number') {
|
|
1942
|
+
return Number.isInteger(value) ? 'integer' : 'float';
|
|
1943
|
+
}
|
|
1944
|
+
if (typeof value === 'boolean')
|
|
1945
|
+
return 'bool';
|
|
1946
|
+
if (Array.isArray(value))
|
|
1947
|
+
return 'keyword[]';
|
|
1948
|
+
if (typeof value === 'string') {
|
|
1949
|
+
return value.length > 256 ? 'text' : 'keyword';
|
|
1950
|
+
}
|
|
1951
|
+
return 'keyword';
|
|
1952
|
+
}
|
|
1926
1953
|
/**
|
|
1927
1954
|
* Client wrapper for Qdrant vector store operations.
|
|
1928
1955
|
*/
|
|
@@ -2069,6 +2096,58 @@ class VectorStoreClient {
|
|
|
2069
2096
|
return null;
|
|
2070
2097
|
}
|
|
2071
2098
|
}
|
|
2099
|
+
/**
|
|
2100
|
+
* Get collection info including point count, dimensions, and payload field schema.
|
|
2101
|
+
*
|
|
2102
|
+
* When Qdrant has payload indexes, uses `payload_schema` directly. Otherwise
|
|
2103
|
+
* samples points to discover fields and infer types.
|
|
2104
|
+
*/
|
|
2105
|
+
async getCollectionInfo() {
|
|
2106
|
+
const info = await this.client.getCollection(this.collectionName);
|
|
2107
|
+
const pointCount = info.points_count ?? 0;
|
|
2108
|
+
const vectorsConfig = info.config.params.vectors;
|
|
2109
|
+
const dimensions = vectorsConfig !== undefined && 'size' in vectorsConfig
|
|
2110
|
+
? vectorsConfig.size
|
|
2111
|
+
: 0;
|
|
2112
|
+
// Try indexed payload_schema first.
|
|
2113
|
+
const payloadFields = {};
|
|
2114
|
+
const schemaEntries = Object.entries(info.payload_schema);
|
|
2115
|
+
if (schemaEntries.length > 0) {
|
|
2116
|
+
for (const [key, schema] of schemaEntries) {
|
|
2117
|
+
payloadFields[key] = {
|
|
2118
|
+
type: schema.data_type ?? 'unknown',
|
|
2119
|
+
};
|
|
2120
|
+
}
|
|
2121
|
+
}
|
|
2122
|
+
else if (pointCount > 0) {
|
|
2123
|
+
// No indexed schema — sample points to discover fields.
|
|
2124
|
+
await this.discoverPayloadFields(payloadFields);
|
|
2125
|
+
}
|
|
2126
|
+
return { pointCount, dimensions, payloadFields };
|
|
2127
|
+
}
|
|
2128
|
+
/**
|
|
2129
|
+
* Sample points and discover payload field names and inferred types.
|
|
2130
|
+
*
|
|
2131
|
+
* @param target - Object to populate with discovered fields.
|
|
2132
|
+
* @param sampleSize - Number of points to sample.
|
|
2133
|
+
*/
|
|
2134
|
+
async discoverPayloadFields(target, sampleSize = 100) {
|
|
2135
|
+
const result = await this.client.scroll(this.collectionName, {
|
|
2136
|
+
limit: sampleSize,
|
|
2137
|
+
with_payload: true,
|
|
2138
|
+
with_vector: false,
|
|
2139
|
+
});
|
|
2140
|
+
for (const point of result.points) {
|
|
2141
|
+
const payload = point.payload;
|
|
2142
|
+
if (!payload)
|
|
2143
|
+
continue;
|
|
2144
|
+
for (const [key, value] of Object.entries(payload)) {
|
|
2145
|
+
if (key in target)
|
|
2146
|
+
continue;
|
|
2147
|
+
target[key] = { type: inferPayloadType(value) };
|
|
2148
|
+
}
|
|
2149
|
+
}
|
|
2150
|
+
}
|
|
2072
2151
|
/**
|
|
2073
2152
|
* Search for similar vectors.
|
|
2074
2153
|
*
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @module plugin
|
|
3
|
+
* OpenClaw plugin entry point. Registers watcher_search, watcher_enrich, and watcher_status tools.
|
|
4
|
+
*/
|
|
5
|
+
const DEFAULT_API_URL = 'http://127.0.0.1:3458';
|
|
6
|
+
function getApiUrl(api) {
|
|
7
|
+
const url = api.config?.plugins?.entries?.['jeeves-watcher']?.config?.apiUrl;
|
|
8
|
+
return typeof url === 'string' ? url : DEFAULT_API_URL;
|
|
9
|
+
}
|
|
10
|
+
function ok(data) {
|
|
11
|
+
return {
|
|
12
|
+
content: [{ type: 'text', text: JSON.stringify(data, null, 2) }],
|
|
13
|
+
};
|
|
14
|
+
}
|
|
15
|
+
function fail(error) {
|
|
16
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
17
|
+
return {
|
|
18
|
+
content: [{ type: 'text', text: `Error: ${message}` }],
|
|
19
|
+
isError: true,
|
|
20
|
+
};
|
|
21
|
+
}
|
|
22
|
+
async function fetchJson(url, init) {
|
|
23
|
+
const res = await fetch(url, init);
|
|
24
|
+
if (!res.ok) {
|
|
25
|
+
throw new Error(`HTTP ${String(res.status)}: ${await res.text()}`);
|
|
26
|
+
}
|
|
27
|
+
return res.json();
|
|
28
|
+
}
|
|
29
|
+
/** Register all jeeves-watcher tools with the OpenClaw plugin API. */
|
|
30
|
+
function register(api) {
|
|
31
|
+
const baseUrl = getApiUrl(api);
|
|
32
|
+
api.registerTool({
|
|
33
|
+
name: 'watcher_status',
|
|
34
|
+
description: 'Get jeeves-watcher status including collection stats and available payload fields.',
|
|
35
|
+
parameters: { type: 'object', properties: {} },
|
|
36
|
+
execute: async () => {
|
|
37
|
+
try {
|
|
38
|
+
const data = await fetchJson(`${baseUrl}/status`);
|
|
39
|
+
return ok(data);
|
|
40
|
+
}
|
|
41
|
+
catch (error) {
|
|
42
|
+
return fail(error);
|
|
43
|
+
}
|
|
44
|
+
},
|
|
45
|
+
}, { optional: true });
|
|
46
|
+
api.registerTool({
|
|
47
|
+
name: 'watcher_search',
|
|
48
|
+
description: 'Semantic search over indexed documents. Supports Qdrant filters.',
|
|
49
|
+
parameters: {
|
|
50
|
+
type: 'object',
|
|
51
|
+
required: ['query'],
|
|
52
|
+
properties: {
|
|
53
|
+
query: { type: 'string', description: 'Search query text.' },
|
|
54
|
+
limit: {
|
|
55
|
+
type: 'number',
|
|
56
|
+
description: 'Max results (default 10).',
|
|
57
|
+
},
|
|
58
|
+
filter: {
|
|
59
|
+
type: 'object',
|
|
60
|
+
description: 'Qdrant filter object.',
|
|
61
|
+
},
|
|
62
|
+
},
|
|
63
|
+
},
|
|
64
|
+
execute: async (_id, params) => {
|
|
65
|
+
try {
|
|
66
|
+
const data = await fetchJson(`${baseUrl}/search`, {
|
|
67
|
+
method: 'POST',
|
|
68
|
+
headers: { 'Content-Type': 'application/json' },
|
|
69
|
+
body: JSON.stringify({
|
|
70
|
+
query: params.query,
|
|
71
|
+
...(params.limit !== undefined ? { limit: params.limit } : {}),
|
|
72
|
+
...(params.filter !== undefined ? { filter: params.filter } : {}),
|
|
73
|
+
}),
|
|
74
|
+
});
|
|
75
|
+
return ok(data);
|
|
76
|
+
}
|
|
77
|
+
catch (error) {
|
|
78
|
+
return fail(error);
|
|
79
|
+
}
|
|
80
|
+
},
|
|
81
|
+
}, { optional: true });
|
|
82
|
+
api.registerTool({
|
|
83
|
+
name: 'watcher_enrich',
|
|
84
|
+
description: 'Set or update metadata on a document by file path.',
|
|
85
|
+
parameters: {
|
|
86
|
+
type: 'object',
|
|
87
|
+
required: ['path', 'metadata'],
|
|
88
|
+
properties: {
|
|
89
|
+
path: {
|
|
90
|
+
type: 'string',
|
|
91
|
+
description: 'Relative file path of the document.',
|
|
92
|
+
},
|
|
93
|
+
metadata: {
|
|
94
|
+
type: 'object',
|
|
95
|
+
description: 'Key-value metadata to set on the document.',
|
|
96
|
+
},
|
|
97
|
+
},
|
|
98
|
+
},
|
|
99
|
+
execute: async (_id, params) => {
|
|
100
|
+
try {
|
|
101
|
+
const data = await fetchJson(`${baseUrl}/metadata`, {
|
|
102
|
+
method: 'POST',
|
|
103
|
+
headers: { 'Content-Type': 'application/json' },
|
|
104
|
+
body: JSON.stringify({
|
|
105
|
+
path: params.path,
|
|
106
|
+
metadata: params.metadata,
|
|
107
|
+
}),
|
|
108
|
+
});
|
|
109
|
+
return ok(data);
|
|
110
|
+
}
|
|
111
|
+
catch (error) {
|
|
112
|
+
return fail(error);
|
|
113
|
+
}
|
|
114
|
+
},
|
|
115
|
+
}, { optional: true });
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
export { register as default };
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
{
|
|
2
|
+
"id": "jeeves-watcher",
|
|
3
|
+
"name": "Jeeves Watcher",
|
|
4
|
+
"description": "Semantic search and metadata enrichment via a jeeves-watcher instance.",
|
|
5
|
+
"version": "0.3.0",
|
|
6
|
+
"skills": ["skill"],
|
|
7
|
+
"configSchema": {
|
|
8
|
+
"type": "object",
|
|
9
|
+
"additionalProperties": false,
|
|
10
|
+
"properties": {
|
|
11
|
+
"apiUrl": {
|
|
12
|
+
"type": "string",
|
|
13
|
+
"description": "jeeves-watcher API base URL",
|
|
14
|
+
"default": "http://127.0.0.1:3458"
|
|
15
|
+
}
|
|
16
|
+
}
|
|
17
|
+
},
|
|
18
|
+
"uiHints": {
|
|
19
|
+
"apiUrl": {
|
|
20
|
+
"label": "Watcher API URL",
|
|
21
|
+
"placeholder": "http://127.0.0.1:3458"
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
}
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: jeeves-watcher
|
|
3
|
+
description: >
|
|
4
|
+
Semantic search and metadata enrichment via a jeeves-watcher instance.
|
|
5
|
+
Use as your primary method for finding relevant files and context before
|
|
6
|
+
falling back to filesystem browsing.
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
# Jeeves Watcher — Context Discovery
|
|
10
|
+
|
|
11
|
+
## Behavioral Directive
|
|
12
|
+
|
|
13
|
+
**Use `watcher_search` as your first step when you need to find information.** Before browsing the filesystem, before grepping, before guessing at file paths — search the index. It covers far more ground than manual exploration and finds things you wouldn't know to look for.
|
|
14
|
+
|
|
15
|
+
**When search doesn't find what you need, fall back to your usual methods.** The index covers configured watch paths but not everything on the system. Active repos, system tools, and files outside the watch scope won't appear. Absence of results means the content may not be indexed, not that it doesn't exist.
|
|
16
|
+
|
|
17
|
+
## Workflow
|
|
18
|
+
|
|
19
|
+
### 1. Discover (once per session)
|
|
20
|
+
|
|
21
|
+
Call `watcher_status` early in your session to learn what's available:
|
|
22
|
+
|
|
23
|
+
```json
|
|
24
|
+
{}
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
This returns collection stats and — critically — the set of payload fields with their types. Cache this mentally; these fields won't change during a session. Use them to construct targeted filters.
|
|
28
|
+
|
|
29
|
+
### 2. Search (primary context discovery)
|
|
30
|
+
|
|
31
|
+
Use `watcher_search` to find relevant files:
|
|
32
|
+
|
|
33
|
+
```json
|
|
34
|
+
{ "query": "authentication flow", "limit": 5 }
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
Results include `chunk_text` in the payload. For quick context, the chunks may be sufficient without reading the full file. Only load the file when you need complete content or plan to edit it.
|
|
38
|
+
|
|
39
|
+
### 3. Read (when needed)
|
|
40
|
+
|
|
41
|
+
Use the `file_path` from search results to read the actual file. Group results by `file_path` when multiple chunks come from the same document.
|
|
42
|
+
|
|
43
|
+
### 4. Fall back (when search misses)
|
|
44
|
+
|
|
45
|
+
If search returns nothing useful or low-scoring results (below ~0.3), the content likely isn't indexed. Fall back to filesystem browsing, directory listing, or grep. This is expected — not everything is in the index.
|
|
46
|
+
|
|
47
|
+
## Tools
|
|
48
|
+
|
|
49
|
+
### `watcher_status`
|
|
50
|
+
|
|
51
|
+
Get service health, collection stats, and discover available payload fields.
|
|
52
|
+
|
|
53
|
+
| Parameter | Type | Required | Description |
|
|
54
|
+
| --------- | ---- | -------- | ----------- |
|
|
55
|
+
| _(none)_ | | | |
|
|
56
|
+
|
|
57
|
+
**Returns:** `status`, `uptime`, `collection` (name, pointCount, dimensions), `payloadFields` (field names with types).
|
|
58
|
+
|
|
59
|
+
### `watcher_search`
|
|
60
|
+
|
|
61
|
+
Semantic similarity search with optional Qdrant filters.
|
|
62
|
+
|
|
63
|
+
| Parameter | Type | Required | Description |
|
|
64
|
+
| --------- | ------ | -------- | ------------------------------------ |
|
|
65
|
+
| `query` | string | yes | Natural-language search query |
|
|
66
|
+
| `limit` | number | no | Max results to return (default: 10) |
|
|
67
|
+
| `filter` | object | no | Qdrant filter object (see below) |
|
|
68
|
+
|
|
69
|
+
**Plain search:**
|
|
70
|
+
|
|
71
|
+
```json
|
|
72
|
+
{ "query": "error handling", "limit": 5 }
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
**Filtered search:**
|
|
76
|
+
|
|
77
|
+
```json
|
|
78
|
+
{
|
|
79
|
+
"query": "error handling",
|
|
80
|
+
"limit": 10,
|
|
81
|
+
"filter": {
|
|
82
|
+
"must": [{ "key": "domain", "match": { "value": "backend" } }]
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
### `watcher_enrich`
|
|
88
|
+
|
|
89
|
+
Set or update metadata on a document by file path.
|
|
90
|
+
|
|
91
|
+
| Parameter | Type | Required | Description |
|
|
92
|
+
| ---------- | ------ | -------- | ----------------------------------- |
|
|
93
|
+
| `path` | string | yes | File path of the document |
|
|
94
|
+
| `metadata` | object | yes | Key-value metadata to set |
|
|
95
|
+
|
|
96
|
+
```json
|
|
97
|
+
{
|
|
98
|
+
"path": "docs/auth.md",
|
|
99
|
+
"metadata": { "domain": "auth", "reviewed": true }
|
|
100
|
+
}
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
## Qdrant Filter Patterns
|
|
104
|
+
|
|
105
|
+
Build filters using fields discovered via `watcher_status`.
|
|
106
|
+
|
|
107
|
+
**Exact match:**
|
|
108
|
+
|
|
109
|
+
```json
|
|
110
|
+
{ "must": [{ "key": "domain", "match": { "value": "email" } }] }
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
**Multiple conditions:**
|
|
114
|
+
|
|
115
|
+
```json
|
|
116
|
+
{
|
|
117
|
+
"must": [
|
|
118
|
+
{ "key": "domain", "match": { "value": "codebase" } },
|
|
119
|
+
{ "key": "file_path", "match": { "text": "auth" } }
|
|
120
|
+
]
|
|
121
|
+
}
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
**Exclude results:**
|
|
125
|
+
|
|
126
|
+
```json
|
|
127
|
+
{
|
|
128
|
+
"must_not": [{ "key": "domain", "match": { "value": "codebase" } }]
|
|
129
|
+
}
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
**Full-text match** (tokenized, for longer text fields):
|
|
133
|
+
|
|
134
|
+
```json
|
|
135
|
+
{ "must": [{ "key": "chunk_text", "match": { "text": "authentication" } }] }
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
## Score Interpretation
|
|
139
|
+
|
|
140
|
+
- **0.7+** — Strong semantic match. Trust these results.
|
|
141
|
+
- **0.4–0.7** — Relevant but may need verification. Worth reading.
|
|
142
|
+
- **Below 0.3** — Likely noise. The content you need may not be indexed.
|
|
143
|
+
|
|
144
|
+
## Tips
|
|
145
|
+
|
|
146
|
+
- **Start broad, then narrow.** A plain query without filters shows you what's available. Add filters once you know which payload field values are relevant.
|
|
147
|
+
- **Group by file.** Multiple chunks from the same file appear as separate results. Look at `file_path` to see when you're getting multiple views of one document.
|
|
148
|
+
- **Chunk text is a preview.** It's useful for quick triage but may be truncated or split mid-sentence. Read the actual file for complete context.
|
|
149
|
+
- **Enrich after analysis.** When you review a document and learn something about it, use `watcher_enrich` to tag it. Future searches can filter on those tags.
|
package/package.json
CHANGED
|
@@ -117,6 +117,11 @@
|
|
|
117
117
|
"license": "BSD-3-Clause",
|
|
118
118
|
"main": "dist/cjs/index.js",
|
|
119
119
|
"module": "dist/mjs/index.js",
|
|
120
|
+
"openclaw": {
|
|
121
|
+
"extensions": [
|
|
122
|
+
"./dist/plugin/index.js"
|
|
123
|
+
]
|
|
124
|
+
},
|
|
120
125
|
"name": "@karmaniverous/jeeves-watcher",
|
|
121
126
|
"publishConfig": {
|
|
122
127
|
"access": "public"
|
|
@@ -158,7 +163,7 @@
|
|
|
158
163
|
},
|
|
159
164
|
"scripts": {
|
|
160
165
|
"generate:schema": "tsx src/config/generate-schema.ts",
|
|
161
|
-
"build": "npm run generate:schema && rimraf dist && cross-env NO_COLOR=1 rollup --config rollup.config.ts --configPlugin @rollup/plugin-typescript",
|
|
166
|
+
"build": "npm run generate:schema && rimraf dist && cross-env NO_COLOR=1 rollup --config rollup.config.ts --configPlugin @rollup/plugin-typescript && node -e \"const fs=require('fs-extra');fs.copySync('plugin/openclaw.plugin.json','dist/plugin/openclaw.plugin.json');fs.copySync('plugin/skill','dist/plugin/skill');\"",
|
|
162
167
|
"changelog": "auto-changelog",
|
|
163
168
|
"diagrams": "cd diagrams && plantuml -tpng -o ../assets -r .",
|
|
164
169
|
"docs": "typedoc",
|
|
@@ -172,5 +177,5 @@
|
|
|
172
177
|
},
|
|
173
178
|
"type": "module",
|
|
174
179
|
"types": "dist/index.d.ts",
|
|
175
|
-
"version": "0.
|
|
180
|
+
"version": "0.3.1"
|
|
176
181
|
}
|