@karmaniverous/jeeves-watcher 0.4.4 → 0.5.0-1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +31 -7
- package/config.schema.json +394 -157
- package/dist/cjs/index.js +3162 -1455
- package/dist/cli/jeeves-watcher/index.js +4145 -2302
- package/dist/index.d.ts +447 -67
- package/dist/index.iife.js +3691 -1985
- package/dist/index.iife.min.js +1 -1
- package/dist/mjs/index.js +3166 -1463
- package/dist/plugin/index.js +165 -16
- package/dist/plugin/openclaw.plugin.json +2 -2
- package/dist/skills/jeeves-watcher/SKILL.md +413 -0
- package/dist/skills/jeeves-watcher-admin/SKILL.md +200 -0
- package/package.json +4 -2
- package/dist/plugin/skill/SKILL.md +0 -149
package/dist/plugin/index.js
CHANGED
|
@@ -1,17 +1,20 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* @module plugin
|
|
3
|
-
*
|
|
2
|
+
* @module plugin/helpers
|
|
3
|
+
* Shared types and utility functions for the OpenClaw plugin tool registrations.
|
|
4
4
|
*/
|
|
5
5
|
const DEFAULT_API_URL = 'http://127.0.0.1:3458';
|
|
6
|
+
/** Resolve the watcher API base URL from plugin config. */
|
|
6
7
|
function getApiUrl(api) {
|
|
7
8
|
const url = api.config?.plugins?.entries?.['jeeves-watcher']?.config?.apiUrl;
|
|
8
9
|
return typeof url === 'string' ? url : DEFAULT_API_URL;
|
|
9
10
|
}
|
|
11
|
+
/** Format a successful tool result. */
|
|
10
12
|
function ok(data) {
|
|
11
13
|
return {
|
|
12
14
|
content: [{ type: 'text', text: JSON.stringify(data, null, 2) }],
|
|
13
15
|
};
|
|
14
16
|
}
|
|
17
|
+
/** Format an error tool result. */
|
|
15
18
|
function fail(error) {
|
|
16
19
|
const message = error instanceof Error ? error.message : String(error);
|
|
17
20
|
return {
|
|
@@ -19,6 +22,7 @@ function fail(error) {
|
|
|
19
22
|
isError: true,
|
|
20
23
|
};
|
|
21
24
|
}
|
|
25
|
+
/** Fetch JSON from a URL, throwing on non-OK responses. */
|
|
22
26
|
async function fetchJson(url, init) {
|
|
23
27
|
const res = await fetch(url, init);
|
|
24
28
|
if (!res.ok) {
|
|
@@ -26,17 +30,21 @@ async function fetchJson(url, init) {
|
|
|
26
30
|
}
|
|
27
31
|
return res.json();
|
|
28
32
|
}
|
|
33
|
+
|
|
34
|
+
/**
|
|
35
|
+
* @module plugin
|
|
36
|
+
* OpenClaw plugin entry point. Registers all jeeves-watcher tools.
|
|
37
|
+
*/
|
|
29
38
|
/** Register all jeeves-watcher tools with the OpenClaw plugin API. */
|
|
30
39
|
function register(api) {
|
|
31
40
|
const baseUrl = getApiUrl(api);
|
|
32
41
|
api.registerTool({
|
|
33
42
|
name: 'watcher_status',
|
|
34
|
-
description: 'Get jeeves-watcher
|
|
43
|
+
description: 'Get jeeves-watcher service health, uptime, and collection statistics.',
|
|
35
44
|
parameters: { type: 'object', properties: {} },
|
|
36
45
|
execute: async () => {
|
|
37
46
|
try {
|
|
38
|
-
|
|
39
|
-
return ok(data);
|
|
47
|
+
return ok(await fetchJson(`${baseUrl}/status`));
|
|
40
48
|
}
|
|
41
49
|
catch (error) {
|
|
42
50
|
return fail(error);
|
|
@@ -55,6 +63,10 @@ function register(api) {
|
|
|
55
63
|
type: 'number',
|
|
56
64
|
description: 'Max results (default 10).',
|
|
57
65
|
},
|
|
66
|
+
offset: {
|
|
67
|
+
type: 'number',
|
|
68
|
+
description: 'Number of results to skip for pagination.',
|
|
69
|
+
},
|
|
58
70
|
filter: {
|
|
59
71
|
type: 'object',
|
|
60
72
|
description: 'Qdrant filter object.',
|
|
@@ -63,16 +75,18 @@ function register(api) {
|
|
|
63
75
|
},
|
|
64
76
|
execute: async (_id, params) => {
|
|
65
77
|
try {
|
|
66
|
-
const
|
|
78
|
+
const body = { query: params.query };
|
|
79
|
+
if (params.limit !== undefined)
|
|
80
|
+
body.limit = params.limit;
|
|
81
|
+
if (params.offset !== undefined)
|
|
82
|
+
body.offset = params.offset;
|
|
83
|
+
if (params.filter !== undefined)
|
|
84
|
+
body.filter = params.filter;
|
|
85
|
+
return ok(await fetchJson(`${baseUrl}/search`, {
|
|
67
86
|
method: 'POST',
|
|
68
87
|
headers: { 'Content-Type': 'application/json' },
|
|
69
|
-
body: JSON.stringify(
|
|
70
|
-
|
|
71
|
-
...(params.limit !== undefined ? { limit: params.limit } : {}),
|
|
72
|
-
...(params.filter !== undefined ? { filter: params.filter } : {}),
|
|
73
|
-
}),
|
|
74
|
-
});
|
|
75
|
-
return ok(data);
|
|
88
|
+
body: JSON.stringify(body),
|
|
89
|
+
}));
|
|
76
90
|
}
|
|
77
91
|
catch (error) {
|
|
78
92
|
return fail(error);
|
|
@@ -98,15 +112,150 @@ function register(api) {
|
|
|
98
112
|
},
|
|
99
113
|
execute: async (_id, params) => {
|
|
100
114
|
try {
|
|
101
|
-
|
|
115
|
+
return ok(await fetchJson(`${baseUrl}/metadata`, {
|
|
102
116
|
method: 'POST',
|
|
103
117
|
headers: { 'Content-Type': 'application/json' },
|
|
104
118
|
body: JSON.stringify({
|
|
105
119
|
path: params.path,
|
|
106
120
|
metadata: params.metadata,
|
|
107
121
|
}),
|
|
108
|
-
});
|
|
109
|
-
|
|
122
|
+
}));
|
|
123
|
+
}
|
|
124
|
+
catch (error) {
|
|
125
|
+
return fail(error);
|
|
126
|
+
}
|
|
127
|
+
},
|
|
128
|
+
}, { optional: true });
|
|
129
|
+
api.registerTool({
|
|
130
|
+
name: 'watcher_query',
|
|
131
|
+
description: 'Query the merged virtual document via JSONPath.',
|
|
132
|
+
parameters: {
|
|
133
|
+
type: 'object',
|
|
134
|
+
required: ['path'],
|
|
135
|
+
properties: {
|
|
136
|
+
path: {
|
|
137
|
+
type: 'string',
|
|
138
|
+
description: 'JSONPath expression.',
|
|
139
|
+
},
|
|
140
|
+
resolve: {
|
|
141
|
+
type: 'array',
|
|
142
|
+
items: { type: 'string', enum: ['files', 'globals'] },
|
|
143
|
+
description: 'Resolution scopes to include (e.g., ["files"], ["globals"], or both).',
|
|
144
|
+
},
|
|
145
|
+
},
|
|
146
|
+
},
|
|
147
|
+
execute: async (_id, params) => {
|
|
148
|
+
try {
|
|
149
|
+
const body = { path: params.path };
|
|
150
|
+
if (params.resolve !== undefined)
|
|
151
|
+
body.resolve = params.resolve;
|
|
152
|
+
return ok(await fetchJson(`${baseUrl}/config/query`, {
|
|
153
|
+
method: 'POST',
|
|
154
|
+
headers: { 'Content-Type': 'application/json' },
|
|
155
|
+
body: JSON.stringify(body),
|
|
156
|
+
}));
|
|
157
|
+
}
|
|
158
|
+
catch (error) {
|
|
159
|
+
return fail(error);
|
|
160
|
+
}
|
|
161
|
+
},
|
|
162
|
+
}, { optional: true });
|
|
163
|
+
api.registerTool({
|
|
164
|
+
name: 'watcher_validate',
|
|
165
|
+
description: 'Validate a candidate config (or current config if omitted). Optionally test file paths against the config to preview rule matching and metadata output.',
|
|
166
|
+
parameters: {
|
|
167
|
+
type: 'object',
|
|
168
|
+
properties: {
|
|
169
|
+
config: {
|
|
170
|
+
type: 'object',
|
|
171
|
+
description: 'Candidate config (partial or full). Omit to validate current config.',
|
|
172
|
+
},
|
|
173
|
+
testPaths: {
|
|
174
|
+
type: 'array',
|
|
175
|
+
items: { type: 'string' },
|
|
176
|
+
description: 'File paths to test against the config for dry-run preview.',
|
|
177
|
+
},
|
|
178
|
+
},
|
|
179
|
+
},
|
|
180
|
+
execute: async (_id, params) => {
|
|
181
|
+
try {
|
|
182
|
+
const body = {};
|
|
183
|
+
if (params.config !== undefined)
|
|
184
|
+
body.config = params.config;
|
|
185
|
+
if (params.testPaths !== undefined)
|
|
186
|
+
body.testPaths = params.testPaths;
|
|
187
|
+
return ok(await fetchJson(`${baseUrl}/config/validate`, {
|
|
188
|
+
method: 'POST',
|
|
189
|
+
headers: { 'Content-Type': 'application/json' },
|
|
190
|
+
body: JSON.stringify(body),
|
|
191
|
+
}));
|
|
192
|
+
}
|
|
193
|
+
catch (error) {
|
|
194
|
+
return fail(error);
|
|
195
|
+
}
|
|
196
|
+
},
|
|
197
|
+
}, { optional: true });
|
|
198
|
+
api.registerTool({
|
|
199
|
+
name: 'watcher_config_apply',
|
|
200
|
+
description: 'Apply a full or partial config. Validates, writes to disk, and triggers configured reindex behavior.',
|
|
201
|
+
parameters: {
|
|
202
|
+
type: 'object',
|
|
203
|
+
required: ['config'],
|
|
204
|
+
properties: {
|
|
205
|
+
config: {
|
|
206
|
+
type: 'object',
|
|
207
|
+
description: 'Full or partial config to apply.',
|
|
208
|
+
},
|
|
209
|
+
},
|
|
210
|
+
},
|
|
211
|
+
execute: async (_id, params) => {
|
|
212
|
+
try {
|
|
213
|
+
return ok(await fetchJson(`${baseUrl}/config/apply`, {
|
|
214
|
+
method: 'POST',
|
|
215
|
+
headers: { 'Content-Type': 'application/json' },
|
|
216
|
+
body: JSON.stringify({ config: params.config }),
|
|
217
|
+
}));
|
|
218
|
+
}
|
|
219
|
+
catch (error) {
|
|
220
|
+
return fail(error);
|
|
221
|
+
}
|
|
222
|
+
},
|
|
223
|
+
}, { optional: true });
|
|
224
|
+
api.registerTool({
|
|
225
|
+
name: 'watcher_reindex',
|
|
226
|
+
description: 'Trigger a reindex of the watched files.',
|
|
227
|
+
parameters: {
|
|
228
|
+
type: 'object',
|
|
229
|
+
properties: {
|
|
230
|
+
scope: {
|
|
231
|
+
type: 'string',
|
|
232
|
+
enum: ['rules', 'full'],
|
|
233
|
+
description: 'Reindex scope: "rules" (default) re-applies inference rules; "full" re-embeds everything.',
|
|
234
|
+
},
|
|
235
|
+
},
|
|
236
|
+
},
|
|
237
|
+
execute: async (_id, params) => {
|
|
238
|
+
try {
|
|
239
|
+
return ok(await fetchJson(`${baseUrl}/config-reindex`, {
|
|
240
|
+
method: 'POST',
|
|
241
|
+
headers: { 'Content-Type': 'application/json' },
|
|
242
|
+
body: JSON.stringify({
|
|
243
|
+
scope: params.scope ?? 'rules',
|
|
244
|
+
}),
|
|
245
|
+
}));
|
|
246
|
+
}
|
|
247
|
+
catch (error) {
|
|
248
|
+
return fail(error);
|
|
249
|
+
}
|
|
250
|
+
},
|
|
251
|
+
}, { optional: true });
|
|
252
|
+
api.registerTool({
|
|
253
|
+
name: 'watcher_issues',
|
|
254
|
+
description: 'Get runtime embedding failures. Shows files that failed processing and why.',
|
|
255
|
+
parameters: { type: 'object', properties: {} },
|
|
256
|
+
execute: async () => {
|
|
257
|
+
try {
|
|
258
|
+
return ok(await fetchJson(`${baseUrl}/issues`));
|
|
110
259
|
}
|
|
111
260
|
catch (error) {
|
|
112
261
|
return fail(error);
|
|
@@ -2,8 +2,8 @@
|
|
|
2
2
|
"id": "jeeves-watcher",
|
|
3
3
|
"name": "Jeeves Watcher",
|
|
4
4
|
"description": "Semantic search and metadata enrichment via a jeeves-watcher instance.",
|
|
5
|
-
"version": "0.
|
|
6
|
-
"skills": ["
|
|
5
|
+
"version": "0.5.0",
|
|
6
|
+
"skills": ["dist/skills/jeeves-watcher", "dist/skills/jeeves-watcher-admin"],
|
|
7
7
|
"configSchema": {
|
|
8
8
|
"type": "object",
|
|
9
9
|
"additionalProperties": false,
|
|
@@ -0,0 +1,413 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: jeeves-watcher
|
|
3
|
+
description: >
|
|
4
|
+
Semantic search and metadata enrichment via a jeeves-watcher instance.
|
|
5
|
+
Use when you need to search indexed documents, discover available metadata
|
|
6
|
+
fields, filter by payload values, or enrich document metadata.
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
# jeeves-watcher — Search & Discovery
|
|
10
|
+
|
|
11
|
+
**Key principle:** The SKILL teaches procedure. The config provides specifics. The assistant discovers everything about a deployment at runtime; nothing about domains, field names, or organizational structure is hardcoded in the SKILL.
|
|
12
|
+
|
|
13
|
+
## Quick Start
|
|
14
|
+
|
|
15
|
+
1. **Orient yourself** (once per session) — understand the deployment's organizational strategy and available record types
|
|
16
|
+
2. **Search** — use semantic search with optional metadata filters to find relevant documents
|
|
17
|
+
3. **Read source** — retrieve full file content for complete context
|
|
18
|
+
|
|
19
|
+
## Tools
|
|
20
|
+
|
|
21
|
+
### `watcher_search`
|
|
22
|
+
Semantic search over indexed documents.
|
|
23
|
+
- `query` (string, required) — natural language search query
|
|
24
|
+
- `limit` (number, optional) — max results, default 10
|
|
25
|
+
- `offset` (number, optional) — skip N results for pagination
|
|
26
|
+
- `filter` (object, optional) — Qdrant filter for metadata filtering
|
|
27
|
+
|
|
28
|
+
### `watcher_enrich`
|
|
29
|
+
Set or update metadata on a document.
|
|
30
|
+
- `path` (string, required) — file path of the document
|
|
31
|
+
- `metadata` (object, required) — key-value metadata to merge
|
|
32
|
+
|
|
33
|
+
### `watcher_status`
|
|
34
|
+
Service health check. Returns uptime, collection stats, reindex status.
|
|
35
|
+
|
|
36
|
+
### `watcher_query`
|
|
37
|
+
Query the merged virtual document via JSONPath.
|
|
38
|
+
- `path` (string, required) — JSONPath expression
|
|
39
|
+
- `resolve` (string[], optional) — `["files"]`, `["globals"]`, or `["files","globals"]`
|
|
40
|
+
|
|
41
|
+
## Qdrant Filter Syntax
|
|
42
|
+
|
|
43
|
+
Filters use Qdrant's native JSON filter format, passed as the `filter` parameter to `watcher_search`.
|
|
44
|
+
|
|
45
|
+
### Basic Patterns
|
|
46
|
+
|
|
47
|
+
**Match exact value:**
|
|
48
|
+
```json
|
|
49
|
+
{ "must": [{ "key": "domain", "match": { "value": "email" } }] }
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
**Match text (full-text search within field):**
|
|
53
|
+
```json
|
|
54
|
+
{ "must": [{ "key": "chunk_text", "match": { "text": "authentication" } }] }
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
**Combine conditions (AND):**
|
|
58
|
+
```json
|
|
59
|
+
{
|
|
60
|
+
"must": [
|
|
61
|
+
{ "key": "domain", "match": { "value": "jira" } },
|
|
62
|
+
{ "key": "status", "match": { "value": "In Progress" } }
|
|
63
|
+
]
|
|
64
|
+
}
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
**Exclude (NOT):**
|
|
68
|
+
```json
|
|
69
|
+
{
|
|
70
|
+
"must_not": [{ "key": "domain", "match": { "value": "repos" } }]
|
|
71
|
+
}
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
**Any of (OR):**
|
|
75
|
+
```json
|
|
76
|
+
{
|
|
77
|
+
"should": [
|
|
78
|
+
{ "key": "domain", "match": { "value": "email" } },
|
|
79
|
+
{ "key": "domain", "match": { "value": "slack" } }
|
|
80
|
+
]
|
|
81
|
+
}
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
**Nested (combine AND + NOT):**
|
|
85
|
+
```json
|
|
86
|
+
{
|
|
87
|
+
"must": [{ "key": "domain", "match": { "value": "jira" } }],
|
|
88
|
+
"must_not": [{ "key": "status", "match": { "value": "Done" } }]
|
|
89
|
+
}
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
### Key Differences
|
|
93
|
+
- `match.value` — exact match (case-sensitive, for keyword fields like `domain`, `status`)
|
|
94
|
+
- `match.text` — full-text match (for text fields like `chunk_text`)
|
|
95
|
+
|
|
96
|
+
## Search Result Shape
|
|
97
|
+
|
|
98
|
+
Each result from `watcher_search` contains:
|
|
99
|
+
|
|
100
|
+
| Field | Type | Description |
|
|
101
|
+
|-------|------|-------------|
|
|
102
|
+
| `id` | string | Qdrant point ID |
|
|
103
|
+
| `score` | number | Similarity score (0-1, higher = more relevant) |
|
|
104
|
+
| `payload.file_path` | string | Source file path |
|
|
105
|
+
| `payload.chunk_text` | string | The matched text chunk |
|
|
106
|
+
| `payload.chunk_index` | number | Chunk position within the file |
|
|
107
|
+
| `payload.total_chunks` | number | Total chunks for this file |
|
|
108
|
+
| `payload.content_hash` | string | Hash of the full document content |
|
|
109
|
+
| `payload.matched_rules` | string[] | Names of inference rules that matched |
|
|
110
|
+
|
|
111
|
+
Additional metadata fields depend on the deployment's inference rules (e.g., `domain`, `status`, `author`). Use `watcher_query` to discover available fields.
|
|
112
|
+
|
|
113
|
+
## JSONPath Patterns for Schema Discovery
|
|
114
|
+
|
|
115
|
+
Use `watcher_query` to explore the merged virtual document. Common patterns:
|
|
116
|
+
|
|
117
|
+
### Orientation
|
|
118
|
+
```
|
|
119
|
+
$.inferenceRules[*].['name','description'] — List all rules with descriptions
|
|
120
|
+
$.search.scoreThresholds — Score interpretation thresholds
|
|
121
|
+
$.slots — Named filter patterns (e.g., memory)
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
### Schema Discovery
|
|
125
|
+
```
|
|
126
|
+
$.inferenceRules[?(@.name=='jira-issue')] — Full rule details
|
|
127
|
+
$.inferenceRules[?(@.name=='jira-issue')].values — Distinct values for a rule
|
|
128
|
+
$.inferenceRules[?(@.name=='jira-issue')].values.status — Values for a specific field
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
### Helper Enumeration
|
|
132
|
+
```
|
|
133
|
+
$.mapHelpers — All JsonMap helper namespaces
|
|
134
|
+
$.mapHelpers.slack.exports — Exports from the 'slack' helper
|
|
135
|
+
$.templateHelpers — All Handlebars helper namespaces
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
### Issues
|
|
139
|
+
```
|
|
140
|
+
$.issues — All runtime embedding failures
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
### Full Config Introspection
|
|
144
|
+
```
|
|
145
|
+
$.schemas — Global named schemas
|
|
146
|
+
$.maps — Named JsonMap transforms
|
|
147
|
+
$.templates — Named Handlebars templates
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
---
|
|
151
|
+
|
|
152
|
+
## Orientation Pattern (Once Per Session)
|
|
153
|
+
|
|
154
|
+
Query the deployment's organizational context and available record types. This information is stable within a session; query once and rely on results for the remainder.
|
|
155
|
+
|
|
156
|
+
**Efficient pattern (two calls):**
|
|
157
|
+
|
|
158
|
+
1. **Top-level context:**
|
|
159
|
+
```
|
|
160
|
+
watcher_query: path="$.['description','search']"
|
|
161
|
+
```
|
|
162
|
+
Returns:
|
|
163
|
+
- `description` — organizational strategy (e.g., how domains are structured, what partitioning means)
|
|
164
|
+
- `search.scoreThresholds` — score interpretation boundaries (strong, relevant, noise)
|
|
165
|
+
|
|
166
|
+
2. **Available record types:**
|
|
167
|
+
```
|
|
168
|
+
watcher_query: path="$.inferenceRules[*].['name','description']"
|
|
169
|
+
```
|
|
170
|
+
Returns list of inference rules with their names and descriptions.
|
|
171
|
+
|
|
172
|
+
**Example result:**
|
|
173
|
+
```json
|
|
174
|
+
[
|
|
175
|
+
{ "name": "email-archive", "description": "Email archive messages" },
|
|
176
|
+
{ "name": "slack-message", "description": "Slack channel messages with channel and author metadata" },
|
|
177
|
+
{ "name": "jira-issue", "description": "Jira issue metadata extracted from issue JSON exports" }
|
|
178
|
+
]
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
The top-level `description` explains this deployment's organizational strategy. Each rule's `description` explains what that specific record type represents. Both levels are useful: one orients, the other enumerates.
|
|
182
|
+
|
|
183
|
+
---
|
|
184
|
+
|
|
185
|
+
## `resolve` Usage Guidance
|
|
186
|
+
|
|
187
|
+
The `resolve` parameter controls which reference layers are expanded in `watcher_query`:
|
|
188
|
+
|
|
189
|
+
- **No `resolve` (default):** Raw config structure with references intact (lightweight)
|
|
190
|
+
- **`resolve: ["files"]`:** Resolve file path references to their contents (e.g., `"schemas/base.json"` → the JSON Schema object)
|
|
191
|
+
- **`resolve: ["globals"]`:** Resolve named schema references (e.g., `"base"` in a rule's schema array → the global schema object)
|
|
192
|
+
- **`resolve: ["files","globals"]`:** Fully inlined, everything expanded
|
|
193
|
+
|
|
194
|
+
**When to use:**
|
|
195
|
+
- **Orientation:** No resolve (just names and descriptions, lightweight)
|
|
196
|
+
- **Query planning:** `resolve: ["files","globals"]` (need complete merged schemas for filter construction)
|
|
197
|
+
- **Browsing global schemas:** `resolve: ["files"]` (see schema contents but keep named references visible for DRY structure understanding)
|
|
198
|
+
|
|
199
|
+
---
|
|
200
|
+
|
|
201
|
+
## Query Planning (Per Search Task)
|
|
202
|
+
|
|
203
|
+
Identify relevant rule(s) from the orientation model, then retrieve their schemas:
|
|
204
|
+
|
|
205
|
+
**Retrieve complete schema for a rule:**
|
|
206
|
+
```
|
|
207
|
+
watcher_query: path="$.inferenceRules[?(@.name=='jira-issue')].schema"
|
|
208
|
+
resolve=["files","globals"]
|
|
209
|
+
```
|
|
210
|
+
|
|
211
|
+
Returns the fully merged schema with properties, types, `set` provenance, `uiHint`, `enum`, etc.
|
|
212
|
+
|
|
213
|
+
**For select/multiselect fields without `enum` in schema:**
|
|
214
|
+
```
|
|
215
|
+
watcher_query: path="$.inferenceRules[?(@.name=='jira-issue')].values.status"
|
|
216
|
+
```
|
|
217
|
+
|
|
218
|
+
Retrieves valid filter values from the runtime values index (distinct values accumulated during embedding).
|
|
219
|
+
|
|
220
|
+
**When search results span multiple rules** (indicated by `matched_rules` on results): query each unique rule's schema separately and merge mentally. Most result sets share the same rule combination, so this is typically one or two queries, not one per result.
|
|
221
|
+
|
|
222
|
+
---
|
|
223
|
+
|
|
224
|
+
## uiHint → Qdrant Filter Mapping
|
|
225
|
+
|
|
226
|
+
Use `uiHint` to determine filter construction strategy. **This table is explicit, not intuited:**
|
|
227
|
+
|
|
228
|
+
| `uiHint` | Qdrant filter | Notes |
|
|
229
|
+
|----------|--------------|-------|
|
|
230
|
+
| `text` | `{ "key": "<field>", "match": { "text": "<value>" } }` | Substring/keyword match |
|
|
231
|
+
| `select` | `{ "key": "<field>", "match": { "value": "<enum_value>" } }` | Exact match; use `enum` values from schema or runtime values index |
|
|
232
|
+
| `multiselect` | `{ "key": "<field>", "match": { "value": "<enum_value>" } }` | Any-element match on array field; use `enum` or runtime values index |
|
|
233
|
+
| `date` | `{ "key": "<field>", "range": { "gte": <unix_ts>, "lt": <unix_ts> } }` | Either bound optional for open-ended ranges (e.g., "after January" → `gte` only) |
|
|
234
|
+
| `number` | `{ "key": "<field>", "range": { "gte": <n>, "lte": <n> } }` | Either bound optional for open-ended ranges |
|
|
235
|
+
| `check` | `{ "key": "<field>", "match": { "value": true } }` | Boolean match |
|
|
236
|
+
| *(absent)* | Do not use in filters | Internal bookkeeping field, not intended for search |
|
|
237
|
+
|
|
238
|
+
**Fallback:** If a `select`/`multiselect` field has neither `enum` in schema nor values in the index, treat it as `text` (substring match instead of exact match).
|
|
239
|
+
|
|
240
|
+
---
|
|
241
|
+
|
|
242
|
+
## Qdrant Filter Combinators
|
|
243
|
+
|
|
244
|
+
Compose individual field conditions into complex queries using three combinators:
|
|
245
|
+
|
|
246
|
+
| Combinator | Semantics | Use case |
|
|
247
|
+
|-----------|-----------|----------|
|
|
248
|
+
| `must` | AND — all conditions required | Intersecting constraints (domain + date range + assignee) |
|
|
249
|
+
| `should` | OR — at least one must match | Alternative values, fuzzy criteria ("assigned to X or Y") |
|
|
250
|
+
| `must_not` | Exclusion — any match triggers exclude | Filtering out noise (exclude Done, exclude codebase domain) |
|
|
251
|
+
|
|
252
|
+
**Combinators nest arbitrarily for complex boolean logic:**
|
|
253
|
+
```json
|
|
254
|
+
{
|
|
255
|
+
"must": [
|
|
256
|
+
{ "key": "domain", "match": { "value": "jira" } },
|
|
257
|
+
{ "key": "created", "range": { "gte": 1735689600 } }
|
|
258
|
+
],
|
|
259
|
+
"should": [
|
|
260
|
+
{ "key": "assignee", "match": { "value": "Jason Williscroft" } },
|
|
261
|
+
{ "key": "assignee", "match": { "value": null } }
|
|
262
|
+
],
|
|
263
|
+
"must_not": [
|
|
264
|
+
{ "key": "status", "match": { "value": "Done" } }
|
|
265
|
+
]
|
|
266
|
+
}
|
|
267
|
+
```
|
|
268
|
+
|
|
269
|
+
A consuming UI will necessarily compose simple single-field filters. The assistant can compose deeply complex queries combining multiple fields, nested boolean logic, and open-ended ranges to precisely target what it needs.
|
|
270
|
+
|
|
271
|
+
---
|
|
272
|
+
|
|
273
|
+
## Search Execution
|
|
274
|
+
|
|
275
|
+
**Plain semantic search is valid and often sufficient.** Not every query needs metadata filters. When the user's question is broad or exploratory, a natural language query with no filter object is the right starting point. Add filters to narrow, not as a default.
|
|
276
|
+
|
|
277
|
+
**Result limit guidance:**
|
|
278
|
+
- Default: 10 results
|
|
279
|
+
- Broad discovery / exploratory: 20–30, apply score threshold cutoff from config
|
|
280
|
+
- Targeted retrieval with tight filters: 5
|
|
281
|
+
- Cross-domain sweep: 15–20, no domain filter, use score to separate signal from noise
|
|
282
|
+
|
|
283
|
+
---
|
|
284
|
+
|
|
285
|
+
## Search Result Shape
|
|
286
|
+
|
|
287
|
+
**Qdrant output (stable across all configs):**
|
|
288
|
+
```json
|
|
289
|
+
{
|
|
290
|
+
"id": "<point_id>",
|
|
291
|
+
"score": 0.82,
|
|
292
|
+
"payload": {
|
|
293
|
+
"file_path": "j:/domains/jira/VCN/issue/WEB-123.json",
|
|
294
|
+
"chunk_index": 0,
|
|
295
|
+
"total_chunks": 1,
|
|
296
|
+
"chunk_text": "...",
|
|
297
|
+
"content_hash": "...",
|
|
298
|
+
"matched_rules": ["jira-issue", "json-subject"],
|
|
299
|
+
...config-defined metadata fields...
|
|
300
|
+
}
|
|
301
|
+
}
|
|
302
|
+
```
|
|
303
|
+
|
|
304
|
+
**System fields present on every result** (watcher-managed, not config-defined):
|
|
305
|
+
- `file_path` — source file path
|
|
306
|
+
- `chunk_index` / `total_chunks` — chunk position within document
|
|
307
|
+
- `chunk_text` — the embedded text content
|
|
308
|
+
- `content_hash` — content fingerprint for deduplication
|
|
309
|
+
- `matched_rules` — inference rules that produced this point's metadata
|
|
310
|
+
|
|
311
|
+
**All other payload fields are config-defined** (via inference rule schemas).
|
|
312
|
+
|
|
313
|
+
Refer to Qdrant documentation for the complete search response envelope.
|
|
314
|
+
|
|
315
|
+
---
|
|
316
|
+
|
|
317
|
+
## Post-Processing Guidance
|
|
318
|
+
|
|
319
|
+
### Score Interpretation
|
|
320
|
+
Use `scoreThresholds` from config (queried during orientation). Values are deployment-specific, constrained to [-1, 1]:
|
|
321
|
+
- `strong` — minimum score for a strong match
|
|
322
|
+
- `relevant` — minimum score for relevance
|
|
323
|
+
- `noise` — maximum score below which results are noise
|
|
324
|
+
|
|
325
|
+
### Chunk Grouping
|
|
326
|
+
Multiple results with the same `file_path` are chunks of one document. Read the full file for complete context.
|
|
327
|
+
|
|
328
|
+
### Schema Lookup
|
|
329
|
+
Use `matched_rules` on results to look up applicable schemas for metadata interpretation:
|
|
330
|
+
```
|
|
331
|
+
watcher_query: path="$.inferenceRules[?(@.name=='jira-issue')].schema"
|
|
332
|
+
resolve=["files","globals"]
|
|
333
|
+
```
|
|
334
|
+
|
|
335
|
+
### Full Context
|
|
336
|
+
Search gives you chunks; use `read` with `file_path` for the complete document.
|
|
337
|
+
|
|
338
|
+
---
|
|
339
|
+
|
|
340
|
+
## Path Testing
|
|
341
|
+
|
|
342
|
+
When uncertain whether a file is indexed, use the path test endpoint:
|
|
343
|
+
```
|
|
344
|
+
watcher_query: path="$.inferenceRules[?(@.name=='<rule>')].match"
|
|
345
|
+
```
|
|
346
|
+
|
|
347
|
+
Or check if a specific path would match:
|
|
348
|
+
- Returns matching rule names and watch scope status
|
|
349
|
+
- Empty `rules` array means no inference rules match
|
|
350
|
+
- `watched: false` means the path falls outside watch paths or is excluded by ignore patterns
|
|
351
|
+
|
|
352
|
+
---
|
|
353
|
+
|
|
354
|
+
## Diagnostics
|
|
355
|
+
|
|
356
|
+
Check the issues endpoint for failed embeddings:
|
|
357
|
+
```
|
|
358
|
+
watcher_query: path="$.issues"
|
|
359
|
+
```
|
|
360
|
+
|
|
361
|
+
**Issues are self-healing:** resolved on successful re-process. The issues file always represents the current set of unresolved problems: a live todo list.
|
|
362
|
+
|
|
363
|
+
**Issue types:**
|
|
364
|
+
- `type_collision` — multiple rules declare the same property with incompatible types (includes `property`, `rules[]`, `types[]`)
|
|
365
|
+
- `interpolation_error` — `set` template path doesn't resolve (includes `property`, `rule`)
|
|
366
|
+
|
|
367
|
+
---
|
|
368
|
+
|
|
369
|
+
## Enrichment
|
|
370
|
+
|
|
371
|
+
Use `watcher_enrich` to tag documents after analysis (e.g., `reviewed: true`, project labels).
|
|
372
|
+
|
|
373
|
+
**Metadata is validated against the file's matched rule schemas.** Validation errors return structured messages:
|
|
374
|
+
```json
|
|
375
|
+
{
|
|
376
|
+
"error": "Validation failed",
|
|
377
|
+
"details": [
|
|
378
|
+
{
|
|
379
|
+
"property": "priority",
|
|
380
|
+
"expected": "string",
|
|
381
|
+
"received": "number",
|
|
382
|
+
"rule": "jira-issue",
|
|
383
|
+
"message": "Property 'priority' is declared as string in jira-issue schema, received number"
|
|
384
|
+
}
|
|
385
|
+
]
|
|
386
|
+
}
|
|
387
|
+
```
|
|
388
|
+
|
|
389
|
+
---
|
|
390
|
+
|
|
391
|
+
## Memory Recall
|
|
392
|
+
|
|
393
|
+
If `$.slots.memory` is present during orientation, this instance indexes memory files. Before answering questions about prior work, decisions, dates, people, preferences, or todos:
|
|
394
|
+
|
|
395
|
+
1. Search with `watcher_search` using the memory slot filter
|
|
396
|
+
2. Use `read` with offset/limit for full context from matched files
|
|
397
|
+
3. Include `Source: <file_path>` citations in your response
|
|
398
|
+
|
|
399
|
+
---
|
|
400
|
+
|
|
401
|
+
## Error Handling
|
|
402
|
+
|
|
403
|
+
If the watcher is unreachable:
|
|
404
|
+
- Inform the user that semantic search is temporarily unavailable
|
|
405
|
+
- Fall back to direct `read` for known file paths
|
|
406
|
+
- Do not retry silently in a loop
|
|
407
|
+
|
|
408
|
+
---
|
|
409
|
+
|
|
410
|
+
## References
|
|
411
|
+
|
|
412
|
+
- [JSONPath Plus documentation](https://www.npmjs.com/package/jsonpath-plus) for JSONPath syntax
|
|
413
|
+
- [Qdrant filtering documentation](https://qdrant.tech/documentation/concepts/filtering/) for advanced query patterns and search response format
|