@probelabs/visor 0.1.181 → 0.1.182
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/defaults/code-talk.yaml +80 -14
- package/defaults/engineer.yaml +33 -15
- package/defaults/skills/code-explorer.yaml +5 -0
- package/dist/agent-protocol/a2a-frontend.d.ts +10 -0
- package/dist/agent-protocol/a2a-frontend.d.ts.map +1 -1
- package/dist/agent-protocol/task-evaluator.d.ts +52 -0
- package/dist/agent-protocol/task-evaluator.d.ts.map +1 -0
- package/dist/agent-protocol/task-store.d.ts +5 -3
- package/dist/agent-protocol/task-store.d.ts.map +1 -1
- package/dist/agent-protocol/tasks-cli-handler.d.ts.map +1 -1
- package/dist/agent-protocol/tasks-tui.d.ts +34 -0
- package/dist/agent-protocol/tasks-tui.d.ts.map +1 -0
- package/dist/agent-protocol/trace-serializer.d.ts +90 -0
- package/dist/agent-protocol/trace-serializer.d.ts.map +1 -0
- package/dist/agent-protocol/track-execution.d.ts +2 -0
- package/dist/agent-protocol/track-execution.d.ts.map +1 -1
- package/dist/cli-main.d.ts.map +1 -1
- package/dist/defaults/code-talk.yaml +80 -14
- package/dist/defaults/engineer.yaml +33 -15
- package/dist/defaults/skills/code-explorer.yaml +5 -0
- package/dist/docs/commands.md +57 -14
- package/dist/docs/configuration.md +2 -0
- package/dist/docs/guides/graceful-restart.md +178 -0
- package/dist/docs/observability.md +69 -0
- package/dist/docs/production-deployment.md +17 -0
- package/dist/email/polling-runner.d.ts +4 -0
- package/dist/email/polling-runner.d.ts.map +1 -1
- package/dist/generated/config-schema.d.ts +70 -6
- package/dist/generated/config-schema.d.ts.map +1 -1
- package/dist/generated/config-schema.json +73 -6
- package/dist/index.js +5006 -886
- package/dist/output/traces/{run-2026-03-17T13-58-29-402Z.ndjson → run-2026-03-18T19-02-50-465Z.ndjson} +84 -84
- package/dist/{traces/run-2026-03-17T13-59-10-403Z.ndjson → output/traces/run-2026-03-18T19-03-30-428Z.ndjson} +2037 -2037
- package/dist/providers/mcp-custom-sse-server.d.ts +4 -0
- package/dist/providers/mcp-custom-sse-server.d.ts.map +1 -1
- package/dist/runners/graceful-restart.d.ts +46 -0
- package/dist/runners/graceful-restart.d.ts.map +1 -0
- package/dist/runners/mcp-server-runner.d.ts +12 -0
- package/dist/runners/mcp-server-runner.d.ts.map +1 -1
- package/dist/runners/runner-factory.d.ts.map +1 -1
- package/dist/runners/runner-host.d.ts +12 -0
- package/dist/runners/runner-host.d.ts.map +1 -1
- package/dist/runners/runner.d.ts +12 -0
- package/dist/runners/runner.d.ts.map +1 -1
- package/dist/sdk/{a2a-frontend-IWOUJOIZ.mjs → a2a-frontend-4LP3MLTS.mjs} +47 -5
- package/dist/sdk/a2a-frontend-4LP3MLTS.mjs.map +1 -0
- package/dist/sdk/a2a-frontend-5J3UNFY4.mjs +1718 -0
- package/dist/sdk/a2a-frontend-5J3UNFY4.mjs.map +1 -0
- package/dist/sdk/{a2a-frontend-BDACLGMA.mjs → a2a-frontend-MU5EO2HZ.mjs} +35 -1
- package/dist/sdk/a2a-frontend-MU5EO2HZ.mjs.map +1 -0
- package/dist/sdk/{check-provider-registry-4YKTEDKF.mjs → check-provider-registry-MHXQGUNN.mjs} +7 -7
- package/dist/sdk/{check-provider-registry-4YFVBGYU.mjs → check-provider-registry-RRWCXSTG.mjs} +3 -3
- package/dist/sdk/{check-provider-registry-67ZLGDDQ.mjs → check-provider-registry-Y33CRFVD.mjs} +7 -7
- package/dist/sdk/{chunk-DGIH6EX3.mjs → chunk-4AXAVXG5.mjs} +151 -281
- package/dist/sdk/chunk-4AXAVXG5.mjs.map +1 -0
- package/dist/sdk/{chunk-VMVIM4JB.mjs → chunk-4I3TJ7UJ.mjs} +37 -7
- package/dist/sdk/chunk-4I3TJ7UJ.mjs.map +1 -0
- package/dist/sdk/{chunk-VXC2XNQJ.mjs → chunk-5J3DNRF7.mjs} +3 -3
- package/dist/sdk/{chunk-7YZSSO4X.mjs → chunk-6DPPP7LD.mjs} +10 -10
- package/dist/sdk/chunk-7ERVRLDV.mjs +296 -0
- package/dist/sdk/chunk-7ERVRLDV.mjs.map +1 -0
- package/dist/sdk/{chunk-4DVP6KVC.mjs → chunk-7Z2WHX2J.mjs} +71 -30
- package/dist/sdk/chunk-7Z2WHX2J.mjs.map +1 -0
- package/dist/sdk/chunk-ANUT54HW.mjs +1502 -0
- package/dist/sdk/chunk-ANUT54HW.mjs.map +1 -0
- package/dist/sdk/{chunk-J73GEFPT.mjs → chunk-DHETLQIX.mjs} +2 -2
- package/dist/sdk/{chunk-QGBASDYP.mjs → chunk-JCOSKBMP.mjs} +71 -30
- package/dist/sdk/chunk-JCOSKBMP.mjs.map +1 -0
- package/dist/sdk/chunk-MK7ONH47.mjs +739 -0
- package/dist/sdk/chunk-MK7ONH47.mjs.map +1 -0
- package/dist/sdk/chunk-QXT47ZHR.mjs +390 -0
- package/dist/sdk/chunk-QXT47ZHR.mjs.map +1 -0
- package/dist/sdk/chunk-V75NEIXL.mjs +296 -0
- package/dist/sdk/chunk-V75NEIXL.mjs.map +1 -0
- package/dist/sdk/chunk-ZOF5QT6U.mjs +5943 -0
- package/dist/sdk/chunk-ZOF5QT6U.mjs.map +1 -0
- package/dist/sdk/{config-TSA5FUOM.mjs → config-2STD74CJ.mjs} +2 -2
- package/dist/sdk/config-JE4HKTWW.mjs +16 -0
- package/dist/sdk/{failure-condition-evaluator-HTPB5FYW.mjs → failure-condition-evaluator-5DZYMCGW.mjs} +4 -4
- package/dist/sdk/failure-condition-evaluator-R6DCDJAV.mjs +18 -0
- package/dist/sdk/{github-frontend-3SDFCCKI.mjs → github-frontend-3PSCKPAJ.mjs} +4 -4
- package/dist/sdk/github-frontend-L3F5JXPJ.mjs +1394 -0
- package/dist/sdk/github-frontend-L3F5JXPJ.mjs.map +1 -0
- package/dist/sdk/{host-QE4L7UXE.mjs → host-54CHV2LW.mjs} +3 -3
- package/dist/sdk/{host-VBBSLUWG.mjs → host-WAU6CT42.mjs} +3 -3
- package/dist/sdk/{host-CVH2CSHM.mjs → host-X5ZZCEWN.mjs} +2 -2
- package/dist/sdk/{routing-YVMTKFDZ.mjs → routing-CVQT4KHX.mjs} +5 -5
- package/dist/sdk/routing-EBAE5SSO.mjs +26 -0
- package/dist/sdk/{schedule-tool-Z5VG67JK.mjs → schedule-tool-POY3CDZL.mjs} +7 -7
- package/dist/sdk/{schedule-tool-ADUXTCY7.mjs → schedule-tool-R2OAATUS.mjs} +7 -7
- package/dist/sdk/{schedule-tool-ZMX3Y7LF.mjs → schedule-tool-Z6QYL2B3.mjs} +3 -3
- package/dist/sdk/{schedule-tool-handler-N7UNABOA.mjs → schedule-tool-handler-J4NUETJ6.mjs} +3 -3
- package/dist/sdk/{schedule-tool-handler-PCERK6ZZ.mjs → schedule-tool-handler-JMAKHPI7.mjs} +7 -7
- package/dist/sdk/{schedule-tool-handler-QOJVFRB4.mjs → schedule-tool-handler-MWFUIQKR.mjs} +7 -7
- package/dist/sdk/sdk.d.mts +33 -0
- package/dist/sdk/sdk.d.ts +33 -0
- package/dist/sdk/sdk.js +2058 -342
- package/dist/sdk/sdk.js.map +1 -1
- package/dist/sdk/sdk.mjs +6 -6
- package/dist/sdk/task-evaluator-HLNXKKVV.mjs +1278 -0
- package/dist/sdk/task-evaluator-HLNXKKVV.mjs.map +1 -0
- package/dist/sdk/{trace-helpers-KXDOJWBL.mjs → trace-helpers-HL5FBX65.mjs} +3 -3
- package/dist/sdk/trace-helpers-WJXYVV4S.mjs +29 -0
- package/dist/sdk/trace-helpers-WJXYVV4S.mjs.map +1 -0
- package/dist/sdk/trace-reader-ZY77OFNM.mjs +266 -0
- package/dist/sdk/trace-reader-ZY77OFNM.mjs.map +1 -0
- package/dist/sdk/track-execution-MKIQXP2C.mjs +136 -0
- package/dist/sdk/track-execution-MKIQXP2C.mjs.map +1 -0
- package/dist/sdk/track-execution-YUXQ6WQH.mjs +136 -0
- package/dist/sdk/track-execution-YUXQ6WQH.mjs.map +1 -0
- package/dist/sdk/{workflow-check-provider-NTHC5ZBF.mjs → workflow-check-provider-SE5I7EMA.mjs} +7 -7
- package/dist/sdk/workflow-check-provider-SE5I7EMA.mjs.map +1 -0
- package/dist/sdk/{workflow-check-provider-SRIMWKLQ.mjs → workflow-check-provider-VKYGI5GK.mjs} +3 -3
- package/dist/sdk/workflow-check-provider-VKYGI5GK.mjs.map +1 -0
- package/dist/sdk/{workflow-check-provider-CJXW2Z4F.mjs → workflow-check-provider-YDGZRI3Z.mjs} +7 -7
- package/dist/sdk/workflow-check-provider-YDGZRI3Z.mjs.map +1 -0
- package/dist/slack/socket-runner.d.ts +12 -0
- package/dist/slack/socket-runner.d.ts.map +1 -1
- package/dist/teams/webhook-runner.d.ts +4 -0
- package/dist/teams/webhook-runner.d.ts.map +1 -1
- package/dist/telegram/polling-runner.d.ts +2 -0
- package/dist/telegram/polling-runner.d.ts.map +1 -1
- package/dist/traces/{run-2026-03-17T13-58-29-402Z.ndjson → run-2026-03-18T19-02-50-465Z.ndjson} +84 -84
- package/dist/{output/traces/run-2026-03-17T13-59-10-403Z.ndjson → traces/run-2026-03-18T19-03-30-428Z.ndjson} +2037 -2037
- package/dist/types/config.d.ts +33 -0
- package/dist/types/config.d.ts.map +1 -1
- package/dist/whatsapp/webhook-runner.d.ts +4 -0
- package/dist/whatsapp/webhook-runner.d.ts.map +1 -1
- package/package.json +2 -2
- package/dist/sdk/a2a-frontend-BDACLGMA.mjs.map +0 -1
- package/dist/sdk/a2a-frontend-IWOUJOIZ.mjs.map +0 -1
- package/dist/sdk/chunk-4DVP6KVC.mjs.map +0 -1
- package/dist/sdk/chunk-DGIH6EX3.mjs.map +0 -1
- package/dist/sdk/chunk-QGBASDYP.mjs.map +0 -1
- package/dist/sdk/chunk-VMVIM4JB.mjs.map +0 -1
- /package/dist/sdk/{check-provider-registry-4YFVBGYU.mjs.map → check-provider-registry-MHXQGUNN.mjs.map} +0 -0
- /package/dist/sdk/{check-provider-registry-4YKTEDKF.mjs.map → check-provider-registry-RRWCXSTG.mjs.map} +0 -0
- /package/dist/sdk/{check-provider-registry-67ZLGDDQ.mjs.map → check-provider-registry-Y33CRFVD.mjs.map} +0 -0
- /package/dist/sdk/{chunk-VXC2XNQJ.mjs.map → chunk-5J3DNRF7.mjs.map} +0 -0
- /package/dist/sdk/{chunk-7YZSSO4X.mjs.map → chunk-6DPPP7LD.mjs.map} +0 -0
- /package/dist/sdk/{chunk-J73GEFPT.mjs.map → chunk-DHETLQIX.mjs.map} +0 -0
- /package/dist/sdk/{config-TSA5FUOM.mjs.map → config-2STD74CJ.mjs.map} +0 -0
- /package/dist/sdk/{failure-condition-evaluator-HTPB5FYW.mjs.map → config-JE4HKTWW.mjs.map} +0 -0
- /package/dist/sdk/{routing-YVMTKFDZ.mjs.map → failure-condition-evaluator-5DZYMCGW.mjs.map} +0 -0
- /package/dist/sdk/{schedule-tool-ADUXTCY7.mjs.map → failure-condition-evaluator-R6DCDJAV.mjs.map} +0 -0
- /package/dist/sdk/{github-frontend-3SDFCCKI.mjs.map → github-frontend-3PSCKPAJ.mjs.map} +0 -0
- /package/dist/sdk/{host-CVH2CSHM.mjs.map → host-54CHV2LW.mjs.map} +0 -0
- /package/dist/sdk/{host-QE4L7UXE.mjs.map → host-WAU6CT42.mjs.map} +0 -0
- /package/dist/sdk/{host-VBBSLUWG.mjs.map → host-X5ZZCEWN.mjs.map} +0 -0
- /package/dist/sdk/{schedule-tool-Z5VG67JK.mjs.map → routing-CVQT4KHX.mjs.map} +0 -0
- /package/dist/sdk/{schedule-tool-ZMX3Y7LF.mjs.map → routing-EBAE5SSO.mjs.map} +0 -0
- /package/dist/sdk/{schedule-tool-handler-N7UNABOA.mjs.map → schedule-tool-POY3CDZL.mjs.map} +0 -0
- /package/dist/sdk/{schedule-tool-handler-PCERK6ZZ.mjs.map → schedule-tool-R2OAATUS.mjs.map} +0 -0
- /package/dist/sdk/{schedule-tool-handler-QOJVFRB4.mjs.map → schedule-tool-Z6QYL2B3.mjs.map} +0 -0
- /package/dist/sdk/{trace-helpers-KXDOJWBL.mjs.map → schedule-tool-handler-J4NUETJ6.mjs.map} +0 -0
- /package/dist/sdk/{workflow-check-provider-CJXW2Z4F.mjs.map → schedule-tool-handler-JMAKHPI7.mjs.map} +0 -0
- /package/dist/sdk/{workflow-check-provider-NTHC5ZBF.mjs.map → schedule-tool-handler-MWFUIQKR.mjs.map} +0 -0
- /package/dist/sdk/{workflow-check-provider-SRIMWKLQ.mjs.map → trace-helpers-HL5FBX65.mjs.map} +0 -0
|
@@ -136,18 +136,59 @@ outputs:
|
|
|
136
136
|
value_js: |
|
|
137
137
|
const result = outputs?.['explore-code'];
|
|
138
138
|
if (result?.answer) return result.answer;
|
|
139
|
-
const
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
if (typeof routeNotes === 'string' && routeNotes.trim().length > 0) {
|
|
143
|
-
return { text: routeNotes };
|
|
139
|
+
const resultText = typeof result?.text === 'string' ? result.text.trim() : '';
|
|
140
|
+
if (resultText.length > 0) {
|
|
141
|
+
return { text: resultText };
|
|
144
142
|
}
|
|
145
|
-
|
|
146
|
-
const
|
|
147
|
-
if (
|
|
148
|
-
|
|
143
|
+
const routeNotes = outputs?.['setup-projects']?.routing_decision?.notes;
|
|
144
|
+
const checkoutProjects = outputs?.['setup-projects']?.checkout_projects;
|
|
145
|
+
if (
|
|
146
|
+
(!Array.isArray(checkoutProjects) || checkoutProjects.length === 0) &&
|
|
147
|
+
typeof routeNotes === 'string' &&
|
|
148
|
+
routeNotes.trim().length > 0
|
|
149
|
+
) {
|
|
150
|
+
return { text: routeNotes.trim() };
|
|
151
|
+
}
|
|
152
|
+
return { text: 'Code exploration did not produce an answer.' };
|
|
153
|
+
|
|
154
|
+
- name: exploration_status
|
|
155
|
+
description: Outcome of the exploration step
|
|
156
|
+
value_js: |
|
|
157
|
+
const result = outputs?.['explore-code'];
|
|
158
|
+
if (result?.answer?.text) return 'success';
|
|
159
|
+
const resultText = typeof result?.text === 'string' ? result.text.trim() : '';
|
|
160
|
+
if (resultText.length > 0) {
|
|
161
|
+
if (/timed out/i.test(resultText)) return 'timeout';
|
|
162
|
+
return 'failed';
|
|
149
163
|
}
|
|
150
|
-
|
|
164
|
+
const routeNotes = outputs?.['setup-projects']?.routing_decision?.notes;
|
|
165
|
+
const checkoutProjects = outputs?.['setup-projects']?.checkout_projects;
|
|
166
|
+
if (
|
|
167
|
+
(!Array.isArray(checkoutProjects) || checkoutProjects.length === 0) &&
|
|
168
|
+
typeof routeNotes === 'string' &&
|
|
169
|
+
routeNotes.trim().length > 0
|
|
170
|
+
) {
|
|
171
|
+
return 'no_projects';
|
|
172
|
+
}
|
|
173
|
+
return 'failed';
|
|
174
|
+
|
|
175
|
+
- name: exploration_error
|
|
176
|
+
description: Timeout or failure detail when exploration did not return a real answer
|
|
177
|
+
value_js: |
|
|
178
|
+
const result = outputs?.['explore-code'];
|
|
179
|
+
if (result?.answer?.text) return '';
|
|
180
|
+
const resultText = typeof result?.text === 'string' ? result.text.trim() : '';
|
|
181
|
+
if (resultText.length > 0) return resultText;
|
|
182
|
+
const routeNotes = outputs?.['setup-projects']?.routing_decision?.notes;
|
|
183
|
+
const checkoutProjects = outputs?.['setup-projects']?.checkout_projects;
|
|
184
|
+
if (
|
|
185
|
+
(!Array.isArray(checkoutProjects) || checkoutProjects.length === 0) &&
|
|
186
|
+
typeof routeNotes === 'string' &&
|
|
187
|
+
routeNotes.trim().length > 0
|
|
188
|
+
) {
|
|
189
|
+
return routeNotes.trim();
|
|
190
|
+
}
|
|
191
|
+
return 'Code exploration did not produce an answer.';
|
|
151
192
|
|
|
152
193
|
- name: references
|
|
153
194
|
description: Code/doc references from exploration
|
|
@@ -174,11 +215,20 @@ outputs:
|
|
|
174
215
|
const result = outputs?.['explore-code'];
|
|
175
216
|
const confidence = result?.confidence;
|
|
176
217
|
const reason = result?.confidence_reason;
|
|
177
|
-
if (typeof reason === 'string') return reason;
|
|
218
|
+
if (typeof reason === 'string' && reason.trim().length > 0) return reason;
|
|
178
219
|
if (confidence === 'high') return '';
|
|
220
|
+
const resultText = typeof result?.text === 'string' ? result.text.trim() : '';
|
|
221
|
+
if (resultText.length > 0) return resultText;
|
|
179
222
|
const routeNotes = outputs?.['setup-projects']?.routing_decision?.notes;
|
|
180
|
-
|
|
181
|
-
|
|
223
|
+
const checkoutProjects = outputs?.['setup-projects']?.checkout_projects;
|
|
224
|
+
if (
|
|
225
|
+
(!Array.isArray(checkoutProjects) || checkoutProjects.length === 0) &&
|
|
226
|
+
typeof routeNotes === 'string' &&
|
|
227
|
+
routeNotes.trim().length > 0
|
|
228
|
+
) {
|
|
229
|
+
return routeNotes.trim();
|
|
230
|
+
}
|
|
231
|
+
return 'Code exploration did not produce an answer.';
|
|
182
232
|
|
|
183
233
|
- name: projects_explored
|
|
184
234
|
description: Which project IDs were checked out
|
|
@@ -261,7 +311,7 @@ steps:
|
|
|
261
311
|
skip_code_context: true
|
|
262
312
|
enableDelegate: true
|
|
263
313
|
enableExecutePlan: false
|
|
264
|
-
max_iterations:
|
|
314
|
+
max_iterations: 100
|
|
265
315
|
prompt_type: code-explorer
|
|
266
316
|
allowBash: true
|
|
267
317
|
bashConfig:
|
|
@@ -415,8 +465,17 @@ steps:
|
|
|
415
465
|
- Each delegate should answer ONE specific question (not "look at the code")
|
|
416
466
|
- Run multiple delegates in PARALLEL for different hypotheses or components
|
|
417
467
|
- Ask delegates to return specific file paths and line numbers
|
|
468
|
+
- Do NOT delegate or re-search the same question twice in one investigation
|
|
469
|
+
- If a delegate returns enough evidence for the current claim, stop and use it
|
|
418
470
|
|
|
419
471
|
Relay complete data from tools — do not summarize or compress tool output.
|
|
472
|
+
|
|
473
|
+
Investigation scope:
|
|
474
|
+
- Stop once you have enough evidence to answer the question accurately
|
|
475
|
+
- If this is an implementation handoff for engineer, optimize for the minimum
|
|
476
|
+
sufficient handoff: repo, branch/ref, target files, relevant tests, and the
|
|
477
|
+
key evidence explaining why those files matter
|
|
478
|
+
- Prefer one search followed by targeted extract over repeated broad searches
|
|
420
479
|
</instructions>
|
|
421
480
|
|
|
422
481
|
{% if inputs.exploration_prompt %}
|
|
@@ -468,6 +527,13 @@ steps:
|
|
|
468
527
|
implementation, then consult docs to confirm semantics. When multiple projects
|
|
469
528
|
are involved, trace data and config flow across them.
|
|
470
529
|
|
|
530
|
+
Efficiency rules for this investigation:
|
|
531
|
+
- Reuse evidence already found in earlier tool results
|
|
532
|
+
- If the question is narrow and the relevant files are already identified,
|
|
533
|
+
stop exploring and answer
|
|
534
|
+
- If the next consumer is engineer, avoid broad code archaeology once the
|
|
535
|
+
implementation target and validation path are clear
|
|
536
|
+
|
|
471
537
|
Synthesize a single answer:
|
|
472
538
|
- Ground everything in code/docs evidence
|
|
473
539
|
- End with a "## References" section with clickable GitHub links:
|
|
@@ -338,19 +338,25 @@ steps:
|
|
|
338
338
|
<delegation>
|
|
339
339
|
Use the delegate tool for parallel work, plan validation, and build discovery.
|
|
340
340
|
|
|
341
|
-
|
|
341
|
+
FIRST decide whether delegation is needed.
|
|
342
|
+
|
|
343
|
+
Delegate "Discover build system" ONLY when the exact commands are not already
|
|
344
|
+
available in the provided context, code-explorer output, project metadata, or
|
|
345
|
+
recent tool results.
|
|
342
346
|
- Check: Makefile, package.json (scripts), Cargo.toml, go.mod, pyproject.toml
|
|
343
347
|
- Check: CI config (.github/workflows/, .gitlab-ci.yml, Jenkinsfile)
|
|
344
348
|
- Check: README for build/test/lint instructions
|
|
345
349
|
- Return the EXACT commands for: build, test, lint/format, and any pre-commit hooks
|
|
346
350
|
- Example output: "build: make, test: make test, lint: gofmt -l . && golangci-lint run"
|
|
347
|
-
|
|
351
|
+
- Reuse these commands throughout the session. Do NOT rediscover them once known.
|
|
348
352
|
|
|
349
|
-
|
|
353
|
+
Delegate "Plan validation" ONLY when the task is broad, high-risk, multi-repo,
|
|
354
|
+
or the implementation path is still unclear after reviewing existing context.
|
|
350
355
|
- Describe: files to change, approach, patterns to follow
|
|
351
356
|
- Ask the delegate to verify: do these files exist? Are there existing tests?
|
|
352
357
|
Are there related utilities or patterns to reuse? Any API contracts to respect?
|
|
353
|
-
-
|
|
358
|
+
- Skip this delegate for narrow single-repo changes when code-explorer or direct
|
|
359
|
+
inspection already identified the target files, branch, and validation path.
|
|
354
360
|
|
|
355
361
|
Also delegate for:
|
|
356
362
|
- Multi-repo changes (one delegate per repo, in parallel)
|
|
@@ -361,9 +367,12 @@ steps:
|
|
|
361
367
|
- Sequential dependent work (step B needs step A's output)
|
|
362
368
|
- Simple single-file edits (fewer than 5 iterations)
|
|
363
369
|
- Git operations (commit, push, PR) — always do these yourself
|
|
370
|
+
- Questions you already delegated once in this session
|
|
364
371
|
|
|
365
372
|
Delegates have fewer iterations and no access to your conversation.
|
|
366
373
|
Provide all necessary context in the delegate prompt.
|
|
374
|
+
If a delegate returns empty output, times out, or repeats information already
|
|
375
|
+
known, do NOT call the same delegate again. Fall back to direct tools.
|
|
367
376
|
</delegation>
|
|
368
377
|
|
|
369
378
|
<git-workflow>
|
|
@@ -372,12 +381,17 @@ steps:
|
|
|
372
381
|
|
|
373
382
|
Before your final response, verify:
|
|
374
383
|
□ Build passes (using exact commands from "Discover build system" delegate)
|
|
375
|
-
□ Tests pass (
|
|
384
|
+
□ Tests pass (start with the narrowest relevant tests; run broader suites only
|
|
385
|
+
when required by repo policy, when the change is cross-cutting, or when focused
|
|
386
|
+
tests indicate wider impact)
|
|
376
387
|
□ Lint/format passes (if the project has a linter)
|
|
377
388
|
□ git add <files>
|
|
378
389
|
□ git commit -m "descriptive message"
|
|
379
390
|
□ git push -u origin <branch-name>
|
|
380
391
|
□ gh pr create (for new PRs) or update existing PR
|
|
392
|
+
- For a new branch: ALWAYS push first, then use `gh pr create --head <branch-name>`
|
|
393
|
+
- If PR creation fails, inspect stderr, fix the missing prerequisite, and retry once
|
|
394
|
+
- Do NOT repeat the same `gh pr create` command after the same error
|
|
381
395
|
No PR URL = failed task. Report errors honestly, never claim false success.
|
|
382
396
|
|
|
383
397
|
If build/test/lint fails, fix the issue before committing. If you cannot fix it,
|
|
@@ -397,19 +411,23 @@ steps:
|
|
|
397
411
|
<efficiency>
|
|
398
412
|
- Use context data directly — don't re-read files or re-run searches for
|
|
399
413
|
information already provided by code-explorer.
|
|
414
|
+
- If code-explorer already identified the repo, branch, files, tests, or exact
|
|
415
|
+
commands, treat that as the default source of truth unless a tool result proves
|
|
416
|
+
it wrong.
|
|
400
417
|
- If a project has <setup> commands listed, run them FIRST (in the project's
|
|
401
418
|
directory) before any other work. These are prerequisites (e.g., `npm install`,
|
|
402
419
|
`make deps`, database migrations).
|
|
403
|
-
- Use tasks to track
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
- If a bash command fails,
|
|
412
|
-
|
|
420
|
+
- Use tasks to track real phases of work, not every obvious micro-step.
|
|
421
|
+
- For narrow single-repo changes, keep the task list minimal:
|
|
422
|
+
1. "Implement changes"
|
|
423
|
+
2. "Verify build"
|
|
424
|
+
3. "Create pull request"
|
|
425
|
+
- Add "Run setup commands", "Discover build system", or "Plan validation"
|
|
426
|
+
ONLY when you actually need to perform those steps.
|
|
427
|
+
- Mark in_progress/completed as you go. Do NOT skip "Verify build".
|
|
428
|
+
- If a bash command fails, diagnose the cause before retrying.
|
|
429
|
+
- Do NOT repeat the same logical action after the same error unless you changed
|
|
430
|
+
a prerequisite (for example: push before re-running `gh pr create`).
|
|
413
431
|
</efficiency>
|
|
414
432
|
{% assign has_trace = inputs.trace_id | size %}
|
|
415
433
|
{% assign has_slack_user = inputs.slack_user_id | size %}
|
|
@@ -29,6 +29,11 @@ knowledge: |
|
|
|
29
29
|
- If confidence "high", trust the answer — do NOT re-call with rephrased question
|
|
30
30
|
- Only call again for a genuinely DIFFERENT aspect of the codebase
|
|
31
31
|
- If confidence "medium" or "low", check confidence_reason for what to refine
|
|
32
|
+
- If `exploration_status` is `timeout`, `failed`, or `no_projects`, do NOT re-call
|
|
33
|
+
with a paraphrase of the same question. Report the failure honestly and only retry
|
|
34
|
+
if you can narrow the question or change the scope.
|
|
35
|
+
- If `references` is empty and confidence is low, treat that as "not answered yet",
|
|
36
|
+
not as a usable code answer.
|
|
32
37
|
|
|
33
38
|
## Usage Instructions
|
|
34
39
|
1. Call the `code-explorer` tool with the user's question — do NOT try to answer code questions yourself
|
package/dist/docs/commands.md
CHANGED
|
@@ -148,35 +148,78 @@ visor mcp-server --transport http --config defaults/code-review.yaml \
|
|
|
148
148
|
|
|
149
149
|
#### `visor tasks`
|
|
150
150
|
|
|
151
|
-
Monitor and
|
|
151
|
+
Monitor, inspect, and evaluate agent tasks. Requires `task_tracking: true` (or `--task-tracking` CLI flag).
|
|
152
152
|
|
|
153
153
|
```bash
|
|
154
154
|
visor tasks [command] [options]
|
|
155
155
|
```
|
|
156
156
|
|
|
157
157
|
**Subcommands:**
|
|
158
|
-
- `list` (default) — List tasks
|
|
158
|
+
- `list` (default) — List tasks (interactive TUI in TTY, table otherwise)
|
|
159
|
+
- `show <task-id>` — Show full task details including response and evaluation
|
|
160
|
+
- `trace <task-id>` — Show execution trace tree (YAML-formatted span hierarchy)
|
|
161
|
+
- `evaluate <task-id>` — Evaluate task quality with LLM judge
|
|
159
162
|
- `stats` — Queue summary statistics
|
|
160
163
|
- `cancel <task-id>` — Cancel a running task
|
|
161
|
-
- `
|
|
164
|
+
- `purge` — Delete old completed/failed tasks
|
|
162
165
|
|
|
163
|
-
|
|
164
|
-
|
|
166
|
+
Task IDs support prefix matching — use the first 8 characters.
|
|
167
|
+
|
|
168
|
+
**List options:**
|
|
169
|
+
- `--all` — Show all tasks including completed/failed history
|
|
170
|
+
- `--state <state>` — Filter: `submitted`, `working`, `completed`, `failed`, `canceled`
|
|
171
|
+
- `--search <text>` — Full-text search on task input
|
|
165
172
|
- `--agent <workflow-id>` — Filter by workflow
|
|
166
|
-
- `--
|
|
167
|
-
- `--
|
|
168
|
-
- `--
|
|
173
|
+
- `--instance <id>` — Filter by visor instance
|
|
174
|
+
- `--limit <n>` — Tasks per page (default: 20)
|
|
175
|
+
- `--page <n>` — Page number
|
|
176
|
+
- `--output <format>` — Output: `table`, `json`, `markdown` (disables TUI)
|
|
177
|
+
- `--tui` — Force interactive TUI mode
|
|
178
|
+
- `--watch` — Auto-refresh every 2 seconds
|
|
179
|
+
|
|
180
|
+
**Trace options:**
|
|
181
|
+
- `--full` — Show full output without truncation
|
|
182
|
+
- `--output <format>` — Output: `tree` (default), `json`
|
|
183
|
+
|
|
184
|
+
**Evaluate options:**
|
|
185
|
+
- `--model <model>` — LLM model for evaluation (default: from config or env)
|
|
186
|
+
- `--provider <provider>` — AI provider: `google`, `openai`, `anthropic`
|
|
187
|
+
- `--last <n>` — Batch evaluate last N tasks
|
|
188
|
+
- `--state <state>` — Filter for batch mode (default: `completed`)
|
|
189
|
+
- `--prompt <text>` — Custom evaluation system prompt
|
|
190
|
+
- `--output <format>` — Output: `table`, `json`
|
|
191
|
+
|
|
192
|
+
**Purge options:**
|
|
193
|
+
- `--age <duration>` — Maximum age, e.g. `24h`, `7d`, `30d` (default: `7d`)
|
|
169
194
|
|
|
170
195
|
**Examples:**
|
|
171
196
|
```bash
|
|
172
|
-
|
|
173
|
-
visor tasks
|
|
174
|
-
visor tasks
|
|
175
|
-
visor tasks
|
|
176
|
-
visor tasks
|
|
197
|
+
# Browsing tasks
|
|
198
|
+
visor tasks # Interactive TUI browser
|
|
199
|
+
visor tasks --output table # Plain table output
|
|
200
|
+
visor tasks --all # Include completed/failed history
|
|
201
|
+
visor tasks --state failed # Show only failed tasks
|
|
202
|
+
visor tasks --search "auth middleware" # Search by input text
|
|
203
|
+
|
|
204
|
+
# Inspecting individual tasks
|
|
205
|
+
visor tasks show abc123 # Task details with response
|
|
206
|
+
visor tasks show abc123 --output json # Full JSON with evaluation data
|
|
207
|
+
|
|
208
|
+
# Execution traces
|
|
209
|
+
visor tasks trace abc123 # Compact trace tree
|
|
210
|
+
visor tasks trace abc123 --full # Full trace with untruncated outputs
|
|
211
|
+
|
|
212
|
+
# Quality evaluation
|
|
213
|
+
visor tasks evaluate abc123 # Evaluate a single task
|
|
214
|
+
visor tasks evaluate abc123 --output json # Evaluation as JSON
|
|
215
|
+
visor tasks evaluate --last 10 # Batch evaluate last 10 tasks
|
|
216
|
+
visor tasks evaluate --last 5 --model gpt-4o # Use specific model
|
|
217
|
+
|
|
218
|
+
# Administration
|
|
177
219
|
visor tasks stats # Queue summary
|
|
178
220
|
visor tasks stats --output json # Stats as JSON
|
|
179
|
-
visor tasks cancel abc123 # Cancel a task
|
|
221
|
+
visor tasks cancel abc123 # Cancel a running task
|
|
222
|
+
visor tasks purge --age 30d # Delete tasks older than 30 days
|
|
180
223
|
```
|
|
181
224
|
|
|
182
225
|
### Common CLI Options
|
|
@@ -430,6 +430,8 @@ The following global configuration options are available and documented in detai
|
|
|
430
430
|
| `sandbox` | Default sandbox name for all steps | [Sandbox Engines](./sandbox-engines.md) |
|
|
431
431
|
| `sandboxes` | Named sandbox definitions (Docker, Bubblewrap, Seatbelt) | [Sandbox Engines](./sandbox-engines.md) |
|
|
432
432
|
| `workspace` | Workspace isolation configuration | [Workspace Isolation RFC](./rfc/workspace-isolation.md) |
|
|
433
|
+
| `task_tracking` | Enable cross-frontend task tracking (`true`/`false`) | [Observability](./observability.md) |
|
|
434
|
+
| `task_evaluate` | Auto-evaluate completed tasks with LLM judge (`true` or object) | [Observability](./observability.md) |
|
|
433
435
|
|
|
434
436
|
Example combining several options:
|
|
435
437
|
|
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
# Graceful Restart
|
|
2
|
+
|
|
3
|
+
Visor supports zero-disruption restarts via `SIGUSR1`. When triggered, the old process stops accepting new work, a new process spawns and begins accepting requests, and the old process waits for all in-flight work to complete before exiting. Both processes run in parallel during the transition.
|
|
4
|
+
|
|
5
|
+
## How It Works
|
|
6
|
+
|
|
7
|
+
```
|
|
8
|
+
SIGUSR1 received by old process
|
|
9
|
+
→ Stop listening on all ports (free ports instantly)
|
|
10
|
+
→ Spawn new process with same args/env
|
|
11
|
+
→ New process starts, binds ports, sends IPC "ready" signal
|
|
12
|
+
→ Old process drains: waits for ALL in-flight work to complete
|
|
13
|
+
→ Old process runs cleanup callbacks
|
|
14
|
+
→ Old process exits
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
**Key behavior:** By default, the old process runs **indefinitely** until all in-flight work completes. There is no timeout — active conversations, tool calls, and webhook handlers are never interrupted. You can optionally set a hard timeout via configuration.
|
|
18
|
+
|
|
19
|
+
## Usage
|
|
20
|
+
|
|
21
|
+
### Trigger a Restart
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
# Find the Visor PID
|
|
25
|
+
pgrep -f visor
|
|
26
|
+
|
|
27
|
+
# Send SIGUSR1
|
|
28
|
+
kill -USR1 <pid>
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
### Kubernetes / Docker
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
# Kubernetes
|
|
35
|
+
kubectl exec -n visor deploy/visor -- kill -USR1 1
|
|
36
|
+
|
|
37
|
+
# Docker
|
|
38
|
+
docker kill --signal=USR1 visor
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
### systemd
|
|
42
|
+
|
|
43
|
+
```ini
|
|
44
|
+
[Service]
|
|
45
|
+
ExecReload=/bin/kill -USR1 $MAINPID
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
Then reload with:
|
|
49
|
+
```bash
|
|
50
|
+
systemctl reload visor
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
## Configuration
|
|
54
|
+
|
|
55
|
+
Add `graceful_restart` to your `.visor.yaml`:
|
|
56
|
+
|
|
57
|
+
```yaml
|
|
58
|
+
graceful_restart:
|
|
59
|
+
# Maximum time to wait for in-flight work to complete (milliseconds).
|
|
60
|
+
# 0 = unlimited (default). Old process waits as long as needed.
|
|
61
|
+
drain_timeout_ms: 0
|
|
62
|
+
|
|
63
|
+
# Maximum time to wait for the new process to start and signal readiness.
|
|
64
|
+
# Default: 15000 (15 seconds).
|
|
65
|
+
child_ready_timeout_ms: 15000
|
|
66
|
+
|
|
67
|
+
# Send "bot is restarting" messages to active conversations.
|
|
68
|
+
# Default: true.
|
|
69
|
+
notify_users: true
|
|
70
|
+
|
|
71
|
+
# Override the auto-detected spawn command.
|
|
72
|
+
# Leave empty to auto-detect (recommended).
|
|
73
|
+
restart_command: ""
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
## Auto-Detection of Spawn Method
|
|
77
|
+
|
|
78
|
+
Visor automatically detects how it was invoked and spawns the new process accordingly:
|
|
79
|
+
|
|
80
|
+
| Invocation | Spawn behavior |
|
|
81
|
+
|---|---|
|
|
82
|
+
| `npx -y @probelabs/visor@latest --slack` | Re-runs `npx -y @probelabs/visor@latest` + original args (fetches latest version) |
|
|
83
|
+
| `node dist/index.js --slack` | Re-runs `node dist/index.js` + same args (picks up updated binary on disk) |
|
|
84
|
+
| `./dist/index.js --slack` | Re-runs with `process.execPath` + same argv |
|
|
85
|
+
| Custom (`restart_command` set) | Runs the configured command + original Visor args |
|
|
86
|
+
|
|
87
|
+
The `VISOR_RESTART_GENERATION` environment variable is incremented on each restart, letting you track restart generations in logs.
|
|
88
|
+
|
|
89
|
+
## Graceful Restart vs Config Reload
|
|
90
|
+
|
|
91
|
+
Visor supports two complementary mechanisms for applying changes without disruption:
|
|
92
|
+
|
|
93
|
+
| Mechanism | Signal | Use case | Process lifecycle |
|
|
94
|
+
|---|---|---|---|
|
|
95
|
+
| **Graceful restart** (`SIGUSR1`) | `kill -USR1` | New code, binary updates, dependency changes | Old process drains, new process spawns |
|
|
96
|
+
| **Hot config reload** (`SIGUSR2` / `--watch`) | `kill -USR2` | Config-only changes (thresholds, checks, routing) | Same process, config reloaded in-place |
|
|
97
|
+
|
|
98
|
+
**When to use `--watch`:** If you only need to update `.visor.yaml` (e.g., add a check, change a threshold, adjust routing), use `--watch` to auto-reload on file changes — no restart needed:
|
|
99
|
+
|
|
100
|
+
```bash
|
|
101
|
+
visor --slack --config .visor.yaml --watch
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
The `--watch` flag monitors the config file for changes and applies them without restarting. This is faster and lighter than a full graceful restart. Use graceful restart (`SIGUSR1`) when you need to pick up new code or binary changes.
|
|
105
|
+
|
|
106
|
+
## Signal Reference
|
|
107
|
+
|
|
108
|
+
| Signal | Behavior |
|
|
109
|
+
|---|---|
|
|
110
|
+
| `SIGUSR1` | Graceful restart — spawns new process, drains old |
|
|
111
|
+
| `SIGUSR2` | Hot config reload — reloads `.visor.yaml` in-place (also triggered by `--watch`) |
|
|
112
|
+
| `SIGTERM` | Graceful shutdown (stop + exit) |
|
|
113
|
+
| `SIGINT` | Graceful shutdown (stop + exit) |
|
|
114
|
+
|
|
115
|
+
## What Gets Drained
|
|
116
|
+
|
|
117
|
+
Each runner type handles draining differently:
|
|
118
|
+
|
|
119
|
+
| Runner | stopListening | drain |
|
|
120
|
+
|---|---|---|
|
|
121
|
+
| **Slack** | Closes WebSocket, stops scheduler | Waits for all active threads to finish |
|
|
122
|
+
| **MCP Server** | Closes HTTP server, frees port | Waits for all active tool calls to complete |
|
|
123
|
+
| **Telegram** | Stops long-polling | Waits for active chat handlers |
|
|
124
|
+
| **Email** | Stops polling interval | Waits for active email processing |
|
|
125
|
+
| **WhatsApp** | Closes webhook HTTP server | Waits for active request handlers |
|
|
126
|
+
| **Teams** | Closes webhook HTTP server | Waits for active request handlers |
|
|
127
|
+
| **A2A** | Closes HTTP server | Waits for active tasks in queue |
|
|
128
|
+
|
|
129
|
+
## Error Handling
|
|
130
|
+
|
|
131
|
+
| Scenario | Behavior |
|
|
132
|
+
|---|---|
|
|
133
|
+
| New process fails to start | Restart aborted, old process continues serving |
|
|
134
|
+
| New process doesn't become ready in time | Restart aborted, child killed, old process continues |
|
|
135
|
+
| Drain timeout exceeded (if configured) | Old process force-exits; new process is already running |
|
|
136
|
+
| Double SIGUSR1 | Second signal ignored while restart is in progress |
|
|
137
|
+
| SIGTERM during restart | Standard shutdown handler takes over |
|
|
138
|
+
|
|
139
|
+
## Deployment Patterns
|
|
140
|
+
|
|
141
|
+
### Blue-Green with SIGUSR1
|
|
142
|
+
|
|
143
|
+
1. Deploy new code to disk (e.g., `npm install -g @probelabs/visor@latest`)
|
|
144
|
+
2. Send `SIGUSR1` to the running process
|
|
145
|
+
3. New process picks up updated binary automatically
|
|
146
|
+
4. Old process drains and exits
|
|
147
|
+
|
|
148
|
+
### Rolling Restart in Kubernetes
|
|
149
|
+
|
|
150
|
+
For Kubernetes deployments with multiple replicas, you can use the built-in rolling update strategy instead of SIGUSR1. However, SIGUSR1 is useful for single-replica deployments or when you want to avoid pod recreation:
|
|
151
|
+
|
|
152
|
+
```bash
|
|
153
|
+
# Restart single instance without pod recreation
|
|
154
|
+
kubectl exec -n visor deploy/visor -- kill -USR1 1
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
### CI/CD Integration
|
|
158
|
+
|
|
159
|
+
```yaml
|
|
160
|
+
# GitHub Actions example
|
|
161
|
+
- name: Deploy and restart
|
|
162
|
+
run: |
|
|
163
|
+
ssh deploy@server "cd /opt/visor && git pull && npm ci && npm run build"
|
|
164
|
+
ssh deploy@server "kill -USR1 $(cat /var/run/visor.pid)"
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
## Monitoring
|
|
168
|
+
|
|
169
|
+
Track restarts via:
|
|
170
|
+
- **Logs:** Look for `[GracefulRestart]` log entries
|
|
171
|
+
- **Environment:** `VISOR_RESTART_GENERATION` shows current generation
|
|
172
|
+
- **OTel:** Restart events appear as spans in telemetry traces
|
|
173
|
+
|
|
174
|
+
## Limitations
|
|
175
|
+
|
|
176
|
+
- **Windows:** `SIGUSR1` is not available on Windows. Use process restart via your service manager instead.
|
|
177
|
+
- **Slack WebSocket:** The WebSocket connection cannot be transferred between processes. The new process opens a fresh Socket Mode connection. Slack automatically routes new events to the new connection.
|
|
178
|
+
- **npx mode:** When running via npx, each restart fetches the latest published version. Pin versions in `restart_command` if you need deterministic restarts.
|
|
@@ -223,6 +223,75 @@ When using `--output json`, full `executionStatistics` object is included with:
|
|
|
223
223
|
| `totalDuration` | Total execution time in milliseconds |
|
|
224
224
|
| Issue counts | By severity: critical, error, warning, info |
|
|
225
225
|
|
|
226
|
+
## Task Tracking & Evaluation
|
|
227
|
+
|
|
228
|
+
Task tracking records every workflow execution (CLI, Slack, TUI, Scheduler) in a shared SQLite store, making them visible via `visor tasks`.
|
|
229
|
+
|
|
230
|
+
### Enabling Task Tracking
|
|
231
|
+
|
|
232
|
+
```yaml
|
|
233
|
+
# .visor.yaml
|
|
234
|
+
task_tracking: true
|
|
235
|
+
```
|
|
236
|
+
|
|
237
|
+
Or via CLI flag: `visor --task-tracking --slack --config .visor.yaml`
|
|
238
|
+
|
|
239
|
+
### Automatic Task Evaluation
|
|
240
|
+
|
|
241
|
+
When enabled, every completed task is automatically evaluated by an LLM judge that scores response quality and execution efficiency. Evaluations run asynchronously (non-blocking) after task completion and are stored as task artifacts.
|
|
242
|
+
|
|
243
|
+
```yaml
|
|
244
|
+
# Simple — enable with defaults
|
|
245
|
+
task_evaluate: true
|
|
246
|
+
|
|
247
|
+
# With configuration
|
|
248
|
+
task_evaluate:
|
|
249
|
+
enabled: true
|
|
250
|
+
model: gemini-2.5-flash # LLM model (default: auto-detect from API keys)
|
|
251
|
+
provider: google # google, openai, anthropic
|
|
252
|
+
prompt: "Custom evaluation..." # Override default evaluation prompt
|
|
253
|
+
```
|
|
254
|
+
|
|
255
|
+
Environment variables (override config):
|
|
256
|
+
- `VISOR_TASK_EVALUATE=true` — enable auto-evaluation
|
|
257
|
+
- `VISOR_EVAL_MODEL` — evaluation model
|
|
258
|
+
- `VISOR_EVAL_PROVIDER` — evaluation provider
|
|
259
|
+
- `VISOR_EVAL_PROMPT` — custom system prompt
|
|
260
|
+
|
|
261
|
+
### Execution Traces
|
|
262
|
+
|
|
263
|
+
Each task captures an OpenTelemetry trace that records the full execution pipeline: check ordering, AI model calls with token counts, tool calls with result sizes, and delegation chains. View traces with:
|
|
264
|
+
|
|
265
|
+
```bash
|
|
266
|
+
visor tasks trace <task-id> # Compact YAML tree
|
|
267
|
+
visor tasks trace <task-id> --full # Full untruncated output
|
|
268
|
+
```
|
|
269
|
+
|
|
270
|
+
The trace tree shows:
|
|
271
|
+
- **visor.run** — root span with metadata (trace_id, version, source, duration)
|
|
272
|
+
- **Checks** — named steps with type (ai/script/workflow), duration, input context, and output
|
|
273
|
+
- **AI blocks** — LLM calls with model, token counts, and intent
|
|
274
|
+
- **Tool calls** — search, extract, listFiles with input queries and result sizes (or "no results")
|
|
275
|
+
- **Delegations** — sub-agent searches with nested AI/tool chains
|
|
276
|
+
|
|
277
|
+
Traces are also included in the LLM evaluation prompt, allowing the judge to assess execution efficiency alongside response quality.
|
|
278
|
+
|
|
279
|
+
### Evaluation Results
|
|
280
|
+
|
|
281
|
+
Evaluations rate tasks on two axes:
|
|
282
|
+
|
|
283
|
+
| Axis | Rating | Categories |
|
|
284
|
+
|------|--------|------------|
|
|
285
|
+
| **Response quality** | 1-5 | excellent, good, adequate, poor, off-topic, error |
|
|
286
|
+
| **Execution quality** | 1-5 | efficient, adequate, wasteful, error |
|
|
287
|
+
|
|
288
|
+
View stored evaluations:
|
|
289
|
+
```bash
|
|
290
|
+
visor tasks show <task-id> # Includes evaluation inline
|
|
291
|
+
visor tasks show <task-id> --output json # Full evaluation object
|
|
292
|
+
visor tasks evaluate --last 10 # Batch evaluate recent tasks
|
|
293
|
+
```
|
|
294
|
+
|
|
226
295
|
## Related Documentation
|
|
227
296
|
|
|
228
297
|
- [Output Formats](./output-formats.md) - Detailed format specifications
|
|
@@ -527,6 +527,23 @@ visor config restore 1 --output restored.yaml
|
|
|
527
527
|
|
|
528
528
|
## Upgrading
|
|
529
529
|
|
|
530
|
+
### Graceful Restart (Zero-Disruption)
|
|
531
|
+
|
|
532
|
+
Visor supports zero-disruption restarts via `SIGUSR1`. The old process stops accepting new work, a new process spawns, and the old process waits for all in-flight work to complete before exiting. Both processes run in parallel during the transition.
|
|
533
|
+
|
|
534
|
+
```bash
|
|
535
|
+
# Deploy new code, then trigger graceful restart
|
|
536
|
+
kill -USR1 $(pgrep -f visor)
|
|
537
|
+
|
|
538
|
+
# Kubernetes
|
|
539
|
+
kubectl exec -n visor deploy/visor -- kill -USR1 1
|
|
540
|
+
|
|
541
|
+
# Docker
|
|
542
|
+
docker kill --signal=USR1 visor
|
|
543
|
+
```
|
|
544
|
+
|
|
545
|
+
By default, the old process waits **indefinitely** for active conversations and requests to complete. See [Graceful Restart Guide](./guides/graceful-restart.md) for full configuration options.
|
|
546
|
+
|
|
530
547
|
### Rolling Update (Kubernetes)
|
|
531
548
|
|
|
532
549
|
```bash
|
|
@@ -50,6 +50,7 @@ export declare class EmailPollingRunner implements Runner {
|
|
|
50
50
|
private sendConfig?;
|
|
51
51
|
private resendLastSeenId?;
|
|
52
52
|
private hasWebhookSecret;
|
|
53
|
+
private activeProcessing;
|
|
53
54
|
constructor(engine: StateMachineExecutionEngine, cfg: VisorConfig, opts: EmailPollingConfig);
|
|
54
55
|
/** Get the EmailClient instance (for shared access) */
|
|
55
56
|
getClient(): EmailClient;
|
|
@@ -58,6 +59,8 @@ export declare class EmailPollingRunner implements Runner {
|
|
|
58
59
|
/** Hot-swap config for future requests */
|
|
59
60
|
updateConfig(cfg: VisorConfig): void;
|
|
60
61
|
start(): Promise<void>;
|
|
62
|
+
stopListening(): Promise<void>;
|
|
63
|
+
drain(timeoutMs?: number): Promise<void>;
|
|
61
64
|
stop(): Promise<void>;
|
|
62
65
|
private startImapPolling;
|
|
63
66
|
private pollOnce;
|
|
@@ -72,6 +75,7 @@ export declare class EmailPollingRunner implements Runner {
|
|
|
72
75
|
error?: string;
|
|
73
76
|
}>;
|
|
74
77
|
private handleMessage;
|
|
78
|
+
private handleMessageInner;
|
|
75
79
|
/** Ensure email frontend is in the config for this run */
|
|
76
80
|
private prepareConfigForRun;
|
|
77
81
|
/** Deduplication: track processed messages by Message-ID */
|