@probelabs/visor 0.1.182-ee → 0.1.183-ee

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (205) hide show
  1. package/defaults/assistant.yaml +2 -1
  2. package/defaults/code-talk.yaml +6 -0
  3. package/defaults/skills/task-progress.yaml +39 -0
  4. package/dist/agent-protocol/task-evaluator.d.ts +2 -1
  5. package/dist/agent-protocol/task-evaluator.d.ts.map +1 -1
  6. package/dist/agent-protocol/task-progress-tool.d.ts +29 -0
  7. package/dist/agent-protocol/task-progress-tool.d.ts.map +1 -0
  8. package/dist/agent-protocol/task-store.d.ts +8 -0
  9. package/dist/agent-protocol/task-store.d.ts.map +1 -1
  10. package/dist/agent-protocol/tasks-cli-handler.d.ts.map +1 -1
  11. package/dist/agent-protocol/trace-serializer.d.ts +5 -2
  12. package/dist/agent-protocol/trace-serializer.d.ts.map +1 -1
  13. package/dist/agent-protocol/track-execution.d.ts +1 -1
  14. package/dist/agent-protocol/track-execution.d.ts.map +1 -1
  15. package/dist/ai-review-service.d.ts.map +1 -1
  16. package/dist/cli-main.d.ts.map +1 -1
  17. package/dist/debug-visualizer/trace-reader.d.ts.map +1 -1
  18. package/dist/defaults/assistant.yaml +2 -1
  19. package/dist/defaults/code-talk.yaml +6 -0
  20. package/dist/defaults/skills/task-progress.yaml +39 -0
  21. package/dist/docs/telemetry-live-spans-plan.md +510 -0
  22. package/dist/generated/config-schema.json +43 -6
  23. package/dist/index.js +3545 -701
  24. package/dist/providers/ai-check-provider.d.ts.map +1 -1
  25. package/dist/providers/git-checkout-provider.d.ts.map +1 -1
  26. package/dist/providers/mcp-custom-sse-server.d.ts.map +1 -1
  27. package/dist/reviewer.d.ts +2 -0
  28. package/dist/reviewer.d.ts.map +1 -1
  29. package/dist/runners/process-cli-handler.d.ts +2 -0
  30. package/dist/runners/process-cli-handler.d.ts.map +1 -0
  31. package/dist/runners/process-discovery.d.ts +29 -0
  32. package/dist/runners/process-discovery.d.ts.map +1 -0
  33. package/dist/sandbox/check-runner.d.ts.map +1 -1
  34. package/dist/sandbox/sandbox-telemetry.d.ts +7 -0
  35. package/dist/sandbox/sandbox-telemetry.d.ts.map +1 -1
  36. package/dist/sandbox/trace-ingester.d.ts +28 -15
  37. package/dist/sandbox/trace-ingester.d.ts.map +1 -1
  38. package/dist/scheduler/schedule-tool.d.ts +5 -0
  39. package/dist/scheduler/schedule-tool.d.ts.map +1 -1
  40. package/dist/sdk/{a2a-frontend-MU5EO2HZ.mjs → a2a-frontend-5YDHFQXD.mjs} +47 -8
  41. package/dist/sdk/{a2a-frontend-MU5EO2HZ.mjs.map → a2a-frontend-5YDHFQXD.mjs.map} +1 -1
  42. package/dist/sdk/{a2a-frontend-4LP3MLTS.mjs → a2a-frontend-6LWBIPMS.mjs} +19 -3
  43. package/dist/sdk/a2a-frontend-6LWBIPMS.mjs.map +1 -0
  44. package/dist/sdk/check-provider-registry-WSEVHJEV.mjs +31 -0
  45. package/dist/sdk/{check-provider-registry-I4BCWKRU.mjs → check-provider-registry-YRADEEQY.mjs} +6 -6
  46. package/dist/sdk/chunk-4BN2XI4X.mjs +459 -0
  47. package/dist/sdk/chunk-4BN2XI4X.mjs.map +1 -0
  48. package/dist/sdk/chunk-54KOAC4W.mjs +665 -0
  49. package/dist/sdk/chunk-54KOAC4W.mjs.map +1 -0
  50. package/dist/sdk/chunk-6C3R6E42.mjs +1700 -0
  51. package/dist/sdk/chunk-6C3R6E42.mjs.map +1 -0
  52. package/dist/sdk/{chunk-4I3TJ7UJ.mjs → chunk-7W5QCO4Y.mjs} +47 -10
  53. package/dist/sdk/chunk-7W5QCO4Y.mjs.map +1 -0
  54. package/dist/sdk/chunk-B2OUZAWY.mjs +237 -0
  55. package/dist/sdk/chunk-B2OUZAWY.mjs.map +1 -0
  56. package/dist/sdk/chunk-FWWLD555.mjs +244 -0
  57. package/dist/sdk/chunk-FWWLD555.mjs.map +1 -0
  58. package/dist/sdk/{chunk-QXT47ZHR.mjs → chunk-G7GSN3SK.mjs} +2 -2
  59. package/dist/sdk/{chunk-QXT47ZHR.mjs.map → chunk-G7GSN3SK.mjs.map} +1 -1
  60. package/dist/sdk/{chunk-DHETLQIX.mjs → chunk-GA2TYKSR.mjs} +5 -5
  61. package/dist/sdk/{chunk-6DPPP7LD.mjs → chunk-IDL3AA3G.mjs} +203 -42
  62. package/dist/sdk/chunk-IDL3AA3G.mjs.map +1 -0
  63. package/dist/sdk/chunk-MEB2TTIE.mjs +157 -0
  64. package/dist/sdk/chunk-MEB2TTIE.mjs.map +1 -0
  65. package/dist/sdk/{chunk-3JFK6KCD.mjs → chunk-MFXPJUUE.mjs} +150 -280
  66. package/dist/sdk/chunk-MFXPJUUE.mjs.map +1 -0
  67. package/dist/sdk/{chunk-KBGQJKIZ.mjs → chunk-NPSLGKXB.mjs} +3 -3
  68. package/dist/sdk/chunk-P2K4VOMU.mjs +825 -0
  69. package/dist/sdk/chunk-P2K4VOMU.mjs.map +1 -0
  70. package/dist/sdk/chunk-RI4ONH5X.mjs +482 -0
  71. package/dist/sdk/chunk-RI4ONH5X.mjs.map +1 -0
  72. package/dist/sdk/chunk-S5FSRHMY.mjs +139 -0
  73. package/dist/sdk/chunk-S5FSRHMY.mjs.map +1 -0
  74. package/dist/sdk/{chunk-7ERVRLDV.mjs → chunk-TFUQ2D5L.mjs} +13 -2
  75. package/dist/sdk/chunk-TFUQ2D5L.mjs.map +1 -0
  76. package/dist/sdk/{chunk-TQQNSHQV.mjs → chunk-UXB4XWEE.mjs} +1044 -179
  77. package/dist/sdk/chunk-UXB4XWEE.mjs.map +1 -0
  78. package/dist/sdk/{chunk-U6K5SK7X.mjs → chunk-V45TITKX.mjs} +2 -2
  79. package/dist/sdk/{chunk-ANUT54HW.mjs → chunk-WKLJ57WF.mjs} +6 -6
  80. package/dist/sdk/chunk-XOAEKFKB.mjs +1150 -0
  81. package/dist/sdk/chunk-XOAEKFKB.mjs.map +1 -0
  82. package/dist/sdk/chunk-ZPYODGYA.mjs +251 -0
  83. package/dist/sdk/chunk-ZPYODGYA.mjs.map +1 -0
  84. package/dist/sdk/command-executor-YNJOS77A.mjs +14 -0
  85. package/dist/sdk/{config-2STD74CJ.mjs → config-PCP6O6Y6.mjs} +4 -4
  86. package/dist/sdk/{failure-condition-evaluator-FFWJRAEQ.mjs → failure-condition-evaluator-H3PBFBYT.mjs} +4 -4
  87. package/dist/sdk/failure-condition-evaluator-IRFKTYZD.mjs +18 -0
  88. package/dist/sdk/github-auth-BJQBLK2V.mjs +196 -0
  89. package/dist/sdk/github-auth-BJQBLK2V.mjs.map +1 -0
  90. package/dist/sdk/{github-frontend-L3F5JXPJ.mjs → github-frontend-DECYOBRN.mjs} +8 -8
  91. package/dist/sdk/{github-frontend-KGV2R5Z6.mjs → github-frontend-TZRBOQCN.mjs} +4 -4
  92. package/dist/sdk/{host-QBJ7TOWG.mjs → host-CFM2ASDI.mjs} +4 -4
  93. package/dist/sdk/{host-X5ZZCEWN.mjs → host-T4LNVU2H.mjs} +3 -3
  94. package/dist/sdk/{knex-store-QCEW4I4R.mjs → knex-store-OEWSZEBY.mjs} +3 -3
  95. package/dist/sdk/lazy-otel-5RDTVS5L.mjs +24 -0
  96. package/dist/sdk/liquid-extensions-E3AKRX7P.mjs +25 -0
  97. package/dist/sdk/{loader-ZNKKJEZ3.mjs → loader-WRGI244P.mjs} +5 -5
  98. package/dist/sdk/memory-store-OHUIXCWJ.mjs +12 -0
  99. package/dist/sdk/metrics-MYUPQBBV.mjs +41 -0
  100. package/dist/sdk/{opa-policy-engine-QCSSIMUF.mjs → opa-policy-engine-IVMCGVNA.mjs} +3 -3
  101. package/dist/sdk/prompt-state-LN57DQF3.mjs +16 -0
  102. package/dist/sdk/renderer-schema-BT2IXMLW.mjs +51 -0
  103. package/dist/sdk/renderer-schema-BT2IXMLW.mjs.map +1 -0
  104. package/dist/sdk/routing-H2PQ57OA.mjs +26 -0
  105. package/dist/sdk/{routing-CVQT4KHX.mjs → routing-JMZ7HDCC.mjs} +5 -5
  106. package/dist/sdk/schedule-tool-2DPNSU63.mjs +37 -0
  107. package/dist/sdk/{schedule-tool-AECLFHSY.mjs → schedule-tool-4M45RK3E.mjs} +6 -6
  108. package/dist/sdk/{schedule-tool-handler-6QLZRTQA.mjs → schedule-tool-handler-KLHE2SOW.mjs} +6 -6
  109. package/dist/sdk/schedule-tool-handler-KLHE2SOW.mjs.map +1 -0
  110. package/dist/sdk/{schedule-tool-handler-J4NUETJ6.mjs → schedule-tool-handler-NBEO46RV.mjs} +16 -16
  111. package/dist/sdk/schedule-tool-handler-NBEO46RV.mjs.map +1 -0
  112. package/dist/sdk/sdk.d.mts +2 -0
  113. package/dist/sdk/sdk.d.ts +2 -0
  114. package/dist/sdk/sdk.js +3125 -666
  115. package/dist/sdk/sdk.js.map +1 -1
  116. package/dist/sdk/sdk.mjs +15 -15
  117. package/dist/sdk/slack-frontend-DF5VL4OF.mjs +929 -0
  118. package/dist/sdk/slack-frontend-DF5VL4OF.mjs.map +1 -0
  119. package/dist/sdk/{task-evaluator-HLNXKKVV.mjs → task-evaluator-GQYDOSGT.mjs} +138 -24
  120. package/dist/sdk/task-evaluator-GQYDOSGT.mjs.map +1 -0
  121. package/dist/sdk/task-evaluator-OVMG7S56.mjs +263 -0
  122. package/dist/sdk/task-evaluator-OVMG7S56.mjs.map +1 -0
  123. package/dist/sdk/{trace-helpers-WJXYVV4S.mjs → trace-helpers-26ZCAE2V.mjs} +7 -5
  124. package/dist/sdk/trace-helpers-26ZCAE2V.mjs.map +1 -0
  125. package/dist/sdk/{trace-helpers-3FFAI7X3.mjs → trace-helpers-XV5GAX5L.mjs} +3 -3
  126. package/dist/sdk/trace-helpers-XV5GAX5L.mjs.map +1 -0
  127. package/dist/sdk/{trace-reader-ZY77OFNM.mjs → trace-reader-OVE4DL2D.mjs} +6 -2
  128. package/dist/sdk/trace-reader-OVE4DL2D.mjs.map +1 -0
  129. package/dist/sdk/trace-serializer-KKBJHM7J.mjs +24 -0
  130. package/dist/sdk/trace-serializer-KKBJHM7J.mjs.map +1 -0
  131. package/dist/sdk/{track-execution-AMQQNXKE.mjs → track-execution-3EC24C2X.mjs} +68 -7
  132. package/dist/sdk/track-execution-3EC24C2X.mjs.map +1 -0
  133. package/dist/sdk/{track-execution-MKIQXP2C.mjs → track-execution-66RLL6QT.mjs} +10 -3
  134. package/dist/sdk/track-execution-66RLL6QT.mjs.map +1 -0
  135. package/dist/sdk/utcp-check-provider-WI3QZ3W6.mjs +16 -0
  136. package/dist/sdk/utcp-check-provider-WI3QZ3W6.mjs.map +1 -0
  137. package/dist/sdk/workflow-check-provider-X2UREEH7.mjs +31 -0
  138. package/dist/sdk/workflow-check-provider-X2UREEH7.mjs.map +1 -0
  139. package/dist/sdk/{workflow-check-provider-EXMC6JIS.mjs → workflow-check-provider-YXALZNAQ.mjs} +6 -6
  140. package/dist/sdk/workflow-check-provider-YXALZNAQ.mjs.map +1 -0
  141. package/dist/sdk/workflow-registry-YCZ3FCJC.mjs +12 -0
  142. package/dist/sdk/workflow-registry-YCZ3FCJC.mjs.map +1 -0
  143. package/dist/slack/socket-runner.d.ts.map +1 -1
  144. package/dist/state-machine/dispatch/sandbox-routing.d.ts.map +1 -1
  145. package/dist/state-machine/states/level-dispatch.d.ts.map +1 -1
  146. package/dist/telemetry/fallback-ndjson.d.ts +21 -0
  147. package/dist/telemetry/fallback-ndjson.d.ts.map +1 -1
  148. package/dist/telemetry/lazy-otel.d.ts +2 -0
  149. package/dist/telemetry/lazy-otel.d.ts.map +1 -1
  150. package/dist/telemetry/opentelemetry.d.ts +5 -0
  151. package/dist/telemetry/opentelemetry.d.ts.map +1 -1
  152. package/dist/telemetry/trace-helpers.d.ts +10 -0
  153. package/dist/telemetry/trace-helpers.d.ts.map +1 -1
  154. package/dist/test-runner/conversation-sugar.d.ts +7 -0
  155. package/dist/test-runner/conversation-sugar.d.ts.map +1 -1
  156. package/dist/test-runner/core/flow-stage.d.ts.map +1 -1
  157. package/dist/test-runner/index.d.ts.map +1 -1
  158. package/dist/test-runner/validator.d.ts.map +1 -1
  159. package/dist/types/git-checkout.d.ts +2 -0
  160. package/dist/types/git-checkout.d.ts.map +1 -1
  161. package/dist/utils/script-tool-environment.d.ts.map +1 -1
  162. package/package.json +2 -2
  163. package/dist/sdk/a2a-frontend-4LP3MLTS.mjs.map +0 -1
  164. package/dist/sdk/check-provider-registry-RRWCXSTG.mjs +0 -31
  165. package/dist/sdk/chunk-3JFK6KCD.mjs.map +0 -1
  166. package/dist/sdk/chunk-4I3TJ7UJ.mjs.map +0 -1
  167. package/dist/sdk/chunk-6DPPP7LD.mjs.map +0 -1
  168. package/dist/sdk/chunk-6VVXKXTI.mjs +0 -164
  169. package/dist/sdk/chunk-6VVXKXTI.mjs.map +0 -1
  170. package/dist/sdk/chunk-7ERVRLDV.mjs.map +0 -1
  171. package/dist/sdk/chunk-TQQNSHQV.mjs.map +0 -1
  172. package/dist/sdk/failure-condition-evaluator-5DZYMCGW.mjs +0 -18
  173. package/dist/sdk/routing-XALEDC2G.mjs +0 -26
  174. package/dist/sdk/schedule-tool-Z6QYL2B3.mjs +0 -37
  175. package/dist/sdk/task-evaluator-HLNXKKVV.mjs.map +0 -1
  176. package/dist/sdk/trace-reader-ZY77OFNM.mjs.map +0 -1
  177. package/dist/sdk/track-execution-AMQQNXKE.mjs.map +0 -1
  178. package/dist/sdk/track-execution-MKIQXP2C.mjs.map +0 -1
  179. package/dist/sdk/workflow-check-provider-VKYGI5GK.mjs +0 -31
  180. /package/dist/sdk/{check-provider-registry-I4BCWKRU.mjs.map → check-provider-registry-WSEVHJEV.mjs.map} +0 -0
  181. /package/dist/sdk/{check-provider-registry-RRWCXSTG.mjs.map → check-provider-registry-YRADEEQY.mjs.map} +0 -0
  182. /package/dist/sdk/{chunk-DHETLQIX.mjs.map → chunk-GA2TYKSR.mjs.map} +0 -0
  183. /package/dist/sdk/{chunk-ANUT54HW.mjs.map → chunk-NPSLGKXB.mjs.map} +0 -0
  184. /package/dist/sdk/{chunk-U6K5SK7X.mjs.map → chunk-V45TITKX.mjs.map} +0 -0
  185. /package/dist/sdk/{chunk-KBGQJKIZ.mjs.map → chunk-WKLJ57WF.mjs.map} +0 -0
  186. /package/dist/sdk/{config-2STD74CJ.mjs.map → command-executor-YNJOS77A.mjs.map} +0 -0
  187. /package/dist/sdk/{failure-condition-evaluator-5DZYMCGW.mjs.map → config-PCP6O6Y6.mjs.map} +0 -0
  188. /package/dist/sdk/{failure-condition-evaluator-FFWJRAEQ.mjs.map → failure-condition-evaluator-H3PBFBYT.mjs.map} +0 -0
  189. /package/dist/sdk/{routing-CVQT4KHX.mjs.map → failure-condition-evaluator-IRFKTYZD.mjs.map} +0 -0
  190. /package/dist/sdk/{github-frontend-KGV2R5Z6.mjs.map → github-frontend-DECYOBRN.mjs.map} +0 -0
  191. /package/dist/sdk/{github-frontend-L3F5JXPJ.mjs.map → github-frontend-TZRBOQCN.mjs.map} +0 -0
  192. /package/dist/sdk/{host-QBJ7TOWG.mjs.map → host-CFM2ASDI.mjs.map} +0 -0
  193. /package/dist/sdk/{host-X5ZZCEWN.mjs.map → host-T4LNVU2H.mjs.map} +0 -0
  194. /package/dist/sdk/{knex-store-QCEW4I4R.mjs.map → knex-store-OEWSZEBY.mjs.map} +0 -0
  195. /package/dist/sdk/{routing-XALEDC2G.mjs.map → lazy-otel-5RDTVS5L.mjs.map} +0 -0
  196. /package/dist/sdk/{schedule-tool-AECLFHSY.mjs.map → liquid-extensions-E3AKRX7P.mjs.map} +0 -0
  197. /package/dist/sdk/{loader-ZNKKJEZ3.mjs.map → loader-WRGI244P.mjs.map} +0 -0
  198. /package/dist/sdk/{schedule-tool-Z6QYL2B3.mjs.map → memory-store-OHUIXCWJ.mjs.map} +0 -0
  199. /package/dist/sdk/{schedule-tool-handler-6QLZRTQA.mjs.map → metrics-MYUPQBBV.mjs.map} +0 -0
  200. /package/dist/sdk/{opa-policy-engine-QCSSIMUF.mjs.map → opa-policy-engine-IVMCGVNA.mjs.map} +0 -0
  201. /package/dist/sdk/{schedule-tool-handler-J4NUETJ6.mjs.map → prompt-state-LN57DQF3.mjs.map} +0 -0
  202. /package/dist/sdk/{trace-helpers-3FFAI7X3.mjs.map → routing-H2PQ57OA.mjs.map} +0 -0
  203. /package/dist/sdk/{trace-helpers-WJXYVV4S.mjs.map → routing-JMZ7HDCC.mjs.map} +0 -0
  204. /package/dist/sdk/{workflow-check-provider-EXMC6JIS.mjs.map → schedule-tool-2DPNSU63.mjs.map} +0 -0
  205. /package/dist/sdk/{workflow-check-provider-VKYGI5GK.mjs.map → schedule-tool-4M45RK3E.mjs.map} +0 -0
@@ -0,0 +1,510 @@
1
+ # Live Telemetry Plan
2
+
3
+ ## Status
4
+
5
+ This document is now implemented enough for merge, with only optional follow-ups deferred.
6
+
7
+ ### Implemented
8
+
9
+ - Parent-side lifecycle spans for core Visor checks are live.
10
+ - Sandbox routing lifecycle spans are live.
11
+ - Sandbox child lifecycle spans are live.
12
+ - Sandbox child trace files are tailed incrementally while the child is still running.
13
+ - Final sweep and dedup for sandbox child trace ingestion are implemented.
14
+ - Probe tool lifecycle markers are live.
15
+ - `createToolSpan` support exists in the Visor Probe tracer adapter.
16
+ - Replayed sandbox child spans keep their original names instead of synthetic `child: ...` names.
17
+ - `tasks trace` rendering was updated to handle lifecycle spans and child spans correctly.
18
+ - Trace lookup now follows the current Visor telemetry configuration:
19
+ - file-mode prefers local trace files first
20
+ - OTLP/Grafana setups prefer remote backends first
21
+ - missing `trace_id` values can be recovered from stored trace files
22
+
23
+ ### Deferred
24
+
25
+ - optional direct child OTLP export mode for network-enabled sandboxes
26
+
27
+ ### Merge Readiness
28
+
29
+ The core goal of this plan is met:
30
+
31
+ - traces are materially more live than before
32
+ - sandbox child work no longer waits until process exit to appear
33
+ - task trace rendering and backend selection follow the new model
34
+ - host-mode and engineer lifecycle visibility are explicit enough in realistic Oel runs
35
+
36
+ The remaining deferred item is an optimization, not a blocker.
37
+
38
+ ## Goal
39
+
40
+ Make telemetry appear live for:
41
+
42
+ - Visor workflow/check spans
43
+ - Probe AI/delegate/tool activity
44
+ - sandboxed child checks
45
+
46
+ Keep the final duration spans for accuracy, but surface start/progress markers immediately.
47
+
48
+ Engineer-specific constraint:
49
+
50
+ - If engineer runs inside a sandbox, it is acceptable for deep child details to remain deferred initially.
51
+ - The parent trace must still show immediately that engineer started, which sandbox was selected, and that Visor is waiting on child execution.
52
+
53
+ ## Non-Goal
54
+
55
+ Do not rely on open spans becoming visible in Tempo/Grafana before they end. That is not how the current exporter model behaves.
56
+
57
+ The practical solution is:
58
+
59
+ - emit short-lived lifecycle spans that end immediately
60
+ - keep long final spans for duration
61
+ - stream sandbox child telemetry back to the parent while the child is still running
62
+
63
+ ## Current Behavior
64
+
65
+ ### Host-mode Visor / Probe
66
+
67
+ - Most long-running spans are created with normal OTel span lifecycles and appear only after `span.end()`.
68
+ - Some Probe events are intentionally promoted into short-lived child spans and appear almost immediately.
69
+ - Slack and some long-running frontends call `forceFlushTelemetry()`, which helps only for spans that already ended.
70
+
71
+ ### Sandboxed checks
72
+
73
+ - Child `--run-check` execution writes telemetry to a file via `VISOR_FALLBACK_TRACE_FILE`.
74
+ - Parent ingests that file only after child process completion.
75
+ - Result: sandbox child spans appear only at the end.
76
+
77
+ This is the main reason sandbox work is not live today.
78
+
79
+ ## Design Principles
80
+
81
+ 1. Prefer lifecycle spans over waiting for long spans to finish.
82
+ 2. Treat sandbox telemetry as a transport problem.
83
+ 3. Do not require network access inside the sandbox.
84
+ 4. Support both:
85
+ - local file-based tracing
86
+ - remote OTLP/Grafana tracing
87
+ 5. Keep one implementation path that works even for `network: false` sandboxes.
88
+
89
+ ## Target Architecture
90
+
91
+ ### 1. Parent-side lifecycle spans
92
+
93
+ For every expensive operation, emit immediate short-lived spans:
94
+
95
+ - `*.started`
96
+ - `*.progress`
97
+ - `*.completed`
98
+ - `*.failed`
99
+
100
+ These should be used for:
101
+
102
+ - `visor.check.*`
103
+ - sandbox routing decisions
104
+ - Probe AI request start
105
+ - Probe delegated search start
106
+ - Probe tool execution start/completion
107
+ - engineer parent lifecycle
108
+
109
+ Status:
110
+
111
+ - Implemented for the intended baseline
112
+ - `started` / `completed` / `failed` / `progress` coverage exists for the main Visor check path
113
+ - sandbox routing and sandbox child lifecycle markers exist
114
+ - engineer-specific parent lifecycle markers exist
115
+
116
+ These spans should end immediately and be flushed aggressively enough to appear within 1-2 seconds.
117
+
118
+ ### 2. Keep final duration spans
119
+
120
+ Do not remove:
121
+
122
+ - `visor.check.<id>`
123
+ - `visor.ai_check`
124
+ - `ai.request`
125
+ - `search.delegate`
126
+
127
+ These remain the authoritative duration spans and will still appear after completion.
128
+
129
+ Status:
130
+
131
+ - Implemented
132
+
133
+ ### 3. Sandbox child telemetry transport
134
+
135
+ Sandbox child telemetry must support two modes.
136
+
137
+ #### Mode A: Host-mediated live relay (default)
138
+
139
+ Use when:
140
+
141
+ - local file tracing
142
+ - remote OTLP/Grafana
143
+ - sandbox has no network
144
+ - sandbox network policy is unknown
145
+
146
+ Flow:
147
+
148
+ 1. Parent creates child trace file path in mounted workspace.
149
+ 2. Child writes spans incrementally to that file.
150
+ 3. Parent starts a live tailer immediately after child launch.
151
+ 4. Parent ingests appended span records continuously while child is running.
152
+ 5. Parent re-emits them into its own OTel pipeline.
153
+ 6. Parent performs one final sweep after child exit.
154
+
155
+ This mode works without sandbox network access.
156
+
157
+ Status:
158
+
159
+ - Implemented as the default path
160
+
161
+ #### Mode B: Direct child OTLP export (optional optimization)
162
+
163
+ Use only when:
164
+
165
+ - sandbox network is enabled
166
+ - OTLP endpoint is reachable from container/sandbox
167
+ - required auth/env can be passed safely
168
+
169
+ Flow:
170
+
171
+ 1. Parent propagates trace context and OTLP config into child.
172
+ 2. Child exports directly to OTLP.
173
+ 3. Parent still emits lifecycle spans locally.
174
+ 4. Optional file fallback remains available if child export fails.
175
+
176
+ This must not be the only supported sandbox strategy.
177
+
178
+ Status:
179
+
180
+ - Deferred
181
+
182
+ ## Local Setup Plan
183
+
184
+ Local setup means:
185
+
186
+ - `VISOR_TELEMETRY_SINK=file`
187
+ - NDJSON/file tracing
188
+ - local debug visualizer or local trace inspection
189
+
190
+ ### Desired behavior
191
+
192
+ - Parent lifecycle spans show immediately.
193
+ - Probe lifecycle spans show immediately.
194
+ - Sandboxed child spans begin appearing while child is still running.
195
+
196
+ Status:
197
+
198
+ - Implemented
199
+
200
+ ### Required work
201
+
202
+ 1. Keep current file exporter.
203
+ 2. Add a streaming/tailing ingester for `VISOR_FALLBACK_TRACE_FILE`.
204
+ 3. Ingest only newly appended lines, not full file replay on each poll.
205
+ 4. Re-emit each parsed child span to the active parent trace.
206
+ 5. Finalize with one last pass after child exit.
207
+ 6. Deduplicate records so final sweep does not duplicate already streamed spans.
208
+
209
+ Status:
210
+
211
+ - Implemented
212
+
213
+ ### Notes
214
+
215
+ - This is the most important path for local debugging.
216
+ - It is also the safest path for all sandbox engines.
217
+
218
+ ## Remote OTLP / Grafana Plan
219
+
220
+ Remote setup means:
221
+
222
+ - `VISOR_TELEMETRY_SINK=otlp`
223
+ - Grafana Tempo / Jaeger / OTLP collector
224
+
225
+ ### Desired behavior
226
+
227
+ - Parent lifecycle spans appear within 1-2 seconds.
228
+ - Host-mode Probe lifecycle spans appear within 1-2 seconds.
229
+ - Sandboxed child spans appear live even if the child cannot access OTLP directly.
230
+
231
+ Status:
232
+
233
+ - Implemented through the file-tail relay baseline
234
+
235
+ ### Required baseline
236
+
237
+ Use the same host-mediated live relay as local mode.
238
+
239
+ That gives:
240
+
241
+ - child writes local file
242
+ - parent tails file
243
+ - parent re-emits child spans into parent OTel SDK
244
+ - parent OTel exporter sends to Grafana/Tempo
245
+
246
+ This avoids any dependency on sandbox network reachability.
247
+
248
+ ### Optional optimization
249
+
250
+ Direct child OTLP export may be enabled later for Docker/network-enabled environments, but it must remain optional.
251
+
252
+ Status:
253
+
254
+ - Deferred
255
+
256
+ ## Detailed Work Plan
257
+
258
+ ### Phase 1: Improve live visibility for host-mode spans
259
+
260
+ #### 1.1 Add standard lifecycle span helper
261
+
262
+ Add a helper that creates an immediate child span and ends it immediately.
263
+
264
+ Use it for:
265
+
266
+ - check scheduled
267
+ - check started
268
+ - provider selected
269
+ - sandbox selected
270
+ - child spawned
271
+ - waiting on child
272
+ - completed
273
+ - failed
274
+
275
+ Status:
276
+
277
+ - Implemented for the main helper path
278
+
279
+ #### 1.2 Apply to Visor check execution
280
+
281
+ Emit lifecycle spans around:
282
+
283
+ - state-machine dispatch
284
+ - provider execution start
285
+ - sandbox routing decision
286
+ - provider completion/failure
287
+
288
+ Status:
289
+
290
+ - Implemented for the intended baseline
291
+ - main check `started` / `completed` / `failed` / `progress` coverage exists
292
+
293
+ #### 1.3 Extend Probe tracer adapter
294
+
295
+ Current adapter already creates immediate spans for some events.
296
+
297
+ Extend it so it consistently emits immediate lifecycle spans for:
298
+
299
+ - `probe.ai_request.started`
300
+ - `probe.search_delegate.started`
301
+ - `probe.tool.started`
302
+ - `probe.tool.completed`
303
+ - `probe.tool.failed`
304
+
305
+ Also implement `createToolSpan` in the adapter so Probe DSL / `execute_plan` paths are visible too.
306
+
307
+ Status:
308
+
309
+ - Implemented for the intended baseline
310
+ - `probe.tool.started/completed/failed` exist
311
+ - `createToolSpan` exists
312
+ - explicit `probe.ai_request.started` / `probe.search_delegate.started` exist
313
+
314
+ #### 1.4 Add throttled flush
315
+
316
+ For long-running frontends:
317
+
318
+ - flush after critical lifecycle markers
319
+ - optionally periodic flush every 5-10 seconds while a run is active
320
+
321
+ This should be rate-limited to avoid exporter pressure.
322
+
323
+ Status:
324
+
325
+ - Implemented with throttled non-blocking flush requests on immediate lifecycle spans
326
+
327
+ ### Phase 2: Replace end-of-process-only sandbox ingestion
328
+
329
+ #### 2.1 Build a tailing child trace ingester
330
+
331
+ Requirements:
332
+
333
+ - open child trace file after launch
334
+ - read appended NDJSON lines incrementally
335
+ - tolerate partial writes
336
+ - maintain file offset
337
+ - parse only complete lines
338
+ - ignore malformed partial fragments until completed
339
+
340
+ Status:
341
+
342
+ - Implemented
343
+
344
+ #### 2.2 Re-emit child spans continuously
345
+
346
+ As lines arrive:
347
+
348
+ - parse span record
349
+ - skip fallback markers / duplicates
350
+ - emit to parent OTel tracer immediately
351
+
352
+ Child span attributes should include:
353
+
354
+ - `visor.sandbox.child_span = true`
355
+ - child metadata such as check id / sandbox name / source file if available
356
+
357
+ Status:
358
+
359
+ - Implemented
360
+ - child spans now keep original span names and carry child-origin metadata
361
+
362
+ #### 2.3 Final sweep
363
+
364
+ After child exit:
365
+
366
+ - read remaining bytes
367
+ - parse remaining complete lines
368
+ - ingest anything missed
369
+
370
+ Status:
371
+
372
+ - Implemented
373
+
374
+ #### 2.4 Deduplication
375
+
376
+ Need a stable dedup key:
377
+
378
+ - child `traceId + spanId`
379
+ - or file offset + hash fallback if trace ids are unavailable
380
+
381
+ Without dedup, streaming plus final sweep will duplicate spans.
382
+
383
+ Status:
384
+
385
+ - Implemented
386
+
387
+ ### Phase 3: Engineer parent lifecycle
388
+
389
+ Engineer-specific requirement:
390
+
391
+ - parent should show immediate visibility even if inner engineer details are deferred
392
+
393
+ Emit immediate spans:
394
+
395
+ - `engineer-task.started`
396
+ - `engineer-task.sandbox_resolved`
397
+ - `engineer-task.child_spawned`
398
+ - `engineer-task.waiting_on_child`
399
+ - `engineer-task.completed`
400
+ - `engineer-task.failed`
401
+
402
+ If engineer is not sandboxed, host-mode Probe improvements will automatically apply.
403
+
404
+ Status:
405
+
406
+ - Implemented for the intended baseline
407
+ - generic check/sandbox lifecycle markers already make engineer runs visible
408
+ - dedicated engineer-specific markers exist for:
409
+ - started
410
+ - sandbox resolved
411
+ - child spawned
412
+ - waiting on child
413
+ - completed
414
+ - failed
415
+ - progress
416
+
417
+ ### Phase 4: Optional direct child OTLP mode
418
+
419
+ Only after host relay is stable.
420
+
421
+ Requirements:
422
+
423
+ - explicit enablement
424
+ - health/reachability check
425
+ - propagate trace context
426
+ - fallback to file relay automatically if unavailable
427
+
428
+ This is an optimization, not the default design.
429
+
430
+ Status:
431
+
432
+ - Deferred
433
+
434
+ ## Acceptance Criteria
435
+
436
+ ### Host-mode checks
437
+
438
+ Within 1-2 seconds after start, Grafana/debug viewer should show:
439
+
440
+ - check started
441
+ - provider selected
442
+ - Probe AI request started
443
+ - delegated search started
444
+
445
+ ### Sandboxed checks
446
+
447
+ Within 1-3 seconds after child spawn, Grafana/debug viewer should show:
448
+
449
+ - parent sandbox lifecycle markers immediately
450
+ - streamed child spans beginning before child exit
451
+
452
+ ### Engineer in sandbox
453
+
454
+ Within 1-2 seconds after start:
455
+
456
+ - engineer task started
457
+ - sandbox selection
458
+ - child spawned / waiting marker
459
+
460
+ Deep child details may remain deferred initially.
461
+
462
+ ## Failure Modes To Handle
463
+
464
+ 1. Child file does not exist yet
465
+ - tailer should retry
466
+
467
+ 2. Child writes partial JSON line
468
+ - buffer until newline
469
+
470
+ 3. Child file never grows
471
+ - still keep parent lifecycle markers visible
472
+
473
+ 4. Read-only sandbox
474
+ - file relay may be unavailable
475
+ - parent must still emit sandbox lifecycle markers
476
+ - final child detail may be absent unless direct OTLP is available
477
+
478
+ 5. Parent process crashes during streaming
479
+ - already-emitted spans remain available
480
+ - child may keep writing file for postmortem recovery
481
+
482
+ 6. OTLP endpoint unavailable
483
+ - file/local behavior still works
484
+
485
+ ## Rollout Order
486
+
487
+ 1. lifecycle span helper
488
+ 2. host-mode Visor lifecycle markers
489
+ 3. Probe tracer adapter lifecycle markers + `createToolSpan`
490
+ 4. throttled flush in long-running frontends
491
+ 5. sandbox live tailer + incremental ingester
492
+ 6. final sweep + dedup
493
+ 7. engineer parent lifecycle markers
494
+ 8. optional direct child OTLP mode
495
+
496
+ ## Recommendation
497
+
498
+ Do not start with direct sandbox OTLP export.
499
+
500
+ Start with:
501
+
502
+ - lifecycle spans everywhere
503
+ - host-mediated streaming relay for sandbox children
504
+
505
+ That solves both:
506
+
507
+ - local file tracing
508
+ - remote Grafana/OTLP tracing
509
+
510
+ with one robust design that does not depend on sandbox network reachability.
@@ -209,6 +209,17 @@
209
209
  "graceful_restart": {
210
210
  "$ref": "#/definitions/GracefulRestartConfig",
211
211
  "description": "Graceful restart configuration"
212
+ },
213
+ "task_evaluate": {
214
+ "anyOf": [
215
+ {
216
+ "type": "boolean"
217
+ },
218
+ {
219
+ "$ref": "#/definitions/TaskEvaluateConfig"
220
+ }
221
+ ],
222
+ "description": "Automatically evaluate completed tasks using an LLM judge. Requires task_tracking to be enabled. Runs asynchronously after task completion. Set to `true` for defaults, or provide an object to configure."
212
223
  }
213
224
  },
214
225
  "required": [
@@ -1145,7 +1156,7 @@
1145
1156
  "description": "Arguments/inputs for the workflow"
1146
1157
  },
1147
1158
  "overrides": {
1148
- "$ref": "#/definitions/Record%3Cstring%2CPartial%3Cinterface-src_types_config.ts-15521-30601-src_types_config.ts-0-60281%3E%3E",
1159
+ "$ref": "#/definitions/Record%3Cstring%2CPartial%3Cinterface-src_types_config.ts-15521-30601-src_types_config.ts-0-61047%3E%3E",
1149
1160
  "description": "Override specific step configurations in the workflow"
1150
1161
  },
1151
1162
  "output_mapping": {
@@ -1161,7 +1172,7 @@
1161
1172
  "description": "Config file path - alternative to workflow ID (loads a Visor config file as workflow)"
1162
1173
  },
1163
1174
  "workflow_overrides": {
1164
- "$ref": "#/definitions/Record%3Cstring%2CPartial%3Cinterface-src_types_config.ts-15521-30601-src_types_config.ts-0-60281%3E%3E",
1175
+ "$ref": "#/definitions/Record%3Cstring%2CPartial%3Cinterface-src_types_config.ts-15521-30601-src_types_config.ts-0-61047%3E%3E",
1165
1176
  "description": "Alias for overrides - workflow step overrides (backward compatibility)"
1166
1177
  },
1167
1178
  "ref": {
@@ -1929,7 +1940,7 @@
1929
1940
  "description": "Custom output name (defaults to workflow name)"
1930
1941
  },
1931
1942
  "overrides": {
1932
- "$ref": "#/definitions/Record%3Cstring%2CPartial%3Cinterface-src_types_config.ts-15521-30601-src_types_config.ts-0-60281%3E%3E",
1943
+ "$ref": "#/definitions/Record%3Cstring%2CPartial%3Cinterface-src_types_config.ts-15521-30601-src_types_config.ts-0-61047%3E%3E",
1933
1944
  "description": "Step overrides"
1934
1945
  },
1935
1946
  "output_mapping": {
@@ -1946,13 +1957,13 @@
1946
1957
  "^x-": {}
1947
1958
  }
1948
1959
  },
1949
- "Record<string,Partial<interface-src_types_config.ts-15521-30601-src_types_config.ts-0-60281>>": {
1960
+ "Record<string,Partial<interface-src_types_config.ts-15521-30601-src_types_config.ts-0-61047>>": {
1950
1961
  "type": "object",
1951
1962
  "additionalProperties": {
1952
- "$ref": "#/definitions/Partial%3Cinterface-src_types_config.ts-15521-30601-src_types_config.ts-0-60281%3E"
1963
+ "$ref": "#/definitions/Partial%3Cinterface-src_types_config.ts-15521-30601-src_types_config.ts-0-61047%3E"
1953
1964
  }
1954
1965
  },
1955
- "Partial<interface-src_types_config.ts-15521-30601-src_types_config.ts-0-60281>": {
1966
+ "Partial<interface-src_types_config.ts-15521-30601-src_types_config.ts-0-61047>": {
1956
1967
  "type": "object",
1957
1968
  "additionalProperties": false
1958
1969
  },
@@ -3985,6 +3996,32 @@
3985
3996
  "patternProperties": {
3986
3997
  "^x-": {}
3987
3998
  }
3999
+ },
4000
+ "TaskEvaluateConfig": {
4001
+ "type": "object",
4002
+ "properties": {
4003
+ "enabled": {
4004
+ "type": "boolean",
4005
+ "description": "Enable auto-evaluation (default: true when config object is present)"
4006
+ },
4007
+ "model": {
4008
+ "type": "string",
4009
+ "description": "LLM model to use for evaluation (e.g. \"gemini-2.5-flash\", \"claude-sonnet-4-5\")"
4010
+ },
4011
+ "provider": {
4012
+ "type": "string",
4013
+ "description": "AI provider: google, openai, anthropic"
4014
+ },
4015
+ "prompt": {
4016
+ "type": "string",
4017
+ "description": "Custom system prompt for the evaluator (overrides the default evaluation prompt)"
4018
+ }
4019
+ },
4020
+ "additionalProperties": false,
4021
+ "description": "Configuration for automatic task evaluation via LLM judge.",
4022
+ "patternProperties": {
4023
+ "^x-": {}
4024
+ }
3988
4025
  }
3989
4026
  }
3990
4027
  }