@probelabs/visor 0.1.173 → 0.1.174

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. package/dist/docs/dashboards/README.md +73 -26
  2. package/dist/docs/dashboards/grafana-visor-overview.json +435 -15
  3. package/dist/docs/telemetry-reference.md +387 -0
  4. package/dist/docs/telemetry-setup.md +2 -0
  5. package/dist/generated/config-schema.d.ts +277 -7
  6. package/dist/generated/config-schema.d.ts.map +1 -1
  7. package/dist/generated/config-schema.json +3803 -0
  8. package/dist/index.js +364 -30
  9. package/dist/output/traces/{run-2026-03-09T15-21-25-122Z.ndjson → run-2026-03-09T15-44-30-340Z.ndjson} +84 -84
  10. package/dist/output/traces/{run-2026-03-09T15-22-05-255Z.ndjson → run-2026-03-09T15-45-10-778Z.ndjson} +1852 -1852
  11. package/dist/sdk/{a2a-frontend-VHOQ45CR.mjs → a2a-frontend-5L6H7ZVF.mjs} +3 -3
  12. package/dist/sdk/{a2a-frontend-7CYN3X7M.mjs → a2a-frontend-FUJRKHJB.mjs} +3 -3
  13. package/dist/sdk/{check-provider-registry-65GO3SCO.mjs → check-provider-registry-UM762L7S.mjs} +7 -7
  14. package/dist/sdk/{check-provider-registry-75O5XJMA.mjs → check-provider-registry-UPQNHHFF.mjs} +7 -7
  15. package/dist/sdk/{check-provider-registry-DBTS7OXY.mjs → check-provider-registry-YVQI4IOR.mjs} +7 -7
  16. package/dist/sdk/{chunk-Y5MEQW2W.mjs → chunk-2PL2YH3B.mjs} +19 -19
  17. package/dist/sdk/{chunk-4TV2CVVI.mjs → chunk-34QX63WK.mjs} +16 -14
  18. package/dist/sdk/chunk-34QX63WK.mjs.map +1 -0
  19. package/dist/sdk/{chunk-2HXOGRAS.mjs → chunk-65SHRIQF.mjs} +3 -3
  20. package/dist/sdk/{chunk-2HXOGRAS.mjs.map → chunk-65SHRIQF.mjs.map} +1 -1
  21. package/dist/sdk/{chunk-VVHALCWV.mjs → chunk-EFNNJIMY.mjs} +3 -3
  22. package/dist/sdk/{chunk-7CWJNSL2.mjs → chunk-EP7PQ4IX.mjs} +19 -19
  23. package/dist/sdk/{chunk-AV6KML52.mjs → chunk-I6GKXMQ5.mjs} +19 -19
  24. package/dist/sdk/{chunk-VK7FUBBU.mjs → chunk-UTBSBJFV.mjs} +3 -3
  25. package/dist/sdk/{chunk-HZEXCJGA.mjs → chunk-W4KCJM6J.mjs} +282 -8
  26. package/dist/sdk/chunk-W4KCJM6J.mjs.map +1 -0
  27. package/dist/sdk/{chunk-GVPMO6QD.mjs → chunk-WJIV7MKY.mjs} +3 -3
  28. package/dist/sdk/{chunk-WYFQQ445.mjs → chunk-YEARBXYT.mjs} +3 -3
  29. package/dist/sdk/{chunk-LTHHE6Z5.mjs → chunk-ZI3SEHWA.mjs} +4 -4
  30. package/dist/sdk/{chunk-LTHHE6Z5.mjs.map → chunk-ZI3SEHWA.mjs.map} +1 -1
  31. package/dist/sdk/{config-UXRHADSE.mjs → config-BVL3KFMB.mjs} +2 -2
  32. package/dist/sdk/{failure-condition-evaluator-Q4KNMX6F.mjs → failure-condition-evaluator-4O6BTC4Q.mjs} +4 -4
  33. package/dist/sdk/{failure-condition-evaluator-SNR5XLGN.mjs → failure-condition-evaluator-DL6H57NX.mjs} +4 -4
  34. package/dist/sdk/{github-frontend-56UQTA47.mjs → github-frontend-F2YCPK6H.mjs} +4 -4
  35. package/dist/sdk/{github-frontend-OOP26667.mjs → github-frontend-UXL73NKB.mjs} +4 -4
  36. package/dist/sdk/{host-QRGXXRDA.mjs → host-6TBS44ER.mjs} +3 -3
  37. package/dist/sdk/{host-VYPJ2UGQ.mjs → host-KJTXX76P.mjs} +3 -3
  38. package/dist/sdk/{metrics-FU2G5SZ2.mjs → metrics-JTOG2HNO.mjs} +2 -2
  39. package/dist/sdk/{routing-DBQHPP2O.mjs → routing-AWYB2YX3.mjs} +5 -5
  40. package/dist/sdk/{routing-ZAUCS3HJ.mjs → routing-GF2CF3JT.mjs} +5 -5
  41. package/dist/sdk/{schedule-tool-MHICRNCI.mjs → schedule-tool-IEY2CFLU.mjs} +7 -7
  42. package/dist/sdk/{schedule-tool-VRLX54J5.mjs → schedule-tool-SGCYDSHL.mjs} +7 -7
  43. package/dist/sdk/{schedule-tool-2FIVKPVJ.mjs → schedule-tool-UMDRCNO5.mjs} +7 -7
  44. package/dist/sdk/{schedule-tool-handler-3ES4WON7.mjs → schedule-tool-handler-5EPTHBLS.mjs} +7 -7
  45. package/dist/sdk/{schedule-tool-handler-FQGAWC5N.mjs → schedule-tool-handler-5QVUZ5EZ.mjs} +7 -7
  46. package/dist/sdk/{schedule-tool-handler-KYUHU4JR.mjs → schedule-tool-handler-HMEGLYJF.mjs} +7 -7
  47. package/dist/sdk/sdk.d.mts +137 -133
  48. package/dist/sdk/sdk.d.ts +137 -133
  49. package/dist/sdk/sdk.js +298 -22
  50. package/dist/sdk/sdk.js.map +1 -1
  51. package/dist/sdk/sdk.mjs +6 -6
  52. package/dist/sdk/{trace-helpers-UKMYHQIK.mjs → trace-helpers-6TEWG7RK.mjs} +3 -3
  53. package/dist/sdk/{trace-helpers-ZFDJ55SH.mjs → trace-helpers-FKM2MEDW.mjs} +3 -3
  54. package/dist/sdk/{workflow-check-provider-F5DTEX6E.mjs → workflow-check-provider-7VNIO6L5.mjs} +7 -7
  55. package/dist/sdk/{workflow-check-provider-VEOVTCVU.mjs → workflow-check-provider-EWMZEEES.mjs} +7 -7
  56. package/dist/sdk/{workflow-check-provider-5KQTXKWS.mjs → workflow-check-provider-VJ7VIMCQ.mjs} +7 -7
  57. package/dist/telemetry/metrics.d.ts.map +1 -1
  58. package/dist/traces/{run-2026-03-09T15-21-25-122Z.ndjson → run-2026-03-09T15-44-30-340Z.ndjson} +84 -84
  59. package/dist/traces/{run-2026-03-09T15-22-05-255Z.ndjson → run-2026-03-09T15-45-10-778Z.ndjson} +1852 -1852
  60. package/dist/types/config.d.ts +5 -4
  61. package/dist/types/config.d.ts.map +1 -1
  62. package/package.json +2 -2
  63. package/dist/sdk/chunk-4TV2CVVI.mjs.map +0 -1
  64. package/dist/sdk/chunk-HZEXCJGA.mjs.map +0 -1
  65. /package/dist/sdk/{a2a-frontend-7CYN3X7M.mjs.map → a2a-frontend-5L6H7ZVF.mjs.map} +0 -0
  66. /package/dist/sdk/{a2a-frontend-VHOQ45CR.mjs.map → a2a-frontend-FUJRKHJB.mjs.map} +0 -0
  67. /package/dist/sdk/{check-provider-registry-65GO3SCO.mjs.map → check-provider-registry-UM762L7S.mjs.map} +0 -0
  68. /package/dist/sdk/{check-provider-registry-75O5XJMA.mjs.map → check-provider-registry-UPQNHHFF.mjs.map} +0 -0
  69. /package/dist/sdk/{check-provider-registry-DBTS7OXY.mjs.map → check-provider-registry-YVQI4IOR.mjs.map} +0 -0
  70. /package/dist/sdk/{chunk-Y5MEQW2W.mjs.map → chunk-2PL2YH3B.mjs.map} +0 -0
  71. /package/dist/sdk/{chunk-VK7FUBBU.mjs.map → chunk-EFNNJIMY.mjs.map} +0 -0
  72. /package/dist/sdk/{chunk-7CWJNSL2.mjs.map → chunk-EP7PQ4IX.mjs.map} +0 -0
  73. /package/dist/sdk/{chunk-AV6KML52.mjs.map → chunk-I6GKXMQ5.mjs.map} +0 -0
  74. /package/dist/sdk/{chunk-VVHALCWV.mjs.map → chunk-UTBSBJFV.mjs.map} +0 -0
  75. /package/dist/sdk/{chunk-GVPMO6QD.mjs.map → chunk-WJIV7MKY.mjs.map} +0 -0
  76. /package/dist/sdk/{chunk-WYFQQ445.mjs.map → chunk-YEARBXYT.mjs.map} +0 -0
  77. /package/dist/sdk/{config-UXRHADSE.mjs.map → config-BVL3KFMB.mjs.map} +0 -0
  78. /package/dist/sdk/{failure-condition-evaluator-Q4KNMX6F.mjs.map → failure-condition-evaluator-4O6BTC4Q.mjs.map} +0 -0
  79. /package/dist/sdk/{failure-condition-evaluator-SNR5XLGN.mjs.map → failure-condition-evaluator-DL6H57NX.mjs.map} +0 -0
  80. /package/dist/sdk/{github-frontend-56UQTA47.mjs.map → github-frontend-F2YCPK6H.mjs.map} +0 -0
  81. /package/dist/sdk/{github-frontend-OOP26667.mjs.map → github-frontend-UXL73NKB.mjs.map} +0 -0
  82. /package/dist/sdk/{host-QRGXXRDA.mjs.map → host-6TBS44ER.mjs.map} +0 -0
  83. /package/dist/sdk/{host-VYPJ2UGQ.mjs.map → host-KJTXX76P.mjs.map} +0 -0
  84. /package/dist/sdk/{metrics-FU2G5SZ2.mjs.map → metrics-JTOG2HNO.mjs.map} +0 -0
  85. /package/dist/sdk/{routing-DBQHPP2O.mjs.map → routing-AWYB2YX3.mjs.map} +0 -0
  86. /package/dist/sdk/{routing-ZAUCS3HJ.mjs.map → routing-GF2CF3JT.mjs.map} +0 -0
  87. /package/dist/sdk/{schedule-tool-2FIVKPVJ.mjs.map → schedule-tool-IEY2CFLU.mjs.map} +0 -0
  88. /package/dist/sdk/{schedule-tool-MHICRNCI.mjs.map → schedule-tool-SGCYDSHL.mjs.map} +0 -0
  89. /package/dist/sdk/{schedule-tool-VRLX54J5.mjs.map → schedule-tool-UMDRCNO5.mjs.map} +0 -0
  90. /package/dist/sdk/{schedule-tool-handler-3ES4WON7.mjs.map → schedule-tool-handler-5EPTHBLS.mjs.map} +0 -0
  91. /package/dist/sdk/{schedule-tool-handler-FQGAWC5N.mjs.map → schedule-tool-handler-5QVUZ5EZ.mjs.map} +0 -0
  92. /package/dist/sdk/{schedule-tool-handler-KYUHU4JR.mjs.map → schedule-tool-handler-HMEGLYJF.mjs.map} +0 -0
  93. /package/dist/sdk/{trace-helpers-UKMYHQIK.mjs.map → trace-helpers-6TEWG7RK.mjs.map} +0 -0
  94. /package/dist/sdk/{trace-helpers-ZFDJ55SH.mjs.map → trace-helpers-FKM2MEDW.mjs.map} +0 -0
  95. /package/dist/sdk/{workflow-check-provider-5KQTXKWS.mjs.map → workflow-check-provider-7VNIO6L5.mjs.map} +0 -0
  96. /package/dist/sdk/{workflow-check-provider-F5DTEX6E.mjs.map → workflow-check-provider-EWMZEEES.mjs.map} +0 -0
  97. /package/dist/sdk/{workflow-check-provider-VEOVTCVU.mjs.map → workflow-check-provider-VJ7VIMCQ.mjs.map} +0 -0
@@ -1,37 +1,84 @@
1
- # Dashboards for Visor Telemetry
1
+ # Grafana Dashboards for Visor
2
2
 
3
- This folder contains example Grafana dashboards to visualize Visor traces and metrics exported via OpenTelemetry (Tempo + Prometheus).
3
+ Pre-built Grafana dashboards for visualizing Visor telemetry data exported via OpenTelemetry.
4
4
 
5
- ## What's Included
5
+ ## Dashboards
6
6
 
7
- - **grafana-visor-overview.json** - Run/Check overview dashboard with:
8
- - Check duration histogram (95th percentile)
9
- - Issues by severity rate panel
10
- - **grafana-visor-diagrams.json** - Diagram telemetry dashboard with:
11
- - Diagram blocks by origin (content vs issue)
7
+ ### Visor Overview (`grafana-visor-overview.json`)
8
+
9
+ The main dashboard with five sections:
10
+
11
+ **Runs & Users** Top-level stats and trends:
12
+ - Total runs, unique users, avg duration, success rate
13
+ - AI call totals and avg AI calls per run
14
+ - Runs over time by source (CLI/Slack/TUI)
15
+ - Run duration percentiles (P50/P95/P99)
16
+ - Tables: runs by user, runs by workflow
17
+
18
+ **Check Performance** — Per-check metrics:
19
+ - Check duration P95 by check ID
20
+ - Issues by severity over time
21
+ - Top 10 slowest checks (bar gauge)
22
+ - Issues distribution by check (pie chart)
23
+
24
+ **AI Provider** — AI usage analytics:
25
+ - AI calls over time by model
26
+ - AI calls per run distribution (P50/P95)
27
+ - AI calls by check (table)
28
+ - AI calls by model (pie chart)
29
+
30
+ **Failure Conditions** — Health signals:
31
+ - fail_if trigger rate by check and scope
32
+ - Active concurrent checks (live gauge)
33
+ - Diagram blocks emitted
34
+
35
+ **Traces** — Recent `visor.run` traces from Tempo with drill-down
36
+
37
+ ### Visor Diagrams (`grafana-visor-diagrams.json`)
38
+
39
+ Lightweight dashboard for Mermaid diagram block telemetry.
12
40
 
13
41
  ## Setup
14
42
 
15
- 1. Deploy Grafana + Tempo + Prometheus (or Grafana Cloud).
16
- 2. Configure your OTel Collector to receive OTLP traces/metrics and forward to Tempo/Prometheus.
17
- 3. Enable telemetry in Visor CI:
18
- ```bash
19
- export VISOR_TELEMETRY_ENABLED=true
20
- export VISOR_TELEMETRY_SINK=otlp
21
- export OTEL_EXPORTER_OTLP_TRACES_ENDPOINT=https://collector.example.com/v1/traces
22
- export OTEL_EXPORTER_OTLP_HEADERS="Authorization=Bearer <token>"
23
- ```
24
- 4. Import the JSON dashboards into Grafana.
25
- 5. Update the data source UIDs in each dashboard to match your Tempo and Prometheus data sources (replace `PROM_DS` with your Prometheus data source UID).
43
+ ### With Grafana LGTM (recommended for local dev)
26
44
 
27
- ## Notes
45
+ ```bash
46
+ docker run -d --name grafana-otel \
47
+ -p 3000:3000 -p 4317:4317 -p 4318:4318 \
48
+ -v grafana-otel-data:/data \
49
+ grafana/otel-lgtm:latest
50
+ ```
28
51
 
29
- - Spans appear in Tempo's Explore/Trace view (service.name=visor).
30
- - Metrics are emitted when the OTLP metrics exporter is configured.
31
- - The dashboards use placeholder data source UIDs (`PROM_DS`) that need to be updated after import.
52
+ Grafana is at `http://localhost:3000` (admin/admin). Data sources are pre-configured.
32
53
 
33
- ## Related Documentation
54
+ ### Import the Dashboard
34
55
 
35
- - [Telemetry Setup Guide](../telemetry-setup.md) - Complete setup instructions for enabling telemetry
36
- - [Telemetry RFC](../telemetry-tracing-rfc.md) - Design rationale and architecture details
56
+ 1. Open Grafana Dashboards Import
57
+ 2. Upload `grafana-visor-overview.json`
58
+ 3. Select your Prometheus and Tempo data sources when prompted
59
+ 4. Click Import
60
+
61
+ ### With standalone Grafana
62
+
63
+ Update the data source UIDs in the JSON:
64
+ - Replace `${DS_PROMETHEUS}` references with your Prometheus data source UID
65
+ - Replace `${DS_TEMPO}` references with your Tempo data source UID
66
+
67
+ ### Enable Visor Telemetry
68
+
69
+ ```bash
70
+ export VISOR_TELEMETRY_ENABLED=true
71
+ export VISOR_TELEMETRY_SINK=otlp
72
+ export OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318
73
+ ```
74
+
75
+ ## Template Variables
76
+
77
+ The overview dashboard includes filter variables:
78
+ - **Source** — Filter by run source (`cli`, `slack`, `tui`)
79
+ - **Workflow** — Filter by workflow/check combination
80
+
81
+ ## Related Documentation
37
82
 
83
+ - [Telemetry Reference](../telemetry-reference.md) — Complete list of all spans, metrics, and events
84
+ - [Telemetry Setup Guide](../telemetry-setup.md) — How to enable and configure telemetry
@@ -1,33 +1,453 @@
1
1
  {
2
- "__inputs": [],
3
- "title": "Visor Overview",
2
+ "__inputs": [
3
+ {
4
+ "name": "DS_PROMETHEUS",
5
+ "label": "Prometheus",
6
+ "description": "Prometheus data source for Visor metrics",
7
+ "type": "datasource",
8
+ "pluginId": "prometheus",
9
+ "pluginName": "Prometheus"
10
+ },
11
+ {
12
+ "name": "DS_TEMPO",
13
+ "label": "Tempo",
14
+ "description": "Tempo data source for Visor traces",
15
+ "type": "datasource",
16
+ "pluginId": "tempo",
17
+ "pluginName": "Tempo"
18
+ }
19
+ ],
20
+ "__requires": [
21
+ { "type": "grafana", "id": "grafana", "name": "Grafana", "version": "10.0.0" },
22
+ { "type": "datasource", "id": "prometheus", "name": "Prometheus" },
23
+ { "type": "datasource", "id": "tempo", "name": "Tempo" }
24
+ ],
25
+ "title": "Visor — Overview",
26
+ "uid": "visor-overview",
27
+ "description": "Visor workflow execution overview: runs, users, AI calls, check performance, and issues.",
28
+ "tags": ["visor", "otel", "observability"],
4
29
  "timezone": "browser",
30
+ "editable": true,
31
+ "version": 2,
32
+ "time": { "from": "now-24h", "to": "now" },
33
+ "refresh": "30s",
34
+ "templating": {
35
+ "list": [
36
+ {
37
+ "name": "source",
38
+ "label": "Source",
39
+ "type": "query",
40
+ "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
41
+ "query": "label_values(visor_run_total, visor_run_source)",
42
+ "includeAll": true,
43
+ "current": { "text": "All", "value": "$__all" },
44
+ "refresh": 2,
45
+ "multi": true
46
+ },
47
+ {
48
+ "name": "workflow",
49
+ "label": "Workflow",
50
+ "type": "query",
51
+ "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
52
+ "query": "label_values(visor_run_total, visor_run_workflow)",
53
+ "includeAll": true,
54
+ "current": { "text": "All", "value": "$__all" },
55
+ "refresh": 2,
56
+ "multi": true
57
+ }
58
+ ]
59
+ },
5
60
  "panels": [
61
+ {
62
+ "type": "row",
63
+ "title": "Runs & Users",
64
+ "gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 },
65
+ "collapsed": false
66
+ },
67
+ {
68
+ "type": "stat",
69
+ "title": "Total Runs",
70
+ "description": "Total workflow executions in the selected time range",
71
+ "targets": [
72
+ {
73
+ "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
74
+ "expr": "sum(increase(visor_run_total{visor_run_source=~\"$source\", visor_run_workflow=~\"$workflow\"}[$__range]))",
75
+ "legendFormat": "Runs"
76
+ }
77
+ ],
78
+ "fieldConfig": { "defaults": { "thresholds": { "steps": [{ "color": "blue", "value": null }] } } },
79
+ "options": { "colorMode": "value", "graphMode": "area", "reduceOptions": { "calcs": ["lastNotNull"] } },
80
+ "gridPos": { "h": 4, "w": 4, "x": 0, "y": 1 }
81
+ },
82
+ {
83
+ "type": "stat",
84
+ "title": "Unique Users",
85
+ "description": "Distinct user IDs that triggered runs",
86
+ "targets": [
87
+ {
88
+ "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
89
+ "expr": "count(count by (visor_run_user_id) (visor_run_total{visor_run_source=~\"$source\", visor_run_user_id!=\"\"}))",
90
+ "legendFormat": "Users"
91
+ }
92
+ ],
93
+ "fieldConfig": { "defaults": { "thresholds": { "steps": [{ "color": "green", "value": null }] } } },
94
+ "options": { "colorMode": "value", "graphMode": "none", "reduceOptions": { "calcs": ["lastNotNull"] } },
95
+ "gridPos": { "h": 4, "w": 4, "x": 4, "y": 1 }
96
+ },
97
+ {
98
+ "type": "stat",
99
+ "title": "Avg Run Duration",
100
+ "description": "Average workflow execution time",
101
+ "targets": [
102
+ {
103
+ "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
104
+ "expr": "sum(increase(visor_run_duration_ms_sum{visor_run_source=~\"$source\"}[$__range])) / sum(increase(visor_run_duration_ms_count{visor_run_source=~\"$source\"}[$__range]))",
105
+ "legendFormat": "Avg"
106
+ }
107
+ ],
108
+ "fieldConfig": { "defaults": { "unit": "ms", "thresholds": { "steps": [{ "color": "green", "value": null }, { "color": "yellow", "value": 60000 }, { "color": "red", "value": 300000 }] } } },
109
+ "options": { "colorMode": "value", "graphMode": "none", "reduceOptions": { "calcs": ["lastNotNull"] } },
110
+ "gridPos": { "h": 4, "w": 4, "x": 8, "y": 1 }
111
+ },
112
+ {
113
+ "type": "stat",
114
+ "title": "Success Rate",
115
+ "description": "Percentage of runs that completed successfully",
116
+ "targets": [
117
+ {
118
+ "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
119
+ "expr": "sum(increase(visor_run_duration_ms_count{visor_run_success=\"true\", visor_run_source=~\"$source\"}[$__range])) / sum(increase(visor_run_duration_ms_count{visor_run_source=~\"$source\"}[$__range])) * 100",
120
+ "legendFormat": "Success %"
121
+ }
122
+ ],
123
+ "fieldConfig": { "defaults": { "unit": "percent", "min": 0, "max": 100, "thresholds": { "steps": [{ "color": "red", "value": null }, { "color": "yellow", "value": 80 }, { "color": "green", "value": 95 }] } } },
124
+ "options": { "colorMode": "value", "graphMode": "none", "reduceOptions": { "calcs": ["lastNotNull"] } },
125
+ "gridPos": { "h": 4, "w": 4, "x": 12, "y": 1 }
126
+ },
127
+ {
128
+ "type": "stat",
129
+ "title": "Total AI Calls",
130
+ "description": "Total AI provider invocations",
131
+ "targets": [
132
+ {
133
+ "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
134
+ "expr": "sum(increase(visor_ai_call_total{visor_run_source=~\"$source\"}[$__range]))",
135
+ "legendFormat": "AI Calls"
136
+ }
137
+ ],
138
+ "fieldConfig": { "defaults": { "thresholds": { "steps": [{ "color": "purple", "value": null }] } } },
139
+ "options": { "colorMode": "value", "graphMode": "area", "reduceOptions": { "calcs": ["lastNotNull"] } },
140
+ "gridPos": { "h": 4, "w": 4, "x": 16, "y": 1 }
141
+ },
142
+ {
143
+ "type": "stat",
144
+ "title": "Avg AI Calls / Run",
145
+ "description": "Average number of AI calls per workflow run",
146
+ "targets": [
147
+ {
148
+ "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
149
+ "expr": "sum(increase(visor_run_ai_calls_sum{visor_run_source=~\"$source\"}[$__range])) / sum(increase(visor_run_ai_calls_count{visor_run_source=~\"$source\"}[$__range]))",
150
+ "legendFormat": "Avg AI/Run"
151
+ }
152
+ ],
153
+ "fieldConfig": { "defaults": { "thresholds": { "steps": [{ "color": "purple", "value": null }] } } },
154
+ "options": { "colorMode": "value", "graphMode": "none", "reduceOptions": { "calcs": ["lastNotNull"] } },
155
+ "gridPos": { "h": 4, "w": 4, "x": 20, "y": 1 }
156
+ },
6
157
  {
7
158
  "type": "timeseries",
8
- "title": "Check Duration (ms)",
159
+ "title": "Runs Over Time",
160
+ "description": "Run count over time, broken down by source",
161
+ "targets": [
162
+ {
163
+ "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
164
+ "expr": "sum by (visor_run_source) (increase(visor_run_total{visor_run_source=~\"$source\", visor_run_workflow=~\"$workflow\"}[5m]))",
165
+ "legendFormat": "{{visor_run_source}}"
166
+ }
167
+ ],
168
+ "fieldConfig": { "defaults": { "custom": { "drawStyle": "bars", "fillOpacity": 50, "stacking": { "mode": "normal" } } } },
169
+ "gridPos": { "h": 8, "w": 12, "x": 0, "y": 5 }
170
+ },
171
+ {
172
+ "type": "timeseries",
173
+ "title": "Run Duration (P50 / P95 / P99)",
174
+ "description": "Run duration percentiles over time",
175
+ "targets": [
176
+ {
177
+ "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
178
+ "expr": "histogram_quantile(0.50, sum(rate(visor_run_duration_ms_bucket{visor_run_source=~\"$source\"}[5m])) by (le))",
179
+ "legendFormat": "P50"
180
+ },
181
+ {
182
+ "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
183
+ "expr": "histogram_quantile(0.95, sum(rate(visor_run_duration_ms_bucket{visor_run_source=~\"$source\"}[5m])) by (le))",
184
+ "legendFormat": "P95"
185
+ },
186
+ {
187
+ "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
188
+ "expr": "histogram_quantile(0.99, sum(rate(visor_run_duration_ms_bucket{visor_run_source=~\"$source\"}[5m])) by (le))",
189
+ "legendFormat": "P99"
190
+ }
191
+ ],
192
+ "fieldConfig": { "defaults": { "unit": "ms", "custom": { "fillOpacity": 10 } } },
193
+ "gridPos": { "h": 8, "w": 12, "x": 12, "y": 5 }
194
+ },
195
+ {
196
+ "type": "table",
197
+ "title": "Runs by User",
198
+ "description": "Run counts and average duration per user",
9
199
  "targets": [
10
200
  {
11
- "datasource": { "type": "prometheus", "uid": "PROM_DS" },
12
- "expr": "histogram_quantile(0.95, sum(rate(visor_check_duration_ms_bucket[5m])) by (le))"
201
+ "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
202
+ "expr": "sum by (visor_run_user_id, visor_run_user_name) (increase(visor_run_total{visor_run_source=~\"$source\", visor_run_user_id!=\"\"}[$__range]))",
203
+ "legendFormat": "",
204
+ "format": "table",
205
+ "instant": true
13
206
  }
14
207
  ],
15
- "fieldConfig": { "defaults": { "unit": "ms" }, "overrides": [] },
16
- "gridPos": { "h": 8, "w": 12, "x": 0, "y": 0 }
208
+ "fieldConfig": {
209
+ "overrides": [
210
+ { "matcher": { "id": "byName", "options": "visor_run_user_name" }, "properties": [{ "id": "displayName", "value": "User" }] },
211
+ { "matcher": { "id": "byName", "options": "visor_run_user_id" }, "properties": [{ "id": "displayName", "value": "User ID" }] },
212
+ { "matcher": { "id": "byName", "options": "Value" }, "properties": [{ "id": "displayName", "value": "Runs" }] }
213
+ ]
214
+ },
215
+ "options": { "sortBy": [{ "displayName": "Runs", "desc": true }] },
216
+ "transformations": [{ "id": "organize", "options": { "excludeByName": { "Time": true } } }],
217
+ "gridPos": { "h": 8, "w": 12, "x": 0, "y": 13 }
218
+ },
219
+ {
220
+ "type": "table",
221
+ "title": "Runs by Workflow",
222
+ "description": "Run counts per workflow/check combination",
223
+ "targets": [
224
+ {
225
+ "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
226
+ "expr": "sum by (visor_run_workflow) (increase(visor_run_total{visor_run_source=~\"$source\", visor_run_workflow=~\"$workflow\", visor_run_workflow!=\"\"}[$__range]))",
227
+ "legendFormat": "",
228
+ "format": "table",
229
+ "instant": true
230
+ }
231
+ ],
232
+ "fieldConfig": {
233
+ "overrides": [
234
+ { "matcher": { "id": "byName", "options": "visor_run_workflow" }, "properties": [{ "id": "displayName", "value": "Workflow" }] },
235
+ { "matcher": { "id": "byName", "options": "Value" }, "properties": [{ "id": "displayName", "value": "Runs" }] }
236
+ ]
237
+ },
238
+ "options": { "sortBy": [{ "displayName": "Runs", "desc": true }] },
239
+ "transformations": [{ "id": "organize", "options": { "excludeByName": { "Time": true } } }],
240
+ "gridPos": { "h": 8, "w": 12, "x": 12, "y": 13 }
241
+ },
242
+ {
243
+ "type": "row",
244
+ "title": "Check Performance",
245
+ "gridPos": { "h": 1, "w": 24, "x": 0, "y": 21 },
246
+ "collapsed": false
247
+ },
248
+ {
249
+ "type": "timeseries",
250
+ "title": "Check Duration by Check (P95)",
251
+ "description": "95th percentile check execution time per check ID",
252
+ "targets": [
253
+ {
254
+ "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
255
+ "expr": "histogram_quantile(0.95, sum(rate(visor_check_duration_ms_bucket[5m])) by (le, visor_check_id))",
256
+ "legendFormat": "{{visor_check_id}}"
257
+ }
258
+ ],
259
+ "fieldConfig": { "defaults": { "unit": "ms", "custom": { "fillOpacity": 10 } } },
260
+ "gridPos": { "h": 8, "w": 12, "x": 0, "y": 22 }
261
+ },
262
+ {
263
+ "type": "timeseries",
264
+ "title": "Issues by Severity",
265
+ "description": "Rate of issues produced, broken down by severity",
266
+ "targets": [
267
+ {
268
+ "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
269
+ "expr": "sum by (severity) (increase(visor_check_issues_total[5m]))",
270
+ "legendFormat": "{{severity}}"
271
+ }
272
+ ],
273
+ "fieldConfig": { "defaults": { "custom": { "drawStyle": "bars", "fillOpacity": 50, "stacking": { "mode": "normal" } } } },
274
+ "gridPos": { "h": 8, "w": 12, "x": 12, "y": 22 }
275
+ },
276
+ {
277
+ "type": "bargauge",
278
+ "title": "Slowest Checks (Avg Duration)",
279
+ "description": "Average execution time per check, sorted by slowest",
280
+ "targets": [
281
+ {
282
+ "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
283
+ "expr": "topk(10, sum by (visor_check_id) (increase(visor_check_duration_ms_sum[$__range])) / sum by (visor_check_id) (increase(visor_check_duration_ms_count[$__range])))",
284
+ "legendFormat": "{{visor_check_id}}"
285
+ }
286
+ ],
287
+ "fieldConfig": { "defaults": { "unit": "ms", "thresholds": { "steps": [{ "color": "green", "value": null }, { "color": "yellow", "value": 30000 }, { "color": "red", "value": 120000 }] } } },
288
+ "options": { "orientation": "horizontal", "displayMode": "gradient" },
289
+ "gridPos": { "h": 8, "w": 12, "x": 0, "y": 30 }
290
+ },
291
+ {
292
+ "type": "piechart",
293
+ "title": "Issues by Check",
294
+ "description": "Distribution of issues across checks",
295
+ "targets": [
296
+ {
297
+ "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
298
+ "expr": "sum by (visor_check_id) (increase(visor_check_issues_total[$__range]))",
299
+ "legendFormat": "{{visor_check_id}}"
300
+ }
301
+ ],
302
+ "gridPos": { "h": 8, "w": 12, "x": 12, "y": 30 }
303
+ },
304
+ {
305
+ "type": "row",
306
+ "title": "AI Provider",
307
+ "gridPos": { "h": 1, "w": 24, "x": 0, "y": 38 },
308
+ "collapsed": false
309
+ },
310
+ {
311
+ "type": "timeseries",
312
+ "title": "AI Calls Over Time",
313
+ "description": "AI provider call rate, broken down by model",
314
+ "targets": [
315
+ {
316
+ "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
317
+ "expr": "sum by (visor_ai_model) (increase(visor_ai_call_total{visor_run_source=~\"$source\"}[5m]))",
318
+ "legendFormat": "{{visor_ai_model}}"
319
+ }
320
+ ],
321
+ "fieldConfig": { "defaults": { "custom": { "drawStyle": "bars", "fillOpacity": 50, "stacking": { "mode": "normal" } } } },
322
+ "gridPos": { "h": 8, "w": 12, "x": 0, "y": 39 }
323
+ },
324
+ {
325
+ "type": "timeseries",
326
+ "title": "AI Calls per Run (Distribution)",
327
+ "description": "P50 and P95 of AI calls per run",
328
+ "targets": [
329
+ {
330
+ "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
331
+ "expr": "histogram_quantile(0.50, sum(rate(visor_run_ai_calls_bucket{visor_run_source=~\"$source\"}[5m])) by (le))",
332
+ "legendFormat": "P50"
333
+ },
334
+ {
335
+ "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
336
+ "expr": "histogram_quantile(0.95, sum(rate(visor_run_ai_calls_bucket{visor_run_source=~\"$source\"}[5m])) by (le))",
337
+ "legendFormat": "P95"
338
+ }
339
+ ],
340
+ "fieldConfig": { "defaults": { "custom": { "fillOpacity": 10 } } },
341
+ "gridPos": { "h": 8, "w": 12, "x": 12, "y": 39 }
342
+ },
343
+ {
344
+ "type": "table",
345
+ "title": "AI Calls by Check",
346
+ "description": "Which checks trigger the most AI calls",
347
+ "targets": [
348
+ {
349
+ "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
350
+ "expr": "topk(20, sum by (visor_check_id) (increase(visor_ai_call_total[$__range])))",
351
+ "legendFormat": "",
352
+ "format": "table",
353
+ "instant": true
354
+ }
355
+ ],
356
+ "fieldConfig": {
357
+ "overrides": [
358
+ { "matcher": { "id": "byName", "options": "visor_check_id" }, "properties": [{ "id": "displayName", "value": "Check" }] },
359
+ { "matcher": { "id": "byName", "options": "Value" }, "properties": [{ "id": "displayName", "value": "AI Calls" }] }
360
+ ]
361
+ },
362
+ "options": { "sortBy": [{ "displayName": "AI Calls", "desc": true }] },
363
+ "transformations": [{ "id": "organize", "options": { "excludeByName": { "Time": true } } }],
364
+ "gridPos": { "h": 8, "w": 12, "x": 0, "y": 47 }
365
+ },
366
+ {
367
+ "type": "piechart",
368
+ "title": "AI Calls by Model",
369
+ "description": "Distribution of AI calls across models",
370
+ "targets": [
371
+ {
372
+ "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
373
+ "expr": "sum by (visor_ai_model) (increase(visor_ai_call_total[$__range]))",
374
+ "legendFormat": "{{visor_ai_model}}"
375
+ }
376
+ ],
377
+ "gridPos": { "h": 8, "w": 12, "x": 12, "y": 47 }
378
+ },
379
+ {
380
+ "type": "row",
381
+ "title": "Failure Conditions",
382
+ "gridPos": { "h": 1, "w": 24, "x": 0, "y": 55 },
383
+ "collapsed": false
384
+ },
385
+ {
386
+ "type": "timeseries",
387
+ "title": "fail_if Triggered",
388
+ "description": "Rate of fail_if conditions being triggered",
389
+ "targets": [
390
+ {
391
+ "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
392
+ "expr": "sum by (visor_check_id, scope) (increase(visor_fail_if_triggered_total[5m]))",
393
+ "legendFormat": "{{visor_check_id}} ({{scope}})"
394
+ }
395
+ ],
396
+ "fieldConfig": { "defaults": { "custom": { "drawStyle": "bars", "fillOpacity": 50 } } },
397
+ "gridPos": { "h": 8, "w": 12, "x": 0, "y": 56 }
398
+ },
399
+ {
400
+ "type": "stat",
401
+ "title": "Active Checks (Now)",
402
+ "description": "Currently running checks",
403
+ "targets": [
404
+ {
405
+ "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
406
+ "expr": "sum(visor_run_active_checks)",
407
+ "legendFormat": "Active"
408
+ }
409
+ ],
410
+ "fieldConfig": { "defaults": { "thresholds": { "steps": [{ "color": "blue", "value": null }, { "color": "yellow", "value": 10 }, { "color": "red", "value": 50 }] } } },
411
+ "options": { "colorMode": "value", "graphMode": "area", "reduceOptions": { "calcs": ["lastNotNull"] } },
412
+ "gridPos": { "h": 8, "w": 6, "x": 12, "y": 56 }
17
413
  },
18
414
  {
19
415
  "type": "stat",
20
- "title": "Issues by Severity (rate)",
416
+ "title": "Diagram Blocks",
417
+ "description": "Total Mermaid diagram blocks emitted",
21
418
  "targets": [
22
419
  {
23
- "datasource": { "type": "prometheus", "uid": "PROM_DS" },
24
- "expr": "sum(rate(visor_check_issues_total[5m])) by (severity)"
420
+ "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
421
+ "expr": "sum(increase(visor_diagram_blocks_total[$__range]))",
422
+ "legendFormat": "Blocks"
25
423
  }
26
424
  ],
27
- "gridPos": { "h": 8, "w": 12, "x": 12, "y": 0 }
425
+ "fieldConfig": { "defaults": { "thresholds": { "steps": [{ "color": "blue", "value": null }] } } },
426
+ "options": { "colorMode": "value", "graphMode": "none", "reduceOptions": { "calcs": ["lastNotNull"] } },
427
+ "gridPos": { "h": 8, "w": 6, "x": 18, "y": 56 }
428
+ },
429
+ {
430
+ "type": "row",
431
+ "title": "Traces",
432
+ "gridPos": { "h": 1, "w": 24, "x": 0, "y": 64 },
433
+ "collapsed": false
434
+ },
435
+ {
436
+ "type": "traces",
437
+ "title": "Recent Traces",
438
+ "description": "Recent visor.run traces from Tempo",
439
+ "targets": [
440
+ {
441
+ "datasource": { "type": "tempo", "uid": "${DS_TEMPO}" },
442
+ "queryType": "traceqlSearch",
443
+ "filters": [
444
+ { "id": "service-name", "tag": "service.name", "operator": "=", "value": ["visor"], "scope": "resource" },
445
+ { "id": "span-name", "tag": "name", "operator": "=", "value": ["visor.run"], "scope": "span" }
446
+ ],
447
+ "limit": 20
448
+ }
449
+ ],
450
+ "gridPos": { "h": 10, "w": 24, "x": 0, "y": 65 }
28
451
  }
29
- ],
30
- "version": 1,
31
- "editable": true
452
+ ]
32
453
  }
33
-