@probelabs/visor 0.1.173-ee → 0.1.174-ee
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/docs/dashboards/README.md +73 -26
- package/dist/docs/dashboards/grafana-visor-overview.json +435 -15
- package/dist/docs/telemetry-reference.md +387 -0
- package/dist/docs/telemetry-setup.md +2 -0
- package/dist/generated/config-schema.d.ts +277 -7
- package/dist/generated/config-schema.d.ts.map +1 -1
- package/dist/generated/config-schema.json +3803 -0
- package/dist/index.js +364 -30
- package/dist/sdk/{a2a-frontend-7CYN3X7M.mjs → a2a-frontend-FUJRKHJB.mjs} +3 -3
- package/dist/sdk/{check-provider-registry-ZMSJFQSU.mjs → check-provider-registry-53C2ZIXJ.mjs} +7 -7
- package/dist/sdk/{check-provider-registry-75O5XJMA.mjs → check-provider-registry-UPQNHHFF.mjs} +7 -7
- package/dist/sdk/{chunk-Y5MEQW2W.mjs → chunk-2PL2YH3B.mjs} +19 -19
- package/dist/sdk/{chunk-4TV2CVVI.mjs → chunk-34QX63WK.mjs} +16 -14
- package/dist/sdk/chunk-34QX63WK.mjs.map +1 -0
- package/dist/sdk/{chunk-2HXOGRAS.mjs → chunk-65SHRIQF.mjs} +3 -3
- package/dist/sdk/{chunk-2HXOGRAS.mjs.map → chunk-65SHRIQF.mjs.map} +1 -1
- package/dist/sdk/{chunk-VK7FUBBU.mjs → chunk-EFNNJIMY.mjs} +3 -3
- package/dist/sdk/{chunk-CMEYDK6S.mjs → chunk-GKSSG5IM.mjs} +19 -19
- package/dist/sdk/{chunk-HZEXCJGA.mjs → chunk-W4KCJM6J.mjs} +282 -8
- package/dist/sdk/chunk-W4KCJM6J.mjs.map +1 -0
- package/dist/sdk/{chunk-WYFQQ445.mjs → chunk-WJIV7MKY.mjs} +3 -3
- package/dist/sdk/{config-UXRHADSE.mjs → config-BVL3KFMB.mjs} +2 -2
- package/dist/sdk/{failure-condition-evaluator-SNR5XLGN.mjs → failure-condition-evaluator-DL6H57NX.mjs} +4 -4
- package/dist/sdk/{github-frontend-56UQTA47.mjs → github-frontend-F2YCPK6H.mjs} +4 -4
- package/dist/sdk/{host-QRGXXRDA.mjs → host-6TBS44ER.mjs} +3 -3
- package/dist/sdk/{host-TAGO66M6.mjs → host-LRWIKURZ.mjs} +3 -3
- package/dist/sdk/{metrics-FU2G5SZ2.mjs → metrics-JTOG2HNO.mjs} +2 -2
- package/dist/sdk/{routing-ZAUCS3HJ.mjs → routing-GF2CF3JT.mjs} +5 -5
- package/dist/sdk/{schedule-tool-LPBO3TNY.mjs → schedule-tool-5KDBDCFO.mjs} +7 -7
- package/dist/sdk/{schedule-tool-VRLX54J5.mjs → schedule-tool-UMDRCNO5.mjs} +7 -7
- package/dist/sdk/{schedule-tool-handler-A7YKDVLZ.mjs → schedule-tool-handler-5EPTHBLS.mjs} +7 -7
- package/dist/sdk/{schedule-tool-handler-KYUHU4JR.mjs → schedule-tool-handler-MUF5V36L.mjs} +7 -7
- package/dist/sdk/sdk.d.mts +137 -133
- package/dist/sdk/sdk.d.ts +137 -133
- package/dist/sdk/sdk.js +297 -21
- package/dist/sdk/sdk.js.map +1 -1
- package/dist/sdk/sdk.mjs +6 -6
- package/dist/sdk/{trace-helpers-UKMYHQIK.mjs → trace-helpers-FKM2MEDW.mjs} +3 -3
- package/dist/sdk/{workflow-check-provider-FXTRMKJ2.mjs → workflow-check-provider-EWMZEEES.mjs} +7 -7
- package/dist/sdk/{workflow-check-provider-VEOVTCVU.mjs → workflow-check-provider-RQUCBAYY.mjs} +7 -7
- package/dist/telemetry/metrics.d.ts.map +1 -1
- package/dist/types/config.d.ts +5 -4
- package/dist/types/config.d.ts.map +1 -1
- package/package.json +2 -2
- package/dist/sdk/chunk-4TV2CVVI.mjs.map +0 -1
- package/dist/sdk/chunk-HZEXCJGA.mjs.map +0 -1
- /package/dist/sdk/{a2a-frontend-7CYN3X7M.mjs.map → a2a-frontend-FUJRKHJB.mjs.map} +0 -0
- /package/dist/sdk/{check-provider-registry-75O5XJMA.mjs.map → check-provider-registry-53C2ZIXJ.mjs.map} +0 -0
- /package/dist/sdk/{check-provider-registry-ZMSJFQSU.mjs.map → check-provider-registry-UPQNHHFF.mjs.map} +0 -0
- /package/dist/sdk/{chunk-Y5MEQW2W.mjs.map → chunk-2PL2YH3B.mjs.map} +0 -0
- /package/dist/sdk/{chunk-VK7FUBBU.mjs.map → chunk-EFNNJIMY.mjs.map} +0 -0
- /package/dist/sdk/{chunk-CMEYDK6S.mjs.map → chunk-GKSSG5IM.mjs.map} +0 -0
- /package/dist/sdk/{chunk-WYFQQ445.mjs.map → chunk-WJIV7MKY.mjs.map} +0 -0
- /package/dist/sdk/{config-UXRHADSE.mjs.map → config-BVL3KFMB.mjs.map} +0 -0
- /package/dist/sdk/{failure-condition-evaluator-SNR5XLGN.mjs.map → failure-condition-evaluator-DL6H57NX.mjs.map} +0 -0
- /package/dist/sdk/{github-frontend-56UQTA47.mjs.map → github-frontend-F2YCPK6H.mjs.map} +0 -0
- /package/dist/sdk/{host-QRGXXRDA.mjs.map → host-6TBS44ER.mjs.map} +0 -0
- /package/dist/sdk/{host-TAGO66M6.mjs.map → host-LRWIKURZ.mjs.map} +0 -0
- /package/dist/sdk/{metrics-FU2G5SZ2.mjs.map → metrics-JTOG2HNO.mjs.map} +0 -0
- /package/dist/sdk/{routing-ZAUCS3HJ.mjs.map → routing-GF2CF3JT.mjs.map} +0 -0
- /package/dist/sdk/{schedule-tool-LPBO3TNY.mjs.map → schedule-tool-5KDBDCFO.mjs.map} +0 -0
- /package/dist/sdk/{schedule-tool-VRLX54J5.mjs.map → schedule-tool-UMDRCNO5.mjs.map} +0 -0
- /package/dist/sdk/{schedule-tool-handler-A7YKDVLZ.mjs.map → schedule-tool-handler-5EPTHBLS.mjs.map} +0 -0
- /package/dist/sdk/{schedule-tool-handler-KYUHU4JR.mjs.map → schedule-tool-handler-MUF5V36L.mjs.map} +0 -0
- /package/dist/sdk/{trace-helpers-UKMYHQIK.mjs.map → trace-helpers-FKM2MEDW.mjs.map} +0 -0
- /package/dist/sdk/{workflow-check-provider-FXTRMKJ2.mjs.map → workflow-check-provider-EWMZEEES.mjs.map} +0 -0
- /package/dist/sdk/{workflow-check-provider-VEOVTCVU.mjs.map → workflow-check-provider-RQUCBAYY.mjs.map} +0 -0
|
@@ -1,37 +1,84 @@
|
|
|
1
|
-
# Dashboards for Visor
|
|
1
|
+
# Grafana Dashboards for Visor
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
Pre-built Grafana dashboards for visualizing Visor telemetry data exported via OpenTelemetry.
|
|
4
4
|
|
|
5
|
-
##
|
|
5
|
+
## Dashboards
|
|
6
6
|
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
7
|
+
### Visor Overview (`grafana-visor-overview.json`)
|
|
8
|
+
|
|
9
|
+
The main dashboard with five sections:
|
|
10
|
+
|
|
11
|
+
**Runs & Users** — Top-level stats and trends:
|
|
12
|
+
- Total runs, unique users, avg duration, success rate
|
|
13
|
+
- AI call totals and avg AI calls per run
|
|
14
|
+
- Runs over time by source (CLI/Slack/TUI)
|
|
15
|
+
- Run duration percentiles (P50/P95/P99)
|
|
16
|
+
- Tables: runs by user, runs by workflow
|
|
17
|
+
|
|
18
|
+
**Check Performance** — Per-check metrics:
|
|
19
|
+
- Check duration P95 by check ID
|
|
20
|
+
- Issues by severity over time
|
|
21
|
+
- Top 10 slowest checks (bar gauge)
|
|
22
|
+
- Issues distribution by check (pie chart)
|
|
23
|
+
|
|
24
|
+
**AI Provider** — AI usage analytics:
|
|
25
|
+
- AI calls over time by model
|
|
26
|
+
- AI calls per run distribution (P50/P95)
|
|
27
|
+
- AI calls by check (table)
|
|
28
|
+
- AI calls by model (pie chart)
|
|
29
|
+
|
|
30
|
+
**Failure Conditions** — Health signals:
|
|
31
|
+
- fail_if trigger rate by check and scope
|
|
32
|
+
- Active concurrent checks (live gauge)
|
|
33
|
+
- Diagram blocks emitted
|
|
34
|
+
|
|
35
|
+
**Traces** — Recent `visor.run` traces from Tempo with drill-down
|
|
36
|
+
|
|
37
|
+
### Visor Diagrams (`grafana-visor-diagrams.json`)
|
|
38
|
+
|
|
39
|
+
Lightweight dashboard for Mermaid diagram block telemetry.
|
|
12
40
|
|
|
13
41
|
## Setup
|
|
14
42
|
|
|
15
|
-
|
|
16
|
-
2. Configure your OTel Collector to receive OTLP traces/metrics and forward to Tempo/Prometheus.
|
|
17
|
-
3. Enable telemetry in Visor CI:
|
|
18
|
-
```bash
|
|
19
|
-
export VISOR_TELEMETRY_ENABLED=true
|
|
20
|
-
export VISOR_TELEMETRY_SINK=otlp
|
|
21
|
-
export OTEL_EXPORTER_OTLP_TRACES_ENDPOINT=https://collector.example.com/v1/traces
|
|
22
|
-
export OTEL_EXPORTER_OTLP_HEADERS="Authorization=Bearer <token>"
|
|
23
|
-
```
|
|
24
|
-
4. Import the JSON dashboards into Grafana.
|
|
25
|
-
5. Update the data source UIDs in each dashboard to match your Tempo and Prometheus data sources (replace `PROM_DS` with your Prometheus data source UID).
|
|
43
|
+
### With Grafana LGTM (recommended for local dev)
|
|
26
44
|
|
|
27
|
-
|
|
45
|
+
```bash
|
|
46
|
+
docker run -d --name grafana-otel \
|
|
47
|
+
-p 3000:3000 -p 4317:4317 -p 4318:4318 \
|
|
48
|
+
-v grafana-otel-data:/data \
|
|
49
|
+
grafana/otel-lgtm:latest
|
|
50
|
+
```
|
|
28
51
|
|
|
29
|
-
|
|
30
|
-
- Metrics are emitted when the OTLP metrics exporter is configured.
|
|
31
|
-
- The dashboards use placeholder data source UIDs (`PROM_DS`) that need to be updated after import.
|
|
52
|
+
Grafana is at `http://localhost:3000` (admin/admin). Data sources are pre-configured.
|
|
32
53
|
|
|
33
|
-
|
|
54
|
+
### Import the Dashboard
|
|
34
55
|
|
|
35
|
-
|
|
36
|
-
|
|
56
|
+
1. Open Grafana → Dashboards → Import
|
|
57
|
+
2. Upload `grafana-visor-overview.json`
|
|
58
|
+
3. Select your Prometheus and Tempo data sources when prompted
|
|
59
|
+
4. Click Import
|
|
60
|
+
|
|
61
|
+
### With standalone Grafana
|
|
62
|
+
|
|
63
|
+
Update the data source UIDs in the JSON:
|
|
64
|
+
- Replace `${DS_PROMETHEUS}` references with your Prometheus data source UID
|
|
65
|
+
- Replace `${DS_TEMPO}` references with your Tempo data source UID
|
|
66
|
+
|
|
67
|
+
### Enable Visor Telemetry
|
|
68
|
+
|
|
69
|
+
```bash
|
|
70
|
+
export VISOR_TELEMETRY_ENABLED=true
|
|
71
|
+
export VISOR_TELEMETRY_SINK=otlp
|
|
72
|
+
export OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
## Template Variables
|
|
76
|
+
|
|
77
|
+
The overview dashboard includes filter variables:
|
|
78
|
+
- **Source** — Filter by run source (`cli`, `slack`, `tui`)
|
|
79
|
+
- **Workflow** — Filter by workflow/check combination
|
|
80
|
+
|
|
81
|
+
## Related Documentation
|
|
37
82
|
|
|
83
|
+
- [Telemetry Reference](../telemetry-reference.md) — Complete list of all spans, metrics, and events
|
|
84
|
+
- [Telemetry Setup Guide](../telemetry-setup.md) — How to enable and configure telemetry
|
|
@@ -1,33 +1,453 @@
|
|
|
1
1
|
{
|
|
2
|
-
"__inputs": [
|
|
3
|
-
|
|
2
|
+
"__inputs": [
|
|
3
|
+
{
|
|
4
|
+
"name": "DS_PROMETHEUS",
|
|
5
|
+
"label": "Prometheus",
|
|
6
|
+
"description": "Prometheus data source for Visor metrics",
|
|
7
|
+
"type": "datasource",
|
|
8
|
+
"pluginId": "prometheus",
|
|
9
|
+
"pluginName": "Prometheus"
|
|
10
|
+
},
|
|
11
|
+
{
|
|
12
|
+
"name": "DS_TEMPO",
|
|
13
|
+
"label": "Tempo",
|
|
14
|
+
"description": "Tempo data source for Visor traces",
|
|
15
|
+
"type": "datasource",
|
|
16
|
+
"pluginId": "tempo",
|
|
17
|
+
"pluginName": "Tempo"
|
|
18
|
+
}
|
|
19
|
+
],
|
|
20
|
+
"__requires": [
|
|
21
|
+
{ "type": "grafana", "id": "grafana", "name": "Grafana", "version": "10.0.0" },
|
|
22
|
+
{ "type": "datasource", "id": "prometheus", "name": "Prometheus" },
|
|
23
|
+
{ "type": "datasource", "id": "tempo", "name": "Tempo" }
|
|
24
|
+
],
|
|
25
|
+
"title": "Visor — Overview",
|
|
26
|
+
"uid": "visor-overview",
|
|
27
|
+
"description": "Visor workflow execution overview: runs, users, AI calls, check performance, and issues.",
|
|
28
|
+
"tags": ["visor", "otel", "observability"],
|
|
4
29
|
"timezone": "browser",
|
|
30
|
+
"editable": true,
|
|
31
|
+
"version": 2,
|
|
32
|
+
"time": { "from": "now-24h", "to": "now" },
|
|
33
|
+
"refresh": "30s",
|
|
34
|
+
"templating": {
|
|
35
|
+
"list": [
|
|
36
|
+
{
|
|
37
|
+
"name": "source",
|
|
38
|
+
"label": "Source",
|
|
39
|
+
"type": "query",
|
|
40
|
+
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
|
41
|
+
"query": "label_values(visor_run_total, visor_run_source)",
|
|
42
|
+
"includeAll": true,
|
|
43
|
+
"current": { "text": "All", "value": "$__all" },
|
|
44
|
+
"refresh": 2,
|
|
45
|
+
"multi": true
|
|
46
|
+
},
|
|
47
|
+
{
|
|
48
|
+
"name": "workflow",
|
|
49
|
+
"label": "Workflow",
|
|
50
|
+
"type": "query",
|
|
51
|
+
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
|
52
|
+
"query": "label_values(visor_run_total, visor_run_workflow)",
|
|
53
|
+
"includeAll": true,
|
|
54
|
+
"current": { "text": "All", "value": "$__all" },
|
|
55
|
+
"refresh": 2,
|
|
56
|
+
"multi": true
|
|
57
|
+
}
|
|
58
|
+
]
|
|
59
|
+
},
|
|
5
60
|
"panels": [
|
|
61
|
+
{
|
|
62
|
+
"type": "row",
|
|
63
|
+
"title": "Runs & Users",
|
|
64
|
+
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 },
|
|
65
|
+
"collapsed": false
|
|
66
|
+
},
|
|
67
|
+
{
|
|
68
|
+
"type": "stat",
|
|
69
|
+
"title": "Total Runs",
|
|
70
|
+
"description": "Total workflow executions in the selected time range",
|
|
71
|
+
"targets": [
|
|
72
|
+
{
|
|
73
|
+
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
|
74
|
+
"expr": "sum(increase(visor_run_total{visor_run_source=~\"$source\", visor_run_workflow=~\"$workflow\"}[$__range]))",
|
|
75
|
+
"legendFormat": "Runs"
|
|
76
|
+
}
|
|
77
|
+
],
|
|
78
|
+
"fieldConfig": { "defaults": { "thresholds": { "steps": [{ "color": "blue", "value": null }] } } },
|
|
79
|
+
"options": { "colorMode": "value", "graphMode": "area", "reduceOptions": { "calcs": ["lastNotNull"] } },
|
|
80
|
+
"gridPos": { "h": 4, "w": 4, "x": 0, "y": 1 }
|
|
81
|
+
},
|
|
82
|
+
{
|
|
83
|
+
"type": "stat",
|
|
84
|
+
"title": "Unique Users",
|
|
85
|
+
"description": "Distinct user IDs that triggered runs",
|
|
86
|
+
"targets": [
|
|
87
|
+
{
|
|
88
|
+
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
|
89
|
+
"expr": "count(count by (visor_run_user_id) (visor_run_total{visor_run_source=~\"$source\", visor_run_user_id!=\"\"}))",
|
|
90
|
+
"legendFormat": "Users"
|
|
91
|
+
}
|
|
92
|
+
],
|
|
93
|
+
"fieldConfig": { "defaults": { "thresholds": { "steps": [{ "color": "green", "value": null }] } } },
|
|
94
|
+
"options": { "colorMode": "value", "graphMode": "none", "reduceOptions": { "calcs": ["lastNotNull"] } },
|
|
95
|
+
"gridPos": { "h": 4, "w": 4, "x": 4, "y": 1 }
|
|
96
|
+
},
|
|
97
|
+
{
|
|
98
|
+
"type": "stat",
|
|
99
|
+
"title": "Avg Run Duration",
|
|
100
|
+
"description": "Average workflow execution time",
|
|
101
|
+
"targets": [
|
|
102
|
+
{
|
|
103
|
+
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
|
104
|
+
"expr": "sum(increase(visor_run_duration_ms_sum{visor_run_source=~\"$source\"}[$__range])) / sum(increase(visor_run_duration_ms_count{visor_run_source=~\"$source\"}[$__range]))",
|
|
105
|
+
"legendFormat": "Avg"
|
|
106
|
+
}
|
|
107
|
+
],
|
|
108
|
+
"fieldConfig": { "defaults": { "unit": "ms", "thresholds": { "steps": [{ "color": "green", "value": null }, { "color": "yellow", "value": 60000 }, { "color": "red", "value": 300000 }] } } },
|
|
109
|
+
"options": { "colorMode": "value", "graphMode": "none", "reduceOptions": { "calcs": ["lastNotNull"] } },
|
|
110
|
+
"gridPos": { "h": 4, "w": 4, "x": 8, "y": 1 }
|
|
111
|
+
},
|
|
112
|
+
{
|
|
113
|
+
"type": "stat",
|
|
114
|
+
"title": "Success Rate",
|
|
115
|
+
"description": "Percentage of runs that completed successfully",
|
|
116
|
+
"targets": [
|
|
117
|
+
{
|
|
118
|
+
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
|
119
|
+
"expr": "sum(increase(visor_run_duration_ms_count{visor_run_success=\"true\", visor_run_source=~\"$source\"}[$__range])) / sum(increase(visor_run_duration_ms_count{visor_run_source=~\"$source\"}[$__range])) * 100",
|
|
120
|
+
"legendFormat": "Success %"
|
|
121
|
+
}
|
|
122
|
+
],
|
|
123
|
+
"fieldConfig": { "defaults": { "unit": "percent", "min": 0, "max": 100, "thresholds": { "steps": [{ "color": "red", "value": null }, { "color": "yellow", "value": 80 }, { "color": "green", "value": 95 }] } } },
|
|
124
|
+
"options": { "colorMode": "value", "graphMode": "none", "reduceOptions": { "calcs": ["lastNotNull"] } },
|
|
125
|
+
"gridPos": { "h": 4, "w": 4, "x": 12, "y": 1 }
|
|
126
|
+
},
|
|
127
|
+
{
|
|
128
|
+
"type": "stat",
|
|
129
|
+
"title": "Total AI Calls",
|
|
130
|
+
"description": "Total AI provider invocations",
|
|
131
|
+
"targets": [
|
|
132
|
+
{
|
|
133
|
+
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
|
134
|
+
"expr": "sum(increase(visor_ai_call_total{visor_run_source=~\"$source\"}[$__range]))",
|
|
135
|
+
"legendFormat": "AI Calls"
|
|
136
|
+
}
|
|
137
|
+
],
|
|
138
|
+
"fieldConfig": { "defaults": { "thresholds": { "steps": [{ "color": "purple", "value": null }] } } },
|
|
139
|
+
"options": { "colorMode": "value", "graphMode": "area", "reduceOptions": { "calcs": ["lastNotNull"] } },
|
|
140
|
+
"gridPos": { "h": 4, "w": 4, "x": 16, "y": 1 }
|
|
141
|
+
},
|
|
142
|
+
{
|
|
143
|
+
"type": "stat",
|
|
144
|
+
"title": "Avg AI Calls / Run",
|
|
145
|
+
"description": "Average number of AI calls per workflow run",
|
|
146
|
+
"targets": [
|
|
147
|
+
{
|
|
148
|
+
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
|
149
|
+
"expr": "sum(increase(visor_run_ai_calls_sum{visor_run_source=~\"$source\"}[$__range])) / sum(increase(visor_run_ai_calls_count{visor_run_source=~\"$source\"}[$__range]))",
|
|
150
|
+
"legendFormat": "Avg AI/Run"
|
|
151
|
+
}
|
|
152
|
+
],
|
|
153
|
+
"fieldConfig": { "defaults": { "thresholds": { "steps": [{ "color": "purple", "value": null }] } } },
|
|
154
|
+
"options": { "colorMode": "value", "graphMode": "none", "reduceOptions": { "calcs": ["lastNotNull"] } },
|
|
155
|
+
"gridPos": { "h": 4, "w": 4, "x": 20, "y": 1 }
|
|
156
|
+
},
|
|
6
157
|
{
|
|
7
158
|
"type": "timeseries",
|
|
8
|
-
"title": "
|
|
159
|
+
"title": "Runs Over Time",
|
|
160
|
+
"description": "Run count over time, broken down by source",
|
|
161
|
+
"targets": [
|
|
162
|
+
{
|
|
163
|
+
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
|
164
|
+
"expr": "sum by (visor_run_source) (increase(visor_run_total{visor_run_source=~\"$source\", visor_run_workflow=~\"$workflow\"}[5m]))",
|
|
165
|
+
"legendFormat": "{{visor_run_source}}"
|
|
166
|
+
}
|
|
167
|
+
],
|
|
168
|
+
"fieldConfig": { "defaults": { "custom": { "drawStyle": "bars", "fillOpacity": 50, "stacking": { "mode": "normal" } } } },
|
|
169
|
+
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 5 }
|
|
170
|
+
},
|
|
171
|
+
{
|
|
172
|
+
"type": "timeseries",
|
|
173
|
+
"title": "Run Duration (P50 / P95 / P99)",
|
|
174
|
+
"description": "Run duration percentiles over time",
|
|
175
|
+
"targets": [
|
|
176
|
+
{
|
|
177
|
+
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
|
178
|
+
"expr": "histogram_quantile(0.50, sum(rate(visor_run_duration_ms_bucket{visor_run_source=~\"$source\"}[5m])) by (le))",
|
|
179
|
+
"legendFormat": "P50"
|
|
180
|
+
},
|
|
181
|
+
{
|
|
182
|
+
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
|
183
|
+
"expr": "histogram_quantile(0.95, sum(rate(visor_run_duration_ms_bucket{visor_run_source=~\"$source\"}[5m])) by (le))",
|
|
184
|
+
"legendFormat": "P95"
|
|
185
|
+
},
|
|
186
|
+
{
|
|
187
|
+
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
|
188
|
+
"expr": "histogram_quantile(0.99, sum(rate(visor_run_duration_ms_bucket{visor_run_source=~\"$source\"}[5m])) by (le))",
|
|
189
|
+
"legendFormat": "P99"
|
|
190
|
+
}
|
|
191
|
+
],
|
|
192
|
+
"fieldConfig": { "defaults": { "unit": "ms", "custom": { "fillOpacity": 10 } } },
|
|
193
|
+
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 5 }
|
|
194
|
+
},
|
|
195
|
+
{
|
|
196
|
+
"type": "table",
|
|
197
|
+
"title": "Runs by User",
|
|
198
|
+
"description": "Run counts and average duration per user",
|
|
9
199
|
"targets": [
|
|
10
200
|
{
|
|
11
|
-
"datasource": { "type": "prometheus", "uid": "
|
|
12
|
-
"expr": "
|
|
201
|
+
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
|
202
|
+
"expr": "sum by (visor_run_user_id, visor_run_user_name) (increase(visor_run_total{visor_run_source=~\"$source\", visor_run_user_id!=\"\"}[$__range]))",
|
|
203
|
+
"legendFormat": "",
|
|
204
|
+
"format": "table",
|
|
205
|
+
"instant": true
|
|
13
206
|
}
|
|
14
207
|
],
|
|
15
|
-
"fieldConfig": {
|
|
16
|
-
|
|
208
|
+
"fieldConfig": {
|
|
209
|
+
"overrides": [
|
|
210
|
+
{ "matcher": { "id": "byName", "options": "visor_run_user_name" }, "properties": [{ "id": "displayName", "value": "User" }] },
|
|
211
|
+
{ "matcher": { "id": "byName", "options": "visor_run_user_id" }, "properties": [{ "id": "displayName", "value": "User ID" }] },
|
|
212
|
+
{ "matcher": { "id": "byName", "options": "Value" }, "properties": [{ "id": "displayName", "value": "Runs" }] }
|
|
213
|
+
]
|
|
214
|
+
},
|
|
215
|
+
"options": { "sortBy": [{ "displayName": "Runs", "desc": true }] },
|
|
216
|
+
"transformations": [{ "id": "organize", "options": { "excludeByName": { "Time": true } } }],
|
|
217
|
+
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 13 }
|
|
218
|
+
},
|
|
219
|
+
{
|
|
220
|
+
"type": "table",
|
|
221
|
+
"title": "Runs by Workflow",
|
|
222
|
+
"description": "Run counts per workflow/check combination",
|
|
223
|
+
"targets": [
|
|
224
|
+
{
|
|
225
|
+
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
|
226
|
+
"expr": "sum by (visor_run_workflow) (increase(visor_run_total{visor_run_source=~\"$source\", visor_run_workflow=~\"$workflow\", visor_run_workflow!=\"\"}[$__range]))",
|
|
227
|
+
"legendFormat": "",
|
|
228
|
+
"format": "table",
|
|
229
|
+
"instant": true
|
|
230
|
+
}
|
|
231
|
+
],
|
|
232
|
+
"fieldConfig": {
|
|
233
|
+
"overrides": [
|
|
234
|
+
{ "matcher": { "id": "byName", "options": "visor_run_workflow" }, "properties": [{ "id": "displayName", "value": "Workflow" }] },
|
|
235
|
+
{ "matcher": { "id": "byName", "options": "Value" }, "properties": [{ "id": "displayName", "value": "Runs" }] }
|
|
236
|
+
]
|
|
237
|
+
},
|
|
238
|
+
"options": { "sortBy": [{ "displayName": "Runs", "desc": true }] },
|
|
239
|
+
"transformations": [{ "id": "organize", "options": { "excludeByName": { "Time": true } } }],
|
|
240
|
+
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 13 }
|
|
241
|
+
},
|
|
242
|
+
{
|
|
243
|
+
"type": "row",
|
|
244
|
+
"title": "Check Performance",
|
|
245
|
+
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 21 },
|
|
246
|
+
"collapsed": false
|
|
247
|
+
},
|
|
248
|
+
{
|
|
249
|
+
"type": "timeseries",
|
|
250
|
+
"title": "Check Duration by Check (P95)",
|
|
251
|
+
"description": "95th percentile check execution time per check ID",
|
|
252
|
+
"targets": [
|
|
253
|
+
{
|
|
254
|
+
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
|
255
|
+
"expr": "histogram_quantile(0.95, sum(rate(visor_check_duration_ms_bucket[5m])) by (le, visor_check_id))",
|
|
256
|
+
"legendFormat": "{{visor_check_id}}"
|
|
257
|
+
}
|
|
258
|
+
],
|
|
259
|
+
"fieldConfig": { "defaults": { "unit": "ms", "custom": { "fillOpacity": 10 } } },
|
|
260
|
+
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 22 }
|
|
261
|
+
},
|
|
262
|
+
{
|
|
263
|
+
"type": "timeseries",
|
|
264
|
+
"title": "Issues by Severity",
|
|
265
|
+
"description": "Rate of issues produced, broken down by severity",
|
|
266
|
+
"targets": [
|
|
267
|
+
{
|
|
268
|
+
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
|
269
|
+
"expr": "sum by (severity) (increase(visor_check_issues_total[5m]))",
|
|
270
|
+
"legendFormat": "{{severity}}"
|
|
271
|
+
}
|
|
272
|
+
],
|
|
273
|
+
"fieldConfig": { "defaults": { "custom": { "drawStyle": "bars", "fillOpacity": 50, "stacking": { "mode": "normal" } } } },
|
|
274
|
+
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 22 }
|
|
275
|
+
},
|
|
276
|
+
{
|
|
277
|
+
"type": "bargauge",
|
|
278
|
+
"title": "Slowest Checks (Avg Duration)",
|
|
279
|
+
"description": "Average execution time per check, sorted by slowest",
|
|
280
|
+
"targets": [
|
|
281
|
+
{
|
|
282
|
+
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
|
283
|
+
"expr": "topk(10, sum by (visor_check_id) (increase(visor_check_duration_ms_sum[$__range])) / sum by (visor_check_id) (increase(visor_check_duration_ms_count[$__range])))",
|
|
284
|
+
"legendFormat": "{{visor_check_id}}"
|
|
285
|
+
}
|
|
286
|
+
],
|
|
287
|
+
"fieldConfig": { "defaults": { "unit": "ms", "thresholds": { "steps": [{ "color": "green", "value": null }, { "color": "yellow", "value": 30000 }, { "color": "red", "value": 120000 }] } } },
|
|
288
|
+
"options": { "orientation": "horizontal", "displayMode": "gradient" },
|
|
289
|
+
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 30 }
|
|
290
|
+
},
|
|
291
|
+
{
|
|
292
|
+
"type": "piechart",
|
|
293
|
+
"title": "Issues by Check",
|
|
294
|
+
"description": "Distribution of issues across checks",
|
|
295
|
+
"targets": [
|
|
296
|
+
{
|
|
297
|
+
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
|
298
|
+
"expr": "sum by (visor_check_id) (increase(visor_check_issues_total[$__range]))",
|
|
299
|
+
"legendFormat": "{{visor_check_id}}"
|
|
300
|
+
}
|
|
301
|
+
],
|
|
302
|
+
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 30 }
|
|
303
|
+
},
|
|
304
|
+
{
|
|
305
|
+
"type": "row",
|
|
306
|
+
"title": "AI Provider",
|
|
307
|
+
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 38 },
|
|
308
|
+
"collapsed": false
|
|
309
|
+
},
|
|
310
|
+
{
|
|
311
|
+
"type": "timeseries",
|
|
312
|
+
"title": "AI Calls Over Time",
|
|
313
|
+
"description": "AI provider call rate, broken down by model",
|
|
314
|
+
"targets": [
|
|
315
|
+
{
|
|
316
|
+
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
|
317
|
+
"expr": "sum by (visor_ai_model) (increase(visor_ai_call_total{visor_run_source=~\"$source\"}[5m]))",
|
|
318
|
+
"legendFormat": "{{visor_ai_model}}"
|
|
319
|
+
}
|
|
320
|
+
],
|
|
321
|
+
"fieldConfig": { "defaults": { "custom": { "drawStyle": "bars", "fillOpacity": 50, "stacking": { "mode": "normal" } } } },
|
|
322
|
+
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 39 }
|
|
323
|
+
},
|
|
324
|
+
{
|
|
325
|
+
"type": "timeseries",
|
|
326
|
+
"title": "AI Calls per Run (Distribution)",
|
|
327
|
+
"description": "P50 and P95 of AI calls per run",
|
|
328
|
+
"targets": [
|
|
329
|
+
{
|
|
330
|
+
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
|
331
|
+
"expr": "histogram_quantile(0.50, sum(rate(visor_run_ai_calls_bucket{visor_run_source=~\"$source\"}[5m])) by (le))",
|
|
332
|
+
"legendFormat": "P50"
|
|
333
|
+
},
|
|
334
|
+
{
|
|
335
|
+
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
|
336
|
+
"expr": "histogram_quantile(0.95, sum(rate(visor_run_ai_calls_bucket{visor_run_source=~\"$source\"}[5m])) by (le))",
|
|
337
|
+
"legendFormat": "P95"
|
|
338
|
+
}
|
|
339
|
+
],
|
|
340
|
+
"fieldConfig": { "defaults": { "custom": { "fillOpacity": 10 } } },
|
|
341
|
+
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 39 }
|
|
342
|
+
},
|
|
343
|
+
{
|
|
344
|
+
"type": "table",
|
|
345
|
+
"title": "AI Calls by Check",
|
|
346
|
+
"description": "Which checks trigger the most AI calls",
|
|
347
|
+
"targets": [
|
|
348
|
+
{
|
|
349
|
+
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
|
350
|
+
"expr": "topk(20, sum by (visor_check_id) (increase(visor_ai_call_total[$__range])))",
|
|
351
|
+
"legendFormat": "",
|
|
352
|
+
"format": "table",
|
|
353
|
+
"instant": true
|
|
354
|
+
}
|
|
355
|
+
],
|
|
356
|
+
"fieldConfig": {
|
|
357
|
+
"overrides": [
|
|
358
|
+
{ "matcher": { "id": "byName", "options": "visor_check_id" }, "properties": [{ "id": "displayName", "value": "Check" }] },
|
|
359
|
+
{ "matcher": { "id": "byName", "options": "Value" }, "properties": [{ "id": "displayName", "value": "AI Calls" }] }
|
|
360
|
+
]
|
|
361
|
+
},
|
|
362
|
+
"options": { "sortBy": [{ "displayName": "AI Calls", "desc": true }] },
|
|
363
|
+
"transformations": [{ "id": "organize", "options": { "excludeByName": { "Time": true } } }],
|
|
364
|
+
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 47 }
|
|
365
|
+
},
|
|
366
|
+
{
|
|
367
|
+
"type": "piechart",
|
|
368
|
+
"title": "AI Calls by Model",
|
|
369
|
+
"description": "Distribution of AI calls across models",
|
|
370
|
+
"targets": [
|
|
371
|
+
{
|
|
372
|
+
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
|
373
|
+
"expr": "sum by (visor_ai_model) (increase(visor_ai_call_total[$__range]))",
|
|
374
|
+
"legendFormat": "{{visor_ai_model}}"
|
|
375
|
+
}
|
|
376
|
+
],
|
|
377
|
+
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 47 }
|
|
378
|
+
},
|
|
379
|
+
{
|
|
380
|
+
"type": "row",
|
|
381
|
+
"title": "Failure Conditions",
|
|
382
|
+
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 55 },
|
|
383
|
+
"collapsed": false
|
|
384
|
+
},
|
|
385
|
+
{
|
|
386
|
+
"type": "timeseries",
|
|
387
|
+
"title": "fail_if Triggered",
|
|
388
|
+
"description": "Rate of fail_if conditions being triggered",
|
|
389
|
+
"targets": [
|
|
390
|
+
{
|
|
391
|
+
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
|
392
|
+
"expr": "sum by (visor_check_id, scope) (increase(visor_fail_if_triggered_total[5m]))",
|
|
393
|
+
"legendFormat": "{{visor_check_id}} ({{scope}})"
|
|
394
|
+
}
|
|
395
|
+
],
|
|
396
|
+
"fieldConfig": { "defaults": { "custom": { "drawStyle": "bars", "fillOpacity": 50 } } },
|
|
397
|
+
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 56 }
|
|
398
|
+
},
|
|
399
|
+
{
|
|
400
|
+
"type": "stat",
|
|
401
|
+
"title": "Active Checks (Now)",
|
|
402
|
+
"description": "Currently running checks",
|
|
403
|
+
"targets": [
|
|
404
|
+
{
|
|
405
|
+
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
|
406
|
+
"expr": "sum(visor_run_active_checks)",
|
|
407
|
+
"legendFormat": "Active"
|
|
408
|
+
}
|
|
409
|
+
],
|
|
410
|
+
"fieldConfig": { "defaults": { "thresholds": { "steps": [{ "color": "blue", "value": null }, { "color": "yellow", "value": 10 }, { "color": "red", "value": 50 }] } } },
|
|
411
|
+
"options": { "colorMode": "value", "graphMode": "area", "reduceOptions": { "calcs": ["lastNotNull"] } },
|
|
412
|
+
"gridPos": { "h": 8, "w": 6, "x": 12, "y": 56 }
|
|
17
413
|
},
|
|
18
414
|
{
|
|
19
415
|
"type": "stat",
|
|
20
|
-
"title": "
|
|
416
|
+
"title": "Diagram Blocks",
|
|
417
|
+
"description": "Total Mermaid diagram blocks emitted",
|
|
21
418
|
"targets": [
|
|
22
419
|
{
|
|
23
|
-
"datasource": { "type": "prometheus", "uid": "
|
|
24
|
-
"expr": "sum(
|
|
420
|
+
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
|
|
421
|
+
"expr": "sum(increase(visor_diagram_blocks_total[$__range]))",
|
|
422
|
+
"legendFormat": "Blocks"
|
|
25
423
|
}
|
|
26
424
|
],
|
|
27
|
-
"
|
|
425
|
+
"fieldConfig": { "defaults": { "thresholds": { "steps": [{ "color": "blue", "value": null }] } } },
|
|
426
|
+
"options": { "colorMode": "value", "graphMode": "none", "reduceOptions": { "calcs": ["lastNotNull"] } },
|
|
427
|
+
"gridPos": { "h": 8, "w": 6, "x": 18, "y": 56 }
|
|
428
|
+
},
|
|
429
|
+
{
|
|
430
|
+
"type": "row",
|
|
431
|
+
"title": "Traces",
|
|
432
|
+
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 64 },
|
|
433
|
+
"collapsed": false
|
|
434
|
+
},
|
|
435
|
+
{
|
|
436
|
+
"type": "traces",
|
|
437
|
+
"title": "Recent Traces",
|
|
438
|
+
"description": "Recent visor.run traces from Tempo",
|
|
439
|
+
"targets": [
|
|
440
|
+
{
|
|
441
|
+
"datasource": { "type": "tempo", "uid": "${DS_TEMPO}" },
|
|
442
|
+
"queryType": "traceqlSearch",
|
|
443
|
+
"filters": [
|
|
444
|
+
{ "id": "service-name", "tag": "service.name", "operator": "=", "value": ["visor"], "scope": "resource" },
|
|
445
|
+
{ "id": "span-name", "tag": "name", "operator": "=", "value": ["visor.run"], "scope": "span" }
|
|
446
|
+
],
|
|
447
|
+
"limit": 20
|
|
448
|
+
}
|
|
449
|
+
],
|
|
450
|
+
"gridPos": { "h": 10, "w": 24, "x": 0, "y": 65 }
|
|
28
451
|
}
|
|
29
|
-
]
|
|
30
|
-
"version": 1,
|
|
31
|
-
"editable": true
|
|
452
|
+
]
|
|
32
453
|
}
|
|
33
|
-
|