@emeryld/obs-stack 0.1.12 → 0.1.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -115,9 +115,9 @@ Each datasource now exposes a stable `uid` (`loki` and `tempo`) so the shipping
115
115
 
116
116
  We also preload three starter dashboards via `grafana/provisioning/dashboards/`. Each JSON file targets the Loki datasource (uid `loki`) so they work out of the box:
117
117
 
118
- - **Logs overview**: table-based log stream plus a severity breakout to spot noisy services that are emitting within the selected time range.
119
- - **Error spotlight**: graph of error rate per service, a recent error table, and a quick stat showing how many error events arrived over the past five minutes.
120
- - **Service telemetry**: log rate by logger namespace, the top namespaces by volume, and a traced-log ratio that highlights how many records still carry `trace_id`.
118
+ - **Logs overview**: log volume by log type, application log level breakdown, top application event names, and a recent log stream.
119
+ - **Error spotlight**: app error counts, request 5xx counts, schedule failures, an error-rate trend, and a recent error stream with top failing routes.
120
+ - **Service telemetry**: request throughput + latency percentiles, top request paths, cache hit ratio/latency, schedule status counts, and socket event volume.
121
121
 
122
122
  Edit the JSON in `grafana/provisioning/dashboards/files/` (or export updates from Grafana) and restart the stack or reload dashboards to see your changes automatically.
123
123
 
@@ -1,209 +1,302 @@
1
1
  {
2
+ "id": null,
3
+ "uid": "errors-spotlight",
4
+ "title": "Errors Spotlight",
5
+ "tags": ["errors", "logs"],
6
+ "timezone": "browser",
7
+ "schemaVersion": 38,
8
+ "version": 1,
9
+ "refresh": "10s",
10
+ "time": {
11
+ "from": "now-6h",
12
+ "to": "now"
13
+ },
2
14
  "annotations": {
3
15
  "list": [
4
16
  {
5
17
  "builtIn": 1,
6
- "datasource": {
7
- "type": "grafana",
8
- "uid": "-- Grafana --"
9
- },
18
+ "datasource": "-- Grafana --",
10
19
  "enable": true,
11
20
  "hide": true,
12
- "iconColor": "rgba(0, 211, 255, 1)",
13
21
  "name": "Annotations & Alerts",
14
22
  "type": "dashboard"
15
23
  }
16
24
  ]
17
25
  },
18
- "editable": true,
19
- "gnetId": null,
20
- "graphTooltip": 0,
21
- "id": null,
22
- "links": [],
23
- "liveNow": false,
26
+ "templating": {
27
+ "list": [
28
+ {
29
+ "name": "service",
30
+ "type": "query",
31
+ "datasource": {
32
+ "type": "loki",
33
+ "uid": "loki"
34
+ },
35
+ "definition": "label_values({service_name!=\"\"}, service_name)",
36
+ "query": "label_values({service_name!=\"\"}, service_name)",
37
+ "refresh": 1,
38
+ "includeAll": true,
39
+ "multi": true,
40
+ "allValue": ".*",
41
+ "current": {
42
+ "text": "All",
43
+ "value": "$__all"
44
+ }
45
+ }
46
+ ]
47
+ },
24
48
  "panels": [
25
49
  {
50
+ "id": 1,
51
+ "type": "stat",
52
+ "title": "App Errors (5m)",
26
53
  "datasource": {
27
54
  "type": "loki",
28
55
  "uid": "loki"
29
56
  },
57
+ "gridPos": {
58
+ "h": 5,
59
+ "w": 8,
60
+ "x": 0,
61
+ "y": 0
62
+ },
30
63
  "fieldConfig": {
31
64
  "defaults": {
32
- "custom": {},
33
- "mappings": [],
34
- "thresholds": {
35
- "mode": "absolute",
36
- "steps": [
37
- {
38
- "color": "green",
39
- "value": null
40
- }
41
- ]
42
- },
43
65
  "unit": "short"
44
66
  },
45
67
  "overrides": []
46
68
  },
69
+ "options": {
70
+ "reduceOptions": {
71
+ "calcs": ["lastNotNull"],
72
+ "fields": "",
73
+ "values": false
74
+ },
75
+ "orientation": "auto",
76
+ "colorMode": "value",
77
+ "graphMode": "none",
78
+ "justifyMode": "auto"
79
+ },
80
+ "targets": [
81
+ {
82
+ "refId": "A",
83
+ "expr": "sum(count_over_time({service_name=~\"$service\", logType=\"application\", level=\"error\"}[5m]))",
84
+ "datasource": {
85
+ "type": "loki",
86
+ "uid": "loki"
87
+ }
88
+ }
89
+ ]
90
+ },
91
+ {
92
+ "id": 2,
93
+ "type": "stat",
94
+ "title": "Request 5xx (5m)",
95
+ "datasource": {
96
+ "type": "loki",
97
+ "uid": "loki"
98
+ },
47
99
  "gridPos": {
48
- "h": 8,
49
- "w": 12,
50
- "x": 0,
100
+ "h": 5,
101
+ "w": 8,
102
+ "x": 8,
51
103
  "y": 0
52
104
  },
53
- "id": 1,
105
+ "fieldConfig": {
106
+ "defaults": {
107
+ "unit": "short"
108
+ },
109
+ "overrides": []
110
+ },
54
111
  "options": {
55
- "legend": {
56
- "displayMode": "list",
57
- "placement": "bottom"
58
- }
112
+ "reduceOptions": {
113
+ "calcs": ["lastNotNull"],
114
+ "fields": "",
115
+ "values": false
116
+ },
117
+ "orientation": "auto",
118
+ "colorMode": "value",
119
+ "graphMode": "none",
120
+ "justifyMode": "auto"
59
121
  },
60
122
  "targets": [
61
123
  {
124
+ "refId": "A",
125
+ "expr": "sum(count_over_time({service_name=~\"$service\", logType=\"request\", status=~\"5..\"}[5m]))",
62
126
  "datasource": {
63
127
  "type": "loki",
64
128
  "uid": "loki"
65
- },
66
- "expr": "sum(rate({severityText=\"ERROR\", service_name=~\"$service_name\"}[1m])) by (service_name)",
67
- "queryType": "range",
68
- "refId": "A"
129
+ }
69
130
  }
70
- ],
71
- "title": "Error rate per service",
72
- "type": "timeseries"
131
+ ]
73
132
  },
74
133
  {
134
+ "id": 3,
135
+ "type": "stat",
136
+ "title": "Schedule Failures (15m)",
75
137
  "datasource": {
76
138
  "type": "loki",
77
139
  "uid": "loki"
78
140
  },
141
+ "gridPos": {
142
+ "h": 5,
143
+ "w": 8,
144
+ "x": 16,
145
+ "y": 0
146
+ },
79
147
  "fieldConfig": {
80
148
  "defaults": {
81
- "custom": {},
82
- "mappings": [],
83
- "thresholds": {
84
- "mode": "absolute",
85
- "steps": [
86
- {
87
- "color": "green",
88
- "value": null
89
- }
90
- ]
91
- }
149
+ "unit": "short"
92
150
  },
93
151
  "overrides": []
94
152
  },
95
- "gridPos": {
96
- "h": 8,
97
- "w": 12,
98
- "x": 0,
99
- "y": 8
100
- },
101
- "id": 2,
102
153
  "options": {
103
- "showHeader": true
154
+ "reduceOptions": {
155
+ "calcs": ["lastNotNull"],
156
+ "fields": "",
157
+ "values": false
158
+ },
159
+ "orientation": "auto",
160
+ "colorMode": "value",
161
+ "graphMode": "none",
162
+ "justifyMode": "auto"
104
163
  },
105
164
  "targets": [
106
165
  {
166
+ "refId": "A",
167
+ "expr": "sum(count_over_time({service_name=~\"$service\", logType=\"schedule\", status=~\"(fail|error|timeout).*\"}[15m]))",
107
168
  "datasource": {
108
169
  "type": "loki",
109
170
  "uid": "loki"
110
- },
111
- "expr": "{severityText=\"ERROR\", service_name=~\"$service_name\"}",
112
- "queryType": "range",
113
- "refId": "A"
171
+ }
114
172
  }
115
- ],
116
- "title": "Recent errors",
117
- "type": "table"
173
+ ]
118
174
  },
119
175
  {
176
+ "id": 4,
177
+ "type": "timeseries",
178
+ "title": "Error Rate by Type",
120
179
  "datasource": {
121
180
  "type": "loki",
122
181
  "uid": "loki"
123
182
  },
183
+ "gridPos": {
184
+ "h": 8,
185
+ "w": 24,
186
+ "x": 0,
187
+ "y": 5
188
+ },
124
189
  "fieldConfig": {
125
190
  "defaults": {
126
- "custom": {},
127
- "mappings": [],
128
191
  "unit": "short"
129
192
  },
130
193
  "overrides": []
131
194
  },
195
+ "options": {
196
+ "legend": {
197
+ "displayMode": "list",
198
+ "placement": "bottom"
199
+ },
200
+ "tooltip": {
201
+ "mode": "single"
202
+ }
203
+ },
204
+ "targets": [
205
+ {
206
+ "refId": "A",
207
+ "expr": "sum(count_over_time({service_name=~\"$service\", logType=\"application\", level=\"error\"}[5m]))",
208
+ "legendFormat": "application",
209
+ "datasource": {
210
+ "type": "loki",
211
+ "uid": "loki"
212
+ }
213
+ },
214
+ {
215
+ "refId": "B",
216
+ "expr": "sum(count_over_time({service_name=~\"$service\", logType=\"request\", status=~\"5..\"}[5m]))",
217
+ "legendFormat": "request",
218
+ "datasource": {
219
+ "type": "loki",
220
+ "uid": "loki"
221
+ }
222
+ },
223
+ {
224
+ "refId": "C",
225
+ "expr": "sum(count_over_time({service_name=~\"$service\", logType=\"schedule\", status=~\"(fail|error|timeout).*\"}[5m]))",
226
+ "legendFormat": "schedule",
227
+ "datasource": {
228
+ "type": "loki",
229
+ "uid": "loki"
230
+ }
231
+ }
232
+ ]
233
+ },
234
+ {
235
+ "id": 5,
236
+ "type": "logs",
237
+ "title": "Recent Errors",
238
+ "datasource": {
239
+ "type": "loki",
240
+ "uid": "loki"
241
+ },
132
242
  "gridPos": {
133
- "h": 4,
134
- "w": 12,
243
+ "h": 10,
244
+ "w": 24,
135
245
  "x": 0,
136
- "y": 16
246
+ "y": 13
137
247
  },
138
- "id": 3,
139
248
  "options": {
140
- "orientation": "horizontal",
141
- "reduceOptions": {
142
- "calcs": [
143
- "sum"
144
- ],
145
- "fields": "",
146
- "values": false
147
- },
148
- "textMode": "auto"
249
+ "dedupStrategy": "none",
250
+ "showLabels": true,
251
+ "showTime": true,
252
+ "sortOrder": "Descending",
253
+ "wrapLogMessage": true
149
254
  },
150
255
  "targets": [
151
256
  {
257
+ "refId": "A",
258
+ "expr": "{service_name=~\"$service\", logType=\"application\", level=\"error\"} or {service_name=~\"$service\", logType=\"request\", status=~\"5..\"} or {service_name=~\"$service\", logType=\"schedule\", status=~\"(fail|error|timeout).*\"}",
152
259
  "datasource": {
153
260
  "type": "loki",
154
261
  "uid": "loki"
155
- },
156
- "expr": "sum(count_over_time({severityText=\"ERROR\", service_name=~\"$service_name\"}[5m]))",
157
- "queryType": "range",
158
- "refId": "A"
262
+ }
159
263
  }
160
- ],
161
- "title": "Errors in last 5 minutes",
162
- "type": "stat"
163
- }
164
- ],
165
- "refresh": "5s",
166
- "schemaVersion": 38,
167
- "style": "dark",
168
- "tags": [
169
- "errors",
170
- "alerts",
171
- "logs"
172
- ],
173
- "templating": {
174
- "list": [
175
- {
176
- "allValue": ".*",
177
- "datasource": {
178
- "type": "loki",
179
- "uid": "loki"
264
+ ]
265
+ },
266
+ {
267
+ "id": 6,
268
+ "type": "table",
269
+ "title": "Top Error Routes (1h)",
270
+ "datasource": {
271
+ "type": "loki",
272
+ "uid": "loki"
273
+ },
274
+ "gridPos": {
275
+ "h": 7,
276
+ "w": 24,
277
+ "x": 0,
278
+ "y": 23
279
+ },
280
+ "fieldConfig": {
281
+ "defaults": {
282
+ "unit": "short"
180
283
  },
181
- "definition": "label_values({service_name!=\"\"}, service_name)",
182
- "includeAll": true,
183
- "label": "Service",
184
- "multi": false,
185
- "name": "service_name",
186
- "options": [],
187
- "query": "label_values({service_name!=\"\"}, service_name)",
188
- "refresh": 2,
189
- "skipUrlSync": false,
190
- "type": "query",
191
- "useTags": false,
192
- "current": {
193
- "text": "All services",
194
- "value": ".*"
284
+ "overrides": []
285
+ },
286
+ "options": {
287
+ "showHeader": true
288
+ },
289
+ "targets": [
290
+ {
291
+ "refId": "A",
292
+ "expr": "topk(10, sum by (path, method) (count_over_time({service_name=~\"$service\", logType=\"request\", status=~\"5..\"}[1h])))",
293
+ "legendFormat": "{{method}} {{path}}",
294
+ "datasource": {
295
+ "type": "loki",
296
+ "uid": "loki"
297
+ }
195
298
  }
196
- }
197
- ]
198
- },
199
- "time": {
200
- "from": "now-1h",
201
- "to": "now"
202
- },
203
- "timepicker": {},
204
- "timezone": "browser",
205
- "title": "Error spotlight",
206
- "uid": "error-spotlight",
207
- "version": 1,
208
- "weekStart": ""
299
+ ]
300
+ }
301
+ ]
209
302
  }
@@ -1,169 +1,213 @@
1
1
  {
2
+ "id": null,
3
+ "uid": "logs-overview",
4
+ "title": "Logs Overview",
5
+ "tags": ["logs"],
6
+ "timezone": "browser",
7
+ "schemaVersion": 38,
8
+ "version": 1,
9
+ "refresh": "10s",
10
+ "time": {
11
+ "from": "now-6h",
12
+ "to": "now"
13
+ },
2
14
  "annotations": {
3
15
  "list": [
4
16
  {
5
17
  "builtIn": 1,
6
- "datasource": {
7
- "type": "grafana",
8
- "uid": "-- Grafana --"
9
- },
18
+ "datasource": "-- Grafana --",
10
19
  "enable": true,
11
20
  "hide": true,
12
- "iconColor": "rgba(0, 211, 255, 1)",
13
21
  "name": "Annotations & Alerts",
14
22
  "type": "dashboard"
15
23
  }
16
24
  ]
17
25
  },
18
- "editable": true,
19
- "gnetId": null,
20
- "graphTooltip": 0,
21
- "id": null,
22
- "links": [],
23
- "liveNow": false,
26
+ "templating": {
27
+ "list": [
28
+ {
29
+ "name": "service",
30
+ "type": "query",
31
+ "datasource": {
32
+ "type": "loki",
33
+ "uid": "loki"
34
+ },
35
+ "definition": "label_values({service_name!=\"\"}, service_name)",
36
+ "query": "label_values({service_name!=\"\"}, service_name)",
37
+ "refresh": 1,
38
+ "includeAll": true,
39
+ "multi": true,
40
+ "allValue": ".*",
41
+ "current": {
42
+ "text": "All",
43
+ "value": "$__all"
44
+ }
45
+ },
46
+ {
47
+ "name": "logType",
48
+ "type": "query",
49
+ "datasource": {
50
+ "type": "loki",
51
+ "uid": "loki"
52
+ },
53
+ "definition": "label_values({logType!=\"\"}, logType)",
54
+ "query": "label_values({logType!=\"\"}, logType)",
55
+ "refresh": 1,
56
+ "includeAll": true,
57
+ "multi": true,
58
+ "allValue": ".*",
59
+ "current": {
60
+ "text": "All",
61
+ "value": "$__all"
62
+ }
63
+ }
64
+ ]
65
+ },
24
66
  "panels": [
25
67
  {
68
+ "id": 1,
69
+ "type": "timeseries",
70
+ "title": "Log Volume by Type",
26
71
  "datasource": {
27
72
  "type": "loki",
28
73
  "uid": "loki"
29
74
  },
75
+ "gridPos": {
76
+ "h": 8,
77
+ "w": 24,
78
+ "x": 0,
79
+ "y": 0
80
+ },
30
81
  "fieldConfig": {
31
82
  "defaults": {
32
- "custom": {},
33
- "mappings": [],
34
- "thresholds": {
35
- "mode": "absolute",
36
- "steps": [
37
- {
38
- "color": "green",
39
- "value": null
40
- }
41
- ]
42
- }
83
+ "unit": "short"
43
84
  },
44
85
  "overrides": []
45
86
  },
87
+ "options": {
88
+ "legend": {
89
+ "displayMode": "list",
90
+ "placement": "bottom"
91
+ },
92
+ "tooltip": {
93
+ "mode": "single"
94
+ }
95
+ },
96
+ "targets": [
97
+ {
98
+ "refId": "A",
99
+ "expr": "sum by (logType) (count_over_time({service_name=~\"$service\", logType=~\"$logType\"}[5m]))",
100
+ "legendFormat": "{{logType}}",
101
+ "datasource": {
102
+ "type": "loki",
103
+ "uid": "loki"
104
+ }
105
+ }
106
+ ]
107
+ },
108
+ {
109
+ "id": 2,
110
+ "type": "bargauge",
111
+ "title": "App Log Levels (15m)",
112
+ "datasource": {
113
+ "type": "loki",
114
+ "uid": "loki"
115
+ },
46
116
  "gridPos": {
47
- "h": 12,
117
+ "h": 6,
48
118
  "w": 12,
49
119
  "x": 0,
50
- "y": 0
120
+ "y": 8
121
+ },
122
+ "fieldConfig": {
123
+ "defaults": {
124
+ "unit": "short"
125
+ },
126
+ "overrides": []
51
127
  },
52
- "id": 1,
53
128
  "options": {
54
- "showLabels": true,
55
- "showTime": true,
56
- "wrapLogMessage": true
129
+ "displayMode": "basic",
130
+ "orientation": "horizontal",
131
+ "showUnfilled": true
57
132
  },
58
133
  "targets": [
59
134
  {
135
+ "refId": "A",
136
+ "expr": "sum by (level) (count_over_time({service_name=~\"$service\", logType=\"application\"}[15m]))",
137
+ "legendFormat": "{{level}}",
60
138
  "datasource": {
61
139
  "type": "loki",
62
140
  "uid": "loki"
63
- },
64
- "editorMode": "builder",
65
- "expr": "{service_name=~\"$service_name\"}",
66
- "queryType": "range",
67
- "refId": "A"
141
+ }
68
142
  }
69
- ],
70
- "title": "Log stream",
71
- "type": "logs"
143
+ ]
72
144
  },
73
145
  {
146
+ "id": 3,
147
+ "type": "table",
148
+ "title": "Top Application Events (1h)",
74
149
  "datasource": {
75
150
  "type": "loki",
76
151
  "uid": "loki"
77
152
  },
153
+ "gridPos": {
154
+ "h": 6,
155
+ "w": 12,
156
+ "x": 12,
157
+ "y": 8
158
+ },
78
159
  "fieldConfig": {
79
160
  "defaults": {
80
- "custom": {},
81
- "mappings": [],
82
- "thresholds": {
83
- "mode": "absolute",
84
- "steps": [
85
- {
86
- "color": "green",
87
- "value": null
88
- }
89
- ]
90
- },
91
161
  "unit": "short"
92
162
  },
93
163
  "overrides": []
94
164
  },
165
+ "options": {
166
+ "showHeader": true
167
+ },
168
+ "targets": [
169
+ {
170
+ "refId": "A",
171
+ "expr": "topk(10, sum by (name) (count_over_time({service_name=~\"$service\", logType=\"application\"}[1h])))",
172
+ "legendFormat": "{{name}}",
173
+ "datasource": {
174
+ "type": "loki",
175
+ "uid": "loki"
176
+ }
177
+ }
178
+ ]
179
+ },
180
+ {
181
+ "id": 4,
182
+ "type": "logs",
183
+ "title": "Recent Application Logs",
184
+ "datasource": {
185
+ "type": "loki",
186
+ "uid": "loki"
187
+ },
95
188
  "gridPos": {
96
- "h": 8,
97
- "w": 12,
189
+ "h": 10,
190
+ "w": 24,
98
191
  "x": 0,
99
- "y": 12
192
+ "y": 14
100
193
  },
101
- "id": 2,
102
194
  "options": {
103
- "legend": {
104
- "displayMode": "list",
105
- "placement": "bottom"
106
- },
107
- "tooltip": {
108
- "mode": "single"
109
- }
195
+ "dedupStrategy": "none",
196
+ "showLabels": true,
197
+ "showTime": true,
198
+ "sortOrder": "Descending",
199
+ "wrapLogMessage": true
110
200
  },
111
201
  "targets": [
112
202
  {
203
+ "refId": "A",
204
+ "expr": "{service_name=~\"$service\", logType=\"application\"}",
113
205
  "datasource": {
114
206
  "type": "loki",
115
207
  "uid": "loki"
116
- },
117
- "expr": "sum(rate({service_name=~\"$service_name\"}[5m])) by (severityText)",
118
- "queryType": "range",
119
- "refId": "A"
208
+ }
120
209
  }
121
- ],
122
- "title": "Log rate by severity",
123
- "type": "timeseries"
210
+ ]
124
211
  }
125
- ],
126
- "refresh": "15s",
127
- "schemaVersion": 38,
128
- "style": "dark",
129
- "tags": [
130
- "logs",
131
- "observability"
132
- ],
133
- "templating": {
134
- "list": [
135
- {
136
- "allValue": ".*",
137
- "datasource": {
138
- "type": "loki",
139
- "uid": "loki"
140
- },
141
- "definition": "label_values({service_name!=\"\"}, service_name)",
142
- "includeAll": true,
143
- "label": "Service",
144
- "multi": false,
145
- "name": "service_name",
146
- "options": [],
147
- "query": "label_values({service_name!=\"\"}, service_name)",
148
- "refresh": 2,
149
- "skipUrlSync": false,
150
- "type": "query",
151
- "useTags": false,
152
- "current": {
153
- "text": "All services",
154
- "value": ".*"
155
- }
156
- }
157
- ]
158
- },
159
- "time": {
160
- "from": "now-6h",
161
- "to": "now"
162
- },
163
- "timepicker": {},
164
- "timezone": "browser",
165
- "title": "Logs overview",
166
- "uid": "logs-overview",
167
- "version": 1,
168
- "weekStart": ""
212
+ ]
169
213
  }
@@ -1,56 +1,112 @@
1
1
  {
2
+ "id": null,
3
+ "uid": "service-telemetry",
4
+ "title": "Service Telemetry",
5
+ "tags": ["requests", "cache", "schedules", "sockets"],
6
+ "timezone": "browser",
7
+ "schemaVersion": 38,
8
+ "version": 1,
9
+ "refresh": "10s",
10
+ "time": {
11
+ "from": "now-6h",
12
+ "to": "now"
13
+ },
2
14
  "annotations": {
3
15
  "list": [
4
16
  {
5
17
  "builtIn": 1,
6
- "datasource": {
7
- "type": "grafana",
8
- "uid": "-- Grafana --"
9
- },
18
+ "datasource": "-- Grafana --",
10
19
  "enable": true,
11
20
  "hide": true,
12
- "iconColor": "rgba(0, 211, 255, 1)",
13
21
  "name": "Annotations & Alerts",
14
22
  "type": "dashboard"
15
23
  }
16
24
  ]
17
25
  },
18
- "editable": true,
19
- "gnetId": null,
20
- "graphTooltip": 0,
21
- "id": null,
22
- "links": [],
23
- "liveNow": false,
26
+ "templating": {
27
+ "list": [
28
+ {
29
+ "name": "service",
30
+ "type": "query",
31
+ "datasource": {
32
+ "type": "loki",
33
+ "uid": "loki"
34
+ },
35
+ "definition": "label_values({service_name!=\"\"}, service_name)",
36
+ "query": "label_values({service_name!=\"\"}, service_name)",
37
+ "refresh": 1,
38
+ "includeAll": true,
39
+ "multi": true,
40
+ "allValue": ".*",
41
+ "current": {
42
+ "text": "All",
43
+ "value": "$__all"
44
+ }
45
+ }
46
+ ]
47
+ },
24
48
  "panels": [
25
49
  {
50
+ "id": 1,
51
+ "type": "timeseries",
52
+ "title": "Request Throughput by Method",
26
53
  "datasource": {
27
54
  "type": "loki",
28
55
  "uid": "loki"
29
56
  },
57
+ "gridPos": {
58
+ "h": 7,
59
+ "w": 12,
60
+ "x": 0,
61
+ "y": 0
62
+ },
30
63
  "fieldConfig": {
31
64
  "defaults": {
32
- "custom": {},
33
- "mappings": [],
34
- "thresholds": {
35
- "mode": "absolute",
36
- "steps": [
37
- {
38
- "color": "green",
39
- "value": null
40
- }
41
- ]
42
- },
43
65
  "unit": "short"
44
66
  },
45
67
  "overrides": []
46
68
  },
69
+ "options": {
70
+ "legend": {
71
+ "displayMode": "list",
72
+ "placement": "bottom"
73
+ },
74
+ "tooltip": {
75
+ "mode": "single"
76
+ }
77
+ },
78
+ "targets": [
79
+ {
80
+ "refId": "A",
81
+ "expr": "sum by (method) (count_over_time({service_name=~\"$service\", logType=\"request\"}[5m]))",
82
+ "legendFormat": "{{method}}",
83
+ "datasource": {
84
+ "type": "loki",
85
+ "uid": "loki"
86
+ }
87
+ }
88
+ ]
89
+ },
90
+ {
91
+ "id": 2,
92
+ "type": "timeseries",
93
+ "title": "Request Latency (p50 / p95)",
94
+ "datasource": {
95
+ "type": "loki",
96
+ "uid": "loki"
97
+ },
47
98
  "gridPos": {
48
- "h": 8,
99
+ "h": 7,
49
100
  "w": 12,
50
- "x": 0,
101
+ "x": 12,
51
102
  "y": 0
52
103
  },
53
- "id": 1,
104
+ "fieldConfig": {
105
+ "defaults": {
106
+ "unit": "ms"
107
+ },
108
+ "overrides": []
109
+ },
54
110
  "options": {
55
111
  "legend": {
56
112
  "displayMode": "list",
@@ -62,150 +118,222 @@
62
118
  },
63
119
  "targets": [
64
120
  {
121
+ "refId": "A",
122
+ "expr": "quantile_over_time(0.5, {service_name=~\"$service\", logType=\"request\"} | unwrap durationMs [5m])",
123
+ "legendFormat": "p50",
124
+ "datasource": {
125
+ "type": "loki",
126
+ "uid": "loki"
127
+ }
128
+ },
129
+ {
130
+ "refId": "B",
131
+ "expr": "quantile_over_time(0.95, {service_name=~\"$service\", logType=\"request\"} | unwrap durationMs [5m])",
132
+ "legendFormat": "p95",
65
133
  "datasource": {
66
134
  "type": "loki",
67
135
  "uid": "loki"
68
- },
69
- "expr": "sum(rate({service_name=~\"$service_name\"}[5m])) by (logger_name)",
70
- "queryType": "range",
71
- "refId": "A"
136
+ }
72
137
  }
73
- ],
74
- "title": "Log rate by logger",
75
- "type": "timeseries"
138
+ ]
76
139
  },
77
140
  {
141
+ "id": 3,
142
+ "type": "table",
143
+ "title": "Top Request Paths (1h)",
78
144
  "datasource": {
79
145
  "type": "loki",
80
146
  "uid": "loki"
81
147
  },
148
+ "gridPos": {
149
+ "h": 7,
150
+ "w": 12,
151
+ "x": 0,
152
+ "y": 7
153
+ },
82
154
  "fieldConfig": {
83
155
  "defaults": {
84
- "custom": {},
85
- "mappings": [],
86
- "thresholds": {
87
- "mode": "absolute",
88
- "steps": [
89
- {
90
- "color": "green",
91
- "value": null
92
- }
93
- ]
94
- }
156
+ "unit": "short"
95
157
  },
96
158
  "overrides": []
97
159
  },
160
+ "options": {
161
+ "showHeader": true
162
+ },
163
+ "targets": [
164
+ {
165
+ "refId": "A",
166
+ "expr": "topk(10, sum by (path) (count_over_time({service_name=~\"$service\", logType=\"request\"}[1h])))",
167
+ "legendFormat": "{{path}}",
168
+ "datasource": {
169
+ "type": "loki",
170
+ "uid": "loki"
171
+ }
172
+ }
173
+ ]
174
+ },
175
+ {
176
+ "id": 4,
177
+ "type": "stat",
178
+ "title": "Cache Hit Ratio (5m)",
179
+ "datasource": {
180
+ "type": "loki",
181
+ "uid": "loki"
182
+ },
98
183
  "gridPos": {
99
- "h": 8,
100
- "w": 12,
101
- "x": 0,
102
- "y": 8
184
+ "h": 5,
185
+ "w": 6,
186
+ "x": 12,
187
+ "y": 7
188
+ },
189
+ "fieldConfig": {
190
+ "defaults": {
191
+ "unit": "percentunit",
192
+ "min": 0,
193
+ "max": 1
194
+ },
195
+ "overrides": []
103
196
  },
104
- "id": 2,
105
197
  "options": {
106
- "showHeader": true
198
+ "reduceOptions": {
199
+ "calcs": ["lastNotNull"],
200
+ "fields": "",
201
+ "values": false
202
+ },
203
+ "orientation": "auto",
204
+ "colorMode": "value",
205
+ "graphMode": "none",
206
+ "justifyMode": "auto"
107
207
  },
108
208
  "targets": [
109
209
  {
210
+ "refId": "A",
211
+ "expr": "sum(count_over_time({service_name=~\"$service\", logType=\"cache_trace\", operation=\"hit\"}[5m])) / sum(count_over_time({service_name=~\"$service\", logType=\"cache_trace\", operation=~\"hit|miss\"}[5m]))",
110
212
  "datasource": {
111
213
  "type": "loki",
112
214
  "uid": "loki"
113
- },
114
- "expr": "topk(10, sum by (logger_name) (count_over_time({service_name=~\"$service_name\"}[5m])))",
115
- "queryType": "range",
116
- "refId": "A"
215
+ }
117
216
  }
118
- ],
119
- "title": "Top log namespaces",
120
- "type": "table"
217
+ ]
121
218
  },
122
219
  {
220
+ "id": 5,
221
+ "type": "timeseries",
222
+ "title": "Cache Latency by Operation",
123
223
  "datasource": {
124
224
  "type": "loki",
125
225
  "uid": "loki"
126
226
  },
227
+ "gridPos": {
228
+ "h": 5,
229
+ "w": 6,
230
+ "x": 18,
231
+ "y": 7
232
+ },
127
233
  "fieldConfig": {
128
234
  "defaults": {
129
- "custom": {},
130
- "mappings": [],
131
- "unit": "percent"
235
+ "unit": "ms"
132
236
  },
133
237
  "overrides": []
134
238
  },
239
+ "options": {
240
+ "legend": {
241
+ "displayMode": "list",
242
+ "placement": "bottom"
243
+ },
244
+ "tooltip": {
245
+ "mode": "single"
246
+ }
247
+ },
248
+ "targets": [
249
+ {
250
+ "refId": "A",
251
+ "expr": "avg_over_time({service_name=~\"$service\", logType=\"cache_trace\"} | unwrap durationMs [5m])",
252
+ "legendFormat": "{{operation}}",
253
+ "datasource": {
254
+ "type": "loki",
255
+ "uid": "loki"
256
+ }
257
+ }
258
+ ]
259
+ },
260
+ {
261
+ "id": 6,
262
+ "type": "bargauge",
263
+ "title": "Schedule Status (15m)",
264
+ "datasource": {
265
+ "type": "loki",
266
+ "uid": "loki"
267
+ },
135
268
  "gridPos": {
136
- "h": 4,
269
+ "h": 6,
137
270
  "w": 12,
138
271
  "x": 0,
139
- "y": 16
272
+ "y": 14
273
+ },
274
+ "fieldConfig": {
275
+ "defaults": {
276
+ "unit": "short"
277
+ },
278
+ "overrides": []
140
279
  },
141
- "id": 3,
142
280
  "options": {
281
+ "displayMode": "basic",
143
282
  "orientation": "horizontal",
144
- "reduceOptions": {
145
- "calcs": [
146
- "mean"
147
- ],
148
- "fields": "",
149
- "values": false
150
- },
151
- "textMode": "auto"
283
+ "showUnfilled": true
152
284
  },
153
285
  "targets": [
154
286
  {
287
+ "refId": "A",
288
+ "expr": "sum by (status) (count_over_time({service_name=~\"$service\", logType=\"schedule\"}[15m]))",
289
+ "legendFormat": "{{status}}",
155
290
  "datasource": {
156
291
  "type": "loki",
157
292
  "uid": "loki"
158
- },
159
- "expr": "sum(count_over_time({trace_id!=\"\", service_name=~\"$service_name\"}[5m])) / sum(count_over_time({service_name=~\"$service_name\"}[5m])) * 100",
160
- "queryType": "range",
161
- "refId": "A"
293
+ }
162
294
  }
163
- ],
164
- "title": "Traced logs",
165
- "type": "stat"
166
- }
167
- ],
168
- "refresh": "10s",
169
- "schemaVersion": 38,
170
- "style": "dark",
171
- "tags": [
172
- "service",
173
- "telemetry"
174
- ],
175
- "templating": {
176
- "list": [
177
- {
178
- "allValue": ".*",
179
- "datasource": {
180
- "type": "loki",
181
- "uid": "loki"
295
+ ]
296
+ },
297
+ {
298
+ "id": 7,
299
+ "type": "timeseries",
300
+ "title": "Socket Events by Direction",
301
+ "datasource": {
302
+ "type": "loki",
303
+ "uid": "loki"
304
+ },
305
+ "gridPos": {
306
+ "h": 6,
307
+ "w": 12,
308
+ "x": 12,
309
+ "y": 14
310
+ },
311
+ "fieldConfig": {
312
+ "defaults": {
313
+ "unit": "short"
182
314
  },
183
- "definition": "label_values({service_name!=\"\"}, service_name)",
184
- "includeAll": true,
185
- "label": "Service",
186
- "multi": false,
187
- "name": "service_name",
188
- "options": [],
189
- "query": "label_values({service_name!=\"\"}, service_name)",
190
- "refresh": 2,
191
- "skipUrlSync": false,
192
- "type": "query",
193
- "useTags": false,
194
- "current": {
195
- "text": "All services",
196
- "value": ".*"
315
+ "overrides": []
316
+ },
317
+ "options": {
318
+ "legend": {
319
+ "displayMode": "list",
320
+ "placement": "bottom"
321
+ },
322
+ "tooltip": {
323
+ "mode": "single"
197
324
  }
198
- }
199
- ]
200
- },
201
- "time": {
202
- "from": "now-1h",
203
- "to": "now"
204
- },
205
- "timepicker": {},
206
- "timezone": "browser",
207
- "title": "Service telemetry",
208
- "uid": "service-telemetry",
209
- "version": 1,
210
- "weekStart": ""
325
+ },
326
+ "targets": [
327
+ {
328
+ "refId": "A",
329
+ "expr": "sum by (direction) (count_over_time({service_name=~\"$service\", logType=\"socket\"}[5m]))",
330
+ "legendFormat": "{{direction}}",
331
+ "datasource": {
332
+ "type": "loki",
333
+ "uid": "loki"
334
+ }
335
+ }
336
+ ]
337
+ }
338
+ ]
211
339
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@emeryld/obs-stack",
3
- "version": "0.1.12",
3
+ "version": "0.1.14",
4
4
  "description": "Docker Compose-based Grafana + Tempo + Loki + OpenTelemetry Collector stack",
5
5
  "type": "commonjs",
6
6
  "bin": {