@emeryld/obs-stack 0.1.16 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/configs/loki.yaml +1 -0
- package/configs/otel-collector.yaml +34 -5
- package/grafana/provisioning/dashboards/files/errors-spotlight.json +34 -156
- package/grafana/provisioning/dashboards/files/logs-overview.json +23 -101
- package/grafana/provisioning/dashboards/files/service-telemetry.json +43 -183
- package/grafana/provisioning/datasources/datasources.yaml +18 -0
- package/package.json +1 -1
package/configs/loki.yaml
CHANGED
|
@@ -6,16 +6,44 @@ receivers:
|
|
|
6
6
|
|
|
7
7
|
processors:
|
|
8
8
|
batch:
|
|
9
|
-
timeout:
|
|
9
|
+
timeout: 5s
|
|
10
10
|
send_batch_size: 1024
|
|
11
|
-
|
|
11
|
+
|
|
12
|
+
# Normalize resource attrs into Loki-safe label keys.
|
|
13
|
+
# - OTEL uses `service.name` (contains a dot) which is not a valid Loki label name.
|
|
14
|
+
# - We copy it into `service_name`.
|
|
15
|
+
resource/normalize:
|
|
16
|
+
attributes:
|
|
17
|
+
- action: upsert
|
|
18
|
+
key: service_name
|
|
19
|
+
from_attribute: service.name
|
|
20
|
+
|
|
21
|
+
# Ensure the log body is consistently JSON and includes attributes + resource attrs.
|
|
22
|
+
# This makes Grafana/Loki `| json` + `unwrap` work reliably.
|
|
23
|
+
transform/logs_json:
|
|
24
|
+
log_statements:
|
|
25
|
+
- context: log
|
|
26
|
+
statements:
|
|
27
|
+
# If body is scalar, wrap it in a map
|
|
28
|
+
- set(body, {"message": body}) where IsString(body) or IsNumeric(body) or IsBool(body)
|
|
29
|
+
|
|
30
|
+
# Merge log attributes into the body so Grafana can parse them as fields
|
|
31
|
+
- merge_maps(body, attributes)
|
|
32
|
+
|
|
33
|
+
# Merge resource attributes too (service_name/environment/etc.)
|
|
34
|
+
- merge_maps(body, resource.attributes)
|
|
35
|
+
|
|
36
|
+
# Only label low-cardinality fields for Loki.
|
|
37
|
+
# Numeric/high-cardinality fields (durationMs, trace_id, span_id, ids, etc.)
|
|
38
|
+
# must remain in JSON body (query via `| json`).
|
|
39
|
+
attributes/loki_labels:
|
|
12
40
|
actions:
|
|
13
41
|
- action: insert
|
|
14
42
|
key: loki.resource.labels
|
|
15
|
-
value:
|
|
43
|
+
value: service_name,environment
|
|
16
44
|
- action: insert
|
|
17
45
|
key: loki.attribute.labels
|
|
18
|
-
value: logType,level,
|
|
46
|
+
value: logType,level,method,status,path,operation,direction
|
|
19
47
|
|
|
20
48
|
exporters:
|
|
21
49
|
otlp:
|
|
@@ -32,7 +60,8 @@ service:
|
|
|
32
60
|
receivers: [otlp]
|
|
33
61
|
processors: [batch]
|
|
34
62
|
exporters: [otlp]
|
|
63
|
+
|
|
35
64
|
logs:
|
|
36
65
|
receivers: [otlp]
|
|
37
|
-
processors: [attributes/
|
|
66
|
+
processors: [resource/normalize, transform/logs_json, attributes/loki_labels, batch]
|
|
38
67
|
exporters: [loki]
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"tags": ["errors", "logs"],
|
|
6
6
|
"timezone": "browser",
|
|
7
7
|
"schemaVersion": 38,
|
|
8
|
-
"version":
|
|
8
|
+
"version": 2,
|
|
9
9
|
"refresh": "10s",
|
|
10
10
|
"time": {
|
|
11
11
|
"from": "now-6h",
|
|
@@ -28,20 +28,14 @@
|
|
|
28
28
|
{
|
|
29
29
|
"name": "service",
|
|
30
30
|
"type": "query",
|
|
31
|
-
"datasource": {
|
|
32
|
-
"type": "loki",
|
|
33
|
-
"uid": "loki"
|
|
34
|
-
},
|
|
31
|
+
"datasource": { "type": "loki", "uid": "loki" },
|
|
35
32
|
"definition": "label_values({service_name!=\"\"}, service_name)",
|
|
36
33
|
"query": "label_values({service_name!=\"\"}, service_name)",
|
|
37
34
|
"refresh": 1,
|
|
38
35
|
"includeAll": true,
|
|
39
36
|
"multi": true,
|
|
40
37
|
"allValue": ".+",
|
|
41
|
-
"current": {
|
|
42
|
-
"text": "All",
|
|
43
|
-
"value": "$__all"
|
|
44
|
-
}
|
|
38
|
+
"current": { "text": "All", "value": "$__all" }
|
|
45
39
|
}
|
|
46
40
|
]
|
|
47
41
|
},
|
|
@@ -50,28 +44,11 @@
|
|
|
50
44
|
"id": 1,
|
|
51
45
|
"type": "stat",
|
|
52
46
|
"title": "App Errors (5m)",
|
|
53
|
-
"datasource": {
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
},
|
|
57
|
-
"gridPos": {
|
|
58
|
-
"h": 5,
|
|
59
|
-
"w": 8,
|
|
60
|
-
"x": 0,
|
|
61
|
-
"y": 0
|
|
62
|
-
},
|
|
63
|
-
"fieldConfig": {
|
|
64
|
-
"defaults": {
|
|
65
|
-
"unit": "short"
|
|
66
|
-
},
|
|
67
|
-
"overrides": []
|
|
68
|
-
},
|
|
47
|
+
"datasource": { "type": "loki", "uid": "loki" },
|
|
48
|
+
"gridPos": { "h": 5, "w": 8, "x": 0, "y": 0 },
|
|
49
|
+
"fieldConfig": { "defaults": { "unit": "short" }, "overrides": [] },
|
|
69
50
|
"options": {
|
|
70
|
-
"reduceOptions": {
|
|
71
|
-
"calcs": ["lastNotNull"],
|
|
72
|
-
"fields": "",
|
|
73
|
-
"values": false
|
|
74
|
-
},
|
|
51
|
+
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
|
|
75
52
|
"orientation": "auto",
|
|
76
53
|
"colorMode": "value",
|
|
77
54
|
"graphMode": "none",
|
|
@@ -81,10 +58,7 @@
|
|
|
81
58
|
{
|
|
82
59
|
"refId": "A",
|
|
83
60
|
"expr": "sum(count_over_time({service_name=~\"$service\", logType=\"application\", level=\"error\"}[5m]))",
|
|
84
|
-
"datasource": {
|
|
85
|
-
"type": "loki",
|
|
86
|
-
"uid": "loki"
|
|
87
|
-
}
|
|
61
|
+
"datasource": { "type": "loki", "uid": "loki" }
|
|
88
62
|
}
|
|
89
63
|
]
|
|
90
64
|
},
|
|
@@ -92,28 +66,11 @@
|
|
|
92
66
|
"id": 2,
|
|
93
67
|
"type": "stat",
|
|
94
68
|
"title": "Request 5xx (5m)",
|
|
95
|
-
"datasource": {
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
},
|
|
99
|
-
"gridPos": {
|
|
100
|
-
"h": 5,
|
|
101
|
-
"w": 8,
|
|
102
|
-
"x": 8,
|
|
103
|
-
"y": 0
|
|
104
|
-
},
|
|
105
|
-
"fieldConfig": {
|
|
106
|
-
"defaults": {
|
|
107
|
-
"unit": "short"
|
|
108
|
-
},
|
|
109
|
-
"overrides": []
|
|
110
|
-
},
|
|
69
|
+
"datasource": { "type": "loki", "uid": "loki" },
|
|
70
|
+
"gridPos": { "h": 5, "w": 8, "x": 8, "y": 0 },
|
|
71
|
+
"fieldConfig": { "defaults": { "unit": "short" }, "overrides": [] },
|
|
111
72
|
"options": {
|
|
112
|
-
"reduceOptions": {
|
|
113
|
-
"calcs": ["lastNotNull"],
|
|
114
|
-
"fields": "",
|
|
115
|
-
"values": false
|
|
116
|
-
},
|
|
73
|
+
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
|
|
117
74
|
"orientation": "auto",
|
|
118
75
|
"colorMode": "value",
|
|
119
76
|
"graphMode": "none",
|
|
@@ -123,10 +80,7 @@
|
|
|
123
80
|
{
|
|
124
81
|
"refId": "A",
|
|
125
82
|
"expr": "sum(count_over_time({service_name=~\"$service\", logType=\"request\", status=~\"5..\"}[5m]))",
|
|
126
|
-
"datasource": {
|
|
127
|
-
"type": "loki",
|
|
128
|
-
"uid": "loki"
|
|
129
|
-
}
|
|
83
|
+
"datasource": { "type": "loki", "uid": "loki" }
|
|
130
84
|
}
|
|
131
85
|
]
|
|
132
86
|
},
|
|
@@ -134,28 +88,11 @@
|
|
|
134
88
|
"id": 3,
|
|
135
89
|
"type": "stat",
|
|
136
90
|
"title": "Schedule Failures (15m)",
|
|
137
|
-
"datasource": {
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
},
|
|
141
|
-
"gridPos": {
|
|
142
|
-
"h": 5,
|
|
143
|
-
"w": 8,
|
|
144
|
-
"x": 16,
|
|
145
|
-
"y": 0
|
|
146
|
-
},
|
|
147
|
-
"fieldConfig": {
|
|
148
|
-
"defaults": {
|
|
149
|
-
"unit": "short"
|
|
150
|
-
},
|
|
151
|
-
"overrides": []
|
|
152
|
-
},
|
|
91
|
+
"datasource": { "type": "loki", "uid": "loki" },
|
|
92
|
+
"gridPos": { "h": 5, "w": 8, "x": 16, "y": 0 },
|
|
93
|
+
"fieldConfig": { "defaults": { "unit": "short" }, "overrides": [] },
|
|
153
94
|
"options": {
|
|
154
|
-
"reduceOptions": {
|
|
155
|
-
"calcs": ["lastNotNull"],
|
|
156
|
-
"fields": "",
|
|
157
|
-
"values": false
|
|
158
|
-
},
|
|
95
|
+
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
|
|
159
96
|
"orientation": "auto",
|
|
160
97
|
"colorMode": "value",
|
|
161
98
|
"graphMode": "none",
|
|
@@ -165,10 +102,7 @@
|
|
|
165
102
|
{
|
|
166
103
|
"refId": "A",
|
|
167
104
|
"expr": "sum(count_over_time({service_name=~\"$service\", logType=\"schedule\", status=~\"(fail|error|timeout).*\"}[15m]))",
|
|
168
|
-
"datasource": {
|
|
169
|
-
"type": "loki",
|
|
170
|
-
"uid": "loki"
|
|
171
|
-
}
|
|
105
|
+
"datasource": { "type": "loki", "uid": "loki" }
|
|
172
106
|
}
|
|
173
107
|
]
|
|
174
108
|
},
|
|
@@ -176,58 +110,31 @@
|
|
|
176
110
|
"id": 4,
|
|
177
111
|
"type": "timeseries",
|
|
178
112
|
"title": "Error Rate by Type",
|
|
179
|
-
"datasource": {
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
},
|
|
183
|
-
"gridPos": {
|
|
184
|
-
"h": 8,
|
|
185
|
-
"w": 24,
|
|
186
|
-
"x": 0,
|
|
187
|
-
"y": 5
|
|
188
|
-
},
|
|
189
|
-
"fieldConfig": {
|
|
190
|
-
"defaults": {
|
|
191
|
-
"unit": "short"
|
|
192
|
-
},
|
|
193
|
-
"overrides": []
|
|
194
|
-
},
|
|
113
|
+
"datasource": { "type": "loki", "uid": "loki" },
|
|
114
|
+
"gridPos": { "h": 8, "w": 24, "x": 0, "y": 5 },
|
|
115
|
+
"fieldConfig": { "defaults": { "unit": "short" }, "overrides": [] },
|
|
195
116
|
"options": {
|
|
196
|
-
"legend": {
|
|
197
|
-
|
|
198
|
-
"placement": "bottom"
|
|
199
|
-
},
|
|
200
|
-
"tooltip": {
|
|
201
|
-
"mode": "single"
|
|
202
|
-
}
|
|
117
|
+
"legend": { "displayMode": "list", "placement": "bottom" },
|
|
118
|
+
"tooltip": { "mode": "single" }
|
|
203
119
|
},
|
|
204
120
|
"targets": [
|
|
205
121
|
{
|
|
206
122
|
"refId": "A",
|
|
207
123
|
"expr": "sum(count_over_time({service_name=~\"$service\", logType=\"application\", level=\"error\"}[5m]))",
|
|
208
124
|
"legendFormat": "application",
|
|
209
|
-
"datasource": {
|
|
210
|
-
"type": "loki",
|
|
211
|
-
"uid": "loki"
|
|
212
|
-
}
|
|
125
|
+
"datasource": { "type": "loki", "uid": "loki" }
|
|
213
126
|
},
|
|
214
127
|
{
|
|
215
128
|
"refId": "B",
|
|
216
129
|
"expr": "sum(count_over_time({service_name=~\"$service\", logType=\"request\", status=~\"5..\"}[5m]))",
|
|
217
130
|
"legendFormat": "request",
|
|
218
|
-
"datasource": {
|
|
219
|
-
"type": "loki",
|
|
220
|
-
"uid": "loki"
|
|
221
|
-
}
|
|
131
|
+
"datasource": { "type": "loki", "uid": "loki" }
|
|
222
132
|
},
|
|
223
133
|
{
|
|
224
134
|
"refId": "C",
|
|
225
135
|
"expr": "sum(count_over_time({service_name=~\"$service\", logType=\"schedule\", status=~\"(fail|error|timeout).*\"}[5m]))",
|
|
226
136
|
"legendFormat": "schedule",
|
|
227
|
-
"datasource": {
|
|
228
|
-
"type": "loki",
|
|
229
|
-
"uid": "loki"
|
|
230
|
-
}
|
|
137
|
+
"datasource": { "type": "loki", "uid": "loki" }
|
|
231
138
|
}
|
|
232
139
|
]
|
|
233
140
|
},
|
|
@@ -235,16 +142,8 @@
|
|
|
235
142
|
"id": 5,
|
|
236
143
|
"type": "logs",
|
|
237
144
|
"title": "Recent Errors",
|
|
238
|
-
"datasource": {
|
|
239
|
-
|
|
240
|
-
"uid": "loki"
|
|
241
|
-
},
|
|
242
|
-
"gridPos": {
|
|
243
|
-
"h": 10,
|
|
244
|
-
"w": 24,
|
|
245
|
-
"x": 0,
|
|
246
|
-
"y": 13
|
|
247
|
-
},
|
|
145
|
+
"datasource": { "type": "loki", "uid": "loki" },
|
|
146
|
+
"gridPos": { "h": 10, "w": 24, "x": 0, "y": 13 },
|
|
248
147
|
"options": {
|
|
249
148
|
"dedupStrategy": "none",
|
|
250
149
|
"showLabels": true,
|
|
@@ -256,10 +155,7 @@
|
|
|
256
155
|
{
|
|
257
156
|
"refId": "A",
|
|
258
157
|
"expr": "{service_name=~\"$service\", logType=\"application\", level=\"error\"} or {service_name=~\"$service\", logType=\"request\", status=~\"5..\"} or {service_name=~\"$service\", logType=\"schedule\", status=~\"(fail|error|timeout).*\"}",
|
|
259
|
-
"datasource": {
|
|
260
|
-
"type": "loki",
|
|
261
|
-
"uid": "loki"
|
|
262
|
-
}
|
|
158
|
+
"datasource": { "type": "loki", "uid": "loki" }
|
|
263
159
|
}
|
|
264
160
|
]
|
|
265
161
|
},
|
|
@@ -267,34 +163,16 @@
|
|
|
267
163
|
"id": 6,
|
|
268
164
|
"type": "table",
|
|
269
165
|
"title": "Top Error Routes (1h)",
|
|
270
|
-
"datasource": {
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
},
|
|
274
|
-
"gridPos": {
|
|
275
|
-
"h": 7,
|
|
276
|
-
"w": 24,
|
|
277
|
-
"x": 0,
|
|
278
|
-
"y": 23
|
|
279
|
-
},
|
|
280
|
-
"fieldConfig": {
|
|
281
|
-
"defaults": {
|
|
282
|
-
"unit": "short"
|
|
283
|
-
},
|
|
284
|
-
"overrides": []
|
|
285
|
-
},
|
|
286
|
-
"options": {
|
|
287
|
-
"showHeader": true
|
|
288
|
-
},
|
|
166
|
+
"datasource": { "type": "loki", "uid": "loki" },
|
|
167
|
+
"gridPos": { "h": 7, "w": 24, "x": 0, "y": 23 },
|
|
168
|
+
"fieldConfig": { "defaults": { "unit": "short" }, "overrides": [] },
|
|
169
|
+
"options": { "showHeader": true },
|
|
289
170
|
"targets": [
|
|
290
171
|
{
|
|
291
172
|
"refId": "A",
|
|
292
173
|
"expr": "topk(10, sum by (path, method) (count_over_time({service_name=~\"$service\", logType=\"request\", status=~\"5..\"}[1h])))",
|
|
293
174
|
"legendFormat": "{{method}} {{path}}",
|
|
294
|
-
"datasource": {
|
|
295
|
-
"type": "loki",
|
|
296
|
-
"uid": "loki"
|
|
297
|
-
}
|
|
175
|
+
"datasource": { "type": "loki", "uid": "loki" }
|
|
298
176
|
}
|
|
299
177
|
]
|
|
300
178
|
}
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"tags": ["logs"],
|
|
6
6
|
"timezone": "browser",
|
|
7
7
|
"schemaVersion": 38,
|
|
8
|
-
"version":
|
|
8
|
+
"version": 2,
|
|
9
9
|
"refresh": "10s",
|
|
10
10
|
"time": {
|
|
11
11
|
"from": "now-6h",
|
|
@@ -28,38 +28,26 @@
|
|
|
28
28
|
{
|
|
29
29
|
"name": "service",
|
|
30
30
|
"type": "query",
|
|
31
|
-
"datasource": {
|
|
32
|
-
"type": "loki",
|
|
33
|
-
"uid": "loki"
|
|
34
|
-
},
|
|
31
|
+
"datasource": { "type": "loki", "uid": "loki" },
|
|
35
32
|
"definition": "label_values({service_name!=\"\"}, service_name)",
|
|
36
33
|
"query": "label_values({service_name!=\"\"}, service_name)",
|
|
37
34
|
"refresh": 1,
|
|
38
35
|
"includeAll": true,
|
|
39
36
|
"multi": true,
|
|
40
37
|
"allValue": ".+",
|
|
41
|
-
"current": {
|
|
42
|
-
"text": "All",
|
|
43
|
-
"value": "$__all"
|
|
44
|
-
}
|
|
38
|
+
"current": { "text": "All", "value": "$__all" }
|
|
45
39
|
},
|
|
46
40
|
{
|
|
47
41
|
"name": "logType",
|
|
48
42
|
"type": "query",
|
|
49
|
-
"datasource": {
|
|
50
|
-
"type": "loki",
|
|
51
|
-
"uid": "loki"
|
|
52
|
-
},
|
|
43
|
+
"datasource": { "type": "loki", "uid": "loki" },
|
|
53
44
|
"definition": "label_values({logType!=\"\"}, logType)",
|
|
54
45
|
"query": "label_values({logType!=\"\"}, logType)",
|
|
55
46
|
"refresh": 1,
|
|
56
47
|
"includeAll": true,
|
|
57
48
|
"multi": true,
|
|
58
49
|
"allValue": ".+",
|
|
59
|
-
"current": {
|
|
60
|
-
"text": "All",
|
|
61
|
-
"value": "$__all"
|
|
62
|
-
}
|
|
50
|
+
"current": { "text": "All", "value": "$__all" }
|
|
63
51
|
}
|
|
64
52
|
]
|
|
65
53
|
},
|
|
@@ -68,40 +56,19 @@
|
|
|
68
56
|
"id": 1,
|
|
69
57
|
"type": "timeseries",
|
|
70
58
|
"title": "Log Volume by Type",
|
|
71
|
-
"datasource": {
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
},
|
|
75
|
-
"gridPos": {
|
|
76
|
-
"h": 8,
|
|
77
|
-
"w": 24,
|
|
78
|
-
"x": 0,
|
|
79
|
-
"y": 0
|
|
80
|
-
},
|
|
81
|
-
"fieldConfig": {
|
|
82
|
-
"defaults": {
|
|
83
|
-
"unit": "short"
|
|
84
|
-
},
|
|
85
|
-
"overrides": []
|
|
86
|
-
},
|
|
59
|
+
"datasource": { "type": "loki", "uid": "loki" },
|
|
60
|
+
"gridPos": { "h": 8, "w": 24, "x": 0, "y": 0 },
|
|
61
|
+
"fieldConfig": { "defaults": { "unit": "short" }, "overrides": [] },
|
|
87
62
|
"options": {
|
|
88
|
-
"legend": {
|
|
89
|
-
|
|
90
|
-
"placement": "bottom"
|
|
91
|
-
},
|
|
92
|
-
"tooltip": {
|
|
93
|
-
"mode": "single"
|
|
94
|
-
}
|
|
63
|
+
"legend": { "displayMode": "list", "placement": "bottom" },
|
|
64
|
+
"tooltip": { "mode": "single" }
|
|
95
65
|
},
|
|
96
66
|
"targets": [
|
|
97
67
|
{
|
|
98
68
|
"refId": "A",
|
|
99
69
|
"expr": "sum by (logType) (count_over_time({service_name=~\"$service\", logType=~\"$logType\"}[5m]))",
|
|
100
70
|
"legendFormat": "{{logType}}",
|
|
101
|
-
"datasource": {
|
|
102
|
-
"type": "loki",
|
|
103
|
-
"uid": "loki"
|
|
104
|
-
}
|
|
71
|
+
"datasource": { "type": "loki", "uid": "loki" }
|
|
105
72
|
}
|
|
106
73
|
]
|
|
107
74
|
},
|
|
@@ -109,22 +76,9 @@
|
|
|
109
76
|
"id": 2,
|
|
110
77
|
"type": "bargauge",
|
|
111
78
|
"title": "App Log Levels (15m)",
|
|
112
|
-
"datasource": {
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
},
|
|
116
|
-
"gridPos": {
|
|
117
|
-
"h": 6,
|
|
118
|
-
"w": 12,
|
|
119
|
-
"x": 0,
|
|
120
|
-
"y": 8
|
|
121
|
-
},
|
|
122
|
-
"fieldConfig": {
|
|
123
|
-
"defaults": {
|
|
124
|
-
"unit": "short"
|
|
125
|
-
},
|
|
126
|
-
"overrides": []
|
|
127
|
-
},
|
|
79
|
+
"datasource": { "type": "loki", "uid": "loki" },
|
|
80
|
+
"gridPos": { "h": 6, "w": 12, "x": 0, "y": 8 },
|
|
81
|
+
"fieldConfig": { "defaults": { "unit": "short" }, "overrides": [] },
|
|
128
82
|
"options": {
|
|
129
83
|
"displayMode": "basic",
|
|
130
84
|
"orientation": "horizontal",
|
|
@@ -135,10 +89,7 @@
|
|
|
135
89
|
"refId": "A",
|
|
136
90
|
"expr": "sum by (level) (count_over_time({service_name=~\"$service\", logType=\"application\"}[15m]))",
|
|
137
91
|
"legendFormat": "{{level}}",
|
|
138
|
-
"datasource": {
|
|
139
|
-
"type": "loki",
|
|
140
|
-
"uid": "loki"
|
|
141
|
-
}
|
|
92
|
+
"datasource": { "type": "loki", "uid": "loki" }
|
|
142
93
|
}
|
|
143
94
|
]
|
|
144
95
|
},
|
|
@@ -146,34 +97,16 @@
|
|
|
146
97
|
"id": 3,
|
|
147
98
|
"type": "table",
|
|
148
99
|
"title": "Top Application Events (1h)",
|
|
149
|
-
"datasource": {
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
},
|
|
153
|
-
"gridPos": {
|
|
154
|
-
"h": 6,
|
|
155
|
-
"w": 12,
|
|
156
|
-
"x": 12,
|
|
157
|
-
"y": 8
|
|
158
|
-
},
|
|
159
|
-
"fieldConfig": {
|
|
160
|
-
"defaults": {
|
|
161
|
-
"unit": "short"
|
|
162
|
-
},
|
|
163
|
-
"overrides": []
|
|
164
|
-
},
|
|
165
|
-
"options": {
|
|
166
|
-
"showHeader": true
|
|
167
|
-
},
|
|
100
|
+
"datasource": { "type": "loki", "uid": "loki" },
|
|
101
|
+
"gridPos": { "h": 6, "w": 12, "x": 12, "y": 8 },
|
|
102
|
+
"fieldConfig": { "defaults": { "unit": "short" }, "overrides": [] },
|
|
103
|
+
"options": { "showHeader": true },
|
|
168
104
|
"targets": [
|
|
169
105
|
{
|
|
170
106
|
"refId": "A",
|
|
171
107
|
"expr": "topk(10, sum by (name) (count_over_time({service_name=~\"$service\", logType=\"application\"}[1h])))",
|
|
172
108
|
"legendFormat": "{{name}}",
|
|
173
|
-
"datasource": {
|
|
174
|
-
"type": "loki",
|
|
175
|
-
"uid": "loki"
|
|
176
|
-
}
|
|
109
|
+
"datasource": { "type": "loki", "uid": "loki" }
|
|
177
110
|
}
|
|
178
111
|
]
|
|
179
112
|
},
|
|
@@ -181,16 +114,8 @@
|
|
|
181
114
|
"id": 4,
|
|
182
115
|
"type": "logs",
|
|
183
116
|
"title": "Recent Application Logs",
|
|
184
|
-
"datasource": {
|
|
185
|
-
|
|
186
|
-
"uid": "loki"
|
|
187
|
-
},
|
|
188
|
-
"gridPos": {
|
|
189
|
-
"h": 10,
|
|
190
|
-
"w": 24,
|
|
191
|
-
"x": 0,
|
|
192
|
-
"y": 14
|
|
193
|
-
},
|
|
117
|
+
"datasource": { "type": "loki", "uid": "loki" },
|
|
118
|
+
"gridPos": { "h": 10, "w": 24, "x": 0, "y": 14 },
|
|
194
119
|
"options": {
|
|
195
120
|
"dedupStrategy": "none",
|
|
196
121
|
"showLabels": true,
|
|
@@ -202,10 +127,7 @@
|
|
|
202
127
|
{
|
|
203
128
|
"refId": "A",
|
|
204
129
|
"expr": "{service_name=~\"$service\", logType=\"application\"}",
|
|
205
|
-
"datasource": {
|
|
206
|
-
"type": "loki",
|
|
207
|
-
"uid": "loki"
|
|
208
|
-
}
|
|
130
|
+
"datasource": { "type": "loki", "uid": "loki" }
|
|
209
131
|
}
|
|
210
132
|
]
|
|
211
133
|
}
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"tags": ["requests", "cache", "schedules", "sockets"],
|
|
6
6
|
"timezone": "browser",
|
|
7
7
|
"schemaVersion": 38,
|
|
8
|
-
"version":
|
|
8
|
+
"version": 2,
|
|
9
9
|
"refresh": "10s",
|
|
10
10
|
"time": {
|
|
11
11
|
"from": "now-6h",
|
|
@@ -50,40 +50,19 @@
|
|
|
50
50
|
"id": 1,
|
|
51
51
|
"type": "timeseries",
|
|
52
52
|
"title": "Request Throughput by Method",
|
|
53
|
-
"datasource": {
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
},
|
|
57
|
-
"gridPos": {
|
|
58
|
-
"h": 7,
|
|
59
|
-
"w": 12,
|
|
60
|
-
"x": 0,
|
|
61
|
-
"y": 0
|
|
62
|
-
},
|
|
63
|
-
"fieldConfig": {
|
|
64
|
-
"defaults": {
|
|
65
|
-
"unit": "short"
|
|
66
|
-
},
|
|
67
|
-
"overrides": []
|
|
68
|
-
},
|
|
53
|
+
"datasource": { "type": "loki", "uid": "loki" },
|
|
54
|
+
"gridPos": { "h": 7, "w": 12, "x": 0, "y": 0 },
|
|
55
|
+
"fieldConfig": { "defaults": { "unit": "short" }, "overrides": [] },
|
|
69
56
|
"options": {
|
|
70
|
-
"legend": {
|
|
71
|
-
|
|
72
|
-
"placement": "bottom"
|
|
73
|
-
},
|
|
74
|
-
"tooltip": {
|
|
75
|
-
"mode": "single"
|
|
76
|
-
}
|
|
57
|
+
"legend": { "displayMode": "list", "placement": "bottom" },
|
|
58
|
+
"tooltip": { "mode": "single" }
|
|
77
59
|
},
|
|
78
60
|
"targets": [
|
|
79
61
|
{
|
|
80
62
|
"refId": "A",
|
|
81
63
|
"expr": "sum by (method) (count_over_time({service_name=~\"$service\", logType=\"request\"}[5m]))",
|
|
82
64
|
"legendFormat": "{{method}}",
|
|
83
|
-
"datasource": {
|
|
84
|
-
"type": "loki",
|
|
85
|
-
"uid": "loki"
|
|
86
|
-
}
|
|
65
|
+
"datasource": { "type": "loki", "uid": "loki" }
|
|
87
66
|
}
|
|
88
67
|
]
|
|
89
68
|
},
|
|
@@ -91,49 +70,25 @@
|
|
|
91
70
|
"id": 2,
|
|
92
71
|
"type": "timeseries",
|
|
93
72
|
"title": "Request Latency (p50 / p95)",
|
|
94
|
-
"datasource": {
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
},
|
|
98
|
-
"gridPos": {
|
|
99
|
-
"h": 7,
|
|
100
|
-
"w": 12,
|
|
101
|
-
"x": 12,
|
|
102
|
-
"y": 0
|
|
103
|
-
},
|
|
104
|
-
"fieldConfig": {
|
|
105
|
-
"defaults": {
|
|
106
|
-
"unit": "ms"
|
|
107
|
-
},
|
|
108
|
-
"overrides": []
|
|
109
|
-
},
|
|
73
|
+
"datasource": { "type": "loki", "uid": "loki" },
|
|
74
|
+
"gridPos": { "h": 7, "w": 12, "x": 12, "y": 0 },
|
|
75
|
+
"fieldConfig": { "defaults": { "unit": "ms" }, "overrides": [] },
|
|
110
76
|
"options": {
|
|
111
|
-
"legend": {
|
|
112
|
-
|
|
113
|
-
"placement": "bottom"
|
|
114
|
-
},
|
|
115
|
-
"tooltip": {
|
|
116
|
-
"mode": "single"
|
|
117
|
-
}
|
|
77
|
+
"legend": { "displayMode": "list", "placement": "bottom" },
|
|
78
|
+
"tooltip": { "mode": "single" }
|
|
118
79
|
},
|
|
119
80
|
"targets": [
|
|
120
81
|
{
|
|
121
82
|
"refId": "A",
|
|
122
|
-
"expr": "quantile_over_time(0.5, {service_name=~\"$service\", logType=\"request\"} | unwrap durationMs [5m])",
|
|
83
|
+
"expr": "quantile_over_time(0.5, {service_name=~\"$service\", logType=\"request\"} | json | unwrap durationMs [5m])",
|
|
123
84
|
"legendFormat": "p50",
|
|
124
|
-
"datasource": {
|
|
125
|
-
"type": "loki",
|
|
126
|
-
"uid": "loki"
|
|
127
|
-
}
|
|
85
|
+
"datasource": { "type": "loki", "uid": "loki" }
|
|
128
86
|
},
|
|
129
87
|
{
|
|
130
88
|
"refId": "B",
|
|
131
|
-
"expr": "quantile_over_time(0.95, {service_name=~\"$service\", logType=\"request\"} | unwrap durationMs [5m])",
|
|
89
|
+
"expr": "quantile_over_time(0.95, {service_name=~\"$service\", logType=\"request\"} | json | unwrap durationMs [5m])",
|
|
132
90
|
"legendFormat": "p95",
|
|
133
|
-
"datasource": {
|
|
134
|
-
"type": "loki",
|
|
135
|
-
"uid": "loki"
|
|
136
|
-
}
|
|
91
|
+
"datasource": { "type": "loki", "uid": "loki" }
|
|
137
92
|
}
|
|
138
93
|
]
|
|
139
94
|
},
|
|
@@ -141,34 +96,16 @@
|
|
|
141
96
|
"id": 3,
|
|
142
97
|
"type": "table",
|
|
143
98
|
"title": "Top Request Paths (1h)",
|
|
144
|
-
"datasource": {
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
},
|
|
148
|
-
"gridPos": {
|
|
149
|
-
"h": 7,
|
|
150
|
-
"w": 12,
|
|
151
|
-
"x": 0,
|
|
152
|
-
"y": 7
|
|
153
|
-
},
|
|
154
|
-
"fieldConfig": {
|
|
155
|
-
"defaults": {
|
|
156
|
-
"unit": "short"
|
|
157
|
-
},
|
|
158
|
-
"overrides": []
|
|
159
|
-
},
|
|
160
|
-
"options": {
|
|
161
|
-
"showHeader": true
|
|
162
|
-
},
|
|
99
|
+
"datasource": { "type": "loki", "uid": "loki" },
|
|
100
|
+
"gridPos": { "h": 7, "w": 12, "x": 0, "y": 7 },
|
|
101
|
+
"fieldConfig": { "defaults": { "unit": "short" }, "overrides": [] },
|
|
102
|
+
"options": { "showHeader": true },
|
|
163
103
|
"targets": [
|
|
164
104
|
{
|
|
165
105
|
"refId": "A",
|
|
166
106
|
"expr": "topk(10, sum by (path) (count_over_time({service_name=~\"$service\", logType=\"request\"}[1h])))",
|
|
167
107
|
"legendFormat": "{{path}}",
|
|
168
|
-
"datasource": {
|
|
169
|
-
"type": "loki",
|
|
170
|
-
"uid": "loki"
|
|
171
|
-
}
|
|
108
|
+
"datasource": { "type": "loki", "uid": "loki" }
|
|
172
109
|
}
|
|
173
110
|
]
|
|
174
111
|
},
|
|
@@ -176,30 +113,14 @@
|
|
|
176
113
|
"id": 4,
|
|
177
114
|
"type": "stat",
|
|
178
115
|
"title": "Cache Hit Ratio (5m)",
|
|
179
|
-
"datasource": {
|
|
180
|
-
|
|
181
|
-
"uid": "loki"
|
|
182
|
-
},
|
|
183
|
-
"gridPos": {
|
|
184
|
-
"h": 5,
|
|
185
|
-
"w": 6,
|
|
186
|
-
"x": 12,
|
|
187
|
-
"y": 7
|
|
188
|
-
},
|
|
116
|
+
"datasource": { "type": "loki", "uid": "loki" },
|
|
117
|
+
"gridPos": { "h": 5, "w": 6, "x": 12, "y": 7 },
|
|
189
118
|
"fieldConfig": {
|
|
190
|
-
"defaults": {
|
|
191
|
-
"unit": "percentunit",
|
|
192
|
-
"min": 0,
|
|
193
|
-
"max": 1
|
|
194
|
-
},
|
|
119
|
+
"defaults": { "unit": "percentunit", "min": 0, "max": 1 },
|
|
195
120
|
"overrides": []
|
|
196
121
|
},
|
|
197
122
|
"options": {
|
|
198
|
-
"reduceOptions": {
|
|
199
|
-
"calcs": ["lastNotNull"],
|
|
200
|
-
"fields": "",
|
|
201
|
-
"values": false
|
|
202
|
-
},
|
|
123
|
+
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
|
|
203
124
|
"orientation": "auto",
|
|
204
125
|
"colorMode": "value",
|
|
205
126
|
"graphMode": "none",
|
|
@@ -209,10 +130,7 @@
|
|
|
209
130
|
{
|
|
210
131
|
"refId": "A",
|
|
211
132
|
"expr": "sum(count_over_time({service_name=~\"$service\", logType=\"cache_trace\", operation=\"hit\"}[5m])) / sum(count_over_time({service_name=~\"$service\", logType=\"cache_trace\", operation=~\"hit|miss\"}[5m]))",
|
|
212
|
-
"datasource": {
|
|
213
|
-
"type": "loki",
|
|
214
|
-
"uid": "loki"
|
|
215
|
-
}
|
|
133
|
+
"datasource": { "type": "loki", "uid": "loki" }
|
|
216
134
|
}
|
|
217
135
|
]
|
|
218
136
|
},
|
|
@@ -220,40 +138,19 @@
|
|
|
220
138
|
"id": 5,
|
|
221
139
|
"type": "timeseries",
|
|
222
140
|
"title": "Cache Latency by Operation",
|
|
223
|
-
"datasource": {
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
},
|
|
227
|
-
"gridPos": {
|
|
228
|
-
"h": 5,
|
|
229
|
-
"w": 6,
|
|
230
|
-
"x": 18,
|
|
231
|
-
"y": 7
|
|
232
|
-
},
|
|
233
|
-
"fieldConfig": {
|
|
234
|
-
"defaults": {
|
|
235
|
-
"unit": "ms"
|
|
236
|
-
},
|
|
237
|
-
"overrides": []
|
|
238
|
-
},
|
|
141
|
+
"datasource": { "type": "loki", "uid": "loki" },
|
|
142
|
+
"gridPos": { "h": 5, "w": 6, "x": 18, "y": 7 },
|
|
143
|
+
"fieldConfig": { "defaults": { "unit": "ms" }, "overrides": [] },
|
|
239
144
|
"options": {
|
|
240
|
-
"legend": {
|
|
241
|
-
|
|
242
|
-
"placement": "bottom"
|
|
243
|
-
},
|
|
244
|
-
"tooltip": {
|
|
245
|
-
"mode": "single"
|
|
246
|
-
}
|
|
145
|
+
"legend": { "displayMode": "list", "placement": "bottom" },
|
|
146
|
+
"tooltip": { "mode": "single" }
|
|
247
147
|
},
|
|
248
148
|
"targets": [
|
|
249
149
|
{
|
|
250
150
|
"refId": "A",
|
|
251
|
-
"expr": "avg_over_time({service_name=~\"$service\", logType=\"cache_trace\"} | unwrap durationMs [5m])",
|
|
151
|
+
"expr": "avg by (operation) (avg_over_time({service_name=~\"$service\", logType=\"cache_trace\"} | json | unwrap durationMs [5m]))",
|
|
252
152
|
"legendFormat": "{{operation}}",
|
|
253
|
-
"datasource": {
|
|
254
|
-
"type": "loki",
|
|
255
|
-
"uid": "loki"
|
|
256
|
-
}
|
|
153
|
+
"datasource": { "type": "loki", "uid": "loki" }
|
|
257
154
|
}
|
|
258
155
|
]
|
|
259
156
|
},
|
|
@@ -261,22 +158,9 @@
|
|
|
261
158
|
"id": 6,
|
|
262
159
|
"type": "bargauge",
|
|
263
160
|
"title": "Schedule Status (15m)",
|
|
264
|
-
"datasource": {
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
},
|
|
268
|
-
"gridPos": {
|
|
269
|
-
"h": 6,
|
|
270
|
-
"w": 12,
|
|
271
|
-
"x": 0,
|
|
272
|
-
"y": 14
|
|
273
|
-
},
|
|
274
|
-
"fieldConfig": {
|
|
275
|
-
"defaults": {
|
|
276
|
-
"unit": "short"
|
|
277
|
-
},
|
|
278
|
-
"overrides": []
|
|
279
|
-
},
|
|
161
|
+
"datasource": { "type": "loki", "uid": "loki" },
|
|
162
|
+
"gridPos": { "h": 6, "w": 12, "x": 0, "y": 14 },
|
|
163
|
+
"fieldConfig": { "defaults": { "unit": "short" }, "overrides": [] },
|
|
280
164
|
"options": {
|
|
281
165
|
"displayMode": "basic",
|
|
282
166
|
"orientation": "horizontal",
|
|
@@ -287,10 +171,7 @@
|
|
|
287
171
|
"refId": "A",
|
|
288
172
|
"expr": "sum by (status) (count_over_time({service_name=~\"$service\", logType=\"schedule\"}[15m]))",
|
|
289
173
|
"legendFormat": "{{status}}",
|
|
290
|
-
"datasource": {
|
|
291
|
-
"type": "loki",
|
|
292
|
-
"uid": "loki"
|
|
293
|
-
}
|
|
174
|
+
"datasource": { "type": "loki", "uid": "loki" }
|
|
294
175
|
}
|
|
295
176
|
]
|
|
296
177
|
},
|
|
@@ -298,40 +179,19 @@
|
|
|
298
179
|
"id": 7,
|
|
299
180
|
"type": "timeseries",
|
|
300
181
|
"title": "Socket Events by Direction",
|
|
301
|
-
"datasource": {
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
},
|
|
305
|
-
"gridPos": {
|
|
306
|
-
"h": 6,
|
|
307
|
-
"w": 12,
|
|
308
|
-
"x": 12,
|
|
309
|
-
"y": 14
|
|
310
|
-
},
|
|
311
|
-
"fieldConfig": {
|
|
312
|
-
"defaults": {
|
|
313
|
-
"unit": "short"
|
|
314
|
-
},
|
|
315
|
-
"overrides": []
|
|
316
|
-
},
|
|
182
|
+
"datasource": { "type": "loki", "uid": "loki" },
|
|
183
|
+
"gridPos": { "h": 6, "w": 12, "x": 12, "y": 14 },
|
|
184
|
+
"fieldConfig": { "defaults": { "unit": "short" }, "overrides": [] },
|
|
317
185
|
"options": {
|
|
318
|
-
"legend": {
|
|
319
|
-
|
|
320
|
-
"placement": "bottom"
|
|
321
|
-
},
|
|
322
|
-
"tooltip": {
|
|
323
|
-
"mode": "single"
|
|
324
|
-
}
|
|
186
|
+
"legend": { "displayMode": "list", "placement": "bottom" },
|
|
187
|
+
"tooltip": { "mode": "single" }
|
|
325
188
|
},
|
|
326
189
|
"targets": [
|
|
327
190
|
{
|
|
328
191
|
"refId": "A",
|
|
329
192
|
"expr": "sum by (direction) (count_over_time({service_name=~\"$service\", logType=\"socket\"}[5m]))",
|
|
330
193
|
"legendFormat": "{{direction}}",
|
|
331
|
-
"datasource": {
|
|
332
|
-
"type": "loki",
|
|
333
|
-
"uid": "loki"
|
|
334
|
-
}
|
|
194
|
+
"datasource": { "type": "loki", "uid": "loki" }
|
|
335
195
|
}
|
|
336
196
|
]
|
|
337
197
|
}
|
|
@@ -9,6 +9,16 @@ datasources:
|
|
|
9
9
|
isDefault: true
|
|
10
10
|
jsonData:
|
|
11
11
|
min_refresh_interval: "5s"
|
|
12
|
+
maxLines: 2000
|
|
13
|
+
|
|
14
|
+
# Make trace_id clickable from logs -> Tempo trace view.
|
|
15
|
+
# We emit JSON bodies; this regex extracts the 32-hex trace id from JSON.
|
|
16
|
+
derivedFields:
|
|
17
|
+
- name: trace_id
|
|
18
|
+
matcherRegex: "\"trace_id\"\\s*:\\s*\"([a-f0-9]{32})\""
|
|
19
|
+
datasourceUid: tempo
|
|
20
|
+
url: "$${__value.raw}"
|
|
21
|
+
|
|
12
22
|
- name: Tempo
|
|
13
23
|
uid: tempo
|
|
14
24
|
type: tempo
|
|
@@ -17,3 +27,11 @@ datasources:
|
|
|
17
27
|
url: http://tempo:3200
|
|
18
28
|
jsonData:
|
|
19
29
|
httpMode: "GET"
|
|
30
|
+
|
|
31
|
+
# Enable traces -> logs navigation (Tempo -> Loki)
|
|
32
|
+
tracesToLogsV2:
|
|
33
|
+
datasourceUid: loki
|
|
34
|
+
spanStartTimeShift: "-5m"
|
|
35
|
+
spanEndTimeShift: "5m"
|
|
36
|
+
# These are useful tags to carry over when present
|
|
37
|
+
tags: ["service_name", "service.name", "logType", "level"]
|