@oneuptime/common 10.2.15 → 10.2.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Server/API/DashboardAPI.ts +0 -6
- package/Server/Services/DockerHostService.ts +91 -0
- package/Server/Services/IncidentService.ts +60 -23
- package/Server/Services/KubernetesClusterService.ts +92 -0
- package/Types/Dashboard/DashboardComponents/DashboardValueComponent.ts +15 -0
- package/Types/Dashboard/DashboardTemplates.ts +260 -971
- package/Types/Dashboard/DashboardVariable.ts +0 -8
- package/UI/Components/Charts/Utils/DataPoint.ts +0 -0
- package/Utils/Dashboard/Components/DashboardValueComponent.ts +36 -2
- package/Utils/ValueFormatter.ts +57 -0
- package/build/dist/Server/API/DashboardAPI.js +0 -3
- package/build/dist/Server/API/DashboardAPI.js.map +1 -1
- package/build/dist/Server/Services/DockerHostService.js +73 -0
- package/build/dist/Server/Services/DockerHostService.js.map +1 -1
- package/build/dist/Server/Services/IncidentService.js +55 -18
- package/build/dist/Server/Services/IncidentService.js.map +1 -1
- package/build/dist/Server/Services/KubernetesClusterService.js +74 -0
- package/build/dist/Server/Services/KubernetesClusterService.js.map +1 -1
- package/build/dist/Types/Dashboard/DashboardComponents/DashboardValueComponent.js +14 -1
- package/build/dist/Types/Dashboard/DashboardComponents/DashboardValueComponent.js.map +1 -1
- package/build/dist/Types/Dashboard/DashboardTemplates.js +240 -928
- package/build/dist/Types/Dashboard/DashboardTemplates.js.map +1 -1
- package/build/dist/UI/Components/Charts/Utils/DataPoint.js +0 -0
- package/build/dist/UI/Components/Charts/Utils/DataPoint.js.map +1 -1
- package/build/dist/Utils/Dashboard/Components/DashboardValueComponent.js +31 -1
- package/build/dist/Utils/Dashboard/Components/DashboardValueComponent.js.map +1 -1
- package/build/dist/Utils/ValueFormatter.js +51 -0
- package/build/dist/Utils/ValueFormatter.js.map +1 -1
- package/package.json +1 -1
|
@@ -9,20 +9,23 @@ import IconProp from "../Icon/IconProp";
|
|
|
9
9
|
import MetricsAggregationType from "../Metrics/MetricsAggregationType";
|
|
10
10
|
import IncidentMetricType from "../Incident/IncidentMetricType";
|
|
11
11
|
import MonitorMetricType from "../Monitor/MonitorMetricType";
|
|
12
|
-
import SpanMetricType from "../Span/SpanMetricType";
|
|
13
|
-
import ExceptionMetricType from "../Exception/ExceptionMetricType";
|
|
14
|
-
import ProfileMetricType from "../Profile/ProfileMetricType";
|
|
15
12
|
import MetricDashboardMetricType from "../Metrics/MetricDashboardMetricType";
|
|
16
|
-
|
|
13
|
+
import { DashboardValueTrendDirection } from "./DashboardComponents/DashboardValueComponent";
|
|
14
|
+
|
|
15
|
+
/*
|
|
16
|
+
* Trace / Exception / Profiles entries are intentionally not in this
|
|
17
|
+
* enum: their metric catalogs (SpanMetricType, ExceptionMetricType,
|
|
18
|
+
* ProfileMetricType) define names that are not emitted anywhere in the
|
|
19
|
+
* codebase, so the templates only ever rendered empty widgets. Reach
|
|
20
|
+
* for the Logs / Traces / Exceptions pages directly until those metrics
|
|
21
|
+
* exist.
|
|
22
|
+
*/
|
|
17
23
|
export enum DashboardTemplateType {
|
|
18
24
|
Blank = "Blank",
|
|
19
25
|
Monitor = "Monitor",
|
|
20
26
|
Incident = "Incident",
|
|
21
27
|
Kubernetes = "Kubernetes",
|
|
22
28
|
Metrics = "Metrics",
|
|
23
|
-
Trace = "Trace",
|
|
24
|
-
Exception = "Exception",
|
|
25
|
-
Profiles = "Profiles",
|
|
26
29
|
}
|
|
27
30
|
|
|
28
31
|
export interface DashboardTemplate {
|
|
@@ -43,51 +46,30 @@ export const DashboardTemplates: Array<DashboardTemplate> = [
|
|
|
43
46
|
type: DashboardTemplateType.Monitor,
|
|
44
47
|
name: "Monitor Dashboard",
|
|
45
48
|
description:
|
|
46
|
-
"Response time, uptime,
|
|
49
|
+
"Response time, uptime, status codes, CPU/memory health gauges, and breakdown table for synthetic and server monitors.",
|
|
47
50
|
icon: IconProp.Heartbeat,
|
|
48
51
|
},
|
|
49
52
|
{
|
|
50
53
|
type: DashboardTemplateType.Incident,
|
|
51
54
|
name: "Incident Dashboard",
|
|
52
55
|
description:
|
|
53
|
-
"MTTR/MTTA gauges,
|
|
56
|
+
"Incident count, MTTR/MTTA gauges, duration trends, severity breakdown, time-in-state, and longest-incident tables.",
|
|
54
57
|
icon: IconProp.Alert,
|
|
55
58
|
},
|
|
56
59
|
{
|
|
57
60
|
type: DashboardTemplateType.Kubernetes,
|
|
58
61
|
name: "Kubernetes Dashboard",
|
|
59
62
|
description:
|
|
60
|
-
"CPU
|
|
63
|
+
"Pod/node CPU and memory averages, utilization gauges, live pod and node lists, network I/O, restarts, and cluster logs.",
|
|
61
64
|
icon: IconProp.Kubernetes,
|
|
62
65
|
},
|
|
63
66
|
{
|
|
64
67
|
type: DashboardTemplateType.Metrics,
|
|
65
68
|
name: "Metrics Dashboard",
|
|
66
69
|
description:
|
|
67
|
-
"HTTP request
|
|
70
|
+
"HTTP request rate, latency, error rate, CPU utilization gauge, memory usage, disk and network I/O, and runtime metrics.",
|
|
68
71
|
icon: IconProp.ChartBar,
|
|
69
72
|
},
|
|
70
|
-
{
|
|
71
|
-
type: DashboardTemplateType.Trace,
|
|
72
|
-
name: "Trace Dashboard",
|
|
73
|
-
description:
|
|
74
|
-
"Span throughput, latency percentiles, error rates, service health, status breakdown, and recent traces.",
|
|
75
|
-
icon: IconProp.Activity,
|
|
76
|
-
},
|
|
77
|
-
{
|
|
78
|
-
type: DashboardTemplateType.Exception,
|
|
79
|
-
name: "Exception Dashboard",
|
|
80
|
-
description:
|
|
81
|
-
"Exception counts, error rates, top exception types, resolution status, affected services, and logs.",
|
|
82
|
-
icon: IconProp.Bug,
|
|
83
|
-
},
|
|
84
|
-
{
|
|
85
|
-
type: DashboardTemplateType.Profiles,
|
|
86
|
-
name: "Profiles Dashboard",
|
|
87
|
-
description:
|
|
88
|
-
"CPU profiles, memory allocations, heap usage, thread counts, top functions by CPU time, and flamegraph data.",
|
|
89
|
-
icon: IconProp.Fire,
|
|
90
|
-
},
|
|
91
73
|
];
|
|
92
74
|
|
|
93
75
|
// -- Metric query config helpers --
|
|
@@ -166,6 +148,13 @@ function createValueComponent(data: {
|
|
|
166
148
|
left: number;
|
|
167
149
|
width: number;
|
|
168
150
|
metricConfig?: MetricConfig;
|
|
151
|
+
/*
|
|
152
|
+
* Per-widget override for the trend-arrow colour. Leave `undefined` to
|
|
153
|
+
* let the renderer apply its metric-name heuristic (incident counts,
|
|
154
|
+
* error rates, latency, CPU/memory usage flip the colour); set
|
|
155
|
+
* explicitly when the heuristic would guess wrong.
|
|
156
|
+
*/
|
|
157
|
+
trendDirection?: DashboardValueTrendDirection;
|
|
169
158
|
}): DashboardBaseComponent {
|
|
170
159
|
return {
|
|
171
160
|
_type: ObjectType.DashboardComponent,
|
|
@@ -187,6 +176,7 @@ function createValueComponent(data: {
|
|
|
187
176
|
groupBy: undefined,
|
|
188
177
|
},
|
|
189
178
|
},
|
|
179
|
+
trendDirection: data.trendDirection,
|
|
190
180
|
},
|
|
191
181
|
};
|
|
192
182
|
}
|
|
@@ -330,27 +320,56 @@ function createTableComponent(data: {
|
|
|
330
320
|
};
|
|
331
321
|
}
|
|
332
322
|
|
|
333
|
-
function
|
|
323
|
+
function createKubernetesPodListComponent(data: {
|
|
334
324
|
title: string;
|
|
335
325
|
top: number;
|
|
336
326
|
left: number;
|
|
337
327
|
width: number;
|
|
338
328
|
height: number;
|
|
339
329
|
maxRows?: number;
|
|
330
|
+
podPhases?: Array<string>;
|
|
340
331
|
}): DashboardBaseComponent {
|
|
341
332
|
return {
|
|
342
333
|
_type: ObjectType.DashboardComponent,
|
|
343
|
-
componentType: DashboardComponentType.
|
|
334
|
+
componentType: DashboardComponentType.KubernetesPodList,
|
|
344
335
|
componentId: ObjectID.generate(),
|
|
345
336
|
topInDashboardUnits: data.top,
|
|
346
337
|
leftInDashboardUnits: data.left,
|
|
347
338
|
widthInDashboardUnits: data.width,
|
|
348
339
|
heightInDashboardUnits: data.height,
|
|
349
340
|
minHeightInDashboardUnits: 3,
|
|
350
|
-
minWidthInDashboardUnits:
|
|
341
|
+
minWidthInDashboardUnits: 4,
|
|
342
|
+
arguments: {
|
|
343
|
+
title: data.title,
|
|
344
|
+
maxRows: data.maxRows ?? 20,
|
|
345
|
+
podPhases: data.podPhases,
|
|
346
|
+
},
|
|
347
|
+
};
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
function createKubernetesNodeListComponent(data: {
|
|
351
|
+
title: string;
|
|
352
|
+
top: number;
|
|
353
|
+
left: number;
|
|
354
|
+
width: number;
|
|
355
|
+
height: number;
|
|
356
|
+
maxRows?: number;
|
|
357
|
+
readinessFilter?: string;
|
|
358
|
+
}): DashboardBaseComponent {
|
|
359
|
+
return {
|
|
360
|
+
_type: ObjectType.DashboardComponent,
|
|
361
|
+
componentType: DashboardComponentType.KubernetesNodeList,
|
|
362
|
+
componentId: ObjectID.generate(),
|
|
363
|
+
topInDashboardUnits: data.top,
|
|
364
|
+
leftInDashboardUnits: data.left,
|
|
365
|
+
widthInDashboardUnits: data.width,
|
|
366
|
+
heightInDashboardUnits: data.height,
|
|
367
|
+
minHeightInDashboardUnits: 3,
|
|
368
|
+
minWidthInDashboardUnits: 4,
|
|
351
369
|
arguments: {
|
|
352
370
|
title: data.title,
|
|
353
371
|
maxRows: data.maxRows ?? 20,
|
|
372
|
+
readinessFilter: data.readinessFilter,
|
|
354
373
|
},
|
|
355
374
|
};
|
|
356
375
|
}
|
|
@@ -380,20 +399,35 @@ function createMonitorDashboardConfig(): DashboardViewConfig {
|
|
|
380
399
|
aggregationType: MetricsAggregationType.Avg,
|
|
381
400
|
legendUnit: "ms",
|
|
382
401
|
},
|
|
402
|
+
trendDirection: DashboardValueTrendDirection.HigherIsWorse,
|
|
383
403
|
}),
|
|
404
|
+
/*
|
|
405
|
+
* IsOnline is emitted as 0/1 with unit "" by MonitorMetricUtil, so
|
|
406
|
+
* `Avg` gives the uptime ratio in [0, 1] rather than a percent. We
|
|
407
|
+
* label the widget "Uptime (avg)" instead of "%" so the fractional
|
|
408
|
+
* display isn't misleading; flipping the storage to 0/100 + unit
|
|
409
|
+
* "%" would change criteria evaluation elsewhere in the codebase.
|
|
410
|
+
*/
|
|
384
411
|
createValueComponent({
|
|
385
|
-
title: "Uptime
|
|
412
|
+
title: "Uptime (avg)",
|
|
386
413
|
top: 1,
|
|
387
414
|
left: 3,
|
|
388
415
|
width: 3,
|
|
389
416
|
metricConfig: {
|
|
390
417
|
metricName: MonitorMetricType.IsOnline,
|
|
391
418
|
aggregationType: MetricsAggregationType.Avg,
|
|
392
|
-
legendUnit: "%",
|
|
393
419
|
},
|
|
420
|
+
trendDirection: DashboardValueTrendDirection.HigherIsBetter,
|
|
394
421
|
}),
|
|
422
|
+
/*
|
|
423
|
+
* ResponseStatusCode is the literal HTTP status code (200, 404,
|
|
424
|
+
* 503, …). `Count` over it returns the total number of checks the
|
|
425
|
+
* monitor ran, not the error rate — the original "Error Rate" label
|
|
426
|
+
* was misleading. Filtering to status >= 400 would require attribute
|
|
427
|
+
* filters that the template helper doesn't expose, so we relabel.
|
|
428
|
+
*/
|
|
395
429
|
createValueComponent({
|
|
396
|
-
title: "
|
|
430
|
+
title: "Total Checks",
|
|
397
431
|
top: 1,
|
|
398
432
|
left: 6,
|
|
399
433
|
width: 3,
|
|
@@ -401,6 +435,7 @@ function createMonitorDashboardConfig(): DashboardViewConfig {
|
|
|
401
435
|
metricName: MonitorMetricType.ResponseStatusCode,
|
|
402
436
|
aggregationType: MetricsAggregationType.Count,
|
|
403
437
|
},
|
|
438
|
+
trendDirection: DashboardValueTrendDirection.HigherIsBetter,
|
|
404
439
|
}),
|
|
405
440
|
createValueComponent({
|
|
406
441
|
title: "Execution Time",
|
|
@@ -412,6 +447,7 @@ function createMonitorDashboardConfig(): DashboardViewConfig {
|
|
|
412
447
|
aggregationType: MetricsAggregationType.Avg,
|
|
413
448
|
legendUnit: "ms",
|
|
414
449
|
},
|
|
450
|
+
trendDirection: DashboardValueTrendDirection.HigherIsWorse,
|
|
415
451
|
}),
|
|
416
452
|
|
|
417
453
|
// Row 2-4: Charts
|
|
@@ -439,8 +475,7 @@ function createMonitorDashboardConfig(): DashboardViewConfig {
|
|
|
439
475
|
metricConfig: {
|
|
440
476
|
metricName: MonitorMetricType.IsOnline,
|
|
441
477
|
aggregationType: MetricsAggregationType.Avg,
|
|
442
|
-
legend: "
|
|
443
|
-
legendUnit: "%",
|
|
478
|
+
legend: "Uptime Ratio",
|
|
444
479
|
},
|
|
445
480
|
}),
|
|
446
481
|
|
|
@@ -538,6 +573,21 @@ function createMonitorDashboardConfig(): DashboardViewConfig {
|
|
|
538
573
|
}
|
|
539
574
|
|
|
540
575
|
function createIncidentDashboardConfig(): DashboardViewConfig {
|
|
576
|
+
/*
|
|
577
|
+
* Incident metrics (TimeToResolve, TimeToAcknowledge, IncidentDuration,
|
|
578
|
+
* TimeInState, PostmortemCompletionTime) are emitted with unit
|
|
579
|
+
* "seconds" by IncidentService. Templates previously passed
|
|
580
|
+
* `legendUnit: "min"` to relabel the chart legend, but that bypassed
|
|
581
|
+
* ValueFormatter's scale-aware formatting and rendered raw seconds
|
|
582
|
+
* with a "Minutes" suffix (e.g. a 1-hour incident showed as
|
|
583
|
+
* "3600 Minutes"). Gauges were authored against an implicit minute
|
|
584
|
+
* scale (maxValue 120, threshold 60/90) and compared bytes-of-seconds
|
|
585
|
+
* against minutes, so any incident over ~2 minutes pinned the gauge.
|
|
586
|
+
*
|
|
587
|
+
* We now drop the legendUnit overrides — ValueFormatter scales
|
|
588
|
+
* `seconds` to sec/min/hr/days based on magnitude — and reauthor the
|
|
589
|
+
* gauge ranges in seconds so the 0-100% sweep is meaningful.
|
|
590
|
+
*/
|
|
541
591
|
const components: Array<DashboardBaseComponent> = [
|
|
542
592
|
// Row 0: Title
|
|
543
593
|
createTextComponent({
|
|
@@ -549,7 +599,7 @@ function createIncidentDashboardConfig(): DashboardViewConfig {
|
|
|
549
599
|
isBold: true,
|
|
550
600
|
}),
|
|
551
601
|
|
|
552
|
-
// Row 1: Key incident metrics
|
|
602
|
+
// Row 1: Key incident metrics — every one is "higher = worse".
|
|
553
603
|
createValueComponent({
|
|
554
604
|
title: "Incident Count",
|
|
555
605
|
top: 1,
|
|
@@ -559,6 +609,7 @@ function createIncidentDashboardConfig(): DashboardViewConfig {
|
|
|
559
609
|
metricName: IncidentMetricType.IncidentCount,
|
|
560
610
|
aggregationType: MetricsAggregationType.Sum,
|
|
561
611
|
},
|
|
612
|
+
trendDirection: DashboardValueTrendDirection.HigherIsWorse,
|
|
562
613
|
}),
|
|
563
614
|
createValueComponent({
|
|
564
615
|
title: "MTTR",
|
|
@@ -568,8 +619,8 @@ function createIncidentDashboardConfig(): DashboardViewConfig {
|
|
|
568
619
|
metricConfig: {
|
|
569
620
|
metricName: IncidentMetricType.TimeToResolve,
|
|
570
621
|
aggregationType: MetricsAggregationType.Avg,
|
|
571
|
-
legendUnit: "min",
|
|
572
622
|
},
|
|
623
|
+
trendDirection: DashboardValueTrendDirection.HigherIsWorse,
|
|
573
624
|
}),
|
|
574
625
|
createValueComponent({
|
|
575
626
|
title: "MTTA",
|
|
@@ -579,8 +630,8 @@ function createIncidentDashboardConfig(): DashboardViewConfig {
|
|
|
579
630
|
metricConfig: {
|
|
580
631
|
metricName: IncidentMetricType.TimeToAcknowledge,
|
|
581
632
|
aggregationType: MetricsAggregationType.Avg,
|
|
582
|
-
legendUnit: "min",
|
|
583
633
|
},
|
|
634
|
+
trendDirection: DashboardValueTrendDirection.HigherIsWorse,
|
|
584
635
|
}),
|
|
585
636
|
createValueComponent({
|
|
586
637
|
title: "Avg Duration",
|
|
@@ -590,8 +641,8 @@ function createIncidentDashboardConfig(): DashboardViewConfig {
|
|
|
590
641
|
metricConfig: {
|
|
591
642
|
metricName: IncidentMetricType.IncidentDuration,
|
|
592
643
|
aggregationType: MetricsAggregationType.Avg,
|
|
593
|
-
legendUnit: "min",
|
|
594
644
|
},
|
|
645
|
+
trendDirection: DashboardValueTrendDirection.HigherIsWorse,
|
|
595
646
|
}),
|
|
596
647
|
|
|
597
648
|
// Row 2-4: Incident trends
|
|
@@ -619,7 +670,6 @@ function createIncidentDashboardConfig(): DashboardViewConfig {
|
|
|
619
670
|
metricName: IncidentMetricType.IncidentDuration,
|
|
620
671
|
aggregationType: MetricsAggregationType.Avg,
|
|
621
672
|
legend: "Avg Duration",
|
|
622
|
-
legendUnit: "min",
|
|
623
673
|
},
|
|
624
674
|
}),
|
|
625
675
|
|
|
@@ -633,32 +683,37 @@ function createIncidentDashboardConfig(): DashboardViewConfig {
|
|
|
633
683
|
isBold: true,
|
|
634
684
|
}),
|
|
635
685
|
|
|
636
|
-
|
|
686
|
+
/*
|
|
687
|
+
* Row 6-8: MTTR/MTTA gauges. Ranges and thresholds are now in
|
|
688
|
+
* seconds (matching the stored metric unit). Targets: MTTR full
|
|
689
|
+
* scale 2 hours (warn at 1 hour, critical at 1.5 hours); MTTA full
|
|
690
|
+
* scale 1 hour (warn at 15 min, critical at 30 min).
|
|
691
|
+
*/
|
|
637
692
|
createGaugeComponent({
|
|
638
|
-
title: "MTTR
|
|
693
|
+
title: "MTTR",
|
|
639
694
|
top: 6,
|
|
640
695
|
left: 0,
|
|
641
696
|
width: 3,
|
|
642
697
|
height: 3,
|
|
643
698
|
minValue: 0,
|
|
644
|
-
maxValue:
|
|
645
|
-
warningThreshold:
|
|
646
|
-
criticalThreshold:
|
|
699
|
+
maxValue: 7200,
|
|
700
|
+
warningThreshold: 3600,
|
|
701
|
+
criticalThreshold: 5400,
|
|
647
702
|
metricConfig: {
|
|
648
703
|
metricName: IncidentMetricType.TimeToResolve,
|
|
649
704
|
aggregationType: MetricsAggregationType.Avg,
|
|
650
705
|
},
|
|
651
706
|
}),
|
|
652
707
|
createGaugeComponent({
|
|
653
|
-
title: "MTTA
|
|
708
|
+
title: "MTTA",
|
|
654
709
|
top: 6,
|
|
655
710
|
left: 3,
|
|
656
711
|
width: 3,
|
|
657
712
|
height: 3,
|
|
658
713
|
minValue: 0,
|
|
659
|
-
maxValue:
|
|
660
|
-
warningThreshold:
|
|
661
|
-
criticalThreshold:
|
|
714
|
+
maxValue: 3600,
|
|
715
|
+
warningThreshold: 900,
|
|
716
|
+
criticalThreshold: 1800,
|
|
662
717
|
metricConfig: {
|
|
663
718
|
metricName: IncidentMetricType.TimeToAcknowledge,
|
|
664
719
|
aggregationType: MetricsAggregationType.Avg,
|
|
@@ -675,7 +730,6 @@ function createIncidentDashboardConfig(): DashboardViewConfig {
|
|
|
675
730
|
metricName: IncidentMetricType.TimeToResolve,
|
|
676
731
|
aggregationType: MetricsAggregationType.Avg,
|
|
677
732
|
legend: "MTTR",
|
|
678
|
-
legendUnit: "min",
|
|
679
733
|
},
|
|
680
734
|
}),
|
|
681
735
|
|
|
@@ -714,7 +768,6 @@ function createIncidentDashboardConfig(): DashboardViewConfig {
|
|
|
714
768
|
metricName: IncidentMetricType.TimeInState,
|
|
715
769
|
aggregationType: MetricsAggregationType.Avg,
|
|
716
770
|
legend: "Time in State",
|
|
717
|
-
legendUnit: "min",
|
|
718
771
|
},
|
|
719
772
|
}),
|
|
720
773
|
|
|
@@ -728,7 +781,13 @@ function createIncidentDashboardConfig(): DashboardViewConfig {
|
|
|
728
781
|
isBold: true,
|
|
729
782
|
}),
|
|
730
783
|
|
|
731
|
-
|
|
784
|
+
/*
|
|
785
|
+
* Row 14-16: Operational tables. Logs / traces were removed from the
|
|
786
|
+
* Incident template because incident records are not log/trace
|
|
787
|
+
* sources — they're rows in Postgres. Surfacing unrelated cluster
|
|
788
|
+
* logs and trace lists alongside MTTR/MTTA was a UX miss; if a user
|
|
789
|
+
* wants those views they live on dedicated Trace / Log pages.
|
|
790
|
+
*/
|
|
732
791
|
createTableComponent({
|
|
733
792
|
title: "Incidents by Duration",
|
|
734
793
|
top: 14,
|
|
@@ -751,32 +810,42 @@ function createIncidentDashboardConfig(): DashboardViewConfig {
|
|
|
751
810
|
aggregationType: MetricsAggregationType.Avg,
|
|
752
811
|
},
|
|
753
812
|
}),
|
|
754
|
-
|
|
755
|
-
// Row 17-19: Logs and traces
|
|
756
|
-
createLogStreamComponent({
|
|
757
|
-
title: "Recent Incident Logs",
|
|
758
|
-
top: 17,
|
|
759
|
-
left: 0,
|
|
760
|
-
width: 6,
|
|
761
|
-
height: 3,
|
|
762
|
-
}),
|
|
763
|
-
createTraceListComponent({
|
|
764
|
-
title: "Recent Traces",
|
|
765
|
-
top: 17,
|
|
766
|
-
left: 6,
|
|
767
|
-
width: 6,
|
|
768
|
-
height: 3,
|
|
769
|
-
}),
|
|
770
813
|
];
|
|
771
814
|
|
|
772
815
|
return {
|
|
773
816
|
_type: ObjectType.DashboardViewConfig,
|
|
774
817
|
components,
|
|
775
|
-
heightInDashboardUnits: Math.max(DashboardSize.heightInDashboardUnits,
|
|
818
|
+
heightInDashboardUnits: Math.max(DashboardSize.heightInDashboardUnits, 17),
|
|
776
819
|
};
|
|
777
820
|
}
|
|
778
821
|
|
|
779
822
|
function createKubernetesDashboardConfig(): DashboardViewConfig {
|
|
823
|
+
/*
|
|
824
|
+
* Layout notes:
|
|
825
|
+
*
|
|
826
|
+
* - "Pod Count" / "Node Ready" used to be Value widgets over k8s.pod.phase
|
|
827
|
+
* / k8s.node.condition_ready with `Sum` aggregation. Those metrics are
|
|
828
|
+
* per-resource gauges that re-emit `1` on every scrape, so summing
|
|
829
|
+
* across the dashboard window multiplied (pods * scrapes) and produced
|
|
830
|
+
* numbers in the hundreds for tiny clusters. The user-visible fix is
|
|
831
|
+
* to use the dedicated KubernetesPodList / KubernetesNodeList widgets
|
|
832
|
+
* below — they read the per-cluster snapshot in Postgres and show
|
|
833
|
+
* accurate counts in the widget header plus a live list of rows.
|
|
834
|
+
*
|
|
835
|
+
* - "Memory Utilization" used to be a 0-100 gauge over k8s.node.memory.usage,
|
|
836
|
+
* which is reported in bytes. A node with 8 GB of RAM produced a value
|
|
837
|
+
* in the 10^9 range against a 0-100 scale, so the gauge always pinned
|
|
838
|
+
* at the critical end with a meaningless absolute number. Without a
|
|
839
|
+
* first-class percent metric we replace it with a Value widget that
|
|
840
|
+
* renders the absolute usage via ValueFormatter (e.g. "8.3 GB").
|
|
841
|
+
*
|
|
842
|
+
* - CPU widgets use OTel's k8s.*.cpu.utilization, which the collector
|
|
843
|
+
* emits as a [0, 1] ratio with unit "1". DashboardValueComponent /
|
|
844
|
+
* DashboardGaugeComponent now scale that to a percent at render time
|
|
845
|
+
* when the metric name carries the `.utilization` suffix, so "0.05"
|
|
846
|
+
* reads as "5.00%" and gauge thresholds in the natural 0-100 scale work
|
|
847
|
+
* as expected.
|
|
848
|
+
*/
|
|
780
849
|
const components: Array<DashboardBaseComponent> = [
|
|
781
850
|
// Row 0: Title
|
|
782
851
|
createTextComponent({
|
|
@@ -788,48 +857,54 @@ function createKubernetesDashboardConfig(): DashboardViewConfig {
|
|
|
788
857
|
isBold: true,
|
|
789
858
|
}),
|
|
790
859
|
|
|
791
|
-
|
|
860
|
+
/*
|
|
861
|
+
* Row 1: Key cluster metrics — averages render with proper units via
|
|
862
|
+
* ValueFormatter (CPU utilization → "%", memory.usage → "MB"/"GB").
|
|
863
|
+
* All four are "higher = worse" (closer to capacity = bad).
|
|
864
|
+
*/
|
|
792
865
|
createValueComponent({
|
|
793
|
-
title: "CPU
|
|
866
|
+
title: "Pod CPU (avg)",
|
|
794
867
|
top: 1,
|
|
795
868
|
left: 0,
|
|
796
869
|
width: 3,
|
|
797
870
|
metricConfig: {
|
|
798
871
|
metricName: "k8s.pod.cpu.utilization",
|
|
799
872
|
aggregationType: MetricsAggregationType.Avg,
|
|
800
|
-
legendUnit: "%",
|
|
801
873
|
},
|
|
874
|
+
trendDirection: DashboardValueTrendDirection.HigherIsWorse,
|
|
802
875
|
}),
|
|
803
876
|
createValueComponent({
|
|
804
|
-
title: "Memory
|
|
877
|
+
title: "Pod Memory (avg)",
|
|
805
878
|
top: 1,
|
|
806
879
|
left: 3,
|
|
807
880
|
width: 3,
|
|
808
881
|
metricConfig: {
|
|
809
882
|
metricName: "k8s.pod.memory.usage",
|
|
810
883
|
aggregationType: MetricsAggregationType.Avg,
|
|
811
|
-
legendUnit: "bytes",
|
|
812
884
|
},
|
|
885
|
+
trendDirection: DashboardValueTrendDirection.HigherIsWorse,
|
|
813
886
|
}),
|
|
814
887
|
createValueComponent({
|
|
815
|
-
title: "
|
|
888
|
+
title: "Node CPU (avg)",
|
|
816
889
|
top: 1,
|
|
817
890
|
left: 6,
|
|
818
891
|
width: 3,
|
|
819
892
|
metricConfig: {
|
|
820
|
-
metricName: "k8s.
|
|
821
|
-
aggregationType: MetricsAggregationType.
|
|
893
|
+
metricName: "k8s.node.cpu.utilization",
|
|
894
|
+
aggregationType: MetricsAggregationType.Avg,
|
|
822
895
|
},
|
|
896
|
+
trendDirection: DashboardValueTrendDirection.HigherIsWorse,
|
|
823
897
|
}),
|
|
824
898
|
createValueComponent({
|
|
825
|
-
title: "Node
|
|
899
|
+
title: "Node Memory (avg)",
|
|
826
900
|
top: 1,
|
|
827
901
|
left: 9,
|
|
828
902
|
width: 3,
|
|
829
903
|
metricConfig: {
|
|
830
|
-
metricName: "k8s.node.
|
|
831
|
-
aggregationType: MetricsAggregationType.
|
|
904
|
+
metricName: "k8s.node.memory.usage",
|
|
905
|
+
aggregationType: MetricsAggregationType.Avg,
|
|
832
906
|
},
|
|
907
|
+
trendDirection: DashboardValueTrendDirection.HigherIsWorse,
|
|
833
908
|
}),
|
|
834
909
|
|
|
835
910
|
// Row 2-4: Resource usage charts
|
|
@@ -844,7 +919,6 @@ function createKubernetesDashboardConfig(): DashboardViewConfig {
|
|
|
844
919
|
metricName: "k8s.pod.cpu.utilization",
|
|
845
920
|
aggregationType: MetricsAggregationType.Avg,
|
|
846
921
|
legend: "CPU Utilization",
|
|
847
|
-
legendUnit: "%",
|
|
848
922
|
},
|
|
849
923
|
}),
|
|
850
924
|
createChartComponent({
|
|
@@ -858,13 +932,12 @@ function createKubernetesDashboardConfig(): DashboardViewConfig {
|
|
|
858
932
|
metricName: "k8s.pod.memory.usage",
|
|
859
933
|
aggregationType: MetricsAggregationType.Avg,
|
|
860
934
|
legend: "Memory Usage",
|
|
861
|
-
legendUnit: "bytes",
|
|
862
935
|
},
|
|
863
936
|
}),
|
|
864
937
|
|
|
865
938
|
// Row 5: Section header
|
|
866
939
|
createTextComponent({
|
|
867
|
-
text: "
|
|
940
|
+
text: "Cluster Resources",
|
|
868
941
|
top: 5,
|
|
869
942
|
left: 0,
|
|
870
943
|
width: 12,
|
|
@@ -872,81 +945,88 @@ function createKubernetesDashboardConfig(): DashboardViewConfig {
|
|
|
872
945
|
isBold: true,
|
|
873
946
|
}),
|
|
874
947
|
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
|
|
948
|
+
/*
|
|
949
|
+
* Row 6-9: Pod and node lists query the Postgres snapshot, so the
|
|
950
|
+
* header shows the true current count and the body shows live rows
|
|
951
|
+
* (replacing the broken Sum-of-gauge Value widgets).
|
|
952
|
+
*/
|
|
953
|
+
createKubernetesPodListComponent({
|
|
954
|
+
title: "Pods",
|
|
878
955
|
top: 6,
|
|
879
956
|
left: 0,
|
|
880
|
-
width:
|
|
881
|
-
height:
|
|
882
|
-
|
|
883
|
-
maxValue: 100,
|
|
884
|
-
warningThreshold: 70,
|
|
885
|
-
criticalThreshold: 90,
|
|
886
|
-
metricConfig: {
|
|
887
|
-
metricName: "k8s.node.cpu.utilization",
|
|
888
|
-
aggregationType: MetricsAggregationType.Avg,
|
|
889
|
-
},
|
|
957
|
+
width: 6,
|
|
958
|
+
height: 4,
|
|
959
|
+
maxRows: 25,
|
|
890
960
|
}),
|
|
891
|
-
|
|
892
|
-
title: "
|
|
961
|
+
createKubernetesNodeListComponent({
|
|
962
|
+
title: "Nodes",
|
|
893
963
|
top: 6,
|
|
894
|
-
left:
|
|
895
|
-
width:
|
|
964
|
+
left: 6,
|
|
965
|
+
width: 6,
|
|
966
|
+
height: 4,
|
|
967
|
+
maxRows: 25,
|
|
968
|
+
}),
|
|
969
|
+
|
|
970
|
+
// Row 10: Section header
|
|
971
|
+
createTextComponent({
|
|
972
|
+
text: "Resource Health",
|
|
973
|
+
top: 10,
|
|
974
|
+
left: 0,
|
|
975
|
+
width: 12,
|
|
976
|
+
height: 1,
|
|
977
|
+
isBold: true,
|
|
978
|
+
}),
|
|
979
|
+
|
|
980
|
+
/*
|
|
981
|
+
* Row 11-13: CPU gauge (auto-scaled from [0,1] to percent), and the
|
|
982
|
+
* network throughput chart. The old "Memory Utilization" gauge over
|
|
983
|
+
* raw bytes is gone — see top-of-function comment.
|
|
984
|
+
*/
|
|
985
|
+
createGaugeComponent({
|
|
986
|
+
title: "Cluster CPU Utilization",
|
|
987
|
+
top: 11,
|
|
988
|
+
left: 0,
|
|
989
|
+
width: 4,
|
|
896
990
|
height: 3,
|
|
897
991
|
minValue: 0,
|
|
898
992
|
maxValue: 100,
|
|
899
993
|
warningThreshold: 70,
|
|
900
994
|
criticalThreshold: 90,
|
|
901
995
|
metricConfig: {
|
|
902
|
-
metricName: "k8s.node.
|
|
996
|
+
metricName: "k8s.node.cpu.utilization",
|
|
903
997
|
aggregationType: MetricsAggregationType.Avg,
|
|
904
998
|
},
|
|
905
999
|
}),
|
|
906
1000
|
createChartComponent({
|
|
907
|
-
title: "
|
|
908
|
-
chartType: DashboardChartType.
|
|
909
|
-
top:
|
|
910
|
-
left:
|
|
911
|
-
width:
|
|
1001
|
+
title: "Network I/O",
|
|
1002
|
+
chartType: DashboardChartType.Area,
|
|
1003
|
+
top: 11,
|
|
1004
|
+
left: 4,
|
|
1005
|
+
width: 8,
|
|
912
1006
|
height: 3,
|
|
913
1007
|
metricConfig: {
|
|
914
|
-
metricName: "k8s.pod.
|
|
1008
|
+
metricName: "k8s.pod.network.io",
|
|
915
1009
|
aggregationType: MetricsAggregationType.Sum,
|
|
916
|
-
legend: "
|
|
1010
|
+
legend: "Network I/O",
|
|
917
1011
|
},
|
|
918
1012
|
}),
|
|
919
1013
|
|
|
920
|
-
// Row
|
|
1014
|
+
// Row 14: Section header
|
|
921
1015
|
createTextComponent({
|
|
922
|
-
text: "Workload
|
|
923
|
-
top:
|
|
1016
|
+
text: "Workload Activity",
|
|
1017
|
+
top: 14,
|
|
924
1018
|
left: 0,
|
|
925
1019
|
width: 12,
|
|
926
1020
|
height: 1,
|
|
927
1021
|
isBold: true,
|
|
928
1022
|
}),
|
|
929
1023
|
|
|
930
|
-
// Row
|
|
931
|
-
createChartComponent({
|
|
932
|
-
title: "Network I/O",
|
|
933
|
-
chartType: DashboardChartType.Area,
|
|
934
|
-
top: 10,
|
|
935
|
-
left: 0,
|
|
936
|
-
width: 6,
|
|
937
|
-
height: 3,
|
|
938
|
-
metricConfig: {
|
|
939
|
-
metricName: "k8s.pod.network.io",
|
|
940
|
-
aggregationType: MetricsAggregationType.Sum,
|
|
941
|
-
legend: "Network I/O",
|
|
942
|
-
legendUnit: "bytes",
|
|
943
|
-
},
|
|
944
|
-
}),
|
|
1024
|
+
// Row 15-17: Restarts and replicas
|
|
945
1025
|
createChartComponent({
|
|
946
1026
|
title: "Container Restarts Over Time",
|
|
947
1027
|
chartType: DashboardChartType.Bar,
|
|
948
|
-
top:
|
|
949
|
-
left:
|
|
1028
|
+
top: 15,
|
|
1029
|
+
left: 0,
|
|
950
1030
|
width: 6,
|
|
951
1031
|
height: 3,
|
|
952
1032
|
metricConfig: {
|
|
@@ -955,12 +1035,10 @@ function createKubernetesDashboardConfig(): DashboardViewConfig {
|
|
|
955
1035
|
legend: "Restarts",
|
|
956
1036
|
},
|
|
957
1037
|
}),
|
|
958
|
-
|
|
959
|
-
// Row 13-15: Table and logs
|
|
960
1038
|
createTableComponent({
|
|
961
1039
|
title: "Deployment Replicas",
|
|
962
|
-
top:
|
|
963
|
-
left:
|
|
1040
|
+
top: 15,
|
|
1041
|
+
left: 6,
|
|
964
1042
|
width: 6,
|
|
965
1043
|
height: 3,
|
|
966
1044
|
metricConfig: {
|
|
@@ -968,11 +1046,13 @@ function createKubernetesDashboardConfig(): DashboardViewConfig {
|
|
|
968
1046
|
aggregationType: MetricsAggregationType.Min,
|
|
969
1047
|
},
|
|
970
1048
|
}),
|
|
1049
|
+
|
|
1050
|
+
// Row 18-20: Logs
|
|
971
1051
|
createLogStreamComponent({
|
|
972
1052
|
title: "Cluster Logs",
|
|
973
|
-
top:
|
|
974
|
-
left:
|
|
975
|
-
width:
|
|
1053
|
+
top: 18,
|
|
1054
|
+
left: 0,
|
|
1055
|
+
width: 12,
|
|
976
1056
|
height: 3,
|
|
977
1057
|
}),
|
|
978
1058
|
];
|
|
@@ -980,11 +1060,32 @@ function createKubernetesDashboardConfig(): DashboardViewConfig {
|
|
|
980
1060
|
return {
|
|
981
1061
|
_type: ObjectType.DashboardViewConfig,
|
|
982
1062
|
components,
|
|
983
|
-
heightInDashboardUnits: Math.max(DashboardSize.heightInDashboardUnits,
|
|
1063
|
+
heightInDashboardUnits: Math.max(DashboardSize.heightInDashboardUnits, 21),
|
|
984
1064
|
};
|
|
985
1065
|
}
|
|
986
1066
|
|
|
987
1067
|
function createMetricsDashboardConfig(): DashboardViewConfig {
|
|
1068
|
+
/*
|
|
1069
|
+
* Layout notes:
|
|
1070
|
+
*
|
|
1071
|
+
* - `system.cpu.utilization` and `process.cpu.utilization` are OTel
|
|
1072
|
+
* ratio metrics with unit "1" reported in [0, 1]. DashboardValueComponent
|
|
1073
|
+
* / DashboardGaugeComponent scale these to a percent at render time
|
|
1074
|
+
* (see splitFormattedValue / isFractionScale), so the 0-100 gauge sweep
|
|
1075
|
+
* and the percent display work without any special template config.
|
|
1076
|
+
*
|
|
1077
|
+
* - `system.memory.usage` is reported in bytes. A previous "Memory Usage"
|
|
1078
|
+
* gauge compared bytes (10⁹ range) against a 0-100 sweep and pinned
|
|
1079
|
+
* critical for any sane workload. We swapped it for a Value widget that
|
|
1080
|
+
* renders the absolute usage via ValueFormatter (e.g. "8.3 GB"), since
|
|
1081
|
+
* there is no first-class memory-utilization percent metric in OTel's
|
|
1082
|
+
* default system instrumentation.
|
|
1083
|
+
*
|
|
1084
|
+
* - We also dropped explicit `legendUnit: "bytes"/"%"/"ms"` overrides
|
|
1085
|
+
* where they duplicated the stored MetricType unit — ValueFormatter
|
|
1086
|
+
* already auto-scales bytes/seconds/ms and renders ratio metrics as
|
|
1087
|
+
* percent. Keeping overrides only when they add useful aliasing.
|
|
1088
|
+
*/
|
|
988
1089
|
const components: Array<DashboardBaseComponent> = [
|
|
989
1090
|
// Row 0: Title
|
|
990
1091
|
createTextComponent({
|
|
@@ -996,7 +1097,11 @@ function createMetricsDashboardConfig(): DashboardViewConfig {
|
|
|
996
1097
|
isBold: true,
|
|
997
1098
|
}),
|
|
998
1099
|
|
|
999
|
-
|
|
1100
|
+
/*
|
|
1101
|
+
* Row 1: Key HTTP metrics. Request volume rising is generally a
|
|
1102
|
+
* sign of activity (good); latency, errors, and active in-flight
|
|
1103
|
+
* requests rising signal saturation or trouble (bad).
|
|
1104
|
+
*/
|
|
1000
1105
|
createValueComponent({
|
|
1001
1106
|
title: "Request Rate",
|
|
1002
1107
|
top: 1,
|
|
@@ -1007,6 +1112,7 @@ function createMetricsDashboardConfig(): DashboardViewConfig {
|
|
|
1007
1112
|
aggregationType: MetricsAggregationType.Sum,
|
|
1008
1113
|
legendUnit: "req/s",
|
|
1009
1114
|
},
|
|
1115
|
+
trendDirection: DashboardValueTrendDirection.HigherIsBetter,
|
|
1010
1116
|
}),
|
|
1011
1117
|
createValueComponent({
|
|
1012
1118
|
title: "Avg Latency",
|
|
@@ -1018,6 +1124,7 @@ function createMetricsDashboardConfig(): DashboardViewConfig {
|
|
|
1018
1124
|
aggregationType: MetricsAggregationType.Avg,
|
|
1019
1125
|
legendUnit: "ms",
|
|
1020
1126
|
},
|
|
1127
|
+
trendDirection: DashboardValueTrendDirection.HigherIsWorse,
|
|
1021
1128
|
}),
|
|
1022
1129
|
createValueComponent({
|
|
1023
1130
|
title: "Error Rate",
|
|
@@ -1029,6 +1136,7 @@ function createMetricsDashboardConfig(): DashboardViewConfig {
|
|
|
1029
1136
|
aggregationType: MetricsAggregationType.Avg,
|
|
1030
1137
|
legendUnit: "%",
|
|
1031
1138
|
},
|
|
1139
|
+
trendDirection: DashboardValueTrendDirection.HigherIsWorse,
|
|
1032
1140
|
}),
|
|
1033
1141
|
createValueComponent({
|
|
1034
1142
|
title: "Active Requests",
|
|
@@ -1039,6 +1147,7 @@ function createMetricsDashboardConfig(): DashboardViewConfig {
|
|
|
1039
1147
|
metricName: MetricDashboardMetricType.HttpActiveRequests,
|
|
1040
1148
|
aggregationType: MetricsAggregationType.Avg,
|
|
1041
1149
|
},
|
|
1150
|
+
trendDirection: DashboardValueTrendDirection.HigherIsWorse,
|
|
1042
1151
|
}),
|
|
1043
1152
|
|
|
1044
1153
|
// Row 2-4: HTTP request charts
|
|
@@ -1121,7 +1230,12 @@ function createMetricsDashboardConfig(): DashboardViewConfig {
|
|
|
1121
1230
|
isBold: true,
|
|
1122
1231
|
}),
|
|
1123
1232
|
|
|
1124
|
-
|
|
1233
|
+
/*
|
|
1234
|
+
* Row 10-12: System resource health. CPU has a percent gauge (auto-
|
|
1235
|
+
* scaled from [0, 1] ratio at render time); Memory has a Value widget
|
|
1236
|
+
* since `system.memory.usage` is bytes (auto-formatted to MB/GB) and
|
|
1237
|
+
* we don't have a first-class memory-utilization percent metric.
|
|
1238
|
+
*/
|
|
1125
1239
|
createGaugeComponent({
|
|
1126
1240
|
title: "CPU Utilization",
|
|
1127
1241
|
top: 10,
|
|
@@ -1137,20 +1251,16 @@ function createMetricsDashboardConfig(): DashboardViewConfig {
|
|
|
1137
1251
|
aggregationType: MetricsAggregationType.Avg,
|
|
1138
1252
|
},
|
|
1139
1253
|
}),
|
|
1140
|
-
|
|
1254
|
+
createValueComponent({
|
|
1141
1255
|
title: "Memory Usage",
|
|
1142
1256
|
top: 10,
|
|
1143
1257
|
left: 3,
|
|
1144
1258
|
width: 3,
|
|
1145
|
-
height: 3,
|
|
1146
|
-
minValue: 0,
|
|
1147
|
-
maxValue: 100,
|
|
1148
|
-
warningThreshold: 70,
|
|
1149
|
-
criticalThreshold: 90,
|
|
1150
1259
|
metricConfig: {
|
|
1151
1260
|
metricName: MetricDashboardMetricType.SystemMemoryUsage,
|
|
1152
1261
|
aggregationType: MetricsAggregationType.Avg,
|
|
1153
1262
|
},
|
|
1263
|
+
trendDirection: DashboardValueTrendDirection.HigherIsWorse,
|
|
1154
1264
|
}),
|
|
1155
1265
|
createChartComponent({
|
|
1156
1266
|
title: "CPU Usage Over Time",
|
|
@@ -1162,8 +1272,7 @@ function createMetricsDashboardConfig(): DashboardViewConfig {
|
|
|
1162
1272
|
metricConfig: {
|
|
1163
1273
|
metricName: MetricDashboardMetricType.SystemCpuUtilization,
|
|
1164
1274
|
aggregationType: MetricsAggregationType.Avg,
|
|
1165
|
-
legend: "CPU
|
|
1166
|
-
legendUnit: "%",
|
|
1275
|
+
legend: "CPU",
|
|
1167
1276
|
},
|
|
1168
1277
|
}),
|
|
1169
1278
|
|
|
@@ -1275,820 +1384,6 @@ function createMetricsDashboardConfig(): DashboardViewConfig {
|
|
|
1275
1384
|
};
|
|
1276
1385
|
}
|
|
1277
1386
|
|
|
1278
|
-
function createTraceDashboardConfig(): DashboardViewConfig {
|
|
1279
|
-
const components: Array<DashboardBaseComponent> = [
|
|
1280
|
-
// Row 0: Title
|
|
1281
|
-
createTextComponent({
|
|
1282
|
-
text: "Trace Dashboard",
|
|
1283
|
-
top: 0,
|
|
1284
|
-
left: 0,
|
|
1285
|
-
width: 12,
|
|
1286
|
-
height: 1,
|
|
1287
|
-
isBold: true,
|
|
1288
|
-
}),
|
|
1289
|
-
|
|
1290
|
-
// Row 1: Key trace metrics
|
|
1291
|
-
createValueComponent({
|
|
1292
|
-
title: "Span Count",
|
|
1293
|
-
top: 1,
|
|
1294
|
-
left: 0,
|
|
1295
|
-
width: 3,
|
|
1296
|
-
metricConfig: {
|
|
1297
|
-
metricName: SpanMetricType.SpanCount,
|
|
1298
|
-
aggregationType: MetricsAggregationType.Sum,
|
|
1299
|
-
},
|
|
1300
|
-
}),
|
|
1301
|
-
createValueComponent({
|
|
1302
|
-
title: "Avg Duration",
|
|
1303
|
-
top: 1,
|
|
1304
|
-
left: 3,
|
|
1305
|
-
width: 3,
|
|
1306
|
-
metricConfig: {
|
|
1307
|
-
metricName: SpanMetricType.SpanDuration,
|
|
1308
|
-
aggregationType: MetricsAggregationType.Avg,
|
|
1309
|
-
legendUnit: "ms",
|
|
1310
|
-
},
|
|
1311
|
-
}),
|
|
1312
|
-
createValueComponent({
|
|
1313
|
-
title: "Error Rate",
|
|
1314
|
-
top: 1,
|
|
1315
|
-
left: 6,
|
|
1316
|
-
width: 3,
|
|
1317
|
-
metricConfig: {
|
|
1318
|
-
metricName: SpanMetricType.SpanErrorRate,
|
|
1319
|
-
aggregationType: MetricsAggregationType.Avg,
|
|
1320
|
-
legendUnit: "%",
|
|
1321
|
-
},
|
|
1322
|
-
}),
|
|
1323
|
-
createValueComponent({
|
|
1324
|
-
title: "Throughput",
|
|
1325
|
-
top: 1,
|
|
1326
|
-
left: 9,
|
|
1327
|
-
width: 3,
|
|
1328
|
-
metricConfig: {
|
|
1329
|
-
metricName: SpanMetricType.SpanThroughput,
|
|
1330
|
-
aggregationType: MetricsAggregationType.Sum,
|
|
1331
|
-
legendUnit: "req/s",
|
|
1332
|
-
},
|
|
1333
|
-
}),
|
|
1334
|
-
|
|
1335
|
-
// Row 2-4: Throughput and duration charts
|
|
1336
|
-
createChartComponent({
|
|
1337
|
-
title: "Span Throughput Over Time",
|
|
1338
|
-
chartType: DashboardChartType.Bar,
|
|
1339
|
-
top: 2,
|
|
1340
|
-
left: 0,
|
|
1341
|
-
width: 6,
|
|
1342
|
-
height: 3,
|
|
1343
|
-
metricConfig: {
|
|
1344
|
-
metricName: SpanMetricType.SpanCount,
|
|
1345
|
-
aggregationType: MetricsAggregationType.Sum,
|
|
1346
|
-
legend: "Spans",
|
|
1347
|
-
},
|
|
1348
|
-
}),
|
|
1349
|
-
createChartComponent({
|
|
1350
|
-
title: "Avg Span Duration Over Time",
|
|
1351
|
-
chartType: DashboardChartType.Line,
|
|
1352
|
-
top: 2,
|
|
1353
|
-
left: 6,
|
|
1354
|
-
width: 6,
|
|
1355
|
-
height: 3,
|
|
1356
|
-
metricConfig: {
|
|
1357
|
-
metricName: SpanMetricType.SpanDuration,
|
|
1358
|
-
aggregationType: MetricsAggregationType.Avg,
|
|
1359
|
-
legend: "Avg Duration",
|
|
1360
|
-
legendUnit: "ms",
|
|
1361
|
-
},
|
|
1362
|
-
}),
|
|
1363
|
-
|
|
1364
|
-
// Row 5: Section header
|
|
1365
|
-
createTextComponent({
|
|
1366
|
-
text: "Latency Percentiles",
|
|
1367
|
-
top: 5,
|
|
1368
|
-
left: 0,
|
|
1369
|
-
width: 12,
|
|
1370
|
-
height: 1,
|
|
1371
|
-
isBold: true,
|
|
1372
|
-
}),
|
|
1373
|
-
|
|
1374
|
-
// Row 6: Latency percentile values
|
|
1375
|
-
createValueComponent({
|
|
1376
|
-
title: "P50 Latency",
|
|
1377
|
-
top: 6,
|
|
1378
|
-
left: 0,
|
|
1379
|
-
width: 3,
|
|
1380
|
-
metricConfig: {
|
|
1381
|
-
metricName: SpanMetricType.SpanP50Duration,
|
|
1382
|
-
aggregationType: MetricsAggregationType.Avg,
|
|
1383
|
-
legendUnit: "ms",
|
|
1384
|
-
},
|
|
1385
|
-
}),
|
|
1386
|
-
createValueComponent({
|
|
1387
|
-
title: "P90 Latency",
|
|
1388
|
-
top: 6,
|
|
1389
|
-
left: 3,
|
|
1390
|
-
width: 3,
|
|
1391
|
-
metricConfig: {
|
|
1392
|
-
metricName: SpanMetricType.SpanP90Duration,
|
|
1393
|
-
aggregationType: MetricsAggregationType.Avg,
|
|
1394
|
-
legendUnit: "ms",
|
|
1395
|
-
},
|
|
1396
|
-
}),
|
|
1397
|
-
createValueComponent({
|
|
1398
|
-
title: "P95 Latency",
|
|
1399
|
-
top: 6,
|
|
1400
|
-
left: 6,
|
|
1401
|
-
width: 3,
|
|
1402
|
-
metricConfig: {
|
|
1403
|
-
metricName: SpanMetricType.SpanP95Duration,
|
|
1404
|
-
aggregationType: MetricsAggregationType.Avg,
|
|
1405
|
-
legendUnit: "ms",
|
|
1406
|
-
},
|
|
1407
|
-
}),
|
|
1408
|
-
createValueComponent({
|
|
1409
|
-
title: "P99 Latency",
|
|
1410
|
-
top: 6,
|
|
1411
|
-
left: 9,
|
|
1412
|
-
width: 3,
|
|
1413
|
-
metricConfig: {
|
|
1414
|
-
metricName: SpanMetricType.SpanP99Duration,
|
|
1415
|
-
aggregationType: MetricsAggregationType.Avg,
|
|
1416
|
-
legendUnit: "ms",
|
|
1417
|
-
},
|
|
1418
|
-
}),
|
|
1419
|
-
|
|
1420
|
-
// Row 7-9: Latency percentile charts
|
|
1421
|
-
createChartComponent({
|
|
1422
|
-
title: "Latency Percentiles Over Time",
|
|
1423
|
-
chartType: DashboardChartType.Line,
|
|
1424
|
-
top: 7,
|
|
1425
|
-
left: 0,
|
|
1426
|
-
width: 6,
|
|
1427
|
-
height: 3,
|
|
1428
|
-
metricConfig: {
|
|
1429
|
-
metricName: SpanMetricType.SpanP95Duration,
|
|
1430
|
-
aggregationType: MetricsAggregationType.Avg,
|
|
1431
|
-
legend: "P95 Latency",
|
|
1432
|
-
legendUnit: "ms",
|
|
1433
|
-
},
|
|
1434
|
-
}),
|
|
1435
|
-
createChartComponent({
|
|
1436
|
-
title: "Latency Distribution",
|
|
1437
|
-
chartType: DashboardChartType.Histogram,
|
|
1438
|
-
top: 7,
|
|
1439
|
-
left: 6,
|
|
1440
|
-
width: 6,
|
|
1441
|
-
height: 3,
|
|
1442
|
-
metricConfig: {
|
|
1443
|
-
metricName: SpanMetricType.SpanDuration,
|
|
1444
|
-
aggregationType: MetricsAggregationType.Count,
|
|
1445
|
-
legend: "Latency Distribution",
|
|
1446
|
-
legendUnit: "ms",
|
|
1447
|
-
},
|
|
1448
|
-
}),
|
|
1449
|
-
|
|
1450
|
-
// Row 10: Section header
|
|
1451
|
-
createTextComponent({
|
|
1452
|
-
text: "Error Analysis",
|
|
1453
|
-
top: 10,
|
|
1454
|
-
left: 0,
|
|
1455
|
-
width: 12,
|
|
1456
|
-
height: 1,
|
|
1457
|
-
isBold: true,
|
|
1458
|
-
}),
|
|
1459
|
-
|
|
1460
|
-
// Row 11-13: Error charts and status breakdown
|
|
1461
|
-
createGaugeComponent({
|
|
1462
|
-
title: "Error Rate",
|
|
1463
|
-
top: 11,
|
|
1464
|
-
left: 0,
|
|
1465
|
-
width: 3,
|
|
1466
|
-
height: 3,
|
|
1467
|
-
minValue: 0,
|
|
1468
|
-
maxValue: 100,
|
|
1469
|
-
warningThreshold: 5,
|
|
1470
|
-
criticalThreshold: 15,
|
|
1471
|
-
metricConfig: {
|
|
1472
|
-
metricName: SpanMetricType.SpanErrorRate,
|
|
1473
|
-
aggregationType: MetricsAggregationType.Avg,
|
|
1474
|
-
},
|
|
1475
|
-
}),
|
|
1476
|
-
createChartComponent({
|
|
1477
|
-
title: "Errors Over Time",
|
|
1478
|
-
chartType: DashboardChartType.Area,
|
|
1479
|
-
top: 11,
|
|
1480
|
-
left: 3,
|
|
1481
|
-
width: 6,
|
|
1482
|
-
height: 3,
|
|
1483
|
-
metricConfig: {
|
|
1484
|
-
metricName: SpanMetricType.SpanErrorCount,
|
|
1485
|
-
aggregationType: MetricsAggregationType.Sum,
|
|
1486
|
-
legend: "Errors",
|
|
1487
|
-
},
|
|
1488
|
-
}),
|
|
1489
|
-
createChartComponent({
|
|
1490
|
-
title: "Span Status Breakdown",
|
|
1491
|
-
chartType: DashboardChartType.Pie,
|
|
1492
|
-
top: 11,
|
|
1493
|
-
left: 9,
|
|
1494
|
-
width: 3,
|
|
1495
|
-
height: 3,
|
|
1496
|
-
metricConfig: {
|
|
1497
|
-
metricName: SpanMetricType.SpanStatusOk,
|
|
1498
|
-
aggregationType: MetricsAggregationType.Count,
|
|
1499
|
-
legend: "Status",
|
|
1500
|
-
},
|
|
1501
|
-
}),
|
|
1502
|
-
|
|
1503
|
-
// Row 14: Section header
|
|
1504
|
-
createTextComponent({
|
|
1505
|
-
text: "Trace Details",
|
|
1506
|
-
top: 14,
|
|
1507
|
-
left: 0,
|
|
1508
|
-
width: 12,
|
|
1509
|
-
height: 1,
|
|
1510
|
-
isBold: true,
|
|
1511
|
-
}),
|
|
1512
|
-
|
|
1513
|
-
// Row 15-17: Table of slowest spans and request rate
|
|
1514
|
-
createTableComponent({
|
|
1515
|
-
title: "Slowest Spans",
|
|
1516
|
-
top: 15,
|
|
1517
|
-
left: 0,
|
|
1518
|
-
width: 6,
|
|
1519
|
-
height: 3,
|
|
1520
|
-
metricConfig: {
|
|
1521
|
-
metricName: SpanMetricType.SpanDuration,
|
|
1522
|
-
aggregationType: MetricsAggregationType.Max,
|
|
1523
|
-
},
|
|
1524
|
-
}),
|
|
1525
|
-
createChartComponent({
|
|
1526
|
-
title: "Request Rate Over Time",
|
|
1527
|
-
chartType: DashboardChartType.StackedArea,
|
|
1528
|
-
top: 15,
|
|
1529
|
-
left: 6,
|
|
1530
|
-
width: 6,
|
|
1531
|
-
height: 3,
|
|
1532
|
-
metricConfig: {
|
|
1533
|
-
metricName: SpanMetricType.SpanRequestRate,
|
|
1534
|
-
aggregationType: MetricsAggregationType.Sum,
|
|
1535
|
-
legend: "Request Rate",
|
|
1536
|
-
legendUnit: "req/s",
|
|
1537
|
-
},
|
|
1538
|
-
}),
|
|
1539
|
-
|
|
1540
|
-
// Row 18-20: Recent traces and logs
|
|
1541
|
-
createTraceListComponent({
|
|
1542
|
-
title: "Recent Traces",
|
|
1543
|
-
top: 18,
|
|
1544
|
-
left: 0,
|
|
1545
|
-
width: 6,
|
|
1546
|
-
height: 3,
|
|
1547
|
-
}),
|
|
1548
|
-
createLogStreamComponent({
|
|
1549
|
-
title: "Related Logs",
|
|
1550
|
-
top: 18,
|
|
1551
|
-
left: 6,
|
|
1552
|
-
width: 6,
|
|
1553
|
-
height: 3,
|
|
1554
|
-
}),
|
|
1555
|
-
];
|
|
1556
|
-
|
|
1557
|
-
return {
|
|
1558
|
-
_type: ObjectType.DashboardViewConfig,
|
|
1559
|
-
components,
|
|
1560
|
-
heightInDashboardUnits: Math.max(DashboardSize.heightInDashboardUnits, 21),
|
|
1561
|
-
};
|
|
1562
|
-
}
|
|
1563
|
-
|
|
1564
|
-
function createExceptionDashboardConfig(): DashboardViewConfig {
|
|
1565
|
-
const components: Array<DashboardBaseComponent> = [
|
|
1566
|
-
// Row 0: Title
|
|
1567
|
-
createTextComponent({
|
|
1568
|
-
text: "Exception Dashboard",
|
|
1569
|
-
top: 0,
|
|
1570
|
-
left: 0,
|
|
1571
|
-
width: 12,
|
|
1572
|
-
height: 1,
|
|
1573
|
-
isBold: true,
|
|
1574
|
-
}),
|
|
1575
|
-
|
|
1576
|
-
// Row 1: Key exception metrics
|
|
1577
|
-
createValueComponent({
|
|
1578
|
-
title: "Total Exceptions",
|
|
1579
|
-
top: 1,
|
|
1580
|
-
left: 0,
|
|
1581
|
-
width: 3,
|
|
1582
|
-
metricConfig: {
|
|
1583
|
-
metricName: ExceptionMetricType.ExceptionCount,
|
|
1584
|
-
aggregationType: MetricsAggregationType.Sum,
|
|
1585
|
-
},
|
|
1586
|
-
}),
|
|
1587
|
-
createValueComponent({
|
|
1588
|
-
title: "Exception Rate",
|
|
1589
|
-
top: 1,
|
|
1590
|
-
left: 3,
|
|
1591
|
-
width: 3,
|
|
1592
|
-
metricConfig: {
|
|
1593
|
-
metricName: ExceptionMetricType.ExceptionRate,
|
|
1594
|
-
aggregationType: MetricsAggregationType.Avg,
|
|
1595
|
-
legendUnit: "/min",
|
|
1596
|
-
},
|
|
1597
|
-
}),
|
|
1598
|
-
createValueComponent({
|
|
1599
|
-
title: "Unresolved",
|
|
1600
|
-
top: 1,
|
|
1601
|
-
left: 6,
|
|
1602
|
-
width: 3,
|
|
1603
|
-
metricConfig: {
|
|
1604
|
-
metricName: ExceptionMetricType.UnresolvedExceptionCount,
|
|
1605
|
-
aggregationType: MetricsAggregationType.Sum,
|
|
1606
|
-
},
|
|
1607
|
-
}),
|
|
1608
|
-
createValueComponent({
|
|
1609
|
-
title: "Affected Services",
|
|
1610
|
-
top: 1,
|
|
1611
|
-
left: 9,
|
|
1612
|
-
width: 3,
|
|
1613
|
-
metricConfig: {
|
|
1614
|
-
metricName: ExceptionMetricType.ExceptionAffectedServiceCount,
|
|
1615
|
-
aggregationType: MetricsAggregationType.Sum,
|
|
1616
|
-
},
|
|
1617
|
-
}),
|
|
1618
|
-
|
|
1619
|
-
// Row 2-4: Exception trends
|
|
1620
|
-
createChartComponent({
|
|
1621
|
-
title: "Exceptions Over Time",
|
|
1622
|
-
chartType: DashboardChartType.Bar,
|
|
1623
|
-
top: 2,
|
|
1624
|
-
left: 0,
|
|
1625
|
-
width: 6,
|
|
1626
|
-
height: 3,
|
|
1627
|
-
metricConfig: {
|
|
1628
|
-
metricName: ExceptionMetricType.ExceptionCount,
|
|
1629
|
-
aggregationType: MetricsAggregationType.Sum,
|
|
1630
|
-
legend: "Exceptions",
|
|
1631
|
-
},
|
|
1632
|
-
}),
|
|
1633
|
-
createChartComponent({
|
|
1634
|
-
title: "Exception Rate Over Time",
|
|
1635
|
-
chartType: DashboardChartType.Line,
|
|
1636
|
-
top: 2,
|
|
1637
|
-
left: 6,
|
|
1638
|
-
width: 6,
|
|
1639
|
-
height: 3,
|
|
1640
|
-
metricConfig: {
|
|
1641
|
-
metricName: ExceptionMetricType.ExceptionRate,
|
|
1642
|
-
aggregationType: MetricsAggregationType.Avg,
|
|
1643
|
-
legend: "Exception Rate",
|
|
1644
|
-
legendUnit: "/min",
|
|
1645
|
-
},
|
|
1646
|
-
}),
|
|
1647
|
-
|
|
1648
|
-
// Row 5: Section header
|
|
1649
|
-
createTextComponent({
|
|
1650
|
-
text: "Exception Breakdown",
|
|
1651
|
-
top: 5,
|
|
1652
|
-
left: 0,
|
|
1653
|
-
width: 12,
|
|
1654
|
-
height: 1,
|
|
1655
|
-
isBold: true,
|
|
1656
|
-
}),
|
|
1657
|
-
|
|
1658
|
-
// Row 6-8: Exception type and service breakdown
|
|
1659
|
-
createChartComponent({
|
|
1660
|
-
title: "Exceptions by Type",
|
|
1661
|
-
chartType: DashboardChartType.Pie,
|
|
1662
|
-
top: 6,
|
|
1663
|
-
left: 0,
|
|
1664
|
-
width: 6,
|
|
1665
|
-
height: 3,
|
|
1666
|
-
metricConfig: {
|
|
1667
|
-
metricName: ExceptionMetricType.ExceptionCountByType,
|
|
1668
|
-
aggregationType: MetricsAggregationType.Count,
|
|
1669
|
-
legend: "Exception Type",
|
|
1670
|
-
},
|
|
1671
|
-
}),
|
|
1672
|
-
createChartComponent({
|
|
1673
|
-
title: "Exceptions by Service",
|
|
1674
|
-
chartType: DashboardChartType.Bar,
|
|
1675
|
-
top: 6,
|
|
1676
|
-
left: 6,
|
|
1677
|
-
width: 6,
|
|
1678
|
-
height: 3,
|
|
1679
|
-
metricConfig: {
|
|
1680
|
-
metricName: ExceptionMetricType.ExceptionCountByService,
|
|
1681
|
-
aggregationType: MetricsAggregationType.Count,
|
|
1682
|
-
legend: "Service",
|
|
1683
|
-
},
|
|
1684
|
-
}),
|
|
1685
|
-
|
|
1686
|
-
// Row 9: Section header
|
|
1687
|
-
createTextComponent({
|
|
1688
|
-
text: "Resolution Status",
|
|
1689
|
-
top: 9,
|
|
1690
|
-
left: 0,
|
|
1691
|
-
width: 12,
|
|
1692
|
-
height: 1,
|
|
1693
|
-
isBold: true,
|
|
1694
|
-
}),
|
|
1695
|
-
|
|
1696
|
-
// Row 10-12: Resolution gauges and resolution trends
|
|
1697
|
-
createGaugeComponent({
|
|
1698
|
-
title: "Unresolved Exceptions",
|
|
1699
|
-
top: 10,
|
|
1700
|
-
left: 0,
|
|
1701
|
-
width: 3,
|
|
1702
|
-
height: 3,
|
|
1703
|
-
minValue: 0,
|
|
1704
|
-
maxValue: 100,
|
|
1705
|
-
warningThreshold: 25,
|
|
1706
|
-
criticalThreshold: 50,
|
|
1707
|
-
metricConfig: {
|
|
1708
|
-
metricName: ExceptionMetricType.UnresolvedExceptionCount,
|
|
1709
|
-
aggregationType: MetricsAggregationType.Sum,
|
|
1710
|
-
},
|
|
1711
|
-
}),
|
|
1712
|
-
createGaugeComponent({
|
|
1713
|
-
title: "Muted Exceptions",
|
|
1714
|
-
top: 10,
|
|
1715
|
-
left: 3,
|
|
1716
|
-
width: 3,
|
|
1717
|
-
height: 3,
|
|
1718
|
-
minValue: 0,
|
|
1719
|
-
maxValue: 100,
|
|
1720
|
-
metricConfig: {
|
|
1721
|
-
metricName: ExceptionMetricType.MutedExceptionCount,
|
|
1722
|
-
aggregationType: MetricsAggregationType.Sum,
|
|
1723
|
-
},
|
|
1724
|
-
}),
|
|
1725
|
-
createChartComponent({
|
|
1726
|
-
title: "Resolution Status Over Time",
|
|
1727
|
-
chartType: DashboardChartType.StackedArea,
|
|
1728
|
-
top: 10,
|
|
1729
|
-
left: 6,
|
|
1730
|
-
width: 6,
|
|
1731
|
-
height: 3,
|
|
1732
|
-
metricConfig: {
|
|
1733
|
-
metricName: ExceptionMetricType.ResolvedExceptionCount,
|
|
1734
|
-
aggregationType: MetricsAggregationType.Sum,
|
|
1735
|
-
legend: "Resolved",
|
|
1736
|
-
},
|
|
1737
|
-
}),
|
|
1738
|
-
|
|
1739
|
-
// Row 13: Section header
|
|
1740
|
-
createTextComponent({
|
|
1741
|
-
text: "Exception Recurrence",
|
|
1742
|
-
top: 13,
|
|
1743
|
-
left: 0,
|
|
1744
|
-
width: 12,
|
|
1745
|
-
height: 1,
|
|
1746
|
-
isBold: true,
|
|
1747
|
-
}),
|
|
1748
|
-
|
|
1749
|
-
// Row 14-16: Occurrence trends and top exceptions table
|
|
1750
|
-
createChartComponent({
|
|
1751
|
-
title: "Exception Occurrences Over Time",
|
|
1752
|
-
chartType: DashboardChartType.Heatmap,
|
|
1753
|
-
top: 14,
|
|
1754
|
-
left: 0,
|
|
1755
|
-
width: 6,
|
|
1756
|
-
height: 3,
|
|
1757
|
-
metricConfig: {
|
|
1758
|
-
metricName: ExceptionMetricType.ExceptionOccurrenceCount,
|
|
1759
|
-
aggregationType: MetricsAggregationType.Sum,
|
|
1760
|
-
legend: "Occurrences",
|
|
1761
|
-
},
|
|
1762
|
-
}),
|
|
1763
|
-
createTableComponent({
|
|
1764
|
-
title: "Top Exceptions by Occurrence",
|
|
1765
|
-
top: 14,
|
|
1766
|
-
left: 6,
|
|
1767
|
-
width: 6,
|
|
1768
|
-
height: 3,
|
|
1769
|
-
metricConfig: {
|
|
1770
|
-
metricName: ExceptionMetricType.ExceptionOccurrenceCount,
|
|
1771
|
-
aggregationType: MetricsAggregationType.Max,
|
|
1772
|
-
},
|
|
1773
|
-
}),
|
|
1774
|
-
|
|
1775
|
-
// Row 17: Section header
|
|
1776
|
-
createTextComponent({
|
|
1777
|
-
text: "Exception Details",
|
|
1778
|
-
top: 17,
|
|
1779
|
-
left: 0,
|
|
1780
|
-
width: 12,
|
|
1781
|
-
height: 1,
|
|
1782
|
-
isBold: true,
|
|
1783
|
-
}),
|
|
1784
|
-
|
|
1785
|
-
// Row 18-20: Logs and traces
|
|
1786
|
-
createLogStreamComponent({
|
|
1787
|
-
title: "Exception Logs",
|
|
1788
|
-
top: 18,
|
|
1789
|
-
left: 0,
|
|
1790
|
-
width: 6,
|
|
1791
|
-
height: 3,
|
|
1792
|
-
}),
|
|
1793
|
-
createTraceListComponent({
|
|
1794
|
-
title: "Related Traces",
|
|
1795
|
-
top: 18,
|
|
1796
|
-
left: 6,
|
|
1797
|
-
width: 6,
|
|
1798
|
-
height: 3,
|
|
1799
|
-
}),
|
|
1800
|
-
];
|
|
1801
|
-
|
|
1802
|
-
return {
|
|
1803
|
-
_type: ObjectType.DashboardViewConfig,
|
|
1804
|
-
components,
|
|
1805
|
-
heightInDashboardUnits: Math.max(DashboardSize.heightInDashboardUnits, 21),
|
|
1806
|
-
};
|
|
1807
|
-
}
|
|
1808
|
-
|
|
1809
|
-
function createProfilesDashboardConfig(): DashboardViewConfig {
|
|
1810
|
-
const components: Array<DashboardBaseComponent> = [
|
|
1811
|
-
// Row 0: Title
|
|
1812
|
-
createTextComponent({
|
|
1813
|
-
text: "Profiles Dashboard",
|
|
1814
|
-
top: 0,
|
|
1815
|
-
left: 0,
|
|
1816
|
-
width: 12,
|
|
1817
|
-
height: 1,
|
|
1818
|
-
isBold: true,
|
|
1819
|
-
}),
|
|
1820
|
-
|
|
1821
|
-
// Row 1: Key profile metrics
|
|
1822
|
-
createValueComponent({
|
|
1823
|
-
title: "Profile Count",
|
|
1824
|
-
top: 1,
|
|
1825
|
-
left: 0,
|
|
1826
|
-
width: 3,
|
|
1827
|
-
metricConfig: {
|
|
1828
|
-
metricName: ProfileMetricType.ProfileCount,
|
|
1829
|
-
aggregationType: MetricsAggregationType.Sum,
|
|
1830
|
-
},
|
|
1831
|
-
}),
|
|
1832
|
-
createValueComponent({
|
|
1833
|
-
title: "CPU Profile Duration",
|
|
1834
|
-
top: 1,
|
|
1835
|
-
left: 3,
|
|
1836
|
-
width: 3,
|
|
1837
|
-
metricConfig: {
|
|
1838
|
-
metricName: ProfileMetricType.CpuProfileDuration,
|
|
1839
|
-
aggregationType: MetricsAggregationType.Sum,
|
|
1840
|
-
legendUnit: "ms",
|
|
1841
|
-
},
|
|
1842
|
-
}),
|
|
1843
|
-
createValueComponent({
|
|
1844
|
-
title: "Memory Allocations",
|
|
1845
|
-
top: 1,
|
|
1846
|
-
left: 6,
|
|
1847
|
-
width: 3,
|
|
1848
|
-
metricConfig: {
|
|
1849
|
-
metricName: ProfileMetricType.MemoryAllocationCount,
|
|
1850
|
-
aggregationType: MetricsAggregationType.Sum,
|
|
1851
|
-
},
|
|
1852
|
-
}),
|
|
1853
|
-
createValueComponent({
|
|
1854
|
-
title: "Thread Count",
|
|
1855
|
-
top: 1,
|
|
1856
|
-
left: 9,
|
|
1857
|
-
width: 3,
|
|
1858
|
-
metricConfig: {
|
|
1859
|
-
metricName: ProfileMetricType.ThreadCount,
|
|
1860
|
-
aggregationType: MetricsAggregationType.Avg,
|
|
1861
|
-
},
|
|
1862
|
-
}),
|
|
1863
|
-
|
|
1864
|
-
// Row 2-4: CPU profile charts
|
|
1865
|
-
createChartComponent({
|
|
1866
|
-
title: "CPU Profile Duration Over Time",
|
|
1867
|
-
chartType: DashboardChartType.Line,
|
|
1868
|
-
top: 2,
|
|
1869
|
-
left: 0,
|
|
1870
|
-
width: 6,
|
|
1871
|
-
height: 3,
|
|
1872
|
-
metricConfig: {
|
|
1873
|
-
metricName: ProfileMetricType.CpuProfileDuration,
|
|
1874
|
-
aggregationType: MetricsAggregationType.Avg,
|
|
1875
|
-
legend: "CPU Duration",
|
|
1876
|
-
legendUnit: "ms",
|
|
1877
|
-
},
|
|
1878
|
-
}),
|
|
1879
|
-
createChartComponent({
|
|
1880
|
-
title: "CPU Sample Count Over Time",
|
|
1881
|
-
chartType: DashboardChartType.Bar,
|
|
1882
|
-
top: 2,
|
|
1883
|
-
left: 6,
|
|
1884
|
-
width: 6,
|
|
1885
|
-
height: 3,
|
|
1886
|
-
metricConfig: {
|
|
1887
|
-
metricName: ProfileMetricType.CpuProfileSampleCount,
|
|
1888
|
-
aggregationType: MetricsAggregationType.Sum,
|
|
1889
|
-
legend: "CPU Samples",
|
|
1890
|
-
},
|
|
1891
|
-
}),
|
|
1892
|
-
|
|
1893
|
-
// Row 5: Section header
|
|
1894
|
-
createTextComponent({
|
|
1895
|
-
text: "Memory Profiling",
|
|
1896
|
-
top: 5,
|
|
1897
|
-
left: 0,
|
|
1898
|
-
width: 12,
|
|
1899
|
-
height: 1,
|
|
1900
|
-
isBold: true,
|
|
1901
|
-
}),
|
|
1902
|
-
|
|
1903
|
-
// Row 6-8: Memory gauges and allocation charts
|
|
1904
|
-
createGaugeComponent({
|
|
1905
|
-
title: "Heap Usage",
|
|
1906
|
-
top: 6,
|
|
1907
|
-
left: 0,
|
|
1908
|
-
width: 3,
|
|
1909
|
-
height: 3,
|
|
1910
|
-
minValue: 0,
|
|
1911
|
-
maxValue: 100,
|
|
1912
|
-
warningThreshold: 70,
|
|
1913
|
-
criticalThreshold: 90,
|
|
1914
|
-
metricConfig: {
|
|
1915
|
-
metricName: ProfileMetricType.HeapUsage,
|
|
1916
|
-
aggregationType: MetricsAggregationType.Avg,
|
|
1917
|
-
},
|
|
1918
|
-
}),
|
|
1919
|
-
createChartComponent({
|
|
1920
|
-
title: "Memory Allocation Size Over Time",
|
|
1921
|
-
chartType: DashboardChartType.Area,
|
|
1922
|
-
top: 6,
|
|
1923
|
-
left: 3,
|
|
1924
|
-
width: 6,
|
|
1925
|
-
height: 3,
|
|
1926
|
-
metricConfig: {
|
|
1927
|
-
metricName: ProfileMetricType.MemoryAllocationSize,
|
|
1928
|
-
aggregationType: MetricsAggregationType.Sum,
|
|
1929
|
-
legend: "Allocation Size",
|
|
1930
|
-
legendUnit: "bytes",
|
|
1931
|
-
},
|
|
1932
|
-
}),
|
|
1933
|
-
createGaugeComponent({
|
|
1934
|
-
title: "Thread Count",
|
|
1935
|
-
top: 6,
|
|
1936
|
-
left: 9,
|
|
1937
|
-
width: 3,
|
|
1938
|
-
height: 3,
|
|
1939
|
-
minValue: 0,
|
|
1940
|
-
maxValue: 500,
|
|
1941
|
-
warningThreshold: 200,
|
|
1942
|
-
criticalThreshold: 400,
|
|
1943
|
-
metricConfig: {
|
|
1944
|
-
metricName: ProfileMetricType.ThreadCount,
|
|
1945
|
-
aggregationType: MetricsAggregationType.Avg,
|
|
1946
|
-
},
|
|
1947
|
-
}),
|
|
1948
|
-
|
|
1949
|
-
// Row 9: Section header
|
|
1950
|
-
createTextComponent({
|
|
1951
|
-
text: "Allocation Trends",
|
|
1952
|
-
top: 9,
|
|
1953
|
-
left: 0,
|
|
1954
|
-
width: 12,
|
|
1955
|
-
height: 1,
|
|
1956
|
-
isBold: true,
|
|
1957
|
-
}),
|
|
1958
|
-
|
|
1959
|
-
// Row 10-12: Allocation count trends and heap trends
|
|
1960
|
-
createChartComponent({
|
|
1961
|
-
title: "Memory Allocation Count Over Time",
|
|
1962
|
-
chartType: DashboardChartType.Bar,
|
|
1963
|
-
top: 10,
|
|
1964
|
-
left: 0,
|
|
1965
|
-
width: 6,
|
|
1966
|
-
height: 3,
|
|
1967
|
-
metricConfig: {
|
|
1968
|
-
metricName: ProfileMetricType.MemoryAllocationCount,
|
|
1969
|
-
aggregationType: MetricsAggregationType.Sum,
|
|
1970
|
-
legend: "Allocations",
|
|
1971
|
-
},
|
|
1972
|
-
}),
|
|
1973
|
-
createChartComponent({
|
|
1974
|
-
title: "Heap Usage Over Time",
|
|
1975
|
-
chartType: DashboardChartType.Area,
|
|
1976
|
-
top: 10,
|
|
1977
|
-
left: 6,
|
|
1978
|
-
width: 6,
|
|
1979
|
-
height: 3,
|
|
1980
|
-
metricConfig: {
|
|
1981
|
-
metricName: ProfileMetricType.HeapUsage,
|
|
1982
|
-
aggregationType: MetricsAggregationType.Avg,
|
|
1983
|
-
legend: "Heap",
|
|
1984
|
-
legendUnit: "bytes",
|
|
1985
|
-
},
|
|
1986
|
-
}),
|
|
1987
|
-
|
|
1988
|
-
// Row 13: Section header
|
|
1989
|
-
createTextComponent({
|
|
1990
|
-
text: "Runtime & Concurrency",
|
|
1991
|
-
top: 13,
|
|
1992
|
-
left: 0,
|
|
1993
|
-
width: 12,
|
|
1994
|
-
height: 1,
|
|
1995
|
-
isBold: true,
|
|
1996
|
-
}),
|
|
1997
|
-
|
|
1998
|
-
// Row 14-16: Wall clock, goroutines/threads, sample rate
|
|
1999
|
-
createChartComponent({
|
|
2000
|
-
title: "Wall Clock Duration Over Time",
|
|
2001
|
-
chartType: DashboardChartType.Line,
|
|
2002
|
-
top: 14,
|
|
2003
|
-
left: 0,
|
|
2004
|
-
width: 6,
|
|
2005
|
-
height: 3,
|
|
2006
|
-
metricConfig: {
|
|
2007
|
-
metricName: ProfileMetricType.WallClockDuration,
|
|
2008
|
-
aggregationType: MetricsAggregationType.Avg,
|
|
2009
|
-
legend: "Wall Clock",
|
|
2010
|
-
legendUnit: "ms",
|
|
2011
|
-
},
|
|
2012
|
-
}),
|
|
2013
|
-
createChartComponent({
|
|
2014
|
-
title: "Goroutine / Thread Count Over Time",
|
|
2015
|
-
chartType: DashboardChartType.StackedArea,
|
|
2016
|
-
top: 14,
|
|
2017
|
-
left: 6,
|
|
2018
|
-
width: 6,
|
|
2019
|
-
height: 3,
|
|
2020
|
-
metricConfig: {
|
|
2021
|
-
metricName: ProfileMetricType.GoroutineCount,
|
|
2022
|
-
aggregationType: MetricsAggregationType.Avg,
|
|
2023
|
-
legend: "Goroutines / Threads",
|
|
2024
|
-
},
|
|
2025
|
-
}),
|
|
2026
|
-
|
|
2027
|
-
// Row 17: Section header
|
|
2028
|
-
createTextComponent({
|
|
2029
|
-
text: "Hot Functions",
|
|
2030
|
-
top: 17,
|
|
2031
|
-
left: 0,
|
|
2032
|
-
width: 12,
|
|
2033
|
-
height: 1,
|
|
2034
|
-
isBold: true,
|
|
2035
|
-
}),
|
|
2036
|
-
|
|
2037
|
-
// Row 18-20: Top functions tables
|
|
2038
|
-
createTableComponent({
|
|
2039
|
-
title: "Top Functions by CPU Time",
|
|
2040
|
-
top: 18,
|
|
2041
|
-
left: 0,
|
|
2042
|
-
width: 6,
|
|
2043
|
-
height: 3,
|
|
2044
|
-
metricConfig: {
|
|
2045
|
-
metricName: ProfileMetricType.TopFunctionCpuTime,
|
|
2046
|
-
aggregationType: MetricsAggregationType.Max,
|
|
2047
|
-
},
|
|
2048
|
-
}),
|
|
2049
|
-
createTableComponent({
|
|
2050
|
-
title: "Top Functions by Allocations",
|
|
2051
|
-
top: 18,
|
|
2052
|
-
left: 6,
|
|
2053
|
-
width: 6,
|
|
2054
|
-
height: 3,
|
|
2055
|
-
metricConfig: {
|
|
2056
|
-
metricName: ProfileMetricType.TopFunctionAllocations,
|
|
2057
|
-
aggregationType: MetricsAggregationType.Max,
|
|
2058
|
-
},
|
|
2059
|
-
}),
|
|
2060
|
-
|
|
2061
|
-
// Row 21-23: Profile sample rate and logs
|
|
2062
|
-
createChartComponent({
|
|
2063
|
-
title: "Profile Sample Rate Over Time",
|
|
2064
|
-
chartType: DashboardChartType.Line,
|
|
2065
|
-
top: 21,
|
|
2066
|
-
left: 0,
|
|
2067
|
-
width: 6,
|
|
2068
|
-
height: 3,
|
|
2069
|
-
metricConfig: {
|
|
2070
|
-
metricName: ProfileMetricType.ProfileSampleRate,
|
|
2071
|
-
aggregationType: MetricsAggregationType.Avg,
|
|
2072
|
-
legend: "Sample Rate",
|
|
2073
|
-
legendUnit: "samples/s",
|
|
2074
|
-
},
|
|
2075
|
-
}),
|
|
2076
|
-
createLogStreamComponent({
|
|
2077
|
-
title: "Related Logs",
|
|
2078
|
-
top: 21,
|
|
2079
|
-
left: 6,
|
|
2080
|
-
width: 6,
|
|
2081
|
-
height: 3,
|
|
2082
|
-
}),
|
|
2083
|
-
];
|
|
2084
|
-
|
|
2085
|
-
return {
|
|
2086
|
-
_type: ObjectType.DashboardViewConfig,
|
|
2087
|
-
components,
|
|
2088
|
-
heightInDashboardUnits: Math.max(DashboardSize.heightInDashboardUnits, 24),
|
|
2089
|
-
};
|
|
2090
|
-
}
|
|
2091
|
-
|
|
2092
1387
|
export function getTemplateConfig(
|
|
2093
1388
|
type: DashboardTemplateType,
|
|
2094
1389
|
): DashboardViewConfig | null {
|
|
@@ -2101,12 +1396,6 @@ export function getTemplateConfig(
|
|
|
2101
1396
|
return createKubernetesDashboardConfig();
|
|
2102
1397
|
case DashboardTemplateType.Metrics:
|
|
2103
1398
|
return createMetricsDashboardConfig();
|
|
2104
|
-
case DashboardTemplateType.Trace:
|
|
2105
|
-
return createTraceDashboardConfig();
|
|
2106
|
-
case DashboardTemplateType.Exception:
|
|
2107
|
-
return createExceptionDashboardConfig();
|
|
2108
|
-
case DashboardTemplateType.Profiles:
|
|
2109
|
-
return createProfilesDashboardConfig();
|
|
2110
1399
|
case DashboardTemplateType.Blank:
|
|
2111
1400
|
return null;
|
|
2112
1401
|
}
|