@oneuptime/common 10.2.15 → 10.2.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. package/Server/API/DashboardAPI.ts +0 -6
  2. package/Server/Services/DockerHostService.ts +91 -0
  3. package/Server/Services/IncidentService.ts +60 -23
  4. package/Server/Services/KubernetesClusterService.ts +92 -0
  5. package/Types/Dashboard/DashboardComponents/DashboardValueComponent.ts +15 -0
  6. package/Types/Dashboard/DashboardTemplates.ts +260 -971
  7. package/Types/Dashboard/DashboardVariable.ts +0 -8
  8. package/UI/Components/Charts/Utils/DataPoint.ts +0 -0
  9. package/Utils/Dashboard/Components/DashboardValueComponent.ts +36 -2
  10. package/Utils/ValueFormatter.ts +57 -0
  11. package/build/dist/Server/API/DashboardAPI.js +0 -3
  12. package/build/dist/Server/API/DashboardAPI.js.map +1 -1
  13. package/build/dist/Server/Services/DockerHostService.js +73 -0
  14. package/build/dist/Server/Services/DockerHostService.js.map +1 -1
  15. package/build/dist/Server/Services/IncidentService.js +55 -18
  16. package/build/dist/Server/Services/IncidentService.js.map +1 -1
  17. package/build/dist/Server/Services/KubernetesClusterService.js +74 -0
  18. package/build/dist/Server/Services/KubernetesClusterService.js.map +1 -1
  19. package/build/dist/Types/Dashboard/DashboardComponents/DashboardValueComponent.js +14 -1
  20. package/build/dist/Types/Dashboard/DashboardComponents/DashboardValueComponent.js.map +1 -1
  21. package/build/dist/Types/Dashboard/DashboardTemplates.js +240 -928
  22. package/build/dist/Types/Dashboard/DashboardTemplates.js.map +1 -1
  23. package/build/dist/UI/Components/Charts/Utils/DataPoint.js +0 -0
  24. package/build/dist/UI/Components/Charts/Utils/DataPoint.js.map +1 -1
  25. package/build/dist/Utils/Dashboard/Components/DashboardValueComponent.js +31 -1
  26. package/build/dist/Utils/Dashboard/Components/DashboardValueComponent.js.map +1 -1
  27. package/build/dist/Utils/ValueFormatter.js +51 -0
  28. package/build/dist/Utils/ValueFormatter.js.map +1 -1
  29. package/package.json +1 -1
@@ -9,20 +9,23 @@ import IconProp from "../Icon/IconProp";
9
9
  import MetricsAggregationType from "../Metrics/MetricsAggregationType";
10
10
  import IncidentMetricType from "../Incident/IncidentMetricType";
11
11
  import MonitorMetricType from "../Monitor/MonitorMetricType";
12
- import SpanMetricType from "../Span/SpanMetricType";
13
- import ExceptionMetricType from "../Exception/ExceptionMetricType";
14
- import ProfileMetricType from "../Profile/ProfileMetricType";
15
12
  import MetricDashboardMetricType from "../Metrics/MetricDashboardMetricType";
16
-
13
+ import { DashboardValueTrendDirection } from "./DashboardComponents/DashboardValueComponent";
14
+
15
+ /*
16
+ * Trace / Exception / Profiles entries are intentionally not in this
17
+ * enum: their metric catalogs (SpanMetricType, ExceptionMetricType,
18
+ * ProfileMetricType) define names that are not emitted anywhere in the
19
+ * codebase, so the templates only ever rendered empty widgets. Reach
20
+ * for the Logs / Traces / Exceptions pages directly until those metrics
21
+ * exist.
22
+ */
17
23
  export enum DashboardTemplateType {
18
24
  Blank = "Blank",
19
25
  Monitor = "Monitor",
20
26
  Incident = "Incident",
21
27
  Kubernetes = "Kubernetes",
22
28
  Metrics = "Metrics",
23
- Trace = "Trace",
24
- Exception = "Exception",
25
- Profiles = "Profiles",
26
29
  }
27
30
 
28
31
  export interface DashboardTemplate {
@@ -43,51 +46,30 @@ export const DashboardTemplates: Array<DashboardTemplate> = [
43
46
  type: DashboardTemplateType.Monitor,
44
47
  name: "Monitor Dashboard",
45
48
  description:
46
- "Response time, uptime, error rate, throughput charts, health gauges, and logs.",
49
+ "Response time, uptime, status codes, CPU/memory health gauges, and breakdown table for synthetic and server monitors.",
47
50
  icon: IconProp.Heartbeat,
48
51
  },
49
52
  {
50
53
  type: DashboardTemplateType.Incident,
51
54
  name: "Incident Dashboard",
52
55
  description:
53
- "MTTR/MTTA gauges, incident trends, severity breakdown, duration tables, logs, and traces.",
56
+ "Incident count, MTTR/MTTA gauges, duration trends, severity breakdown, time-in-state, and longest-incident tables.",
54
57
  icon: IconProp.Alert,
55
58
  },
56
59
  {
57
60
  type: DashboardTemplateType.Kubernetes,
58
61
  name: "Kubernetes Dashboard",
59
62
  description:
60
- "CPU/memory gauges, pod and node metrics, network I/O, restart trends, and cluster logs.",
63
+ "Pod/node CPU and memory averages, utilization gauges, live pod and node lists, network I/O, restarts, and cluster logs.",
61
64
  icon: IconProp.Kubernetes,
62
65
  },
63
66
  {
64
67
  type: DashboardTemplateType.Metrics,
65
68
  name: "Metrics Dashboard",
66
69
  description:
67
- "HTTP request rates, latency percentiles, error rates, system resource usage, and custom application metrics.",
70
+ "HTTP request rate, latency, error rate, CPU utilization gauge, memory usage, disk and network I/O, and runtime metrics.",
68
71
  icon: IconProp.ChartBar,
69
72
  },
70
- {
71
- type: DashboardTemplateType.Trace,
72
- name: "Trace Dashboard",
73
- description:
74
- "Span throughput, latency percentiles, error rates, service health, status breakdown, and recent traces.",
75
- icon: IconProp.Activity,
76
- },
77
- {
78
- type: DashboardTemplateType.Exception,
79
- name: "Exception Dashboard",
80
- description:
81
- "Exception counts, error rates, top exception types, resolution status, affected services, and logs.",
82
- icon: IconProp.Bug,
83
- },
84
- {
85
- type: DashboardTemplateType.Profiles,
86
- name: "Profiles Dashboard",
87
- description:
88
- "CPU profiles, memory allocations, heap usage, thread counts, top functions by CPU time, and flamegraph data.",
89
- icon: IconProp.Fire,
90
- },
91
73
  ];
92
74
 
93
75
  // -- Metric query config helpers --
@@ -166,6 +148,13 @@ function createValueComponent(data: {
166
148
  left: number;
167
149
  width: number;
168
150
  metricConfig?: MetricConfig;
151
+ /*
152
+ * Per-widget override for the trend-arrow colour. Leave `undefined` to
153
+ * let the renderer apply its metric-name heuristic (incident counts,
154
+ * error rates, latency, CPU/memory usage flip the colour); set
155
+ * explicitly when the heuristic would guess wrong.
156
+ */
157
+ trendDirection?: DashboardValueTrendDirection;
169
158
  }): DashboardBaseComponent {
170
159
  return {
171
160
  _type: ObjectType.DashboardComponent,
@@ -187,6 +176,7 @@ function createValueComponent(data: {
187
176
  groupBy: undefined,
188
177
  },
189
178
  },
179
+ trendDirection: data.trendDirection,
190
180
  },
191
181
  };
192
182
  }
@@ -330,27 +320,56 @@ function createTableComponent(data: {
330
320
  };
331
321
  }
332
322
 
333
- function createTraceListComponent(data: {
323
+ function createKubernetesPodListComponent(data: {
334
324
  title: string;
335
325
  top: number;
336
326
  left: number;
337
327
  width: number;
338
328
  height: number;
339
329
  maxRows?: number;
330
+ podPhases?: Array<string>;
340
331
  }): DashboardBaseComponent {
341
332
  return {
342
333
  _type: ObjectType.DashboardComponent,
343
- componentType: DashboardComponentType.TraceList,
334
+ componentType: DashboardComponentType.KubernetesPodList,
344
335
  componentId: ObjectID.generate(),
345
336
  topInDashboardUnits: data.top,
346
337
  leftInDashboardUnits: data.left,
347
338
  widthInDashboardUnits: data.width,
348
339
  heightInDashboardUnits: data.height,
349
340
  minHeightInDashboardUnits: 3,
350
- minWidthInDashboardUnits: 6,
341
+ minWidthInDashboardUnits: 4,
342
+ arguments: {
343
+ title: data.title,
344
+ maxRows: data.maxRows ?? 20,
345
+ podPhases: data.podPhases,
346
+ },
347
+ };
348
+ }
349
+
350
+ function createKubernetesNodeListComponent(data: {
351
+ title: string;
352
+ top: number;
353
+ left: number;
354
+ width: number;
355
+ height: number;
356
+ maxRows?: number;
357
+ readinessFilter?: string;
358
+ }): DashboardBaseComponent {
359
+ return {
360
+ _type: ObjectType.DashboardComponent,
361
+ componentType: DashboardComponentType.KubernetesNodeList,
362
+ componentId: ObjectID.generate(),
363
+ topInDashboardUnits: data.top,
364
+ leftInDashboardUnits: data.left,
365
+ widthInDashboardUnits: data.width,
366
+ heightInDashboardUnits: data.height,
367
+ minHeightInDashboardUnits: 3,
368
+ minWidthInDashboardUnits: 4,
351
369
  arguments: {
352
370
  title: data.title,
353
371
  maxRows: data.maxRows ?? 20,
372
+ readinessFilter: data.readinessFilter,
354
373
  },
355
374
  };
356
375
  }
@@ -380,20 +399,35 @@ function createMonitorDashboardConfig(): DashboardViewConfig {
380
399
  aggregationType: MetricsAggregationType.Avg,
381
400
  legendUnit: "ms",
382
401
  },
402
+ trendDirection: DashboardValueTrendDirection.HigherIsWorse,
383
403
  }),
404
+ /*
405
+ * IsOnline is emitted as 0/1 with unit "" by MonitorMetricUtil, so
406
+ * `Avg` gives the uptime ratio in [0, 1] rather than a percent. We
407
+ * label the widget "Uptime (avg)" instead of "%" so the fractional
408
+ * display isn't misleading; flipping the storage to 0/100 + unit
409
+ * "%" would change criteria evaluation elsewhere in the codebase.
410
+ */
384
411
  createValueComponent({
385
- title: "Uptime %",
412
+ title: "Uptime (avg)",
386
413
  top: 1,
387
414
  left: 3,
388
415
  width: 3,
389
416
  metricConfig: {
390
417
  metricName: MonitorMetricType.IsOnline,
391
418
  aggregationType: MetricsAggregationType.Avg,
392
- legendUnit: "%",
393
419
  },
420
+ trendDirection: DashboardValueTrendDirection.HigherIsBetter,
394
421
  }),
422
+ /*
423
+ * ResponseStatusCode is the literal HTTP status code (200, 404,
424
+ * 503, …). `Count` over it returns the total number of checks the
425
+ * monitor ran, not the error rate — the original "Error Rate" label
426
+ * was misleading. Filtering to status >= 400 would require attribute
427
+ * filters that the template helper doesn't expose, so we relabel.
428
+ */
395
429
  createValueComponent({
396
- title: "Error Rate",
430
+ title: "Total Checks",
397
431
  top: 1,
398
432
  left: 6,
399
433
  width: 3,
@@ -401,6 +435,7 @@ function createMonitorDashboardConfig(): DashboardViewConfig {
401
435
  metricName: MonitorMetricType.ResponseStatusCode,
402
436
  aggregationType: MetricsAggregationType.Count,
403
437
  },
438
+ trendDirection: DashboardValueTrendDirection.HigherIsBetter,
404
439
  }),
405
440
  createValueComponent({
406
441
  title: "Execution Time",
@@ -412,6 +447,7 @@ function createMonitorDashboardConfig(): DashboardViewConfig {
412
447
  aggregationType: MetricsAggregationType.Avg,
413
448
  legendUnit: "ms",
414
449
  },
450
+ trendDirection: DashboardValueTrendDirection.HigherIsWorse,
415
451
  }),
416
452
 
417
453
  // Row 2-4: Charts
@@ -439,8 +475,7 @@ function createMonitorDashboardConfig(): DashboardViewConfig {
439
475
  metricConfig: {
440
476
  metricName: MonitorMetricType.IsOnline,
441
477
  aggregationType: MetricsAggregationType.Avg,
442
- legend: "Online Status",
443
- legendUnit: "%",
478
+ legend: "Uptime Ratio",
444
479
  },
445
480
  }),
446
481
 
@@ -538,6 +573,21 @@ function createMonitorDashboardConfig(): DashboardViewConfig {
538
573
  }
539
574
 
540
575
  function createIncidentDashboardConfig(): DashboardViewConfig {
576
+ /*
577
+ * Incident metrics (TimeToResolve, TimeToAcknowledge, IncidentDuration,
578
+ * TimeInState, PostmortemCompletionTime) are emitted with unit
579
+ * "seconds" by IncidentService. Templates previously passed
580
+ * `legendUnit: "min"` to relabel the chart legend, but that bypassed
581
+ * ValueFormatter's scale-aware formatting and rendered raw seconds
582
+ * with a "Minutes" suffix (e.g. a 1-hour incident showed as
583
+ * "3600 Minutes"). Gauges were authored against an implicit minute
584
+ * scale (maxValue 120, threshold 60/90) and compared bytes-of-seconds
585
+ * against minutes, so any incident over ~2 minutes pinned the gauge.
586
+ *
587
+ * We now drop the legendUnit overrides — ValueFormatter scales
588
+ * `seconds` to sec/min/hr/days based on magnitude — and reauthor the
589
+ * gauge ranges in seconds so the 0-100% sweep is meaningful.
590
+ */
541
591
  const components: Array<DashboardBaseComponent> = [
542
592
  // Row 0: Title
543
593
  createTextComponent({
@@ -549,7 +599,7 @@ function createIncidentDashboardConfig(): DashboardViewConfig {
549
599
  isBold: true,
550
600
  }),
551
601
 
552
- // Row 1: Key incident metrics
602
+ // Row 1: Key incident metrics — every one is "higher = worse".
553
603
  createValueComponent({
554
604
  title: "Incident Count",
555
605
  top: 1,
@@ -559,6 +609,7 @@ function createIncidentDashboardConfig(): DashboardViewConfig {
559
609
  metricName: IncidentMetricType.IncidentCount,
560
610
  aggregationType: MetricsAggregationType.Sum,
561
611
  },
612
+ trendDirection: DashboardValueTrendDirection.HigherIsWorse,
562
613
  }),
563
614
  createValueComponent({
564
615
  title: "MTTR",
@@ -568,8 +619,8 @@ function createIncidentDashboardConfig(): DashboardViewConfig {
568
619
  metricConfig: {
569
620
  metricName: IncidentMetricType.TimeToResolve,
570
621
  aggregationType: MetricsAggregationType.Avg,
571
- legendUnit: "min",
572
622
  },
623
+ trendDirection: DashboardValueTrendDirection.HigherIsWorse,
573
624
  }),
574
625
  createValueComponent({
575
626
  title: "MTTA",
@@ -579,8 +630,8 @@ function createIncidentDashboardConfig(): DashboardViewConfig {
579
630
  metricConfig: {
580
631
  metricName: IncidentMetricType.TimeToAcknowledge,
581
632
  aggregationType: MetricsAggregationType.Avg,
582
- legendUnit: "min",
583
633
  },
634
+ trendDirection: DashboardValueTrendDirection.HigherIsWorse,
584
635
  }),
585
636
  createValueComponent({
586
637
  title: "Avg Duration",
@@ -590,8 +641,8 @@ function createIncidentDashboardConfig(): DashboardViewConfig {
590
641
  metricConfig: {
591
642
  metricName: IncidentMetricType.IncidentDuration,
592
643
  aggregationType: MetricsAggregationType.Avg,
593
- legendUnit: "min",
594
644
  },
645
+ trendDirection: DashboardValueTrendDirection.HigherIsWorse,
595
646
  }),
596
647
 
597
648
  // Row 2-4: Incident trends
@@ -619,7 +670,6 @@ function createIncidentDashboardConfig(): DashboardViewConfig {
619
670
  metricName: IncidentMetricType.IncidentDuration,
620
671
  aggregationType: MetricsAggregationType.Avg,
621
672
  legend: "Avg Duration",
622
- legendUnit: "min",
623
673
  },
624
674
  }),
625
675
 
@@ -633,32 +683,37 @@ function createIncidentDashboardConfig(): DashboardViewConfig {
633
683
  isBold: true,
634
684
  }),
635
685
 
636
- // Row 6-8: Gauges for MTTR/MTTA and resolution chart
686
+ /*
687
+ * Row 6-8: MTTR/MTTA gauges. Ranges and thresholds are now in
688
+ * seconds (matching the stored metric unit). Targets: MTTR full
689
+ * scale 2 hours (warn at 1 hour, critical at 1.5 hours); MTTA full
690
+ * scale 1 hour (warn at 15 min, critical at 30 min).
691
+ */
637
692
  createGaugeComponent({
638
- title: "MTTR (minutes)",
693
+ title: "MTTR",
639
694
  top: 6,
640
695
  left: 0,
641
696
  width: 3,
642
697
  height: 3,
643
698
  minValue: 0,
644
- maxValue: 120,
645
- warningThreshold: 60,
646
- criticalThreshold: 90,
699
+ maxValue: 7200,
700
+ warningThreshold: 3600,
701
+ criticalThreshold: 5400,
647
702
  metricConfig: {
648
703
  metricName: IncidentMetricType.TimeToResolve,
649
704
  aggregationType: MetricsAggregationType.Avg,
650
705
  },
651
706
  }),
652
707
  createGaugeComponent({
653
- title: "MTTA (minutes)",
708
+ title: "MTTA",
654
709
  top: 6,
655
710
  left: 3,
656
711
  width: 3,
657
712
  height: 3,
658
713
  minValue: 0,
659
- maxValue: 60,
660
- warningThreshold: 15,
661
- criticalThreshold: 30,
714
+ maxValue: 3600,
715
+ warningThreshold: 900,
716
+ criticalThreshold: 1800,
662
717
  metricConfig: {
663
718
  metricName: IncidentMetricType.TimeToAcknowledge,
664
719
  aggregationType: MetricsAggregationType.Avg,
@@ -675,7 +730,6 @@ function createIncidentDashboardConfig(): DashboardViewConfig {
675
730
  metricName: IncidentMetricType.TimeToResolve,
676
731
  aggregationType: MetricsAggregationType.Avg,
677
732
  legend: "MTTR",
678
- legendUnit: "min",
679
733
  },
680
734
  }),
681
735
 
@@ -714,7 +768,6 @@ function createIncidentDashboardConfig(): DashboardViewConfig {
714
768
  metricName: IncidentMetricType.TimeInState,
715
769
  aggregationType: MetricsAggregationType.Avg,
716
770
  legend: "Time in State",
717
- legendUnit: "min",
718
771
  },
719
772
  }),
720
773
 
@@ -728,7 +781,13 @@ function createIncidentDashboardConfig(): DashboardViewConfig {
728
781
  isBold: true,
729
782
  }),
730
783
 
731
- // Row 14-16: Tables
784
+ /*
785
+ * Row 14-16: Operational tables. Logs / traces were removed from the
786
+ * Incident template because incident records are not log/trace
787
+ * sources — they're rows in Postgres. Surfacing unrelated cluster
788
+ * logs and trace lists alongside MTTR/MTTA was a UX miss; if a user
789
+ * wants those views they live on dedicated Trace / Log pages.
790
+ */
732
791
  createTableComponent({
733
792
  title: "Incidents by Duration",
734
793
  top: 14,
@@ -751,32 +810,42 @@ function createIncidentDashboardConfig(): DashboardViewConfig {
751
810
  aggregationType: MetricsAggregationType.Avg,
752
811
  },
753
812
  }),
754
-
755
- // Row 17-19: Logs and traces
756
- createLogStreamComponent({
757
- title: "Recent Incident Logs",
758
- top: 17,
759
- left: 0,
760
- width: 6,
761
- height: 3,
762
- }),
763
- createTraceListComponent({
764
- title: "Recent Traces",
765
- top: 17,
766
- left: 6,
767
- width: 6,
768
- height: 3,
769
- }),
770
813
  ];
771
814
 
772
815
  return {
773
816
  _type: ObjectType.DashboardViewConfig,
774
817
  components,
775
- heightInDashboardUnits: Math.max(DashboardSize.heightInDashboardUnits, 20),
818
+ heightInDashboardUnits: Math.max(DashboardSize.heightInDashboardUnits, 17),
776
819
  };
777
820
  }
778
821
 
779
822
  function createKubernetesDashboardConfig(): DashboardViewConfig {
823
+ /*
824
+ * Layout notes:
825
+ *
826
+ * - "Pod Count" / "Node Ready" used to be Value widgets over k8s.pod.phase
827
+ * / k8s.node.condition_ready with `Sum` aggregation. Those metrics are
828
+ * per-resource gauges that re-emit `1` on every scrape, so summing
829
+ * across the dashboard window multiplied (pods * scrapes) and produced
830
+ * numbers in the hundreds for tiny clusters. The user-visible fix is
831
+ * to use the dedicated KubernetesPodList / KubernetesNodeList widgets
832
+ * below — they read the per-cluster snapshot in Postgres and show
833
+ * accurate counts in the widget header plus a live list of rows.
834
+ *
835
+ * - "Memory Utilization" used to be a 0-100 gauge over k8s.node.memory.usage,
836
+ * which is reported in bytes. A node with 8 GB of RAM produced a value
837
+ * in the 10^9 range against a 0-100 scale, so the gauge always pinned
838
+ * at the critical end with a meaningless absolute number. Without a
839
+ * first-class percent metric we replace it with a Value widget that
840
+ * renders the absolute usage via ValueFormatter (e.g. "8.3 GB").
841
+ *
842
+ * - CPU widgets use OTel's k8s.*.cpu.utilization, which the collector
843
+ * emits as a [0, 1] ratio with unit "1". DashboardValueComponent /
844
+ * DashboardGaugeComponent now scale that to a percent at render time
845
+ * when the metric name carries the `.utilization` suffix, so "0.05"
846
+ * reads as "5.00%" and gauge thresholds in the natural 0-100 scale work
847
+ * as expected.
848
+ */
780
849
  const components: Array<DashboardBaseComponent> = [
781
850
  // Row 0: Title
782
851
  createTextComponent({
@@ -788,48 +857,54 @@ function createKubernetesDashboardConfig(): DashboardViewConfig {
788
857
  isBold: true,
789
858
  }),
790
859
 
791
- // Row 1: Key cluster metrics
860
+ /*
861
+ * Row 1: Key cluster metrics — averages render with proper units via
862
+ * ValueFormatter (CPU utilization → "%", memory.usage → "MB"/"GB").
863
+ * All four are "higher = worse" (closer to capacity = bad).
864
+ */
792
865
  createValueComponent({
793
- title: "CPU Usage",
866
+ title: "Pod CPU (avg)",
794
867
  top: 1,
795
868
  left: 0,
796
869
  width: 3,
797
870
  metricConfig: {
798
871
  metricName: "k8s.pod.cpu.utilization",
799
872
  aggregationType: MetricsAggregationType.Avg,
800
- legendUnit: "%",
801
873
  },
874
+ trendDirection: DashboardValueTrendDirection.HigherIsWorse,
802
875
  }),
803
876
  createValueComponent({
804
- title: "Memory Usage",
877
+ title: "Pod Memory (avg)",
805
878
  top: 1,
806
879
  left: 3,
807
880
  width: 3,
808
881
  metricConfig: {
809
882
  metricName: "k8s.pod.memory.usage",
810
883
  aggregationType: MetricsAggregationType.Avg,
811
- legendUnit: "bytes",
812
884
  },
885
+ trendDirection: DashboardValueTrendDirection.HigherIsWorse,
813
886
  }),
814
887
  createValueComponent({
815
- title: "Pod Count",
888
+ title: "Node CPU (avg)",
816
889
  top: 1,
817
890
  left: 6,
818
891
  width: 3,
819
892
  metricConfig: {
820
- metricName: "k8s.pod.phase",
821
- aggregationType: MetricsAggregationType.Sum,
893
+ metricName: "k8s.node.cpu.utilization",
894
+ aggregationType: MetricsAggregationType.Avg,
822
895
  },
896
+ trendDirection: DashboardValueTrendDirection.HigherIsWorse,
823
897
  }),
824
898
  createValueComponent({
825
- title: "Node Ready",
899
+ title: "Node Memory (avg)",
826
900
  top: 1,
827
901
  left: 9,
828
902
  width: 3,
829
903
  metricConfig: {
830
- metricName: "k8s.node.condition_ready",
831
- aggregationType: MetricsAggregationType.Sum,
904
+ metricName: "k8s.node.memory.usage",
905
+ aggregationType: MetricsAggregationType.Avg,
832
906
  },
907
+ trendDirection: DashboardValueTrendDirection.HigherIsWorse,
833
908
  }),
834
909
 
835
910
  // Row 2-4: Resource usage charts
@@ -844,7 +919,6 @@ function createKubernetesDashboardConfig(): DashboardViewConfig {
844
919
  metricName: "k8s.pod.cpu.utilization",
845
920
  aggregationType: MetricsAggregationType.Avg,
846
921
  legend: "CPU Utilization",
847
- legendUnit: "%",
848
922
  },
849
923
  }),
850
924
  createChartComponent({
@@ -858,13 +932,12 @@ function createKubernetesDashboardConfig(): DashboardViewConfig {
858
932
  metricName: "k8s.pod.memory.usage",
859
933
  aggregationType: MetricsAggregationType.Avg,
860
934
  legend: "Memory Usage",
861
- legendUnit: "bytes",
862
935
  },
863
936
  }),
864
937
 
865
938
  // Row 5: Section header
866
939
  createTextComponent({
867
- text: "Resource Health",
940
+ text: "Cluster Resources",
868
941
  top: 5,
869
942
  left: 0,
870
943
  width: 12,
@@ -872,81 +945,88 @@ function createKubernetesDashboardConfig(): DashboardViewConfig {
872
945
  isBold: true,
873
946
  }),
874
947
 
875
- // Row 6-8: Gauges and pod chart
876
- createGaugeComponent({
877
- title: "CPU Utilization",
948
+ /*
949
+ * Row 6-9: Pod and node lists query the Postgres snapshot, so the
950
+ * header shows the true current count and the body shows live rows
951
+ * (replacing the broken Sum-of-gauge Value widgets).
952
+ */
953
+ createKubernetesPodListComponent({
954
+ title: "Pods",
878
955
  top: 6,
879
956
  left: 0,
880
- width: 3,
881
- height: 3,
882
- minValue: 0,
883
- maxValue: 100,
884
- warningThreshold: 70,
885
- criticalThreshold: 90,
886
- metricConfig: {
887
- metricName: "k8s.node.cpu.utilization",
888
- aggregationType: MetricsAggregationType.Avg,
889
- },
957
+ width: 6,
958
+ height: 4,
959
+ maxRows: 25,
890
960
  }),
891
- createGaugeComponent({
892
- title: "Memory Utilization",
961
+ createKubernetesNodeListComponent({
962
+ title: "Nodes",
893
963
  top: 6,
894
- left: 3,
895
- width: 3,
964
+ left: 6,
965
+ width: 6,
966
+ height: 4,
967
+ maxRows: 25,
968
+ }),
969
+
970
+ // Row 10: Section header
971
+ createTextComponent({
972
+ text: "Resource Health",
973
+ top: 10,
974
+ left: 0,
975
+ width: 12,
976
+ height: 1,
977
+ isBold: true,
978
+ }),
979
+
980
+ /*
981
+ * Row 11-13: CPU gauge (auto-scaled from [0,1] to percent), and the
982
+ * network throughput chart. The old "Memory Utilization" gauge over
983
+ * raw bytes is gone — see top-of-function comment.
984
+ */
985
+ createGaugeComponent({
986
+ title: "Cluster CPU Utilization",
987
+ top: 11,
988
+ left: 0,
989
+ width: 4,
896
990
  height: 3,
897
991
  minValue: 0,
898
992
  maxValue: 100,
899
993
  warningThreshold: 70,
900
994
  criticalThreshold: 90,
901
995
  metricConfig: {
902
- metricName: "k8s.node.memory.usage",
996
+ metricName: "k8s.node.cpu.utilization",
903
997
  aggregationType: MetricsAggregationType.Avg,
904
998
  },
905
999
  }),
906
1000
  createChartComponent({
907
- title: "Pod Count Over Time",
908
- chartType: DashboardChartType.StackedArea,
909
- top: 6,
910
- left: 6,
911
- width: 6,
1001
+ title: "Network I/O",
1002
+ chartType: DashboardChartType.Area,
1003
+ top: 11,
1004
+ left: 4,
1005
+ width: 8,
912
1006
  height: 3,
913
1007
  metricConfig: {
914
- metricName: "k8s.pod.phase",
1008
+ metricName: "k8s.pod.network.io",
915
1009
  aggregationType: MetricsAggregationType.Sum,
916
- legend: "Pods",
1010
+ legend: "Network I/O",
917
1011
  },
918
1012
  }),
919
1013
 
920
- // Row 9: Section header
1014
+ // Row 14: Section header
921
1015
  createTextComponent({
922
- text: "Workload Details",
923
- top: 9,
1016
+ text: "Workload Activity",
1017
+ top: 14,
924
1018
  left: 0,
925
1019
  width: 12,
926
1020
  height: 1,
927
1021
  isBold: true,
928
1022
  }),
929
1023
 
930
- // Row 10-12: Network, restarts
931
- createChartComponent({
932
- title: "Network I/O",
933
- chartType: DashboardChartType.Area,
934
- top: 10,
935
- left: 0,
936
- width: 6,
937
- height: 3,
938
- metricConfig: {
939
- metricName: "k8s.pod.network.io",
940
- aggregationType: MetricsAggregationType.Sum,
941
- legend: "Network I/O",
942
- legendUnit: "bytes",
943
- },
944
- }),
1024
+ // Row 15-17: Restarts and replicas
945
1025
  createChartComponent({
946
1026
  title: "Container Restarts Over Time",
947
1027
  chartType: DashboardChartType.Bar,
948
- top: 10,
949
- left: 6,
1028
+ top: 15,
1029
+ left: 0,
950
1030
  width: 6,
951
1031
  height: 3,
952
1032
  metricConfig: {
@@ -955,12 +1035,10 @@ function createKubernetesDashboardConfig(): DashboardViewConfig {
955
1035
  legend: "Restarts",
956
1036
  },
957
1037
  }),
958
-
959
- // Row 13-15: Table and logs
960
1038
  createTableComponent({
961
1039
  title: "Deployment Replicas",
962
- top: 13,
963
- left: 0,
1040
+ top: 15,
1041
+ left: 6,
964
1042
  width: 6,
965
1043
  height: 3,
966
1044
  metricConfig: {
@@ -968,11 +1046,13 @@ function createKubernetesDashboardConfig(): DashboardViewConfig {
968
1046
  aggregationType: MetricsAggregationType.Min,
969
1047
  },
970
1048
  }),
1049
+
1050
+ // Row 18-20: Logs
971
1051
  createLogStreamComponent({
972
1052
  title: "Cluster Logs",
973
- top: 13,
974
- left: 6,
975
- width: 6,
1053
+ top: 18,
1054
+ left: 0,
1055
+ width: 12,
976
1056
  height: 3,
977
1057
  }),
978
1058
  ];
@@ -980,11 +1060,32 @@ function createKubernetesDashboardConfig(): DashboardViewConfig {
980
1060
  return {
981
1061
  _type: ObjectType.DashboardViewConfig,
982
1062
  components,
983
- heightInDashboardUnits: Math.max(DashboardSize.heightInDashboardUnits, 16),
1063
+ heightInDashboardUnits: Math.max(DashboardSize.heightInDashboardUnits, 21),
984
1064
  };
985
1065
  }
986
1066
 
987
1067
  function createMetricsDashboardConfig(): DashboardViewConfig {
1068
+ /*
1069
+ * Layout notes:
1070
+ *
1071
+ * - `system.cpu.utilization` and `process.cpu.utilization` are OTel
1072
+ * ratio metrics with unit "1" reported in [0, 1]. DashboardValueComponent
1073
+ * / DashboardGaugeComponent scale these to a percent at render time
1074
+ * (see splitFormattedValue / isFractionScale), so the 0-100 gauge sweep
1075
+ * and the percent display work without any special template config.
1076
+ *
1077
+ * - `system.memory.usage` is reported in bytes. A previous "Memory Usage"
1078
+ * gauge compared bytes (10⁹ range) against a 0-100 sweep and pinned
1079
+ * critical for any sane workload. We swapped it for a Value widget that
1080
+ * renders the absolute usage via ValueFormatter (e.g. "8.3 GB"), since
1081
+ * there is no first-class memory-utilization percent metric in OTel's
1082
+ * default system instrumentation.
1083
+ *
1084
+ * - We also dropped explicit `legendUnit: "bytes"/"%"/"ms"` overrides
1085
+ * where they duplicated the stored MetricType unit — ValueFormatter
1086
+ * already auto-scales bytes/seconds/ms and renders ratio metrics as
1087
+ * percent. Keeping overrides only when they add useful aliasing.
1088
+ */
988
1089
  const components: Array<DashboardBaseComponent> = [
989
1090
  // Row 0: Title
990
1091
  createTextComponent({
@@ -996,7 +1097,11 @@ function createMetricsDashboardConfig(): DashboardViewConfig {
996
1097
  isBold: true,
997
1098
  }),
998
1099
 
999
- // Row 1: Key HTTP metrics
1100
+ /*
1101
+ * Row 1: Key HTTP metrics. Request volume rising is generally a
1102
+ * sign of activity (good); latency, errors, and active in-flight
1103
+ * requests rising signal saturation or trouble (bad).
1104
+ */
1000
1105
  createValueComponent({
1001
1106
  title: "Request Rate",
1002
1107
  top: 1,
@@ -1007,6 +1112,7 @@ function createMetricsDashboardConfig(): DashboardViewConfig {
1007
1112
  aggregationType: MetricsAggregationType.Sum,
1008
1113
  legendUnit: "req/s",
1009
1114
  },
1115
+ trendDirection: DashboardValueTrendDirection.HigherIsBetter,
1010
1116
  }),
1011
1117
  createValueComponent({
1012
1118
  title: "Avg Latency",
@@ -1018,6 +1124,7 @@ function createMetricsDashboardConfig(): DashboardViewConfig {
1018
1124
  aggregationType: MetricsAggregationType.Avg,
1019
1125
  legendUnit: "ms",
1020
1126
  },
1127
+ trendDirection: DashboardValueTrendDirection.HigherIsWorse,
1021
1128
  }),
1022
1129
  createValueComponent({
1023
1130
  title: "Error Rate",
@@ -1029,6 +1136,7 @@ function createMetricsDashboardConfig(): DashboardViewConfig {
1029
1136
  aggregationType: MetricsAggregationType.Avg,
1030
1137
  legendUnit: "%",
1031
1138
  },
1139
+ trendDirection: DashboardValueTrendDirection.HigherIsWorse,
1032
1140
  }),
1033
1141
  createValueComponent({
1034
1142
  title: "Active Requests",
@@ -1039,6 +1147,7 @@ function createMetricsDashboardConfig(): DashboardViewConfig {
1039
1147
  metricName: MetricDashboardMetricType.HttpActiveRequests,
1040
1148
  aggregationType: MetricsAggregationType.Avg,
1041
1149
  },
1150
+ trendDirection: DashboardValueTrendDirection.HigherIsWorse,
1042
1151
  }),
1043
1152
 
1044
1153
  // Row 2-4: HTTP request charts
@@ -1121,7 +1230,12 @@ function createMetricsDashboardConfig(): DashboardViewConfig {
1121
1230
  isBold: true,
1122
1231
  }),
1123
1232
 
1124
- // Row 10-12: System resource gauges and charts
1233
+ /*
1234
+ * Row 10-12: System resource health. CPU has a percent gauge (auto-
1235
+ * scaled from [0, 1] ratio at render time); Memory has a Value widget
1236
+ * since `system.memory.usage` is bytes (auto-formatted to MB/GB) and
1237
+ * we don't have a first-class memory-utilization percent metric.
1238
+ */
1125
1239
  createGaugeComponent({
1126
1240
  title: "CPU Utilization",
1127
1241
  top: 10,
@@ -1137,20 +1251,16 @@ function createMetricsDashboardConfig(): DashboardViewConfig {
1137
1251
  aggregationType: MetricsAggregationType.Avg,
1138
1252
  },
1139
1253
  }),
1140
- createGaugeComponent({
1254
+ createValueComponent({
1141
1255
  title: "Memory Usage",
1142
1256
  top: 10,
1143
1257
  left: 3,
1144
1258
  width: 3,
1145
- height: 3,
1146
- minValue: 0,
1147
- maxValue: 100,
1148
- warningThreshold: 70,
1149
- criticalThreshold: 90,
1150
1259
  metricConfig: {
1151
1260
  metricName: MetricDashboardMetricType.SystemMemoryUsage,
1152
1261
  aggregationType: MetricsAggregationType.Avg,
1153
1262
  },
1263
+ trendDirection: DashboardValueTrendDirection.HigherIsWorse,
1154
1264
  }),
1155
1265
  createChartComponent({
1156
1266
  title: "CPU Usage Over Time",
@@ -1162,8 +1272,7 @@ function createMetricsDashboardConfig(): DashboardViewConfig {
1162
1272
  metricConfig: {
1163
1273
  metricName: MetricDashboardMetricType.SystemCpuUtilization,
1164
1274
  aggregationType: MetricsAggregationType.Avg,
1165
- legend: "CPU %",
1166
- legendUnit: "%",
1275
+ legend: "CPU",
1167
1276
  },
1168
1277
  }),
1169
1278
 
@@ -1275,820 +1384,6 @@ function createMetricsDashboardConfig(): DashboardViewConfig {
1275
1384
  };
1276
1385
  }
1277
1386
 
1278
- function createTraceDashboardConfig(): DashboardViewConfig {
1279
- const components: Array<DashboardBaseComponent> = [
1280
- // Row 0: Title
1281
- createTextComponent({
1282
- text: "Trace Dashboard",
1283
- top: 0,
1284
- left: 0,
1285
- width: 12,
1286
- height: 1,
1287
- isBold: true,
1288
- }),
1289
-
1290
- // Row 1: Key trace metrics
1291
- createValueComponent({
1292
- title: "Span Count",
1293
- top: 1,
1294
- left: 0,
1295
- width: 3,
1296
- metricConfig: {
1297
- metricName: SpanMetricType.SpanCount,
1298
- aggregationType: MetricsAggregationType.Sum,
1299
- },
1300
- }),
1301
- createValueComponent({
1302
- title: "Avg Duration",
1303
- top: 1,
1304
- left: 3,
1305
- width: 3,
1306
- metricConfig: {
1307
- metricName: SpanMetricType.SpanDuration,
1308
- aggregationType: MetricsAggregationType.Avg,
1309
- legendUnit: "ms",
1310
- },
1311
- }),
1312
- createValueComponent({
1313
- title: "Error Rate",
1314
- top: 1,
1315
- left: 6,
1316
- width: 3,
1317
- metricConfig: {
1318
- metricName: SpanMetricType.SpanErrorRate,
1319
- aggregationType: MetricsAggregationType.Avg,
1320
- legendUnit: "%",
1321
- },
1322
- }),
1323
- createValueComponent({
1324
- title: "Throughput",
1325
- top: 1,
1326
- left: 9,
1327
- width: 3,
1328
- metricConfig: {
1329
- metricName: SpanMetricType.SpanThroughput,
1330
- aggregationType: MetricsAggregationType.Sum,
1331
- legendUnit: "req/s",
1332
- },
1333
- }),
1334
-
1335
- // Row 2-4: Throughput and duration charts
1336
- createChartComponent({
1337
- title: "Span Throughput Over Time",
1338
- chartType: DashboardChartType.Bar,
1339
- top: 2,
1340
- left: 0,
1341
- width: 6,
1342
- height: 3,
1343
- metricConfig: {
1344
- metricName: SpanMetricType.SpanCount,
1345
- aggregationType: MetricsAggregationType.Sum,
1346
- legend: "Spans",
1347
- },
1348
- }),
1349
- createChartComponent({
1350
- title: "Avg Span Duration Over Time",
1351
- chartType: DashboardChartType.Line,
1352
- top: 2,
1353
- left: 6,
1354
- width: 6,
1355
- height: 3,
1356
- metricConfig: {
1357
- metricName: SpanMetricType.SpanDuration,
1358
- aggregationType: MetricsAggregationType.Avg,
1359
- legend: "Avg Duration",
1360
- legendUnit: "ms",
1361
- },
1362
- }),
1363
-
1364
- // Row 5: Section header
1365
- createTextComponent({
1366
- text: "Latency Percentiles",
1367
- top: 5,
1368
- left: 0,
1369
- width: 12,
1370
- height: 1,
1371
- isBold: true,
1372
- }),
1373
-
1374
- // Row 6: Latency percentile values
1375
- createValueComponent({
1376
- title: "P50 Latency",
1377
- top: 6,
1378
- left: 0,
1379
- width: 3,
1380
- metricConfig: {
1381
- metricName: SpanMetricType.SpanP50Duration,
1382
- aggregationType: MetricsAggregationType.Avg,
1383
- legendUnit: "ms",
1384
- },
1385
- }),
1386
- createValueComponent({
1387
- title: "P90 Latency",
1388
- top: 6,
1389
- left: 3,
1390
- width: 3,
1391
- metricConfig: {
1392
- metricName: SpanMetricType.SpanP90Duration,
1393
- aggregationType: MetricsAggregationType.Avg,
1394
- legendUnit: "ms",
1395
- },
1396
- }),
1397
- createValueComponent({
1398
- title: "P95 Latency",
1399
- top: 6,
1400
- left: 6,
1401
- width: 3,
1402
- metricConfig: {
1403
- metricName: SpanMetricType.SpanP95Duration,
1404
- aggregationType: MetricsAggregationType.Avg,
1405
- legendUnit: "ms",
1406
- },
1407
- }),
1408
- createValueComponent({
1409
- title: "P99 Latency",
1410
- top: 6,
1411
- left: 9,
1412
- width: 3,
1413
- metricConfig: {
1414
- metricName: SpanMetricType.SpanP99Duration,
1415
- aggregationType: MetricsAggregationType.Avg,
1416
- legendUnit: "ms",
1417
- },
1418
- }),
1419
-
1420
- // Row 7-9: Latency percentile charts
1421
- createChartComponent({
1422
- title: "Latency Percentiles Over Time",
1423
- chartType: DashboardChartType.Line,
1424
- top: 7,
1425
- left: 0,
1426
- width: 6,
1427
- height: 3,
1428
- metricConfig: {
1429
- metricName: SpanMetricType.SpanP95Duration,
1430
- aggregationType: MetricsAggregationType.Avg,
1431
- legend: "P95 Latency",
1432
- legendUnit: "ms",
1433
- },
1434
- }),
1435
- createChartComponent({
1436
- title: "Latency Distribution",
1437
- chartType: DashboardChartType.Histogram,
1438
- top: 7,
1439
- left: 6,
1440
- width: 6,
1441
- height: 3,
1442
- metricConfig: {
1443
- metricName: SpanMetricType.SpanDuration,
1444
- aggregationType: MetricsAggregationType.Count,
1445
- legend: "Latency Distribution",
1446
- legendUnit: "ms",
1447
- },
1448
- }),
1449
-
1450
- // Row 10: Section header
1451
- createTextComponent({
1452
- text: "Error Analysis",
1453
- top: 10,
1454
- left: 0,
1455
- width: 12,
1456
- height: 1,
1457
- isBold: true,
1458
- }),
1459
-
1460
- // Row 11-13: Error charts and status breakdown
1461
- createGaugeComponent({
1462
- title: "Error Rate",
1463
- top: 11,
1464
- left: 0,
1465
- width: 3,
1466
- height: 3,
1467
- minValue: 0,
1468
- maxValue: 100,
1469
- warningThreshold: 5,
1470
- criticalThreshold: 15,
1471
- metricConfig: {
1472
- metricName: SpanMetricType.SpanErrorRate,
1473
- aggregationType: MetricsAggregationType.Avg,
1474
- },
1475
- }),
1476
- createChartComponent({
1477
- title: "Errors Over Time",
1478
- chartType: DashboardChartType.Area,
1479
- top: 11,
1480
- left: 3,
1481
- width: 6,
1482
- height: 3,
1483
- metricConfig: {
1484
- metricName: SpanMetricType.SpanErrorCount,
1485
- aggregationType: MetricsAggregationType.Sum,
1486
- legend: "Errors",
1487
- },
1488
- }),
1489
- createChartComponent({
1490
- title: "Span Status Breakdown",
1491
- chartType: DashboardChartType.Pie,
1492
- top: 11,
1493
- left: 9,
1494
- width: 3,
1495
- height: 3,
1496
- metricConfig: {
1497
- metricName: SpanMetricType.SpanStatusOk,
1498
- aggregationType: MetricsAggregationType.Count,
1499
- legend: "Status",
1500
- },
1501
- }),
1502
-
1503
- // Row 14: Section header
1504
- createTextComponent({
1505
- text: "Trace Details",
1506
- top: 14,
1507
- left: 0,
1508
- width: 12,
1509
- height: 1,
1510
- isBold: true,
1511
- }),
1512
-
1513
- // Row 15-17: Table of slowest spans and request rate
1514
- createTableComponent({
1515
- title: "Slowest Spans",
1516
- top: 15,
1517
- left: 0,
1518
- width: 6,
1519
- height: 3,
1520
- metricConfig: {
1521
- metricName: SpanMetricType.SpanDuration,
1522
- aggregationType: MetricsAggregationType.Max,
1523
- },
1524
- }),
1525
- createChartComponent({
1526
- title: "Request Rate Over Time",
1527
- chartType: DashboardChartType.StackedArea,
1528
- top: 15,
1529
- left: 6,
1530
- width: 6,
1531
- height: 3,
1532
- metricConfig: {
1533
- metricName: SpanMetricType.SpanRequestRate,
1534
- aggregationType: MetricsAggregationType.Sum,
1535
- legend: "Request Rate",
1536
- legendUnit: "req/s",
1537
- },
1538
- }),
1539
-
1540
- // Row 18-20: Recent traces and logs
1541
- createTraceListComponent({
1542
- title: "Recent Traces",
1543
- top: 18,
1544
- left: 0,
1545
- width: 6,
1546
- height: 3,
1547
- }),
1548
- createLogStreamComponent({
1549
- title: "Related Logs",
1550
- top: 18,
1551
- left: 6,
1552
- width: 6,
1553
- height: 3,
1554
- }),
1555
- ];
1556
-
1557
- return {
1558
- _type: ObjectType.DashboardViewConfig,
1559
- components,
1560
- heightInDashboardUnits: Math.max(DashboardSize.heightInDashboardUnits, 21),
1561
- };
1562
- }
1563
-
1564
- function createExceptionDashboardConfig(): DashboardViewConfig {
1565
- const components: Array<DashboardBaseComponent> = [
1566
- // Row 0: Title
1567
- createTextComponent({
1568
- text: "Exception Dashboard",
1569
- top: 0,
1570
- left: 0,
1571
- width: 12,
1572
- height: 1,
1573
- isBold: true,
1574
- }),
1575
-
1576
- // Row 1: Key exception metrics
1577
- createValueComponent({
1578
- title: "Total Exceptions",
1579
- top: 1,
1580
- left: 0,
1581
- width: 3,
1582
- metricConfig: {
1583
- metricName: ExceptionMetricType.ExceptionCount,
1584
- aggregationType: MetricsAggregationType.Sum,
1585
- },
1586
- }),
1587
- createValueComponent({
1588
- title: "Exception Rate",
1589
- top: 1,
1590
- left: 3,
1591
- width: 3,
1592
- metricConfig: {
1593
- metricName: ExceptionMetricType.ExceptionRate,
1594
- aggregationType: MetricsAggregationType.Avg,
1595
- legendUnit: "/min",
1596
- },
1597
- }),
1598
- createValueComponent({
1599
- title: "Unresolved",
1600
- top: 1,
1601
- left: 6,
1602
- width: 3,
1603
- metricConfig: {
1604
- metricName: ExceptionMetricType.UnresolvedExceptionCount,
1605
- aggregationType: MetricsAggregationType.Sum,
1606
- },
1607
- }),
1608
- createValueComponent({
1609
- title: "Affected Services",
1610
- top: 1,
1611
- left: 9,
1612
- width: 3,
1613
- metricConfig: {
1614
- metricName: ExceptionMetricType.ExceptionAffectedServiceCount,
1615
- aggregationType: MetricsAggregationType.Sum,
1616
- },
1617
- }),
1618
-
1619
- // Row 2-4: Exception trends
1620
- createChartComponent({
1621
- title: "Exceptions Over Time",
1622
- chartType: DashboardChartType.Bar,
1623
- top: 2,
1624
- left: 0,
1625
- width: 6,
1626
- height: 3,
1627
- metricConfig: {
1628
- metricName: ExceptionMetricType.ExceptionCount,
1629
- aggregationType: MetricsAggregationType.Sum,
1630
- legend: "Exceptions",
1631
- },
1632
- }),
1633
- createChartComponent({
1634
- title: "Exception Rate Over Time",
1635
- chartType: DashboardChartType.Line,
1636
- top: 2,
1637
- left: 6,
1638
- width: 6,
1639
- height: 3,
1640
- metricConfig: {
1641
- metricName: ExceptionMetricType.ExceptionRate,
1642
- aggregationType: MetricsAggregationType.Avg,
1643
- legend: "Exception Rate",
1644
- legendUnit: "/min",
1645
- },
1646
- }),
1647
-
1648
- // Row 5: Section header
1649
- createTextComponent({
1650
- text: "Exception Breakdown",
1651
- top: 5,
1652
- left: 0,
1653
- width: 12,
1654
- height: 1,
1655
- isBold: true,
1656
- }),
1657
-
1658
- // Row 6-8: Exception type and service breakdown
1659
- createChartComponent({
1660
- title: "Exceptions by Type",
1661
- chartType: DashboardChartType.Pie,
1662
- top: 6,
1663
- left: 0,
1664
- width: 6,
1665
- height: 3,
1666
- metricConfig: {
1667
- metricName: ExceptionMetricType.ExceptionCountByType,
1668
- aggregationType: MetricsAggregationType.Count,
1669
- legend: "Exception Type",
1670
- },
1671
- }),
1672
- createChartComponent({
1673
- title: "Exceptions by Service",
1674
- chartType: DashboardChartType.Bar,
1675
- top: 6,
1676
- left: 6,
1677
- width: 6,
1678
- height: 3,
1679
- metricConfig: {
1680
- metricName: ExceptionMetricType.ExceptionCountByService,
1681
- aggregationType: MetricsAggregationType.Count,
1682
- legend: "Service",
1683
- },
1684
- }),
1685
-
1686
- // Row 9: Section header
1687
- createTextComponent({
1688
- text: "Resolution Status",
1689
- top: 9,
1690
- left: 0,
1691
- width: 12,
1692
- height: 1,
1693
- isBold: true,
1694
- }),
1695
-
1696
- // Row 10-12: Resolution gauges and resolution trends
1697
- createGaugeComponent({
1698
- title: "Unresolved Exceptions",
1699
- top: 10,
1700
- left: 0,
1701
- width: 3,
1702
- height: 3,
1703
- minValue: 0,
1704
- maxValue: 100,
1705
- warningThreshold: 25,
1706
- criticalThreshold: 50,
1707
- metricConfig: {
1708
- metricName: ExceptionMetricType.UnresolvedExceptionCount,
1709
- aggregationType: MetricsAggregationType.Sum,
1710
- },
1711
- }),
1712
- createGaugeComponent({
1713
- title: "Muted Exceptions",
1714
- top: 10,
1715
- left: 3,
1716
- width: 3,
1717
- height: 3,
1718
- minValue: 0,
1719
- maxValue: 100,
1720
- metricConfig: {
1721
- metricName: ExceptionMetricType.MutedExceptionCount,
1722
- aggregationType: MetricsAggregationType.Sum,
1723
- },
1724
- }),
1725
- createChartComponent({
1726
- title: "Resolution Status Over Time",
1727
- chartType: DashboardChartType.StackedArea,
1728
- top: 10,
1729
- left: 6,
1730
- width: 6,
1731
- height: 3,
1732
- metricConfig: {
1733
- metricName: ExceptionMetricType.ResolvedExceptionCount,
1734
- aggregationType: MetricsAggregationType.Sum,
1735
- legend: "Resolved",
1736
- },
1737
- }),
1738
-
1739
- // Row 13: Section header
1740
- createTextComponent({
1741
- text: "Exception Recurrence",
1742
- top: 13,
1743
- left: 0,
1744
- width: 12,
1745
- height: 1,
1746
- isBold: true,
1747
- }),
1748
-
1749
- // Row 14-16: Occurrence trends and top exceptions table
1750
- createChartComponent({
1751
- title: "Exception Occurrences Over Time",
1752
- chartType: DashboardChartType.Heatmap,
1753
- top: 14,
1754
- left: 0,
1755
- width: 6,
1756
- height: 3,
1757
- metricConfig: {
1758
- metricName: ExceptionMetricType.ExceptionOccurrenceCount,
1759
- aggregationType: MetricsAggregationType.Sum,
1760
- legend: "Occurrences",
1761
- },
1762
- }),
1763
- createTableComponent({
1764
- title: "Top Exceptions by Occurrence",
1765
- top: 14,
1766
- left: 6,
1767
- width: 6,
1768
- height: 3,
1769
- metricConfig: {
1770
- metricName: ExceptionMetricType.ExceptionOccurrenceCount,
1771
- aggregationType: MetricsAggregationType.Max,
1772
- },
1773
- }),
1774
-
1775
- // Row 17: Section header
1776
- createTextComponent({
1777
- text: "Exception Details",
1778
- top: 17,
1779
- left: 0,
1780
- width: 12,
1781
- height: 1,
1782
- isBold: true,
1783
- }),
1784
-
1785
- // Row 18-20: Logs and traces
1786
- createLogStreamComponent({
1787
- title: "Exception Logs",
1788
- top: 18,
1789
- left: 0,
1790
- width: 6,
1791
- height: 3,
1792
- }),
1793
- createTraceListComponent({
1794
- title: "Related Traces",
1795
- top: 18,
1796
- left: 6,
1797
- width: 6,
1798
- height: 3,
1799
- }),
1800
- ];
1801
-
1802
- return {
1803
- _type: ObjectType.DashboardViewConfig,
1804
- components,
1805
- heightInDashboardUnits: Math.max(DashboardSize.heightInDashboardUnits, 21),
1806
- };
1807
- }
1808
-
1809
- function createProfilesDashboardConfig(): DashboardViewConfig {
1810
- const components: Array<DashboardBaseComponent> = [
1811
- // Row 0: Title
1812
- createTextComponent({
1813
- text: "Profiles Dashboard",
1814
- top: 0,
1815
- left: 0,
1816
- width: 12,
1817
- height: 1,
1818
- isBold: true,
1819
- }),
1820
-
1821
- // Row 1: Key profile metrics
1822
- createValueComponent({
1823
- title: "Profile Count",
1824
- top: 1,
1825
- left: 0,
1826
- width: 3,
1827
- metricConfig: {
1828
- metricName: ProfileMetricType.ProfileCount,
1829
- aggregationType: MetricsAggregationType.Sum,
1830
- },
1831
- }),
1832
- createValueComponent({
1833
- title: "CPU Profile Duration",
1834
- top: 1,
1835
- left: 3,
1836
- width: 3,
1837
- metricConfig: {
1838
- metricName: ProfileMetricType.CpuProfileDuration,
1839
- aggregationType: MetricsAggregationType.Sum,
1840
- legendUnit: "ms",
1841
- },
1842
- }),
1843
- createValueComponent({
1844
- title: "Memory Allocations",
1845
- top: 1,
1846
- left: 6,
1847
- width: 3,
1848
- metricConfig: {
1849
- metricName: ProfileMetricType.MemoryAllocationCount,
1850
- aggregationType: MetricsAggregationType.Sum,
1851
- },
1852
- }),
1853
- createValueComponent({
1854
- title: "Thread Count",
1855
- top: 1,
1856
- left: 9,
1857
- width: 3,
1858
- metricConfig: {
1859
- metricName: ProfileMetricType.ThreadCount,
1860
- aggregationType: MetricsAggregationType.Avg,
1861
- },
1862
- }),
1863
-
1864
- // Row 2-4: CPU profile charts
1865
- createChartComponent({
1866
- title: "CPU Profile Duration Over Time",
1867
- chartType: DashboardChartType.Line,
1868
- top: 2,
1869
- left: 0,
1870
- width: 6,
1871
- height: 3,
1872
- metricConfig: {
1873
- metricName: ProfileMetricType.CpuProfileDuration,
1874
- aggregationType: MetricsAggregationType.Avg,
1875
- legend: "CPU Duration",
1876
- legendUnit: "ms",
1877
- },
1878
- }),
1879
- createChartComponent({
1880
- title: "CPU Sample Count Over Time",
1881
- chartType: DashboardChartType.Bar,
1882
- top: 2,
1883
- left: 6,
1884
- width: 6,
1885
- height: 3,
1886
- metricConfig: {
1887
- metricName: ProfileMetricType.CpuProfileSampleCount,
1888
- aggregationType: MetricsAggregationType.Sum,
1889
- legend: "CPU Samples",
1890
- },
1891
- }),
1892
-
1893
- // Row 5: Section header
1894
- createTextComponent({
1895
- text: "Memory Profiling",
1896
- top: 5,
1897
- left: 0,
1898
- width: 12,
1899
- height: 1,
1900
- isBold: true,
1901
- }),
1902
-
1903
- // Row 6-8: Memory gauges and allocation charts
1904
- createGaugeComponent({
1905
- title: "Heap Usage",
1906
- top: 6,
1907
- left: 0,
1908
- width: 3,
1909
- height: 3,
1910
- minValue: 0,
1911
- maxValue: 100,
1912
- warningThreshold: 70,
1913
- criticalThreshold: 90,
1914
- metricConfig: {
1915
- metricName: ProfileMetricType.HeapUsage,
1916
- aggregationType: MetricsAggregationType.Avg,
1917
- },
1918
- }),
1919
- createChartComponent({
1920
- title: "Memory Allocation Size Over Time",
1921
- chartType: DashboardChartType.Area,
1922
- top: 6,
1923
- left: 3,
1924
- width: 6,
1925
- height: 3,
1926
- metricConfig: {
1927
- metricName: ProfileMetricType.MemoryAllocationSize,
1928
- aggregationType: MetricsAggregationType.Sum,
1929
- legend: "Allocation Size",
1930
- legendUnit: "bytes",
1931
- },
1932
- }),
1933
- createGaugeComponent({
1934
- title: "Thread Count",
1935
- top: 6,
1936
- left: 9,
1937
- width: 3,
1938
- height: 3,
1939
- minValue: 0,
1940
- maxValue: 500,
1941
- warningThreshold: 200,
1942
- criticalThreshold: 400,
1943
- metricConfig: {
1944
- metricName: ProfileMetricType.ThreadCount,
1945
- aggregationType: MetricsAggregationType.Avg,
1946
- },
1947
- }),
1948
-
1949
- // Row 9: Section header
1950
- createTextComponent({
1951
- text: "Allocation Trends",
1952
- top: 9,
1953
- left: 0,
1954
- width: 12,
1955
- height: 1,
1956
- isBold: true,
1957
- }),
1958
-
1959
- // Row 10-12: Allocation count trends and heap trends
1960
- createChartComponent({
1961
- title: "Memory Allocation Count Over Time",
1962
- chartType: DashboardChartType.Bar,
1963
- top: 10,
1964
- left: 0,
1965
- width: 6,
1966
- height: 3,
1967
- metricConfig: {
1968
- metricName: ProfileMetricType.MemoryAllocationCount,
1969
- aggregationType: MetricsAggregationType.Sum,
1970
- legend: "Allocations",
1971
- },
1972
- }),
1973
- createChartComponent({
1974
- title: "Heap Usage Over Time",
1975
- chartType: DashboardChartType.Area,
1976
- top: 10,
1977
- left: 6,
1978
- width: 6,
1979
- height: 3,
1980
- metricConfig: {
1981
- metricName: ProfileMetricType.HeapUsage,
1982
- aggregationType: MetricsAggregationType.Avg,
1983
- legend: "Heap",
1984
- legendUnit: "bytes",
1985
- },
1986
- }),
1987
-
1988
- // Row 13: Section header
1989
- createTextComponent({
1990
- text: "Runtime & Concurrency",
1991
- top: 13,
1992
- left: 0,
1993
- width: 12,
1994
- height: 1,
1995
- isBold: true,
1996
- }),
1997
-
1998
- // Row 14-16: Wall clock, goroutines/threads, sample rate
1999
- createChartComponent({
2000
- title: "Wall Clock Duration Over Time",
2001
- chartType: DashboardChartType.Line,
2002
- top: 14,
2003
- left: 0,
2004
- width: 6,
2005
- height: 3,
2006
- metricConfig: {
2007
- metricName: ProfileMetricType.WallClockDuration,
2008
- aggregationType: MetricsAggregationType.Avg,
2009
- legend: "Wall Clock",
2010
- legendUnit: "ms",
2011
- },
2012
- }),
2013
- createChartComponent({
2014
- title: "Goroutine / Thread Count Over Time",
2015
- chartType: DashboardChartType.StackedArea,
2016
- top: 14,
2017
- left: 6,
2018
- width: 6,
2019
- height: 3,
2020
- metricConfig: {
2021
- metricName: ProfileMetricType.GoroutineCount,
2022
- aggregationType: MetricsAggregationType.Avg,
2023
- legend: "Goroutines / Threads",
2024
- },
2025
- }),
2026
-
2027
- // Row 17: Section header
2028
- createTextComponent({
2029
- text: "Hot Functions",
2030
- top: 17,
2031
- left: 0,
2032
- width: 12,
2033
- height: 1,
2034
- isBold: true,
2035
- }),
2036
-
2037
- // Row 18-20: Top functions tables
2038
- createTableComponent({
2039
- title: "Top Functions by CPU Time",
2040
- top: 18,
2041
- left: 0,
2042
- width: 6,
2043
- height: 3,
2044
- metricConfig: {
2045
- metricName: ProfileMetricType.TopFunctionCpuTime,
2046
- aggregationType: MetricsAggregationType.Max,
2047
- },
2048
- }),
2049
- createTableComponent({
2050
- title: "Top Functions by Allocations",
2051
- top: 18,
2052
- left: 6,
2053
- width: 6,
2054
- height: 3,
2055
- metricConfig: {
2056
- metricName: ProfileMetricType.TopFunctionAllocations,
2057
- aggregationType: MetricsAggregationType.Max,
2058
- },
2059
- }),
2060
-
2061
- // Row 21-23: Profile sample rate and logs
2062
- createChartComponent({
2063
- title: "Profile Sample Rate Over Time",
2064
- chartType: DashboardChartType.Line,
2065
- top: 21,
2066
- left: 0,
2067
- width: 6,
2068
- height: 3,
2069
- metricConfig: {
2070
- metricName: ProfileMetricType.ProfileSampleRate,
2071
- aggregationType: MetricsAggregationType.Avg,
2072
- legend: "Sample Rate",
2073
- legendUnit: "samples/s",
2074
- },
2075
- }),
2076
- createLogStreamComponent({
2077
- title: "Related Logs",
2078
- top: 21,
2079
- left: 6,
2080
- width: 6,
2081
- height: 3,
2082
- }),
2083
- ];
2084
-
2085
- return {
2086
- _type: ObjectType.DashboardViewConfig,
2087
- components,
2088
- heightInDashboardUnits: Math.max(DashboardSize.heightInDashboardUnits, 24),
2089
- };
2090
- }
2091
-
2092
1387
  export function getTemplateConfig(
2093
1388
  type: DashboardTemplateType,
2094
1389
  ): DashboardViewConfig | null {
@@ -2101,12 +1396,6 @@ export function getTemplateConfig(
2101
1396
  return createKubernetesDashboardConfig();
2102
1397
  case DashboardTemplateType.Metrics:
2103
1398
  return createMetricsDashboardConfig();
2104
- case DashboardTemplateType.Trace:
2105
- return createTraceDashboardConfig();
2106
- case DashboardTemplateType.Exception:
2107
- return createExceptionDashboardConfig();
2108
- case DashboardTemplateType.Profiles:
2109
- return createProfilesDashboardConfig();
2110
1399
  case DashboardTemplateType.Blank:
2111
1400
  return null;
2112
1401
  }