@oneuptime/common 10.2.15 → 10.2.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. package/Server/API/DashboardAPI.ts +0 -6
  2. package/Server/Services/DockerHostService.ts +91 -0
  3. package/Server/Services/IncidentService.ts +60 -23
  4. package/Server/Services/KubernetesClusterService.ts +92 -0
  5. package/Types/Dashboard/DashboardComponents/DashboardValueComponent.ts +15 -0
  6. package/Types/Dashboard/DashboardTemplates.ts +260 -971
  7. package/Types/Dashboard/DashboardVariable.ts +0 -8
  8. package/UI/Components/Charts/Utils/DataPoint.ts +0 -0
  9. package/Utils/Dashboard/Components/DashboardValueComponent.ts +36 -2
  10. package/Utils/ValueFormatter.ts +57 -0
  11. package/build/dist/Server/API/DashboardAPI.js +0 -3
  12. package/build/dist/Server/API/DashboardAPI.js.map +1 -1
  13. package/build/dist/Server/Services/DockerHostService.js +73 -0
  14. package/build/dist/Server/Services/DockerHostService.js.map +1 -1
  15. package/build/dist/Server/Services/IncidentService.js +55 -18
  16. package/build/dist/Server/Services/IncidentService.js.map +1 -1
  17. package/build/dist/Server/Services/KubernetesClusterService.js +74 -0
  18. package/build/dist/Server/Services/KubernetesClusterService.js.map +1 -1
  19. package/build/dist/Types/Dashboard/DashboardComponents/DashboardValueComponent.js +14 -1
  20. package/build/dist/Types/Dashboard/DashboardComponents/DashboardValueComponent.js.map +1 -1
  21. package/build/dist/Types/Dashboard/DashboardTemplates.js +240 -928
  22. package/build/dist/Types/Dashboard/DashboardTemplates.js.map +1 -1
  23. package/build/dist/UI/Components/Charts/Utils/DataPoint.js +0 -0
  24. package/build/dist/UI/Components/Charts/Utils/DataPoint.js.map +1 -1
  25. package/build/dist/Utils/Dashboard/Components/DashboardValueComponent.js +31 -1
  26. package/build/dist/Utils/Dashboard/Components/DashboardValueComponent.js.map +1 -1
  27. package/build/dist/Utils/ValueFormatter.js +51 -0
  28. package/build/dist/Utils/ValueFormatter.js.map +1 -1
  29. package/package.json +1 -1
@@ -7,10 +7,16 @@ import IconProp from "../Icon/IconProp";
7
7
  import MetricsAggregationType from "../Metrics/MetricsAggregationType";
8
8
  import IncidentMetricType from "../Incident/IncidentMetricType";
9
9
  import MonitorMetricType from "../Monitor/MonitorMetricType";
10
- import SpanMetricType from "../Span/SpanMetricType";
11
- import ExceptionMetricType from "../Exception/ExceptionMetricType";
12
- import ProfileMetricType from "../Profile/ProfileMetricType";
13
10
  import MetricDashboardMetricType from "../Metrics/MetricDashboardMetricType";
11
+ import { DashboardValueTrendDirection } from "./DashboardComponents/DashboardValueComponent";
12
+ /*
13
+ * Trace / Exception / Profiles entries are intentionally not in this
14
+ * enum: their metric catalogs (SpanMetricType, ExceptionMetricType,
15
+ * ProfileMetricType) define names that are not emitted anywhere in the
16
+ * codebase, so the templates only ever rendered empty widgets. Reach
17
+ * for the Logs / Traces / Exceptions pages directly until those metrics
18
+ * exist.
19
+ */
14
20
  export var DashboardTemplateType;
15
21
  (function (DashboardTemplateType) {
16
22
  DashboardTemplateType["Blank"] = "Blank";
@@ -18,9 +24,6 @@ export var DashboardTemplateType;
18
24
  DashboardTemplateType["Incident"] = "Incident";
19
25
  DashboardTemplateType["Kubernetes"] = "Kubernetes";
20
26
  DashboardTemplateType["Metrics"] = "Metrics";
21
- DashboardTemplateType["Trace"] = "Trace";
22
- DashboardTemplateType["Exception"] = "Exception";
23
- DashboardTemplateType["Profiles"] = "Profiles";
24
27
  })(DashboardTemplateType || (DashboardTemplateType = {}));
25
28
  export const DashboardTemplates = [
26
29
  {
@@ -32,45 +35,27 @@ export const DashboardTemplates = [
32
35
  {
33
36
  type: DashboardTemplateType.Monitor,
34
37
  name: "Monitor Dashboard",
35
- description: "Response time, uptime, error rate, throughput charts, health gauges, and logs.",
38
+ description: "Response time, uptime, status codes, CPU/memory health gauges, and breakdown table for synthetic and server monitors.",
36
39
  icon: IconProp.Heartbeat,
37
40
  },
38
41
  {
39
42
  type: DashboardTemplateType.Incident,
40
43
  name: "Incident Dashboard",
41
- description: "MTTR/MTTA gauges, incident trends, severity breakdown, duration tables, logs, and traces.",
44
+ description: "Incident count, MTTR/MTTA gauges, duration trends, severity breakdown, time-in-state, and longest-incident tables.",
42
45
  icon: IconProp.Alert,
43
46
  },
44
47
  {
45
48
  type: DashboardTemplateType.Kubernetes,
46
49
  name: "Kubernetes Dashboard",
47
- description: "CPU/memory gauges, pod and node metrics, network I/O, restart trends, and cluster logs.",
50
+ description: "Pod/node CPU and memory averages, utilization gauges, live pod and node lists, network I/O, restarts, and cluster logs.",
48
51
  icon: IconProp.Kubernetes,
49
52
  },
50
53
  {
51
54
  type: DashboardTemplateType.Metrics,
52
55
  name: "Metrics Dashboard",
53
- description: "HTTP request rates, latency percentiles, error rates, system resource usage, and custom application metrics.",
56
+ description: "HTTP request rate, latency, error rate, CPU utilization gauge, memory usage, disk and network I/O, and runtime metrics.",
54
57
  icon: IconProp.ChartBar,
55
58
  },
56
- {
57
- type: DashboardTemplateType.Trace,
58
- name: "Trace Dashboard",
59
- description: "Span throughput, latency percentiles, error rates, service health, status breakdown, and recent traces.",
60
- icon: IconProp.Activity,
61
- },
62
- {
63
- type: DashboardTemplateType.Exception,
64
- name: "Exception Dashboard",
65
- description: "Exception counts, error rates, top exception types, resolution status, affected services, and logs.",
66
- icon: IconProp.Bug,
67
- },
68
- {
69
- type: DashboardTemplateType.Profiles,
70
- name: "Profiles Dashboard",
71
- description: "CPU profiles, memory allocations, heap usage, thread counts, top functions by CPU time, and flamegraph data.",
72
- icon: IconProp.Fire,
73
- },
74
59
  ];
75
60
  function buildMetricQueryConfig(config) {
76
61
  var _a, _b;
@@ -145,6 +130,7 @@ function createValueComponent(data) {
145
130
  groupBy: undefined,
146
131
  },
147
132
  },
133
+ trendDirection: data.trendDirection,
148
134
  },
149
135
  };
150
136
  }
@@ -252,21 +238,41 @@ function createTableComponent(data) {
252
238
  },
253
239
  };
254
240
  }
255
- function createTraceListComponent(data) {
241
+ function createKubernetesPodListComponent(data) {
256
242
  var _a;
257
243
  return {
258
244
  _type: ObjectType.DashboardComponent,
259
- componentType: DashboardComponentType.TraceList,
245
+ componentType: DashboardComponentType.KubernetesPodList,
260
246
  componentId: ObjectID.generate(),
261
247
  topInDashboardUnits: data.top,
262
248
  leftInDashboardUnits: data.left,
263
249
  widthInDashboardUnits: data.width,
264
250
  heightInDashboardUnits: data.height,
265
251
  minHeightInDashboardUnits: 3,
266
- minWidthInDashboardUnits: 6,
252
+ minWidthInDashboardUnits: 4,
253
+ arguments: {
254
+ title: data.title,
255
+ maxRows: (_a = data.maxRows) !== null && _a !== void 0 ? _a : 20,
256
+ podPhases: data.podPhases,
257
+ },
258
+ };
259
+ }
260
+ function createKubernetesNodeListComponent(data) {
261
+ var _a;
262
+ return {
263
+ _type: ObjectType.DashboardComponent,
264
+ componentType: DashboardComponentType.KubernetesNodeList,
265
+ componentId: ObjectID.generate(),
266
+ topInDashboardUnits: data.top,
267
+ leftInDashboardUnits: data.left,
268
+ widthInDashboardUnits: data.width,
269
+ heightInDashboardUnits: data.height,
270
+ minHeightInDashboardUnits: 3,
271
+ minWidthInDashboardUnits: 4,
267
272
  arguments: {
268
273
  title: data.title,
269
274
  maxRows: (_a = data.maxRows) !== null && _a !== void 0 ? _a : 20,
275
+ readinessFilter: data.readinessFilter,
270
276
  },
271
277
  };
272
278
  }
@@ -293,20 +299,35 @@ function createMonitorDashboardConfig() {
293
299
  aggregationType: MetricsAggregationType.Avg,
294
300
  legendUnit: "ms",
295
301
  },
302
+ trendDirection: DashboardValueTrendDirection.HigherIsWorse,
296
303
  }),
304
+ /*
305
+ * IsOnline is emitted as 0/1 with unit "" by MonitorMetricUtil, so
306
+ * `Avg` gives the uptime ratio in [0, 1] rather than a percent. We
307
+ * label the widget "Uptime (avg)" instead of "%" so the fractional
308
+ * display isn't misleading; flipping the storage to 0/100 + unit
309
+ * "%" would change criteria evaluation elsewhere in the codebase.
310
+ */
297
311
  createValueComponent({
298
- title: "Uptime %",
312
+ title: "Uptime (avg)",
299
313
  top: 1,
300
314
  left: 3,
301
315
  width: 3,
302
316
  metricConfig: {
303
317
  metricName: MonitorMetricType.IsOnline,
304
318
  aggregationType: MetricsAggregationType.Avg,
305
- legendUnit: "%",
306
319
  },
320
+ trendDirection: DashboardValueTrendDirection.HigherIsBetter,
307
321
  }),
322
+ /*
323
+ * ResponseStatusCode is the literal HTTP status code (200, 404,
324
+ * 503, …). `Count` over it returns the total number of checks the
325
+ * monitor ran, not the error rate — the original "Error Rate" label
326
+ * was misleading. Filtering to status >= 400 would require attribute
327
+ * filters that the template helper doesn't expose, so we relabel.
328
+ */
308
329
  createValueComponent({
309
- title: "Error Rate",
330
+ title: "Total Checks",
310
331
  top: 1,
311
332
  left: 6,
312
333
  width: 3,
@@ -314,6 +335,7 @@ function createMonitorDashboardConfig() {
314
335
  metricName: MonitorMetricType.ResponseStatusCode,
315
336
  aggregationType: MetricsAggregationType.Count,
316
337
  },
338
+ trendDirection: DashboardValueTrendDirection.HigherIsBetter,
317
339
  }),
318
340
  createValueComponent({
319
341
  title: "Execution Time",
@@ -325,6 +347,7 @@ function createMonitorDashboardConfig() {
325
347
  aggregationType: MetricsAggregationType.Avg,
326
348
  legendUnit: "ms",
327
349
  },
350
+ trendDirection: DashboardValueTrendDirection.HigherIsWorse,
328
351
  }),
329
352
  // Row 2-4: Charts
330
353
  createChartComponent({
@@ -351,8 +374,7 @@ function createMonitorDashboardConfig() {
351
374
  metricConfig: {
352
375
  metricName: MonitorMetricType.IsOnline,
353
376
  aggregationType: MetricsAggregationType.Avg,
354
- legend: "Online Status",
355
- legendUnit: "%",
377
+ legend: "Uptime Ratio",
356
378
  },
357
379
  }),
358
380
  // Row 5: Section header
@@ -444,6 +466,21 @@ function createMonitorDashboardConfig() {
444
466
  };
445
467
  }
446
468
  function createIncidentDashboardConfig() {
469
+ /*
470
+ * Incident metrics (TimeToResolve, TimeToAcknowledge, IncidentDuration,
471
+ * TimeInState, PostmortemCompletionTime) are emitted with unit
472
+ * "seconds" by IncidentService. Templates previously passed
473
+ * `legendUnit: "min"` to relabel the chart legend, but that bypassed
474
+ * ValueFormatter's scale-aware formatting and rendered raw seconds
475
+ * with a "Minutes" suffix (e.g. a 1-hour incident showed as
476
+ * "3600 Minutes"). Gauges were authored against an implicit minute
477
+ * scale (maxValue 120, threshold 60/90) and compared bytes-of-seconds
478
+ * against minutes, so any incident over ~2 minutes pinned the gauge.
479
+ *
480
+ * We now drop the legendUnit overrides — ValueFormatter scales
481
+ * `seconds` to sec/min/hr/days based on magnitude — and reauthor the
482
+ * gauge ranges in seconds so the 0-100% sweep is meaningful.
483
+ */
447
484
  const components = [
448
485
  // Row 0: Title
449
486
  createTextComponent({
@@ -454,7 +491,7 @@ function createIncidentDashboardConfig() {
454
491
  height: 1,
455
492
  isBold: true,
456
493
  }),
457
- // Row 1: Key incident metrics
494
+ // Row 1: Key incident metrics — every one is "higher = worse".
458
495
  createValueComponent({
459
496
  title: "Incident Count",
460
497
  top: 1,
@@ -464,6 +501,7 @@ function createIncidentDashboardConfig() {
464
501
  metricName: IncidentMetricType.IncidentCount,
465
502
  aggregationType: MetricsAggregationType.Sum,
466
503
  },
504
+ trendDirection: DashboardValueTrendDirection.HigherIsWorse,
467
505
  }),
468
506
  createValueComponent({
469
507
  title: "MTTR",
@@ -473,8 +511,8 @@ function createIncidentDashboardConfig() {
473
511
  metricConfig: {
474
512
  metricName: IncidentMetricType.TimeToResolve,
475
513
  aggregationType: MetricsAggregationType.Avg,
476
- legendUnit: "min",
477
514
  },
515
+ trendDirection: DashboardValueTrendDirection.HigherIsWorse,
478
516
  }),
479
517
  createValueComponent({
480
518
  title: "MTTA",
@@ -484,8 +522,8 @@ function createIncidentDashboardConfig() {
484
522
  metricConfig: {
485
523
  metricName: IncidentMetricType.TimeToAcknowledge,
486
524
  aggregationType: MetricsAggregationType.Avg,
487
- legendUnit: "min",
488
525
  },
526
+ trendDirection: DashboardValueTrendDirection.HigherIsWorse,
489
527
  }),
490
528
  createValueComponent({
491
529
  title: "Avg Duration",
@@ -495,8 +533,8 @@ function createIncidentDashboardConfig() {
495
533
  metricConfig: {
496
534
  metricName: IncidentMetricType.IncidentDuration,
497
535
  aggregationType: MetricsAggregationType.Avg,
498
- legendUnit: "min",
499
536
  },
537
+ trendDirection: DashboardValueTrendDirection.HigherIsWorse,
500
538
  }),
501
539
  // Row 2-4: Incident trends
502
540
  createChartComponent({
@@ -523,7 +561,6 @@ function createIncidentDashboardConfig() {
523
561
  metricName: IncidentMetricType.IncidentDuration,
524
562
  aggregationType: MetricsAggregationType.Avg,
525
563
  legend: "Avg Duration",
526
- legendUnit: "min",
527
564
  },
528
565
  }),
529
566
  // Row 5: Section header
@@ -535,32 +572,37 @@ function createIncidentDashboardConfig() {
535
572
  height: 1,
536
573
  isBold: true,
537
574
  }),
538
- // Row 6-8: Gauges for MTTR/MTTA and resolution chart
575
+ /*
576
+ * Row 6-8: MTTR/MTTA gauges. Ranges and thresholds are now in
577
+ * seconds (matching the stored metric unit). Targets: MTTR full
578
+ * scale 2 hours (warn at 1 hour, critical at 1.5 hours); MTTA full
579
+ * scale 1 hour (warn at 15 min, critical at 30 min).
580
+ */
539
581
  createGaugeComponent({
540
- title: "MTTR (minutes)",
582
+ title: "MTTR",
541
583
  top: 6,
542
584
  left: 0,
543
585
  width: 3,
544
586
  height: 3,
545
587
  minValue: 0,
546
- maxValue: 120,
547
- warningThreshold: 60,
548
- criticalThreshold: 90,
588
+ maxValue: 7200,
589
+ warningThreshold: 3600,
590
+ criticalThreshold: 5400,
549
591
  metricConfig: {
550
592
  metricName: IncidentMetricType.TimeToResolve,
551
593
  aggregationType: MetricsAggregationType.Avg,
552
594
  },
553
595
  }),
554
596
  createGaugeComponent({
555
- title: "MTTA (minutes)",
597
+ title: "MTTA",
556
598
  top: 6,
557
599
  left: 3,
558
600
  width: 3,
559
601
  height: 3,
560
602
  minValue: 0,
561
- maxValue: 60,
562
- warningThreshold: 15,
563
- criticalThreshold: 30,
603
+ maxValue: 3600,
604
+ warningThreshold: 900,
605
+ criticalThreshold: 1800,
564
606
  metricConfig: {
565
607
  metricName: IncidentMetricType.TimeToAcknowledge,
566
608
  aggregationType: MetricsAggregationType.Avg,
@@ -577,7 +619,6 @@ function createIncidentDashboardConfig() {
577
619
  metricName: IncidentMetricType.TimeToResolve,
578
620
  aggregationType: MetricsAggregationType.Avg,
579
621
  legend: "MTTR",
580
- legendUnit: "min",
581
622
  },
582
623
  }),
583
624
  // Row 9: Section header
@@ -614,7 +655,6 @@ function createIncidentDashboardConfig() {
614
655
  metricName: IncidentMetricType.TimeInState,
615
656
  aggregationType: MetricsAggregationType.Avg,
616
657
  legend: "Time in State",
617
- legendUnit: "min",
618
658
  },
619
659
  }),
620
660
  // Row 13: Section header
@@ -626,7 +666,13 @@ function createIncidentDashboardConfig() {
626
666
  height: 1,
627
667
  isBold: true,
628
668
  }),
629
- // Row 14-16: Tables
669
+ /*
670
+ * Row 14-16: Operational tables. Logs / traces were removed from the
671
+ * Incident template because incident records are not log/trace
672
+ * sources — they're rows in Postgres. Surfacing unrelated cluster
673
+ * logs and trace lists alongside MTTR/MTTA was a UX miss; if a user
674
+ * wants those views they live on dedicated Trace / Log pages.
675
+ */
630
676
  createTableComponent({
631
677
  title: "Incidents by Duration",
632
678
  top: 14,
@@ -649,29 +695,40 @@ function createIncidentDashboardConfig() {
649
695
  aggregationType: MetricsAggregationType.Avg,
650
696
  },
651
697
  }),
652
- // Row 17-19: Logs and traces
653
- createLogStreamComponent({
654
- title: "Recent Incident Logs",
655
- top: 17,
656
- left: 0,
657
- width: 6,
658
- height: 3,
659
- }),
660
- createTraceListComponent({
661
- title: "Recent Traces",
662
- top: 17,
663
- left: 6,
664
- width: 6,
665
- height: 3,
666
- }),
667
698
  ];
668
699
  return {
669
700
  _type: ObjectType.DashboardViewConfig,
670
701
  components,
671
- heightInDashboardUnits: Math.max(DashboardSize.heightInDashboardUnits, 20),
702
+ heightInDashboardUnits: Math.max(DashboardSize.heightInDashboardUnits, 17),
672
703
  };
673
704
  }
674
705
  function createKubernetesDashboardConfig() {
706
+ /*
707
+ * Layout notes:
708
+ *
709
+ * - "Pod Count" / "Node Ready" used to be Value widgets over k8s.pod.phase
710
+ * / k8s.node.condition_ready with `Sum` aggregation. Those metrics are
711
+ * per-resource gauges that re-emit `1` on every scrape, so summing
712
+ * across the dashboard window multiplied (pods * scrapes) and produced
713
+ * numbers in the hundreds for tiny clusters. The user-visible fix is
714
+ * to use the dedicated KubernetesPodList / KubernetesNodeList widgets
715
+ * below — they read the per-cluster snapshot in Postgres and show
716
+ * accurate counts in the widget header plus a live list of rows.
717
+ *
718
+ * - "Memory Utilization" used to be a 0-100 gauge over k8s.node.memory.usage,
719
+ * which is reported in bytes. A node with 8 GB of RAM produced a value
720
+ * in the 10^9 range against a 0-100 scale, so the gauge always pinned
721
+ * at the critical end with a meaningless absolute number. Without a
722
+ * first-class percent metric we replace it with a Value widget that
723
+ * renders the absolute usage via ValueFormatter (e.g. "8.3 GB").
724
+ *
725
+ * - CPU widgets use OTel's k8s.*.cpu.utilization, which the collector
726
+ * emits as a [0, 1] ratio with unit "1". DashboardValueComponent /
727
+ * DashboardGaugeComponent now scale that to a percent at render time
728
+ * when the metric name carries the `.utilization` suffix, so "0.05"
729
+ * reads as "5.00%" and gauge thresholds in the natural 0-100 scale work
730
+ * as expected.
731
+ */
675
732
  const components = [
676
733
  // Row 0: Title
677
734
  createTextComponent({
@@ -682,48 +739,54 @@ function createKubernetesDashboardConfig() {
682
739
  height: 1,
683
740
  isBold: true,
684
741
  }),
685
- // Row 1: Key cluster metrics
742
+ /*
743
+ * Row 1: Key cluster metrics — averages render with proper units via
744
+ * ValueFormatter (CPU utilization → "%", memory.usage → "MB"/"GB").
745
+ * All four are "higher = worse" (closer to capacity = bad).
746
+ */
686
747
  createValueComponent({
687
- title: "CPU Usage",
748
+ title: "Pod CPU (avg)",
688
749
  top: 1,
689
750
  left: 0,
690
751
  width: 3,
691
752
  metricConfig: {
692
753
  metricName: "k8s.pod.cpu.utilization",
693
754
  aggregationType: MetricsAggregationType.Avg,
694
- legendUnit: "%",
695
755
  },
756
+ trendDirection: DashboardValueTrendDirection.HigherIsWorse,
696
757
  }),
697
758
  createValueComponent({
698
- title: "Memory Usage",
759
+ title: "Pod Memory (avg)",
699
760
  top: 1,
700
761
  left: 3,
701
762
  width: 3,
702
763
  metricConfig: {
703
764
  metricName: "k8s.pod.memory.usage",
704
765
  aggregationType: MetricsAggregationType.Avg,
705
- legendUnit: "bytes",
706
766
  },
767
+ trendDirection: DashboardValueTrendDirection.HigherIsWorse,
707
768
  }),
708
769
  createValueComponent({
709
- title: "Pod Count",
770
+ title: "Node CPU (avg)",
710
771
  top: 1,
711
772
  left: 6,
712
773
  width: 3,
713
774
  metricConfig: {
714
- metricName: "k8s.pod.phase",
715
- aggregationType: MetricsAggregationType.Sum,
775
+ metricName: "k8s.node.cpu.utilization",
776
+ aggregationType: MetricsAggregationType.Avg,
716
777
  },
778
+ trendDirection: DashboardValueTrendDirection.HigherIsWorse,
717
779
  }),
718
780
  createValueComponent({
719
- title: "Node Ready",
781
+ title: "Node Memory (avg)",
720
782
  top: 1,
721
783
  left: 9,
722
784
  width: 3,
723
785
  metricConfig: {
724
- metricName: "k8s.node.condition_ready",
725
- aggregationType: MetricsAggregationType.Sum,
786
+ metricName: "k8s.node.memory.usage",
787
+ aggregationType: MetricsAggregationType.Avg,
726
788
  },
789
+ trendDirection: DashboardValueTrendDirection.HigherIsWorse,
727
790
  }),
728
791
  // Row 2-4: Resource usage charts
729
792
  createChartComponent({
@@ -737,7 +800,6 @@ function createKubernetesDashboardConfig() {
737
800
  metricName: "k8s.pod.cpu.utilization",
738
801
  aggregationType: MetricsAggregationType.Avg,
739
802
  legend: "CPU Utilization",
740
- legendUnit: "%",
741
803
  },
742
804
  }),
743
805
  createChartComponent({
@@ -751,91 +813,95 @@ function createKubernetesDashboardConfig() {
751
813
  metricName: "k8s.pod.memory.usage",
752
814
  aggregationType: MetricsAggregationType.Avg,
753
815
  legend: "Memory Usage",
754
- legendUnit: "bytes",
755
816
  },
756
817
  }),
757
818
  // Row 5: Section header
758
819
  createTextComponent({
759
- text: "Resource Health",
820
+ text: "Cluster Resources",
760
821
  top: 5,
761
822
  left: 0,
762
823
  width: 12,
763
824
  height: 1,
764
825
  isBold: true,
765
826
  }),
766
- // Row 6-8: Gauges and pod chart
767
- createGaugeComponent({
768
- title: "CPU Utilization",
827
+ /*
828
+ * Row 6-9: Pod and node lists query the Postgres snapshot, so the
829
+ * header shows the true current count and the body shows live rows
830
+ * (replacing the broken Sum-of-gauge Value widgets).
831
+ */
832
+ createKubernetesPodListComponent({
833
+ title: "Pods",
769
834
  top: 6,
770
835
  left: 0,
771
- width: 3,
772
- height: 3,
773
- minValue: 0,
774
- maxValue: 100,
775
- warningThreshold: 70,
776
- criticalThreshold: 90,
777
- metricConfig: {
778
- metricName: "k8s.node.cpu.utilization",
779
- aggregationType: MetricsAggregationType.Avg,
780
- },
836
+ width: 6,
837
+ height: 4,
838
+ maxRows: 25,
781
839
  }),
782
- createGaugeComponent({
783
- title: "Memory Utilization",
840
+ createKubernetesNodeListComponent({
841
+ title: "Nodes",
784
842
  top: 6,
785
- left: 3,
786
- width: 3,
843
+ left: 6,
844
+ width: 6,
845
+ height: 4,
846
+ maxRows: 25,
847
+ }),
848
+ // Row 10: Section header
849
+ createTextComponent({
850
+ text: "Resource Health",
851
+ top: 10,
852
+ left: 0,
853
+ width: 12,
854
+ height: 1,
855
+ isBold: true,
856
+ }),
857
+ /*
858
+ * Row 11-13: CPU gauge (auto-scaled from [0,1] to percent), and the
859
+ * network throughput chart. The old "Memory Utilization" gauge over
860
+ * raw bytes is gone — see top-of-function comment.
861
+ */
862
+ createGaugeComponent({
863
+ title: "Cluster CPU Utilization",
864
+ top: 11,
865
+ left: 0,
866
+ width: 4,
787
867
  height: 3,
788
868
  minValue: 0,
789
869
  maxValue: 100,
790
870
  warningThreshold: 70,
791
871
  criticalThreshold: 90,
792
872
  metricConfig: {
793
- metricName: "k8s.node.memory.usage",
873
+ metricName: "k8s.node.cpu.utilization",
794
874
  aggregationType: MetricsAggregationType.Avg,
795
875
  },
796
876
  }),
797
877
  createChartComponent({
798
- title: "Pod Count Over Time",
799
- chartType: DashboardChartType.StackedArea,
800
- top: 6,
801
- left: 6,
802
- width: 6,
878
+ title: "Network I/O",
879
+ chartType: DashboardChartType.Area,
880
+ top: 11,
881
+ left: 4,
882
+ width: 8,
803
883
  height: 3,
804
884
  metricConfig: {
805
- metricName: "k8s.pod.phase",
885
+ metricName: "k8s.pod.network.io",
806
886
  aggregationType: MetricsAggregationType.Sum,
807
- legend: "Pods",
887
+ legend: "Network I/O",
808
888
  },
809
889
  }),
810
- // Row 9: Section header
890
+ // Row 14: Section header
811
891
  createTextComponent({
812
- text: "Workload Details",
813
- top: 9,
892
+ text: "Workload Activity",
893
+ top: 14,
814
894
  left: 0,
815
895
  width: 12,
816
896
  height: 1,
817
897
  isBold: true,
818
898
  }),
819
- // Row 10-12: Network, restarts
820
- createChartComponent({
821
- title: "Network I/O",
822
- chartType: DashboardChartType.Area,
823
- top: 10,
824
- left: 0,
825
- width: 6,
826
- height: 3,
827
- metricConfig: {
828
- metricName: "k8s.pod.network.io",
829
- aggregationType: MetricsAggregationType.Sum,
830
- legend: "Network I/O",
831
- legendUnit: "bytes",
832
- },
833
- }),
899
+ // Row 15-17: Restarts and replicas
834
900
  createChartComponent({
835
901
  title: "Container Restarts Over Time",
836
902
  chartType: DashboardChartType.Bar,
837
- top: 10,
838
- left: 6,
903
+ top: 15,
904
+ left: 0,
839
905
  width: 6,
840
906
  height: 3,
841
907
  metricConfig: {
@@ -844,11 +910,10 @@ function createKubernetesDashboardConfig() {
844
910
  legend: "Restarts",
845
911
  },
846
912
  }),
847
- // Row 13-15: Table and logs
848
913
  createTableComponent({
849
914
  title: "Deployment Replicas",
850
- top: 13,
851
- left: 0,
915
+ top: 15,
916
+ left: 6,
852
917
  width: 6,
853
918
  height: 3,
854
919
  metricConfig: {
@@ -856,21 +921,43 @@ function createKubernetesDashboardConfig() {
856
921
  aggregationType: MetricsAggregationType.Min,
857
922
  },
858
923
  }),
924
+ // Row 18-20: Logs
859
925
  createLogStreamComponent({
860
926
  title: "Cluster Logs",
861
- top: 13,
862
- left: 6,
863
- width: 6,
927
+ top: 18,
928
+ left: 0,
929
+ width: 12,
864
930
  height: 3,
865
931
  }),
866
932
  ];
867
933
  return {
868
934
  _type: ObjectType.DashboardViewConfig,
869
935
  components,
870
- heightInDashboardUnits: Math.max(DashboardSize.heightInDashboardUnits, 16),
936
+ heightInDashboardUnits: Math.max(DashboardSize.heightInDashboardUnits, 21),
871
937
  };
872
938
  }
873
939
  function createMetricsDashboardConfig() {
940
+ /*
941
+ * Layout notes:
942
+ *
943
+ * - `system.cpu.utilization` and `process.cpu.utilization` are OTel
944
+ * ratio metrics with unit "1" reported in [0, 1]. DashboardValueComponent
945
+ * / DashboardGaugeComponent scale these to a percent at render time
946
+ * (see splitFormattedValue / isFractionScale), so the 0-100 gauge sweep
947
+ * and the percent display work without any special template config.
948
+ *
949
+ * - `system.memory.usage` is reported in bytes. A previous "Memory Usage"
950
+ * gauge compared bytes (10⁹ range) against a 0-100 sweep and pinned
951
+ * critical for any sane workload. We swapped it for a Value widget that
952
+ * renders the absolute usage via ValueFormatter (e.g. "8.3 GB"), since
953
+ * there is no first-class memory-utilization percent metric in OTel's
954
+ * default system instrumentation.
955
+ *
956
+ * - We also dropped explicit `legendUnit: "bytes"/"%"/"ms"` overrides
957
+ * where they duplicated the stored MetricType unit — ValueFormatter
958
+ * already auto-scales bytes/seconds/ms and renders ratio metrics as
959
+ * percent. Keeping overrides only when they add useful aliasing.
960
+ */
874
961
  const components = [
875
962
  // Row 0: Title
876
963
  createTextComponent({
@@ -881,7 +968,11 @@ function createMetricsDashboardConfig() {
881
968
  height: 1,
882
969
  isBold: true,
883
970
  }),
884
- // Row 1: Key HTTP metrics
971
+ /*
972
+ * Row 1: Key HTTP metrics. Request volume rising is generally a
973
+ * sign of activity (good); latency, errors, and active in-flight
974
+ * requests rising signal saturation or trouble (bad).
975
+ */
885
976
  createValueComponent({
886
977
  title: "Request Rate",
887
978
  top: 1,
@@ -892,6 +983,7 @@ function createMetricsDashboardConfig() {
892
983
  aggregationType: MetricsAggregationType.Sum,
893
984
  legendUnit: "req/s",
894
985
  },
986
+ trendDirection: DashboardValueTrendDirection.HigherIsBetter,
895
987
  }),
896
988
  createValueComponent({
897
989
  title: "Avg Latency",
@@ -903,6 +995,7 @@ function createMetricsDashboardConfig() {
903
995
  aggregationType: MetricsAggregationType.Avg,
904
996
  legendUnit: "ms",
905
997
  },
998
+ trendDirection: DashboardValueTrendDirection.HigherIsWorse,
906
999
  }),
907
1000
  createValueComponent({
908
1001
  title: "Error Rate",
@@ -914,6 +1007,7 @@ function createMetricsDashboardConfig() {
914
1007
  aggregationType: MetricsAggregationType.Avg,
915
1008
  legendUnit: "%",
916
1009
  },
1010
+ trendDirection: DashboardValueTrendDirection.HigherIsWorse,
917
1011
  }),
918
1012
  createValueComponent({
919
1013
  title: "Active Requests",
@@ -924,6 +1018,7 @@ function createMetricsDashboardConfig() {
924
1018
  metricName: MetricDashboardMetricType.HttpActiveRequests,
925
1019
  aggregationType: MetricsAggregationType.Avg,
926
1020
  },
1021
+ trendDirection: DashboardValueTrendDirection.HigherIsWorse,
927
1022
  }),
928
1023
  // Row 2-4: HTTP request charts
929
1024
  createChartComponent({
@@ -1001,7 +1096,12 @@ function createMetricsDashboardConfig() {
1001
1096
  height: 1,
1002
1097
  isBold: true,
1003
1098
  }),
1004
- // Row 10-12: System resource gauges and charts
1099
+ /*
1100
+ * Row 10-12: System resource health. CPU has a percent gauge (auto-
1101
+ * scaled from [0, 1] ratio at render time); Memory has a Value widget
1102
+ * since `system.memory.usage` is bytes (auto-formatted to MB/GB) and
1103
+ * we don't have a first-class memory-utilization percent metric.
1104
+ */
1005
1105
  createGaugeComponent({
1006
1106
  title: "CPU Utilization",
1007
1107
  top: 10,
@@ -1017,20 +1117,16 @@ function createMetricsDashboardConfig() {
1017
1117
  aggregationType: MetricsAggregationType.Avg,
1018
1118
  },
1019
1119
  }),
1020
- createGaugeComponent({
1120
+ createValueComponent({
1021
1121
  title: "Memory Usage",
1022
1122
  top: 10,
1023
1123
  left: 3,
1024
1124
  width: 3,
1025
- height: 3,
1026
- minValue: 0,
1027
- maxValue: 100,
1028
- warningThreshold: 70,
1029
- criticalThreshold: 90,
1030
1125
  metricConfig: {
1031
1126
  metricName: MetricDashboardMetricType.SystemMemoryUsage,
1032
1127
  aggregationType: MetricsAggregationType.Avg,
1033
1128
  },
1129
+ trendDirection: DashboardValueTrendDirection.HigherIsWorse,
1034
1130
  }),
1035
1131
  createChartComponent({
1036
1132
  title: "CPU Usage Over Time",
@@ -1042,8 +1138,7 @@ function createMetricsDashboardConfig() {
1042
1138
  metricConfig: {
1043
1139
  metricName: MetricDashboardMetricType.SystemCpuUtilization,
1044
1140
  aggregationType: MetricsAggregationType.Avg,
1045
- legend: "CPU %",
1046
- legendUnit: "%",
1141
+ legend: "CPU",
1047
1142
  },
1048
1143
  }),
1049
1144
  // Row 13: Section header
@@ -1148,783 +1243,6 @@ function createMetricsDashboardConfig() {
1148
1243
  heightInDashboardUnits: Math.max(DashboardSize.heightInDashboardUnits, 24),
1149
1244
  };
1150
1245
  }
1151
- function createTraceDashboardConfig() {
1152
- const components = [
1153
- // Row 0: Title
1154
- createTextComponent({
1155
- text: "Trace Dashboard",
1156
- top: 0,
1157
- left: 0,
1158
- width: 12,
1159
- height: 1,
1160
- isBold: true,
1161
- }),
1162
- // Row 1: Key trace metrics
1163
- createValueComponent({
1164
- title: "Span Count",
1165
- top: 1,
1166
- left: 0,
1167
- width: 3,
1168
- metricConfig: {
1169
- metricName: SpanMetricType.SpanCount,
1170
- aggregationType: MetricsAggregationType.Sum,
1171
- },
1172
- }),
1173
- createValueComponent({
1174
- title: "Avg Duration",
1175
- top: 1,
1176
- left: 3,
1177
- width: 3,
1178
- metricConfig: {
1179
- metricName: SpanMetricType.SpanDuration,
1180
- aggregationType: MetricsAggregationType.Avg,
1181
- legendUnit: "ms",
1182
- },
1183
- }),
1184
- createValueComponent({
1185
- title: "Error Rate",
1186
- top: 1,
1187
- left: 6,
1188
- width: 3,
1189
- metricConfig: {
1190
- metricName: SpanMetricType.SpanErrorRate,
1191
- aggregationType: MetricsAggregationType.Avg,
1192
- legendUnit: "%",
1193
- },
1194
- }),
1195
- createValueComponent({
1196
- title: "Throughput",
1197
- top: 1,
1198
- left: 9,
1199
- width: 3,
1200
- metricConfig: {
1201
- metricName: SpanMetricType.SpanThroughput,
1202
- aggregationType: MetricsAggregationType.Sum,
1203
- legendUnit: "req/s",
1204
- },
1205
- }),
1206
- // Row 2-4: Throughput and duration charts
1207
- createChartComponent({
1208
- title: "Span Throughput Over Time",
1209
- chartType: DashboardChartType.Bar,
1210
- top: 2,
1211
- left: 0,
1212
- width: 6,
1213
- height: 3,
1214
- metricConfig: {
1215
- metricName: SpanMetricType.SpanCount,
1216
- aggregationType: MetricsAggregationType.Sum,
1217
- legend: "Spans",
1218
- },
1219
- }),
1220
- createChartComponent({
1221
- title: "Avg Span Duration Over Time",
1222
- chartType: DashboardChartType.Line,
1223
- top: 2,
1224
- left: 6,
1225
- width: 6,
1226
- height: 3,
1227
- metricConfig: {
1228
- metricName: SpanMetricType.SpanDuration,
1229
- aggregationType: MetricsAggregationType.Avg,
1230
- legend: "Avg Duration",
1231
- legendUnit: "ms",
1232
- },
1233
- }),
1234
- // Row 5: Section header
1235
- createTextComponent({
1236
- text: "Latency Percentiles",
1237
- top: 5,
1238
- left: 0,
1239
- width: 12,
1240
- height: 1,
1241
- isBold: true,
1242
- }),
1243
- // Row 6: Latency percentile values
1244
- createValueComponent({
1245
- title: "P50 Latency",
1246
- top: 6,
1247
- left: 0,
1248
- width: 3,
1249
- metricConfig: {
1250
- metricName: SpanMetricType.SpanP50Duration,
1251
- aggregationType: MetricsAggregationType.Avg,
1252
- legendUnit: "ms",
1253
- },
1254
- }),
1255
- createValueComponent({
1256
- title: "P90 Latency",
1257
- top: 6,
1258
- left: 3,
1259
- width: 3,
1260
- metricConfig: {
1261
- metricName: SpanMetricType.SpanP90Duration,
1262
- aggregationType: MetricsAggregationType.Avg,
1263
- legendUnit: "ms",
1264
- },
1265
- }),
1266
- createValueComponent({
1267
- title: "P95 Latency",
1268
- top: 6,
1269
- left: 6,
1270
- width: 3,
1271
- metricConfig: {
1272
- metricName: SpanMetricType.SpanP95Duration,
1273
- aggregationType: MetricsAggregationType.Avg,
1274
- legendUnit: "ms",
1275
- },
1276
- }),
1277
- createValueComponent({
1278
- title: "P99 Latency",
1279
- top: 6,
1280
- left: 9,
1281
- width: 3,
1282
- metricConfig: {
1283
- metricName: SpanMetricType.SpanP99Duration,
1284
- aggregationType: MetricsAggregationType.Avg,
1285
- legendUnit: "ms",
1286
- },
1287
- }),
1288
- // Row 7-9: Latency percentile charts
1289
- createChartComponent({
1290
- title: "Latency Percentiles Over Time",
1291
- chartType: DashboardChartType.Line,
1292
- top: 7,
1293
- left: 0,
1294
- width: 6,
1295
- height: 3,
1296
- metricConfig: {
1297
- metricName: SpanMetricType.SpanP95Duration,
1298
- aggregationType: MetricsAggregationType.Avg,
1299
- legend: "P95 Latency",
1300
- legendUnit: "ms",
1301
- },
1302
- }),
1303
- createChartComponent({
1304
- title: "Latency Distribution",
1305
- chartType: DashboardChartType.Histogram,
1306
- top: 7,
1307
- left: 6,
1308
- width: 6,
1309
- height: 3,
1310
- metricConfig: {
1311
- metricName: SpanMetricType.SpanDuration,
1312
- aggregationType: MetricsAggregationType.Count,
1313
- legend: "Latency Distribution",
1314
- legendUnit: "ms",
1315
- },
1316
- }),
1317
- // Row 10: Section header
1318
- createTextComponent({
1319
- text: "Error Analysis",
1320
- top: 10,
1321
- left: 0,
1322
- width: 12,
1323
- height: 1,
1324
- isBold: true,
1325
- }),
1326
- // Row 11-13: Error charts and status breakdown
1327
- createGaugeComponent({
1328
- title: "Error Rate",
1329
- top: 11,
1330
- left: 0,
1331
- width: 3,
1332
- height: 3,
1333
- minValue: 0,
1334
- maxValue: 100,
1335
- warningThreshold: 5,
1336
- criticalThreshold: 15,
1337
- metricConfig: {
1338
- metricName: SpanMetricType.SpanErrorRate,
1339
- aggregationType: MetricsAggregationType.Avg,
1340
- },
1341
- }),
1342
- createChartComponent({
1343
- title: "Errors Over Time",
1344
- chartType: DashboardChartType.Area,
1345
- top: 11,
1346
- left: 3,
1347
- width: 6,
1348
- height: 3,
1349
- metricConfig: {
1350
- metricName: SpanMetricType.SpanErrorCount,
1351
- aggregationType: MetricsAggregationType.Sum,
1352
- legend: "Errors",
1353
- },
1354
- }),
1355
- createChartComponent({
1356
- title: "Span Status Breakdown",
1357
- chartType: DashboardChartType.Pie,
1358
- top: 11,
1359
- left: 9,
1360
- width: 3,
1361
- height: 3,
1362
- metricConfig: {
1363
- metricName: SpanMetricType.SpanStatusOk,
1364
- aggregationType: MetricsAggregationType.Count,
1365
- legend: "Status",
1366
- },
1367
- }),
1368
- // Row 14: Section header
1369
- createTextComponent({
1370
- text: "Trace Details",
1371
- top: 14,
1372
- left: 0,
1373
- width: 12,
1374
- height: 1,
1375
- isBold: true,
1376
- }),
1377
- // Row 15-17: Table of slowest spans and request rate
1378
- createTableComponent({
1379
- title: "Slowest Spans",
1380
- top: 15,
1381
- left: 0,
1382
- width: 6,
1383
- height: 3,
1384
- metricConfig: {
1385
- metricName: SpanMetricType.SpanDuration,
1386
- aggregationType: MetricsAggregationType.Max,
1387
- },
1388
- }),
1389
- createChartComponent({
1390
- title: "Request Rate Over Time",
1391
- chartType: DashboardChartType.StackedArea,
1392
- top: 15,
1393
- left: 6,
1394
- width: 6,
1395
- height: 3,
1396
- metricConfig: {
1397
- metricName: SpanMetricType.SpanRequestRate,
1398
- aggregationType: MetricsAggregationType.Sum,
1399
- legend: "Request Rate",
1400
- legendUnit: "req/s",
1401
- },
1402
- }),
1403
- // Row 18-20: Recent traces and logs
1404
- createTraceListComponent({
1405
- title: "Recent Traces",
1406
- top: 18,
1407
- left: 0,
1408
- width: 6,
1409
- height: 3,
1410
- }),
1411
- createLogStreamComponent({
1412
- title: "Related Logs",
1413
- top: 18,
1414
- left: 6,
1415
- width: 6,
1416
- height: 3,
1417
- }),
1418
- ];
1419
- return {
1420
- _type: ObjectType.DashboardViewConfig,
1421
- components,
1422
- heightInDashboardUnits: Math.max(DashboardSize.heightInDashboardUnits, 21),
1423
- };
1424
- }
1425
- function createExceptionDashboardConfig() {
1426
- const components = [
1427
- // Row 0: Title
1428
- createTextComponent({
1429
- text: "Exception Dashboard",
1430
- top: 0,
1431
- left: 0,
1432
- width: 12,
1433
- height: 1,
1434
- isBold: true,
1435
- }),
1436
- // Row 1: Key exception metrics
1437
- createValueComponent({
1438
- title: "Total Exceptions",
1439
- top: 1,
1440
- left: 0,
1441
- width: 3,
1442
- metricConfig: {
1443
- metricName: ExceptionMetricType.ExceptionCount,
1444
- aggregationType: MetricsAggregationType.Sum,
1445
- },
1446
- }),
1447
- createValueComponent({
1448
- title: "Exception Rate",
1449
- top: 1,
1450
- left: 3,
1451
- width: 3,
1452
- metricConfig: {
1453
- metricName: ExceptionMetricType.ExceptionRate,
1454
- aggregationType: MetricsAggregationType.Avg,
1455
- legendUnit: "/min",
1456
- },
1457
- }),
1458
- createValueComponent({
1459
- title: "Unresolved",
1460
- top: 1,
1461
- left: 6,
1462
- width: 3,
1463
- metricConfig: {
1464
- metricName: ExceptionMetricType.UnresolvedExceptionCount,
1465
- aggregationType: MetricsAggregationType.Sum,
1466
- },
1467
- }),
1468
- createValueComponent({
1469
- title: "Affected Services",
1470
- top: 1,
1471
- left: 9,
1472
- width: 3,
1473
- metricConfig: {
1474
- metricName: ExceptionMetricType.ExceptionAffectedServiceCount,
1475
- aggregationType: MetricsAggregationType.Sum,
1476
- },
1477
- }),
1478
- // Row 2-4: Exception trends
1479
- createChartComponent({
1480
- title: "Exceptions Over Time",
1481
- chartType: DashboardChartType.Bar,
1482
- top: 2,
1483
- left: 0,
1484
- width: 6,
1485
- height: 3,
1486
- metricConfig: {
1487
- metricName: ExceptionMetricType.ExceptionCount,
1488
- aggregationType: MetricsAggregationType.Sum,
1489
- legend: "Exceptions",
1490
- },
1491
- }),
1492
- createChartComponent({
1493
- title: "Exception Rate Over Time",
1494
- chartType: DashboardChartType.Line,
1495
- top: 2,
1496
- left: 6,
1497
- width: 6,
1498
- height: 3,
1499
- metricConfig: {
1500
- metricName: ExceptionMetricType.ExceptionRate,
1501
- aggregationType: MetricsAggregationType.Avg,
1502
- legend: "Exception Rate",
1503
- legendUnit: "/min",
1504
- },
1505
- }),
1506
- // Row 5: Section header
1507
- createTextComponent({
1508
- text: "Exception Breakdown",
1509
- top: 5,
1510
- left: 0,
1511
- width: 12,
1512
- height: 1,
1513
- isBold: true,
1514
- }),
1515
- // Row 6-8: Exception type and service breakdown
1516
- createChartComponent({
1517
- title: "Exceptions by Type",
1518
- chartType: DashboardChartType.Pie,
1519
- top: 6,
1520
- left: 0,
1521
- width: 6,
1522
- height: 3,
1523
- metricConfig: {
1524
- metricName: ExceptionMetricType.ExceptionCountByType,
1525
- aggregationType: MetricsAggregationType.Count,
1526
- legend: "Exception Type",
1527
- },
1528
- }),
1529
- createChartComponent({
1530
- title: "Exceptions by Service",
1531
- chartType: DashboardChartType.Bar,
1532
- top: 6,
1533
- left: 6,
1534
- width: 6,
1535
- height: 3,
1536
- metricConfig: {
1537
- metricName: ExceptionMetricType.ExceptionCountByService,
1538
- aggregationType: MetricsAggregationType.Count,
1539
- legend: "Service",
1540
- },
1541
- }),
1542
- // Row 9: Section header
1543
- createTextComponent({
1544
- text: "Resolution Status",
1545
- top: 9,
1546
- left: 0,
1547
- width: 12,
1548
- height: 1,
1549
- isBold: true,
1550
- }),
1551
- // Row 10-12: Resolution gauges and resolution trends
1552
- createGaugeComponent({
1553
- title: "Unresolved Exceptions",
1554
- top: 10,
1555
- left: 0,
1556
- width: 3,
1557
- height: 3,
1558
- minValue: 0,
1559
- maxValue: 100,
1560
- warningThreshold: 25,
1561
- criticalThreshold: 50,
1562
- metricConfig: {
1563
- metricName: ExceptionMetricType.UnresolvedExceptionCount,
1564
- aggregationType: MetricsAggregationType.Sum,
1565
- },
1566
- }),
1567
- createGaugeComponent({
1568
- title: "Muted Exceptions",
1569
- top: 10,
1570
- left: 3,
1571
- width: 3,
1572
- height: 3,
1573
- minValue: 0,
1574
- maxValue: 100,
1575
- metricConfig: {
1576
- metricName: ExceptionMetricType.MutedExceptionCount,
1577
- aggregationType: MetricsAggregationType.Sum,
1578
- },
1579
- }),
1580
- createChartComponent({
1581
- title: "Resolution Status Over Time",
1582
- chartType: DashboardChartType.StackedArea,
1583
- top: 10,
1584
- left: 6,
1585
- width: 6,
1586
- height: 3,
1587
- metricConfig: {
1588
- metricName: ExceptionMetricType.ResolvedExceptionCount,
1589
- aggregationType: MetricsAggregationType.Sum,
1590
- legend: "Resolved",
1591
- },
1592
- }),
1593
- // Row 13: Section header
1594
- createTextComponent({
1595
- text: "Exception Recurrence",
1596
- top: 13,
1597
- left: 0,
1598
- width: 12,
1599
- height: 1,
1600
- isBold: true,
1601
- }),
1602
- // Row 14-16: Occurrence trends and top exceptions table
1603
- createChartComponent({
1604
- title: "Exception Occurrences Over Time",
1605
- chartType: DashboardChartType.Heatmap,
1606
- top: 14,
1607
- left: 0,
1608
- width: 6,
1609
- height: 3,
1610
- metricConfig: {
1611
- metricName: ExceptionMetricType.ExceptionOccurrenceCount,
1612
- aggregationType: MetricsAggregationType.Sum,
1613
- legend: "Occurrences",
1614
- },
1615
- }),
1616
- createTableComponent({
1617
- title: "Top Exceptions by Occurrence",
1618
- top: 14,
1619
- left: 6,
1620
- width: 6,
1621
- height: 3,
1622
- metricConfig: {
1623
- metricName: ExceptionMetricType.ExceptionOccurrenceCount,
1624
- aggregationType: MetricsAggregationType.Max,
1625
- },
1626
- }),
1627
- // Row 17: Section header
1628
- createTextComponent({
1629
- text: "Exception Details",
1630
- top: 17,
1631
- left: 0,
1632
- width: 12,
1633
- height: 1,
1634
- isBold: true,
1635
- }),
1636
- // Row 18-20: Logs and traces
1637
- createLogStreamComponent({
1638
- title: "Exception Logs",
1639
- top: 18,
1640
- left: 0,
1641
- width: 6,
1642
- height: 3,
1643
- }),
1644
- createTraceListComponent({
1645
- title: "Related Traces",
1646
- top: 18,
1647
- left: 6,
1648
- width: 6,
1649
- height: 3,
1650
- }),
1651
- ];
1652
- return {
1653
- _type: ObjectType.DashboardViewConfig,
1654
- components,
1655
- heightInDashboardUnits: Math.max(DashboardSize.heightInDashboardUnits, 21),
1656
- };
1657
- }
1658
- function createProfilesDashboardConfig() {
1659
- const components = [
1660
- // Row 0: Title
1661
- createTextComponent({
1662
- text: "Profiles Dashboard",
1663
- top: 0,
1664
- left: 0,
1665
- width: 12,
1666
- height: 1,
1667
- isBold: true,
1668
- }),
1669
- // Row 1: Key profile metrics
1670
- createValueComponent({
1671
- title: "Profile Count",
1672
- top: 1,
1673
- left: 0,
1674
- width: 3,
1675
- metricConfig: {
1676
- metricName: ProfileMetricType.ProfileCount,
1677
- aggregationType: MetricsAggregationType.Sum,
1678
- },
1679
- }),
1680
- createValueComponent({
1681
- title: "CPU Profile Duration",
1682
- top: 1,
1683
- left: 3,
1684
- width: 3,
1685
- metricConfig: {
1686
- metricName: ProfileMetricType.CpuProfileDuration,
1687
- aggregationType: MetricsAggregationType.Sum,
1688
- legendUnit: "ms",
1689
- },
1690
- }),
1691
- createValueComponent({
1692
- title: "Memory Allocations",
1693
- top: 1,
1694
- left: 6,
1695
- width: 3,
1696
- metricConfig: {
1697
- metricName: ProfileMetricType.MemoryAllocationCount,
1698
- aggregationType: MetricsAggregationType.Sum,
1699
- },
1700
- }),
1701
- createValueComponent({
1702
- title: "Thread Count",
1703
- top: 1,
1704
- left: 9,
1705
- width: 3,
1706
- metricConfig: {
1707
- metricName: ProfileMetricType.ThreadCount,
1708
- aggregationType: MetricsAggregationType.Avg,
1709
- },
1710
- }),
1711
- // Row 2-4: CPU profile charts
1712
- createChartComponent({
1713
- title: "CPU Profile Duration Over Time",
1714
- chartType: DashboardChartType.Line,
1715
- top: 2,
1716
- left: 0,
1717
- width: 6,
1718
- height: 3,
1719
- metricConfig: {
1720
- metricName: ProfileMetricType.CpuProfileDuration,
1721
- aggregationType: MetricsAggregationType.Avg,
1722
- legend: "CPU Duration",
1723
- legendUnit: "ms",
1724
- },
1725
- }),
1726
- createChartComponent({
1727
- title: "CPU Sample Count Over Time",
1728
- chartType: DashboardChartType.Bar,
1729
- top: 2,
1730
- left: 6,
1731
- width: 6,
1732
- height: 3,
1733
- metricConfig: {
1734
- metricName: ProfileMetricType.CpuProfileSampleCount,
1735
- aggregationType: MetricsAggregationType.Sum,
1736
- legend: "CPU Samples",
1737
- },
1738
- }),
1739
- // Row 5: Section header
1740
- createTextComponent({
1741
- text: "Memory Profiling",
1742
- top: 5,
1743
- left: 0,
1744
- width: 12,
1745
- height: 1,
1746
- isBold: true,
1747
- }),
1748
- // Row 6-8: Memory gauges and allocation charts
1749
- createGaugeComponent({
1750
- title: "Heap Usage",
1751
- top: 6,
1752
- left: 0,
1753
- width: 3,
1754
- height: 3,
1755
- minValue: 0,
1756
- maxValue: 100,
1757
- warningThreshold: 70,
1758
- criticalThreshold: 90,
1759
- metricConfig: {
1760
- metricName: ProfileMetricType.HeapUsage,
1761
- aggregationType: MetricsAggregationType.Avg,
1762
- },
1763
- }),
1764
- createChartComponent({
1765
- title: "Memory Allocation Size Over Time",
1766
- chartType: DashboardChartType.Area,
1767
- top: 6,
1768
- left: 3,
1769
- width: 6,
1770
- height: 3,
1771
- metricConfig: {
1772
- metricName: ProfileMetricType.MemoryAllocationSize,
1773
- aggregationType: MetricsAggregationType.Sum,
1774
- legend: "Allocation Size",
1775
- legendUnit: "bytes",
1776
- },
1777
- }),
1778
- createGaugeComponent({
1779
- title: "Thread Count",
1780
- top: 6,
1781
- left: 9,
1782
- width: 3,
1783
- height: 3,
1784
- minValue: 0,
1785
- maxValue: 500,
1786
- warningThreshold: 200,
1787
- criticalThreshold: 400,
1788
- metricConfig: {
1789
- metricName: ProfileMetricType.ThreadCount,
1790
- aggregationType: MetricsAggregationType.Avg,
1791
- },
1792
- }),
1793
- // Row 9: Section header
1794
- createTextComponent({
1795
- text: "Allocation Trends",
1796
- top: 9,
1797
- left: 0,
1798
- width: 12,
1799
- height: 1,
1800
- isBold: true,
1801
- }),
1802
- // Row 10-12: Allocation count trends and heap trends
1803
- createChartComponent({
1804
- title: "Memory Allocation Count Over Time",
1805
- chartType: DashboardChartType.Bar,
1806
- top: 10,
1807
- left: 0,
1808
- width: 6,
1809
- height: 3,
1810
- metricConfig: {
1811
- metricName: ProfileMetricType.MemoryAllocationCount,
1812
- aggregationType: MetricsAggregationType.Sum,
1813
- legend: "Allocations",
1814
- },
1815
- }),
1816
- createChartComponent({
1817
- title: "Heap Usage Over Time",
1818
- chartType: DashboardChartType.Area,
1819
- top: 10,
1820
- left: 6,
1821
- width: 6,
1822
- height: 3,
1823
- metricConfig: {
1824
- metricName: ProfileMetricType.HeapUsage,
1825
- aggregationType: MetricsAggregationType.Avg,
1826
- legend: "Heap",
1827
- legendUnit: "bytes",
1828
- },
1829
- }),
1830
- // Row 13: Section header
1831
- createTextComponent({
1832
- text: "Runtime & Concurrency",
1833
- top: 13,
1834
- left: 0,
1835
- width: 12,
1836
- height: 1,
1837
- isBold: true,
1838
- }),
1839
- // Row 14-16: Wall clock, goroutines/threads, sample rate
1840
- createChartComponent({
1841
- title: "Wall Clock Duration Over Time",
1842
- chartType: DashboardChartType.Line,
1843
- top: 14,
1844
- left: 0,
1845
- width: 6,
1846
- height: 3,
1847
- metricConfig: {
1848
- metricName: ProfileMetricType.WallClockDuration,
1849
- aggregationType: MetricsAggregationType.Avg,
1850
- legend: "Wall Clock",
1851
- legendUnit: "ms",
1852
- },
1853
- }),
1854
- createChartComponent({
1855
- title: "Goroutine / Thread Count Over Time",
1856
- chartType: DashboardChartType.StackedArea,
1857
- top: 14,
1858
- left: 6,
1859
- width: 6,
1860
- height: 3,
1861
- metricConfig: {
1862
- metricName: ProfileMetricType.GoroutineCount,
1863
- aggregationType: MetricsAggregationType.Avg,
1864
- legend: "Goroutines / Threads",
1865
- },
1866
- }),
1867
- // Row 17: Section header
1868
- createTextComponent({
1869
- text: "Hot Functions",
1870
- top: 17,
1871
- left: 0,
1872
- width: 12,
1873
- height: 1,
1874
- isBold: true,
1875
- }),
1876
- // Row 18-20: Top functions tables
1877
- createTableComponent({
1878
- title: "Top Functions by CPU Time",
1879
- top: 18,
1880
- left: 0,
1881
- width: 6,
1882
- height: 3,
1883
- metricConfig: {
1884
- metricName: ProfileMetricType.TopFunctionCpuTime,
1885
- aggregationType: MetricsAggregationType.Max,
1886
- },
1887
- }),
1888
- createTableComponent({
1889
- title: "Top Functions by Allocations",
1890
- top: 18,
1891
- left: 6,
1892
- width: 6,
1893
- height: 3,
1894
- metricConfig: {
1895
- metricName: ProfileMetricType.TopFunctionAllocations,
1896
- aggregationType: MetricsAggregationType.Max,
1897
- },
1898
- }),
1899
- // Row 21-23: Profile sample rate and logs
1900
- createChartComponent({
1901
- title: "Profile Sample Rate Over Time",
1902
- chartType: DashboardChartType.Line,
1903
- top: 21,
1904
- left: 0,
1905
- width: 6,
1906
- height: 3,
1907
- metricConfig: {
1908
- metricName: ProfileMetricType.ProfileSampleRate,
1909
- aggregationType: MetricsAggregationType.Avg,
1910
- legend: "Sample Rate",
1911
- legendUnit: "samples/s",
1912
- },
1913
- }),
1914
- createLogStreamComponent({
1915
- title: "Related Logs",
1916
- top: 21,
1917
- left: 6,
1918
- width: 6,
1919
- height: 3,
1920
- }),
1921
- ];
1922
- return {
1923
- _type: ObjectType.DashboardViewConfig,
1924
- components,
1925
- heightInDashboardUnits: Math.max(DashboardSize.heightInDashboardUnits, 24),
1926
- };
1927
- }
1928
1246
  export function getTemplateConfig(type) {
1929
1247
  switch (type) {
1930
1248
  case DashboardTemplateType.Monitor:
@@ -1935,12 +1253,6 @@ export function getTemplateConfig(type) {
1935
1253
  return createKubernetesDashboardConfig();
1936
1254
  case DashboardTemplateType.Metrics:
1937
1255
  return createMetricsDashboardConfig();
1938
- case DashboardTemplateType.Trace:
1939
- return createTraceDashboardConfig();
1940
- case DashboardTemplateType.Exception:
1941
- return createExceptionDashboardConfig();
1942
- case DashboardTemplateType.Profiles:
1943
- return createProfilesDashboardConfig();
1944
1256
  case DashboardTemplateType.Blank:
1945
1257
  return null;
1946
1258
  }