@skyhook-io/radar-app 1.1.2 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@skyhook-io/radar-app",
3
- "version": "1.1.2",
3
+ "version": "1.2.0",
4
4
  "description": "Radar's full web UI as a reusable React component. Used by Radar's own binary and by external consumers like Radar Cloud.",
5
5
  "repository": {
6
6
  "type": "git",
@@ -31,6 +31,7 @@
31
31
  "@fontsource/dm-mono": "^5.2.7",
32
32
  "@monaco-editor/react": "^4.7.0",
33
33
  "diff": "^9.0.0",
34
+ "monaco-editor": "^0.55.1",
34
35
  "react-markdown": "^10.1.0",
35
36
  "react-virtuoso": "^4.18.6",
36
37
  "remark-gfm": "^4.0.1",
package/src/App.tsx CHANGED
@@ -523,58 +523,97 @@ function AppInner() {
523
523
  // Query client for cache invalidation
524
524
  const queryClient = useQueryClient()
525
525
 
526
- // SSE-driven cache invalidation for resource lists, counts, and detail views.
527
- // Uses a 3-second throttle window: first event starts the timer, all events within the
528
- // window accumulate, then fire a single batch invalidation. This keeps max latency at 3s
529
- // while coalescing burst events (e.g., 100-pod rollout ~10 invalidations total).
530
- const pendingInvalidationRef = useRef<{
531
- kinds: Set<string>
532
- hasCountChange: boolean
526
+ // SSE-driven cache invalidation, split into two cadences so constant status
527
+ // churn on large clusters doesn't force the *expensive* queries (big resource
528
+ // lists + dashboard) to refetch every 3s. The core distinction: add/delete
529
+ // changes what rows/counts exist (membership keep fast); update is mostly
530
+ // status/restart/health noise that can fire constantly on a 10k-pod cluster
531
+ // and shouldn't drag a giant list onto a 3s cadence.
532
+ //
533
+ // FAST (3s): detail drawer for any change (one cheap mounted object), and
534
+ // on add/delete: the list, counts, and dashboard. GitOps + cert keep
535
+ // their existing every-batch behavior — Phase 2 makes GitOps relevance-aware.
536
+ // SLOW (15s): list + dashboard for kinds with update churn. A kind that also
537
+ // had an add/delete in the window gets refreshed by both tiers (an extra
538
+ // refetch per 15s at most) — that's fine and avoids a stale-list bug:
539
+ // deduping by "was structural this window" would wrongly suppress an
540
+ // update that arrived *after* the fast structural flush already ran.
541
+ const fastInvalidationRef = useRef<{
542
+ changedKinds: Set<string> // every changed kind (any op) → detail drawer
543
+ structuralKinds: Set<string> // add/delete kinds → list membership + counts + dashboard
544
+ secretsChanged: boolean
533
545
  timer: number | null
534
- }>({ kinds: new Set(), hasCountChange: false, timer: null })
546
+ }>({ changedKinds: new Set(), structuralKinds: new Set(), secretsChanged: false, timer: null })
547
+ const slowInvalidationRef = useRef<{
548
+ updatedKinds: Set<string> // update-only churn → throttled list + dashboard
549
+ timer: number | null
550
+ }>({ updatedKinds: new Set(), timer: null })
535
551
 
536
552
  const handleK8sEvent = useCallback((event: K8sEvent) => {
537
553
  // Skip K8s Event kind — informational, not resource mutations
538
554
  if (event.kind === 'Event') return
539
555
 
540
- const pending = pendingInvalidationRef.current
541
- pending.kinds.add(kindToPlural(event.kind))
542
- if (event.operation === 'add' || event.operation === 'delete') {
543
- pending.hasCountChange = true
556
+ const kind = kindToPlural(event.kind)
557
+ const structural = event.operation === 'add' || event.operation === 'delete'
558
+
559
+ const fast = fastInvalidationRef.current
560
+ fast.changedKinds.add(kind)
561
+ if (structural) fast.structuralKinds.add(kind)
562
+ if (kind === 'secrets') fast.secretsChanged = true
563
+
564
+ const slow = slowInvalidationRef.current
565
+ if (!structural) slow.updatedKinds.add(kind)
566
+
567
+ // FAST tier — membership-sensitive + cheap, bounded 3s latency.
568
+ if (fast.timer === null) {
569
+ fast.timer = window.setTimeout(() => {
570
+ const f = fastInvalidationRef.current
571
+ for (const k of f.changedKinds) {
572
+ queryClient.invalidateQueries({ queryKey: ['resource', k] }) // open detail drawer stays live
573
+ }
574
+ for (const k of f.structuralKinds) {
575
+ queryClient.invalidateQueries({ queryKey: ['resources', k] }) // list membership changed
576
+ }
577
+ if (f.structuralKinds.size > 0) {
578
+ queryClient.invalidateQueries({ queryKey: ['resource-counts'] })
579
+ queryClient.invalidateQueries({ queryKey: ['dashboard'] })
580
+ }
581
+ if (f.secretsChanged) {
582
+ queryClient.invalidateQueries({ queryKey: ['secret-cert-expiry'] })
583
+ }
584
+ // GitOps behavior unchanged from before — refreshes every batch when a
585
+ // GitOps view is mounted (Phase 2 will make this relevance-aware).
586
+ queryClient.invalidateQueries({ queryKey: ['gitops-tree'] })
587
+ queryClient.invalidateQueries({ queryKey: ['gitops-insights'] })
588
+ fastInvalidationRef.current = { changedKinds: new Set(), structuralKinds: new Set(), secretsChanged: false, timer: null }
589
+ }, 3000)
544
590
  }
545
591
 
546
- // Start throttle window on first event (don't reset bounded 3s latency)
547
- if (pending.timer !== null) return
548
- pending.timer = window.setTimeout(() => {
549
- for (const kind of pending.kinds) {
550
- // Invalidate list queries (['resources', kind, ...]) and detail queries (['resource', kind, ...])
551
- queryClient.invalidateQueries({ queryKey: ['resources', kind] })
552
- queryClient.invalidateQueries({ queryKey: ['resource', kind] })
553
- }
554
- if (pending.hasCountChange) {
555
- queryClient.invalidateQueries({ queryKey: ['resource-counts'] })
556
- }
557
- queryClient.invalidateQueries({ queryKey: ['dashboard'] })
558
- if (pending.kinds.has('secrets')) {
559
- queryClient.invalidateQueries({ queryKey: ['secret-cert-expiry'] })
560
- }
561
- // GitOps tree + insights are derived views over the same informer
562
- // cache that produced this SSE event — when *anything* changes, the
563
- // managed-resource tree and the insights pipeline can have stale
564
- // changes/events/drift. Invalidating broadly here is cheap (only the
565
- // currently-mounted GitOps view re-fetches; other views have no
566
- // matching keys) and is what makes the detail page actually live.
567
- // Without this the failure card + topology lag behind the title chips
568
- // until window focus or a manual refresh.
569
- queryClient.invalidateQueries({ queryKey: ['gitops-tree'] })
570
- queryClient.invalidateQueries({ queryKey: ['gitops-insights'] })
571
- // Reset accumulator
572
- pending.kinds = new Set()
573
- pending.hasCountChange = false
574
- pending.timer = null
575
- }, 3000)
592
+ // SLOW tier throttle the expensive queries for status-only churn. Only
593
+ // updates schedule it; structural changes are fully handled by the fast tier.
594
+ if (!structural && slow.timer === null) {
595
+ slow.timer = window.setTimeout(() => {
596
+ const s = slowInvalidationRef.current
597
+ for (const k of s.updatedKinds) {
598
+ queryClient.invalidateQueries({ queryKey: ['resources', k] })
599
+ }
600
+ queryClient.invalidateQueries({ queryKey: ['dashboard'] }) // health reflects status updates
601
+ slowInvalidationRef.current = { updatedKinds: new Set(), timer: null }
602
+ }, 15000)
603
+ }
576
604
  }, [queryClient])
577
605
 
606
+ // Clear pending invalidation timers on unmount. Reset the refs (not just
607
+ // clearTimeout) so a same-instance remount doesn't inherit a non-null timer
608
+ // id — handleK8sEvent only schedules when timer === null, so a stale id would
609
+ // silently wedge all further SSE-driven invalidation.
610
+ useEffect(() => () => {
611
+ if (fastInvalidationRef.current.timer !== null) clearTimeout(fastInvalidationRef.current.timer)
612
+ if (slowInvalidationRef.current.timer !== null) clearTimeout(slowInvalidationRef.current.timer)
613
+ fastInvalidationRef.current = { changedKinds: new Set(), structuralKinds: new Set(), secretsChanged: false, timer: null }
614
+ slowInvalidationRef.current = { updatedKinds: new Set(), timer: null }
615
+ }, [])
616
+
578
617
  // SSE connection for real-time updates — no namespace filter for small/medium clusters (frontend filters).
579
618
  // forceNamespaceFilter is only set for large clusters that require server-side filtering.
580
619
  // Fleet mode uses 'resources' topology on the backend — filtering is client-side
@@ -590,10 +629,10 @@ function AppInner() {
590
629
  queryClient.invalidateQueries()
591
630
 
592
631
  // Cancel any pending SSE-driven invalidation — old cluster's events are irrelevant
593
- if (pendingInvalidationRef.current.timer !== null) {
594
- clearTimeout(pendingInvalidationRef.current.timer)
595
- pendingInvalidationRef.current = { kinds: new Set(), hasCountChange: false, timer: null }
596
- }
632
+ if (fastInvalidationRef.current.timer !== null) clearTimeout(fastInvalidationRef.current.timer)
633
+ if (slowInvalidationRef.current.timer !== null) clearTimeout(slowInvalidationRef.current.timer)
634
+ fastInvalidationRef.current = { changedKinds: new Set(), structuralKinds: new Set(), secretsChanged: false, timer: null }
635
+ slowInvalidationRef.current = { updatedKinds: new Set(), timer: null }
597
636
 
598
637
  // Close any open drawers/overlays — old cluster's resources don't exist on the new one
599
638
  setSelectedResource(null)
@@ -957,6 +996,7 @@ function AppInner() {
957
996
  })
958
997
 
959
998
  return {
999
+ ...displayedTopology,
960
1000
  nodes: filteredNodes,
961
1001
  edges: filteredEdges,
962
1002
  }
package/src/api/client.ts CHANGED
@@ -140,6 +140,9 @@ export interface WorkloadCount {
140
140
  export interface DashboardMetrics {
141
141
  cpu?: MetricSummary
142
142
  memory?: MetricSummary
143
+ // When false, only requests/capacity are meaningful — live usage (from
144
+ // metrics-server) is unavailable and usage fields are zero.
145
+ usageAvailable: boolean
143
146
  }
144
147
 
145
148
  export interface MetricSummary {
@@ -3026,6 +3029,9 @@ export interface DiagInformerSyncStatus {
3026
3029
  synced: boolean
3027
3030
  syncedAt?: string
3028
3031
  items: number
3032
+ lastError?: string
3033
+ lastErrorAt?: string
3034
+ forbiddenSeen?: boolean
3029
3035
  }
3030
3036
 
3031
3037
  export interface DiagCacheSyncStatus {
@@ -3042,6 +3048,31 @@ export interface DiagCacheSyncStatus {
3042
3048
  promotedKinds?: string[]
3043
3049
  }
3044
3050
 
3051
+ export interface DiagSampleWindow {
3052
+ count: number
3053
+ last: number
3054
+ min: number
3055
+ p50: number
3056
+ p95: number
3057
+ p99: number
3058
+ max: number
3059
+ }
3060
+
3061
+ export interface DiagPerfSnapshot {
3062
+ topology: {
3063
+ totalBuilds: number
3064
+ durationUs: DiagSampleWindow
3065
+ nodeCount: DiagSampleWindow
3066
+ edgeCount: DiagSampleWindow
3067
+ payloadBytes: DiagSampleWindow
3068
+ estimatedNodes: DiagSampleWindow
3069
+ }
3070
+ sse: {
3071
+ totalBroadcasts: number
3072
+ totalDrops: number
3073
+ }
3074
+ }
3075
+
3045
3076
  export interface DiagnosticsSnapshot {
3046
3077
  timestamp: string
3047
3078
  radarVersion: string
@@ -3136,6 +3167,7 @@ export interface DiagnosticsSnapshot {
3136
3167
  sse?: {
3137
3168
  connectedClients: number
3138
3169
  }
3170
+ perf?: DiagPerfSnapshot
3139
3171
  runtime?: {
3140
3172
  heapMB: number
3141
3173
  heapObjectsK: number
@@ -19,7 +19,7 @@ export function TerminalTab({ namespace, podName, containerName, containers, isA
19
19
  const response = await fetch(apiUrl(`/pods/${namespace}/${podName}/debug`), {
20
20
  method: 'POST',
21
21
  headers: { 'Content-Type': 'application/json' },
22
- body: JSON.stringify({ targetContainer, image: 'busybox:latest' }),
22
+ body: JSON.stringify({ targetContainer }),
23
23
  })
24
24
  if (!response.ok) {
25
25
  const err = await response.json().catch(() => ({ error: 'Unknown error' }))
@@ -389,12 +389,14 @@ export function ClusterHealthCard({
389
389
  <Cpu className="w-3.5 h-3.5 text-theme-text-tertiary" />
390
390
  CPU
391
391
  </div>
392
- <ResourceBar
393
- label="Used"
394
- used={formatCPUMillicores(metrics.cpu.usageMillis)}
395
- total={formatCPUMillicores(metrics.cpu.capacityMillis)}
396
- percent={metrics.cpu.usagePercent}
397
- />
392
+ {metricsServerAvailable && (
393
+ <ResourceBar
394
+ label="Used"
395
+ used={formatCPUMillicores(metrics.cpu.usageMillis)}
396
+ total={formatCPUMillicores(metrics.cpu.capacityMillis)}
397
+ percent={metrics.cpu.usagePercent}
398
+ />
399
+ )}
398
400
  <ResourceBar
399
401
  label="Requested"
400
402
  used={formatCPUMillicores(metrics.cpu.requestsMillis)}
@@ -409,12 +411,14 @@ export function ClusterHealthCard({
409
411
  <MemoryStick className="w-3.5 h-3.5 text-theme-text-tertiary" />
410
412
  Memory
411
413
  </div>
412
- <ResourceBar
413
- label="Used"
414
- used={formatMemoryMiB(metrics.memory.usageMillis)}
415
- total={formatMemoryMiB(metrics.memory.capacityMillis)}
416
- percent={metrics.memory.usagePercent}
417
- />
414
+ {metricsServerAvailable && (
415
+ <ResourceBar
416
+ label="Used"
417
+ used={formatMemoryMiB(metrics.memory.usageMillis)}
418
+ total={formatMemoryMiB(metrics.memory.capacityMillis)}
419
+ percent={metrics.memory.usagePercent}
420
+ />
421
+ )}
418
422
  <ResourceBar
419
423
  label="Requested"
420
424
  used={formatMemoryMiB(metrics.memory.requestsMillis)}
@@ -423,7 +427,7 @@ export function ClusterHealthCard({
423
427
  />
424
428
  </div>
425
429
  )}
426
- {!metrics?.cpu && !metrics?.memory && (
430
+ {!metricsServerAvailable && (
427
431
  <MetricsUnavailableHint platform={cluster.platform} metricsServerAvailable={metricsServerAvailable} />
428
432
  )}
429
433
  </div>
@@ -306,11 +306,13 @@ export function MCPSetupDialog({ open, onClose, mcpUrl }: MCPSetupDialogProps) {
306
306
  { name: 'kind', required: true, desc: 'resource kind, e.g. pods, deployments, services' },
307
307
  { name: 'namespace', required: false, desc: 'filter to a specific namespace' },
308
308
  ]},
309
- { name: 'get_resource', desc: 'Get detailed information about a single Kubernetes resource. Returns minified spec, status, and metadata. Optionally include related context (events, relationships, metrics, logs) to avoid extra tool calls.', params: [
309
+ { name: 'get_resource', desc: 'Get a single Kubernetes resource: minified spec/status/metadata plus default-on resourceContext (managedBy, exposes, selectedBy, uses, runsOn, issue/audit rollups). Optionally include heavier sidecars (events, metrics, logs).', params: [
310
310
  { name: 'kind', required: true, desc: 'resource kind, e.g. pod, deployment, service' },
311
- { name: 'namespace', required: true, desc: 'resource namespace' },
311
+ { name: 'namespace', required: false, desc: 'omit for cluster-scoped kinds (Node, ClusterRole, IngressClass, etc.)' },
312
312
  { name: 'name', required: true, desc: 'resource name' },
313
- { name: 'include', required: false, desc: 'events, relationships, metrics, logs' },
313
+ { name: 'group', required: false, desc: 'API group when the kind is ambiguous (e.g. serving.knative.dev for Knative Service vs core Service)' },
314
+ { name: 'include', required: false, desc: 'events, metrics, logs' },
315
+ { name: 'context', required: false, desc: 'resourceContext tier: basic (default) or none (bare minified)' },
314
316
  ]},
315
317
  { name: 'get_topology', desc: 'Get the topology graph showing relationships between Kubernetes resources. Returns nodes and edges representing Deployments, Services, Ingresses, Pods, etc. Use \'traffic\' view for network flow or \'resources\' view for ownership hierarchy. Use \'summary\' format for LLM-friendly text descriptions.', params: [
316
318
  { name: 'namespace', required: false, desc: 'filter to a specific namespace' },
@@ -1,4 +1,5 @@
1
1
  import { useState, useMemo } from 'react'
2
+ import { Virtuoso } from 'react-virtuoso'
2
3
  import type { TrafficFlow } from '../../types'
3
4
  import { clsx } from 'clsx'
4
5
  import { ChevronDown, ChevronUp, ShieldCheck } from 'lucide-react'
@@ -141,14 +142,18 @@ export function TrafficFlowList({ flows }: TrafficFlowListProps) {
141
142
  <span className="text-right">Verdict</span>
142
143
  </div>
143
144
 
144
- {/* Flow rows */}
145
- <div className="flex-1 overflow-y-auto">
146
- {sorted.length === 0 ? (
147
- <div className="flex items-center justify-center h-32 text-sm text-theme-text-tertiary">
148
- {search ? 'No flows match the search' : 'No flows to display'}
149
- </div>
150
- ) : (
151
- sorted.map((flow, i) => {
145
+ {/* Flow rows — virtualized so tens of thousands of Hubble/Cilium flows
146
+ don't all become DOM. Virtuoso measures variable row heights, so the
147
+ expand/collapse panel still works. */}
148
+ {sorted.length === 0 ? (
149
+ <div className="flex-1 flex items-center justify-center text-sm text-theme-text-tertiary">
150
+ {search ? 'No flows match the search' : 'No flows to display'}
151
+ </div>
152
+ ) : (
153
+ <Virtuoso
154
+ className="flex-1"
155
+ data={sorted}
156
+ itemContent={(i, flow) => {
152
157
  const isExpanded = expandedIdx === i
153
158
  const isHTTP = flow.l7Protocol === 'HTTP'
154
159
  const isDNS = flow.l7Protocol === 'DNS'
@@ -316,9 +321,9 @@ export function TrafficFlowList({ flows }: TrafficFlowListProps) {
316
321
  )}
317
322
  </div>
318
323
  )
319
- })
320
- )}
321
- </div>
324
+ }}
325
+ />
326
+ )}
322
327
 
323
328
  {/* Footer */}
324
329
  <div className="px-3 py-1.5 border-t border-theme-border text-[10px] text-theme-text-tertiary">
@@ -1264,9 +1264,13 @@ export function TrafficGraph({ flows, hotPathThreshold = 0, showNamespaceGroups
1264
1264
  try {
1265
1265
  const layoutResult = await elk.layout(elkGraph)
1266
1266
 
1267
+ // Index ELK's positioned children by id once — a .find() per node here is
1268
+ // O(nodes²) and bites on dense traffic graphs.
1269
+ const elkPositions = new Map((layoutResult.children ?? []).map(n => [n.id, n]))
1270
+
1267
1271
  // Apply positions from ELK to nodes
1268
1272
  let positionedNodes = rawNodes.map(node => {
1269
- const elkNode = layoutResult.children?.find(n => n.id === node.id)
1273
+ const elkNode = elkPositions.get(node.id)
1270
1274
  return {
1271
1275
  ...node,
1272
1276
  position: {
@@ -4,7 +4,8 @@ import { clsx } from 'clsx'
4
4
  import { TRANSITION_BACKDROP, TRANSITION_PANEL } from '../../utils/animation'
5
5
  import { openExternal } from '../../utils/navigation'
6
6
  import { useDiagnostics } from '../../api/client'
7
- import type { DiagnosticsSnapshot, DiagMetricsSourceHealth, DiagDropRecord, DiagErrorEntry, DiagCacheSyncStatus, DiagInformerSyncStatus, DiagSyncPhase } from '../../api/client'
7
+ import type { DiagnosticsSnapshot, DiagMetricsSourceHealth, DiagDropRecord, DiagErrorEntry, DiagCacheSyncStatus, DiagInformerSyncStatus, DiagSyncPhase, DiagSampleWindow } from '../../api/client'
8
+ import { getK8sUIPerfSnapshot, type K8sUIPerfSnapshot } from '@skyhook-io/k8s-ui'
8
9
 
9
10
  interface DiagnosticsOverlayProps {
10
11
  onClose: () => void
@@ -31,9 +32,10 @@ export function DiagnosticsOverlay({ onClose, isOpen = true }: DiagnosticsOverla
31
32
 
32
33
  const copyToClipboard = useCallback(async (type: 'json' | 'formatted') => {
33
34
  if (!data) return
35
+ const frontendPerf = getK8sUIPerfSnapshot()
34
36
  const text = type === 'json'
35
- ? JSON.stringify(data, null, 2)
36
- : formatForGitHub(data)
37
+ ? JSON.stringify({ ...data, frontendPerf }, null, 2)
38
+ : formatForGitHub(data, frontendPerf)
37
39
  try {
38
40
  await navigator.clipboard.writeText(text)
39
41
  setCopied(type)
@@ -46,7 +48,7 @@ export function DiagnosticsOverlay({ onClose, isOpen = true }: DiagnosticsOverla
46
48
 
47
49
  const openBugReport = useCallback(() => {
48
50
  if (!data) return
49
- const body = formatForBugReport(data)
51
+ const body = formatForBugReport(data, getK8sUIPerfSnapshot())
50
52
  const url = `https://github.com/skyhook-io/radar/issues/new?labels=bug&body=${encodeURIComponent(body)}`
51
53
  if (url.length > 8000) {
52
54
  // URL too long for GitHub — copy diagnostics to clipboard and open blank issue
@@ -116,6 +118,7 @@ export function DiagnosticsOverlay({ onClose, isOpen = true }: DiagnosticsOverla
116
118
  <TrafficSection data={data} />
117
119
  <PermissionsSection data={data} />
118
120
  <APIDiscoverySection data={data} />
121
+ <PerfSection data={data} />
119
122
  <RuntimeSection data={data} />
120
123
  <ConfigSection data={data} />
121
124
  {data.errors && data.errors.length > 0 && (
@@ -459,6 +462,73 @@ function APIDiscoverySection({ data }: { data: DiagnosticsSnapshot }) {
459
462
  )
460
463
  }
461
464
 
465
+ function PerfSection({ data }: { data: DiagnosticsSnapshot }) {
466
+ const backend = data.perf
467
+ const frontend = getK8sUIPerfSnapshot()
468
+ if (!backend && frontend.totalLayouts === 0 && frontend.totalStructureKeyComputes === 0) return null
469
+ // Warn when SSE has dropped frames, the topology payload window's p95 exceeds
470
+ // 5 MB, or the frontend ELK layout p95 exceeds 1s — these are the load-bearing
471
+ // thresholds for "the tab is going to feel bad."
472
+ const warn =
473
+ (backend?.sse.totalDrops ?? 0) > 0 ||
474
+ (backend?.topology.payloadBytes.p95 ?? 0) > 5 * 1024 * 1024 ||
475
+ frontend.layoutMs.p95 > 1000
476
+ return (
477
+ <Section title="Performance" warn={warn}>
478
+ {backend && (
479
+ <>
480
+ <Row label="Topology Builds" value={backend.topology.totalBuilds.toLocaleString()} />
481
+ <Row label=" Duration" value={formatSampleDuration(backend.topology.durationUs)} />
482
+ <Row label=" Node Count" value={formatSampleCount(backend.topology.nodeCount)} />
483
+ <Row label=" Edge Count" value={formatSampleCount(backend.topology.edgeCount)} />
484
+ <Row label=" Payload" value={formatSampleBytes(backend.topology.payloadBytes)} warn={backend.topology.payloadBytes.p95 > 5 * 1024 * 1024} />
485
+ <Row label=" Estimated Nodes" value={formatSampleCount(backend.topology.estimatedNodes)} />
486
+ <Row label="SSE Broadcasts" value={backend.sse.totalBroadcasts.toLocaleString()} />
487
+ <Row label="SSE Drops" value={backend.sse.totalDrops.toLocaleString()} warn={backend.sse.totalDrops > 0} />
488
+ </>
489
+ )}
490
+ {(frontend.totalLayouts > 0 || frontend.totalStructureKeyComputes > 0) && (
491
+ <>
492
+ <Row label="Frontend Layouts" value={`${frontend.totalLayouts.toLocaleString()} (skipped ${frontend.totalLayoutsSkipped.toLocaleString()})`} />
493
+ <Row label=" ELK Duration" value={formatFrontendMs(frontend.layoutMs)} warn={frontend.layoutMs.p95 > 1000} />
494
+ <Row label=" Last Rendered" value={`${frontend.lastLayoutNodeCount.toLocaleString()} nodes / ${frontend.lastLayoutEdgeCount.toLocaleString()} edges`} />
495
+ <Row label="Frontend structureKey" value={`${frontend.totalStructureKeyComputes.toLocaleString()} computes`} />
496
+ <Row label=" Duration" value={formatFrontendUs(frontend.structureKeyUs)} />
497
+ </>
498
+ )}
499
+ </Section>
500
+ )
501
+ }
502
+
503
+ function formatSampleDuration(w: DiagSampleWindow): string {
504
+ if (w.count === 0) return 'no samples'
505
+ const ms = (us: number) => (us / 1000).toFixed(us < 1000 ? 2 : 1)
506
+ return `last ${ms(w.last)}ms · p50 ${ms(w.p50)} · p95 ${ms(w.p95)} · max ${ms(w.max)}ms (n=${w.count})`
507
+ }
508
+
509
+ function formatSampleCount(w: DiagSampleWindow): string {
510
+ if (w.count === 0) return 'no samples'
511
+ return `last ${w.last.toLocaleString()} · p50 ${w.p50.toLocaleString()} · p95 ${w.p95.toLocaleString()} · max ${w.max.toLocaleString()}`
512
+ }
513
+
514
+ function formatSampleBytes(w: DiagSampleWindow): string {
515
+ if (w.count === 0) return 'no samples'
516
+ const kb = (b: number) => b < 1024 * 1024 ? `${(b / 1024).toFixed(1)}KB` : `${(b / 1024 / 1024).toFixed(2)}MB`
517
+ return `last ${kb(w.last)} · p50 ${kb(w.p50)} · p95 ${kb(w.p95)} · max ${kb(w.max)}`
518
+ }
519
+
520
+ function formatFrontendMs(w: { count: number; last: number; p50: number; p95: number; max: number }): string {
521
+ if (w.count === 0) return 'no samples'
522
+ const fmt = (v: number) => v < 100 ? v.toFixed(1) : Math.round(v).toString()
523
+ return `last ${fmt(w.last)}ms · p50 ${fmt(w.p50)} · p95 ${fmt(w.p95)} · max ${fmt(w.max)}ms (n=${w.count})`
524
+ }
525
+
526
+ function formatFrontendUs(w: { count: number; last: number; p50: number; p95: number; max: number }): string {
527
+ if (w.count === 0) return 'no samples'
528
+ const fmt = (v: number) => v < 1000 ? `${v.toFixed(0)}μs` : `${(v / 1000).toFixed(2)}ms`
529
+ return `last ${fmt(w.last)} · p50 ${fmt(w.p50)} · p95 ${fmt(w.p95)} · max ${fmt(w.max)} (n=${w.count})`
530
+ }
531
+
462
532
  function RuntimeSection({ data }: { data: DiagnosticsSnapshot }) {
463
533
  if (!data.runtime) return null
464
534
  const rt = data.runtime
@@ -510,7 +580,7 @@ function CopyButton({ label, onClick, copied }: { label: string; onClick: () =>
510
580
 
511
581
  // --- GitHub-friendly formatting ---
512
582
 
513
- function formatForGitHub(data: DiagnosticsSnapshot, includeRawJson = true): string {
583
+ function formatForGitHub(data: DiagnosticsSnapshot, frontendPerf?: K8sUIPerfSnapshot, includeRawJson = true): string {
514
584
  const lines: string[] = []
515
585
  lines.push(`## Radar Diagnostics`)
516
586
  lines.push(``)
@@ -600,9 +670,26 @@ function formatForGitHub(data: DiagnosticsSnapshot, includeRawJson = true): stri
600
670
  }
601
671
  const pending = getPendingInformers(sync)
602
672
  if (pending.length > 0) {
603
- const parts = pending.map((i) => `${i.kind}(${i.deferred ? 'deferred' : 'critical'},${i.items.toLocaleString()} items)`)
673
+ const parts = pending.map((i) => {
674
+ const flags = [i.deferred ? 'deferred' : 'critical', `${i.items.toLocaleString()} items`]
675
+ if (i.forbiddenSeen) flags.push('forbidden')
676
+ if (i.lastError) flags.push(`err: ${i.lastError}`)
677
+ return `${i.kind}(${flags.join(', ')})`
678
+ })
604
679
  lines.push(`- **Pending:** ${parts.join(', ')}`)
605
680
  }
681
+ // Synced informers that have since hit a watch error or 403 — a count of 0
682
+ // from one of these is a stale/forbidden lister, not an empty cluster.
683
+ const errored = sync.informers.filter((i) => !pending.includes(i) && (i.lastError || i.forbiddenSeen))
684
+ if (errored.length > 0) {
685
+ const parts = errored.map((i) => {
686
+ const flags: string[] = []
687
+ if (i.forbiddenSeen) flags.push('forbidden')
688
+ if (i.lastError) flags.push(`err: ${i.lastError}`)
689
+ return `${i.kind}(${flags.join(', ')})`
690
+ })
691
+ lines.push(`- **Informer errors:** ${parts.join(', ')}`)
692
+ }
606
693
  }
607
694
  if (inf.watchedCRDs && inf.watchedCRDs.length > 0) {
608
695
  lines.push(`- CRDs: ${inf.watchedCRDs.join(', ')}`)
@@ -640,6 +727,37 @@ function formatForGitHub(data: DiagnosticsSnapshot, includeRawJson = true): stri
640
727
  lines.push(``)
641
728
  }
642
729
 
730
+ if (data.perf || (frontendPerf && (frontendPerf.totalLayouts > 0 || frontendPerf.totalStructureKeyComputes > 0))) {
731
+ lines.push(`### Performance`)
732
+ if (data.perf) {
733
+ const p = data.perf
734
+ const fmtMs = (us: number) => (us / 1000).toFixed(us < 1000 ? 2 : 1)
735
+ const fmtKB = (b: number) => b < 1024 * 1024 ? `${(b / 1024).toFixed(1)}KB` : `${(b / 1024 / 1024).toFixed(2)}MB`
736
+ lines.push(`- Topology Builds: ${p.topology.totalBuilds.toLocaleString()}`)
737
+ if (p.topology.durationUs.count > 0) {
738
+ lines.push(` - Duration (ms): last ${fmtMs(p.topology.durationUs.last)} · p50 ${fmtMs(p.topology.durationUs.p50)} · p95 ${fmtMs(p.topology.durationUs.p95)} · max ${fmtMs(p.topology.durationUs.max)}`)
739
+ lines.push(` - Nodes: last ${p.topology.nodeCount.last} · p95 ${p.topology.nodeCount.p95} · max ${p.topology.nodeCount.max}`)
740
+ lines.push(` - Edges: last ${p.topology.edgeCount.last} · p95 ${p.topology.edgeCount.p95} · max ${p.topology.edgeCount.max}`)
741
+ lines.push(` - Payload: last ${fmtKB(p.topology.payloadBytes.last)} · p95 ${fmtKB(p.topology.payloadBytes.p95)} · max ${fmtKB(p.topology.payloadBytes.max)}`)
742
+ lines.push(` - Estimated Nodes: last ${p.topology.estimatedNodes.last} · p95 ${p.topology.estimatedNodes.p95}`)
743
+ }
744
+ lines.push(`- SSE: ${p.sse.totalBroadcasts.toLocaleString()} broadcasts, ${p.sse.totalDrops.toLocaleString()} drops`)
745
+ }
746
+ if (frontendPerf && (frontendPerf.totalLayouts > 0 || frontendPerf.totalStructureKeyComputes > 0)) {
747
+ const fmt = (v: number) => v < 100 ? v.toFixed(1) : Math.round(v).toString()
748
+ lines.push(`- Frontend Layouts: ${frontendPerf.totalLayouts.toLocaleString()} (${frontendPerf.totalLayoutsSkipped.toLocaleString()} skipped)`)
749
+ if (frontendPerf.layoutMs.count > 0) {
750
+ lines.push(` - ELK (ms): last ${fmt(frontendPerf.layoutMs.last)} · p50 ${fmt(frontendPerf.layoutMs.p50)} · p95 ${fmt(frontendPerf.layoutMs.p95)} · max ${fmt(frontendPerf.layoutMs.max)}`)
751
+ lines.push(` - Last rendered: ${frontendPerf.lastLayoutNodeCount.toLocaleString()} nodes / ${frontendPerf.lastLayoutEdgeCount.toLocaleString()} edges`)
752
+ }
753
+ if (frontendPerf.structureKeyUs.count > 0) {
754
+ const fmtUs = (v: number) => v < 1000 ? `${Math.round(v)}μs` : `${(v / 1000).toFixed(2)}ms`
755
+ lines.push(` - structureKey: ${frontendPerf.totalStructureKeyComputes.toLocaleString()} computes · p50 ${fmtUs(frontendPerf.structureKeyUs.p50)} · p95 ${fmtUs(frontendPerf.structureKeyUs.p95)} · max ${fmtUs(frontendPerf.structureKeyUs.max)}`)
756
+ }
757
+ }
758
+ lines.push(``)
759
+ }
760
+
643
761
  if (data.runtime) {
644
762
  const rt = data.runtime
645
763
  lines.push(`### Runtime`)
@@ -683,8 +801,8 @@ function formatForGitHub(data: DiagnosticsSnapshot, includeRawJson = true): stri
683
801
  return lines.join('\n')
684
802
  }
685
803
 
686
- function formatForBugReport(data: DiagnosticsSnapshot): string {
687
- const diagnostics = formatForGitHub(data, false)
804
+ function formatForBugReport(data: DiagnosticsSnapshot, frontendPerf?: K8sUIPerfSnapshot): string {
805
+ const diagnostics = formatForGitHub(data, frontendPerf, false)
688
806
 
689
807
  const lines: string[] = []
690
808
  lines.push(`## Describe the bug`)
package/src/main.tsx CHANGED
@@ -1,5 +1,6 @@
1
1
  import React from 'react'
2
2
  import ReactDOM from 'react-dom/client'
3
+ import './monaco-setup'
3
4
  import { RadarApp } from './RadarApp'
4
5
  import { openExternal } from './utils/navigation'
5
6
  import './index.css'
@@ -0,0 +1,8 @@
1
+ // monaco-editor's package `exports` map ("./*": "./*") doesn't surface type
2
+ // declarations for deep ESM subpaths, so TS can't resolve these imports even
3
+ // though the .js/.d.ts files exist on disk. Re-export the root types for the
4
+ // editor API and declare the YAML grammar as a side-effect-only module.
5
+ declare module 'monaco-editor/esm/vs/editor/editor.api' {
6
+ export * from 'monaco-editor'
7
+ }
8
+ declare module 'monaco-editor/esm/vs/basic-languages/yaml/yaml.contribution'
@@ -0,0 +1,26 @@
1
+ // Load the Monaco editor from the bundled npm package instead of the default
2
+ // jsdelivr CDN. Without this, @monaco-editor/react fetches the editor at runtime
3
+ // over the network, so the YAML editor never loads in airgapped / offline
4
+ // deployments. Bundling makes the binary fully self-contained.
5
+ //
6
+ // Imported for side effects from main.tsx (Radar's binary entry) only — library
7
+ // consumers (e.g. Radar Hub) keep the default CDN loader unless they opt in.
8
+ //
9
+ // Import the editor API + YAML grammar directly rather than the `monaco-editor`
10
+ // barrel: the barrel pulls in the JSON/CSS/HTML/TypeScript language services,
11
+ // each of which bundles a heavy web worker (the TS one alone is ~7MB) that Radar
12
+ // never uses — it only ever edits YAML.
13
+ import * as monaco from 'monaco-editor/esm/vs/editor/editor.api'
14
+ import 'monaco-editor/esm/vs/basic-languages/yaml/yaml.contribution'
15
+ import { loader } from '@monaco-editor/react'
16
+ import EditorWorker from 'monaco-editor/esm/vs/editor/editor.worker?worker'
17
+
18
+ // YAML has no dedicated Monaco language worker — the base editor worker covers
19
+ // everything we use, so route every label to it.
20
+ ;(self as typeof self & { MonacoEnvironment?: { getWorker(): Worker } }).MonacoEnvironment = {
21
+ getWorker() {
22
+ return new EditorWorker()
23
+ },
24
+ }
25
+
26
+ loader.config({ monaco })