@kitlangton/motel 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/AGENTS.md CHANGED
@@ -2,8 +2,14 @@
2
2
 
3
3
  ## Commands
4
4
  - Install deps: `bun install`
5
- - Run the TUI: `bun run dev` or `bun run start`
6
- - Run the local server only: `bun run server`
5
+ - Run the TUI: `bun run dev` or `bun run start` (auto-ensures a managed
6
+ OTLP daemon is running in the background so traces ingest while the TUI
7
+ is up)
8
+ - Start the background daemon only: `bun run daemon` (same as `motel start`)
9
+ - Stop the managed daemon: `bun run stop`
10
+ - Daemon status JSON: `bun run status`
11
+ - Restart daemon + relaunch TUI: `bun run restart`
12
+ - Run the local server in the foreground (no daemon, no TUI): `bun run server`
7
13
  - Run tests: `bun run test`
8
14
  - Query services via CLI: `bun run cli services`
9
15
  - Query traces via CLI: `bun run cli traces <service> [limit]`
@@ -36,6 +42,7 @@
36
42
  - `/api/ai/calls` searches AI SDK calls (streamText, generateText, etc.) with first-class filters for `model`, `provider`, `sessionId`, `functionId`, `operation`, `status`, `text` (cross-field search), and returns compact summaries with previews and token usage.
37
43
  - `/api/ai/calls/<span-id>` returns the full detail of a single AI call including complete prompt messages, response text, tool calls, timing, and correlated logs.
38
44
  - `/api/ai/stats` aggregates AI call statistics by `provider`, `model`, `functionId`, `sessionId`, or `status` with aggregations: `count`, `avg_duration`, `p95_duration`, `total_input_tokens`, `total_output_tokens`.
45
+ - `/api/facets?type=traces&field=attribute_keys&service=<svc>` lists span-attribute keys for a service, ranked by discriminating power (keys with many distinct values first). Pair with `field=attribute_values&key=<key>` to list values for a specific key. Used by the TUI `f` attribute filter.
39
46
  - `/api/docs` lists available documentation; `/api/docs/debug` and `/api/docs/effect` return the full skill content.
40
47
 
41
48
  ## Architecture
@@ -48,11 +55,16 @@
48
55
  (pane widths, body lines, viewport rows, drill-in level).
49
56
  - `src/ui/app/TraceWorkspace.tsx` renders the drill-in state machine:
50
57
  L0 (trace list), L1 (waterfall), L2 (span detail), plus the service
51
- logs side mode.
52
- - `src/ui/app/TraceListPane.tsx` wraps `TraceList` in a scrollbox with
53
- the filter bar and list header.
54
- - `src/ui/TraceList.tsx` renders trace rows (trace id, duration, span
55
- count, relative age).
58
+ logs side mode. When drilled in the list is hidden entirely and the
59
+ detail pane(s) expand to fill.
60
+ - `src/ui/app/TraceListPane.tsx` hosts the trace list: header + optional
61
+ filter bar + virtual-windowed body (no opentui scrollbox — that had a
62
+ race with Yoga layout timing).
63
+ - `src/ui/TraceList.tsx` exports `TraceListHeader` (the `TRACES 100 · ...`
64
+ strip) and `TraceListBody` (virtual-windowed rows with mouse-wheel
65
+ scrolling). The body owns its own scrollOffset state, preserves the
66
+ selected row's visual position across auto-refresh shifts, and snaps
67
+ the window to follow selection that moves off-screen.
56
68
  - `src/ui/Waterfall.tsx` renders the waterfall timeline with a
57
69
  virtualised scroll viewport; `src/ui/waterfallNav.ts` is the pure
58
70
  collapse/expand/walk resolver (unit-tested).
@@ -116,7 +128,7 @@
116
128
  - `MOTEL_OTEL_TRACE_LIMIT`: defaults to `100`
117
129
  - `MOTEL_OTEL_LOG_LIMIT`: defaults to `80`
118
130
  - `MOTEL_OTEL_RETENTION_HOURS`: defaults to `168` (7d)
119
- - `MOTEL_OTEL_MAX_DB_SIZE_MB`: defaults to `256` (size-based retention cap)
131
+ - `MOTEL_OTEL_MAX_DB_SIZE_MB`: defaults to `1024` (size-based retention cap)
120
132
 
121
133
  ## TUI Keys
122
134
  - `?`: toggle shortcut help
@@ -133,7 +145,9 @@
133
145
  - `tab`: toggle service logs view
134
146
  - `[` / `]`: switch services
135
147
  - `s`: cycle sort mode (recent → slowest → errors)
148
+ - `t`: cycle theme (motel-default → tokyo-night → catppuccin)
136
149
  - `/`: enter filter mode (type to match on root operation name; `:error` restricts to failing traces)
150
+ - `f`: open attribute filter picker (browse span-attribute keys → values for the current service; `backspace` walks back to keys; `esc` in the trace list clears the active filter)
137
151
  - `a`: pause or resume auto-refresh
138
152
  - `r`: refresh now
139
153
  - `c`: copy setup instructions for another Effect app
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@kitlangton/motel",
3
- "version": "0.1.1",
3
+ "version": "0.1.3",
4
4
  "description": "A local OpenTelemetry ingest + TUI viewer for development, backed by SQLite.",
5
5
  "type": "module",
6
6
  "license": "MIT",
@@ -50,6 +50,7 @@
50
50
  "daemon": "bun run src/motel.ts daemon",
51
51
  "status": "bun run src/motel.ts status",
52
52
  "stop": "bun run src/motel.ts stop",
53
+ "restart": "bun run src/motel.ts restart",
53
54
  "server": "bun run src/motel.ts server",
54
55
  "mcp": "bun run src/mcp.ts",
55
56
  "test": "bun test",
package/src/App.tsx CHANGED
@@ -1,16 +1,26 @@
1
- import { RGBA, TextAttributes, type ScrollBoxRenderable } from "@opentui/core"
1
+ import { RGBA, TextAttributes } from "@opentui/core"
2
2
  import { useAtom } from "@effect/atom-react"
3
3
  import { useTerminalDimensions } from "@opentui/react"
4
- import { useCallback, useEffect, useLayoutEffect, useMemo, useRef } from "react"
5
- import { formatTimestamp, traceRowId } from "./ui/format.ts"
4
+ import { useCallback, useEffect, useMemo, useRef } from "react"
5
+ import { formatTimestamp } from "./ui/format.ts"
6
6
  import { Divider, FooterHints, HelpModal, PlainLine, SplitDivider, TextLine } from "./ui/primitives.tsx"
7
7
  import { useAppLayout } from "./ui/app/useAppLayout.ts"
8
8
  import { useTraceScreenData } from "./ui/app/useTraceScreenData.ts"
9
9
  import { TraceWorkspace } from "./ui/app/TraceWorkspace.tsx"
10
- import { noticeAtom, persistSelectedTheme, selectedThemeAtom } from "./ui/state.ts"
10
+ import {
11
+ attrPickerIndexAtom,
12
+ attrPickerInputAtom,
13
+ attrPickerModeAtom,
14
+ attrFacetStateAtom,
15
+ noticeAtom,
16
+ persistSelectedTheme,
17
+ selectedThemeAtom,
18
+ } from "./ui/state.ts"
11
19
  import { applyTheme, colors, SEPARATOR, themeLabel } from "./ui/theme.ts"
12
20
  import { getVisibleSpans } from "./ui/Waterfall.tsx"
13
21
  import { useKeyboardNav } from "./ui/useKeyboardNav.ts"
22
+ import { AttrFilterModal } from "./ui/AttrFilterModal.tsx"
23
+ import { useAttrFilterPicker } from "./ui/useAttrFilterPicker.ts"
14
24
 
15
25
  export const App = () => {
16
26
  const { width, height } = useTerminalDimensions()
@@ -36,11 +46,18 @@ export const App = () => {
36
46
  autoRefresh,
37
47
  filterMode,
38
48
  filterText,
49
+ activeAttrKey,
50
+ activeAttrValue,
39
51
  traceSort,
40
52
  selectedTraceSummary,
41
53
  selectedTrace,
42
54
  filteredTraces,
43
55
  } = useTraceScreenData()
56
+ const [pickerMode] = useAtom(attrPickerModeAtom)
57
+ const [pickerInput] = useAtom(attrPickerInputAtom)
58
+ const [pickerIndex] = useAtom(attrPickerIndexAtom)
59
+ const [attrFacets] = useAtom(attrFacetStateAtom)
60
+ useAttrFilterPicker(activeAttrKey)
44
61
 
45
62
  const layout = useAppLayout({ width, height, notice, detailView, selectedSpanIndex })
46
63
  const {
@@ -61,7 +78,6 @@ export const App = () => {
61
78
  } = layout
62
79
 
63
80
  const noticeTimeoutRef = useRef<ReturnType<typeof setTimeout> | null>(null)
64
- const traceListScrollRef = useRef<ScrollBoxRenderable | null>(null)
65
81
 
66
82
  const flashNotice = (message: string) => {
67
83
  if (noticeTimeoutRef.current !== null) {
@@ -83,27 +99,6 @@ export const App = () => {
83
99
  persistSelectedTheme(selectedTheme)
84
100
  }, [selectedTheme])
85
101
 
86
- useLayoutEffect(() => {
87
- const box = traceListScrollRef.current
88
- const traceId = selectedTraceSummary?.traceId
89
- if (!box || !traceId) return
90
- const indexInList = filteredTraces.findIndex((trace) => trace.traceId === traceId)
91
- if (indexInList < 0) return
92
- const currentTop = box.scrollTop
93
- const viewportRows = Math.max(1, traceViewportRows)
94
- let nextTop = currentTop
95
- if (indexInList < currentTop) {
96
- nextTop = indexInList
97
- } else if (indexInList >= currentTop + viewportRows) {
98
- nextTop = indexInList - viewportRows + 1
99
- }
100
- const maxTop = Math.max(0, filteredTraces.length - viewportRows)
101
- nextTop = Math.max(0, Math.min(nextTop, maxTop))
102
- if (nextTop !== currentTop) {
103
- box.scrollTop = nextTop
104
- }
105
- }, [filteredTraces, selectedTraceIndex, selectedTraceSummary?.traceId, traceSort, traceViewportRows])
106
-
107
102
  const { spanNavActive } = useKeyboardNav({
108
103
  selectedTrace,
109
104
  filteredTraces,
@@ -117,12 +112,15 @@ export const App = () => {
117
112
 
118
113
  const headerServiceLabel = selectedTraceService ?? "none"
119
114
  const autoLabel = autoRefresh ? "● live" : "○ paused"
115
+ const attrFilterLabel = activeAttrKey && activeAttrValue
116
+ ? ` [${activeAttrKey}=${activeAttrValue.length > 20 ? `${activeAttrValue.slice(0, 19)}…` : activeAttrValue}]`
117
+ : ""
120
118
  const headerRight = traceState.fetchedAt
121
119
  ? `${autoLabel} ${formatTimestamp(traceState.fetchedAt)}`
122
120
  : traceState.status === "loading"
123
121
  ? "loading traces..."
124
122
  : ""
125
- const headerLeftLen = "MOTEL".length + SEPARATOR.length + headerServiceLabel.length
123
+ const headerLeftLen = "MOTEL".length + SEPARATOR.length + headerServiceLabel.length + attrFilterLabel.length
126
124
  const headerGap = Math.max(2, headerFooterWidth - headerLeftLen - headerRight.length)
127
125
  const visibleFooterNotice = footerNotice
128
126
 
@@ -168,6 +166,7 @@ export const App = () => {
168
166
  <span fg={colors.muted} attributes={TextAttributes.BOLD}>MOTEL</span>
169
167
  <span fg={colors.separator}>{SEPARATOR}</span>
170
168
  <span fg={colors.muted}>{headerServiceLabel}</span>
169
+ {attrFilterLabel ? <span fg={colors.accent} attributes={TextAttributes.BOLD}>{attrFilterLabel}</span> : null}
171
170
  <span fg={colors.muted}>{" ".repeat(headerGap)}</span>
172
171
  <span fg={colors.muted} attributes={TextAttributes.BOLD}>{headerRight}</span>
173
172
  </TextLine>
@@ -181,7 +180,6 @@ export const App = () => {
181
180
  filterMode={filterMode}
182
181
  filterText={filterText}
183
182
  traceListProps={traceListProps}
184
- traceListScrollRef={traceListScrollRef}
185
183
  selectedTraceService={selectedTraceService}
186
184
  serviceLogState={serviceLogState}
187
185
  selectedServiceLogIndex={selectedServiceLogIndex}
@@ -212,6 +210,18 @@ export const App = () => {
212
210
  </>
213
211
  ) : null}
214
212
  {showHelp ? <HelpModal width={width ?? 100} height={height ?? 24} autoRefresh={autoRefresh} themeLabel={themeLabel(selectedTheme)} onClose={() => setShowHelp(false)} /> : null}
213
+ {pickerMode !== "off" ? (
214
+ <AttrFilterModal
215
+ width={width ?? 100}
216
+ height={height ?? 24}
217
+ mode={pickerMode}
218
+ input={pickerInput}
219
+ selectedIndex={pickerIndex}
220
+ selectedKey={activeAttrKey}
221
+ state={attrFacets}
222
+ onClose={() => { /* handled via keyboard */ }}
223
+ />
224
+ ) : null}
215
225
  </box>
216
226
  )
217
227
  }
package/src/config.ts CHANGED
@@ -34,6 +34,6 @@ export const config = {
34
34
  traceFetchLimit: parsePositiveInt(process.env.MOTEL_OTEL_TRACE_LIMIT, 100),
35
35
  logFetchLimit: parsePositiveInt(process.env.MOTEL_OTEL_LOG_LIMIT, 80),
36
36
  retentionHours: parsePositiveInt(process.env.MOTEL_OTEL_RETENTION_HOURS, 168),
37
- maxDbSizeMb: parsePositiveInt(process.env.MOTEL_OTEL_MAX_DB_SIZE_MB, 256),
37
+ maxDbSizeMb: parsePositiveInt(process.env.MOTEL_OTEL_MAX_DB_SIZE_MB, 1024),
38
38
  },
39
39
  } as const
package/src/httpApi.ts CHANGED
@@ -310,7 +310,10 @@ export const MotelHttpApi = HttpApi.make("MotelTelemetry")
310
310
  Schema.annotateKey({ description: "Data source to facet: 'traces' facets span columns, 'logs' facets log columns" }),
311
311
  ),
312
312
  field: Schema.String.pipe(
313
- Schema.annotateKey({ description: "Column to facet. Traces: service, operation, status. Logs: service, severity, scope" }),
313
+ Schema.annotateKey({ description: "Column to facet. Traces: service, operation, status, attribute_keys, attribute_values. Logs: service, severity, scope. For attribute_values, also pass key=<attribute-name>." }),
314
+ ),
315
+ key: Schema.optionalKey(Schema.String).pipe(
316
+ Schema.annotateKey({ description: "Attribute key to get values for (required when field=attribute_values)." }),
314
317
  ),
315
318
  service: ServiceParam,
316
319
  lookback: LookbackParam,
@@ -320,7 +323,7 @@ export const MotelHttpApi = HttpApi.make("MotelTelemetry")
320
323
  error: ErrorResponse,
321
324
  })
322
325
  .annotate(OpenApi.Summary, "Get facet value counts")
323
- .annotate(OpenApi.Description, "Returns distinct values and their counts for a given field, useful for discovering what data exists before querying. For example: ?type=logs&field=severity returns the distribution of log levels."),
326
+ .annotate(OpenApi.Description, "Returns distinct values and their counts for a given field, useful for discovering what data exists before querying. Examples: ?type=logs&field=severity returns log level distribution; ?type=traces&field=attribute_keys&service=opencode lists top span attribute keys; ?type=traces&field=attribute_values&key=ai.model.id lists values seen for that key."),
324
327
 
325
328
  // AI Call endpoints
326
329
  HttpApiEndpoint.get("aiCalls", "/api/ai/calls", {
@@ -504,6 +504,7 @@ const TelemetryGroupLive = HttpApiBuilder.group(
504
504
  type,
505
505
  field,
506
506
  serviceName: url.searchParams.get("service"),
507
+ key: url.searchParams.get("key"),
507
508
  lookbackMinutes: parseLookbackMinutes(url.searchParams.get("lookback"), config.otel.traceLookbackMinutes),
508
509
  limit: parseLimit(url.searchParams.get("limit"), 20),
509
510
  }),
package/src/motel.ts CHANGED
@@ -36,6 +36,17 @@ case "stop": {
36
36
  break
37
37
  }
38
38
 
39
+ case "restart": {
40
+ // Stop any running managed daemon, then start a fresh one + launch the
41
+ // TUI. Handy during local development when you've rebuilt the server
42
+ // and want the TUI to reconnect to the new binary in one command.
43
+ await run(stopManagedDaemon)
44
+ await run(applyManagedDaemonEnv)
45
+ await run(ensureManagedDaemon)
46
+ await import("./index.js")
47
+ break
48
+ }
49
+
39
50
  case "server": {
40
51
  await run(applyManagedDaemonEnv)
41
52
  await import("./server.js")
@@ -56,6 +67,7 @@ case "-h": {
56
67
  motel daemon
57
68
  motel status
58
69
  motel stop
70
+ motel restart
59
71
  motel server
60
72
  motel mcp
61
73
  motel services
@@ -94,6 +94,7 @@ interface FacetSearch {
94
94
  readonly type: "traces" | "logs"
95
95
  readonly field: string
96
96
  readonly serviceName?: string | null
97
+ readonly key?: string | null
97
98
  readonly lookbackMinutes?: number
98
99
  readonly limit?: number
99
100
  }
@@ -162,6 +163,15 @@ const parseSummaryRow = (row: TraceSummaryRow): TraceSummaryItem => ({
162
163
  warnings: [],
163
164
  })
164
165
 
166
+ // Skip attribute facet rows whose value blob is longer than this. Prevents
167
+ // multi-MB text attrs (ai.prompt, ai.prompt.messages, etc.) from dominating
168
+ // picker-open time — SQLite skips reading those pages from disk when the
169
+ // length predicate is evaluated against the page header, taking queries over
170
+ // a 2GB database from ~1.2s down to ~370ms. Keys whose values are ALL fat
171
+ // simply don't appear in the picker, which is the desired behaviour: you'd
172
+ // never want to filter traces by exact-match on a 1MB prompt blob anyway.
173
+ const FACET_VALUE_MAX_LEN = 512
174
+
165
175
  const TRACE_SUMMARY_SELECT_SQL = `
166
176
  SELECT
167
177
  trace_id,
@@ -436,13 +446,30 @@ export const TelemetryStoreLive = Layer.effect(
436
446
  mkdirSync(dirname(config.otel.databasePath), { recursive: true })
437
447
  const db = yield* Effect.acquireRelease(
438
448
  Effect.sync(() => new Database(config.otel.databasePath, { create: true })),
439
- (db) => Effect.sync(() => db.close()),
449
+ (db) => Effect.sync(() => {
450
+ // `PRAGMA optimize` at close persists any stats SQLite gathered
451
+ // during the session, so the next process start gets an accurate
452
+ // query planner on the first query instead of a 3-second cold
453
+ // run. Cheap: it skips work unless stats have drifted.
454
+ try { db.exec(`PRAGMA optimize;`) } catch { /* nothing */ }
455
+ db.close()
456
+ }),
440
457
  )
441
458
  db.exec(`
442
459
  PRAGMA journal_mode = WAL;
443
460
  PRAGMA synchronous = NORMAL;
444
461
  PRAGMA temp_store = MEMORY;
445
462
  PRAGMA busy_timeout = 5000;
463
+ -- Bump cache above the 2MB default. 64MB fits most hot index pages
464
+ -- (trace_summaries, spans, span_attributes indexes) in RAM even on
465
+ -- multi-GB databases, cutting cold-read latency meaningfully on
466
+ -- picker / search queries that sweep the index.
467
+ PRAGMA cache_size = -65536;
468
+ -- Let SQLite memory-map the first 256MB of the file. This is a
469
+ -- cheap way to avoid read() syscalls on hot pages and lets the OS
470
+ -- page cache serve index lookups directly. Safe on macOS and Linux;
471
+ -- SQLite silently caps at actual file size for smaller DBs.
472
+ PRAGMA mmap_size = 268435456;
446
473
 
447
474
  CREATE TABLE IF NOT EXISTS spans (
448
475
  trace_id TEXT NOT NULL,
@@ -550,6 +577,24 @@ export const TelemetryStoreLive = Layer.effect(
550
577
  // Existing databases may already have the column.
551
578
  }
552
579
 
580
+ // Prime the query planner. `PRAGMA optimize` is SQLite's modern,
581
+ // lightweight stats refresh: it only re-ANALYZEs indexes whose row
582
+ // counts have drifted significantly since the last run, capped at
583
+ // `analysis_limit` iterations per index so it finishes in a
584
+ // bounded time even on large databases. Without this, queries like
585
+ // the attribute picker facet run with guessed row estimates and
586
+ // pay 3-4s on cold open instead of 400ms.
587
+ try {
588
+ db.exec(`PRAGMA analysis_limit = 1000; PRAGMA optimize;`)
589
+ // First-time databases won't have sqlite_stat1 until we run a
590
+ // real ANALYZE. Force it once if stats haven't been collected.
591
+ const hasStats = db.query(`SELECT 1 FROM sqlite_master WHERE name = 'sqlite_stat1' LIMIT 1`).get() !== null
592
+ if (!hasStats) db.exec(`ANALYZE;`)
593
+ } catch {
594
+ // ANALYZE / optimize failures are never fatal — queries still work,
595
+ // they just run with default row estimates.
596
+ }
597
+
553
598
  const insertSpan = db.query(`
554
599
  INSERT INTO spans (
555
600
  trace_id, span_id, parent_span_id, service_name, scope_name, operation_name, kind,
@@ -612,41 +657,73 @@ export const TelemetryStoreLive = Layer.effect(
612
657
  const now = yield* Clock.currentTimeMillis
613
658
 
614
659
  yield* Effect.sync(() => {
615
- let deletedData = false
616
- // Time-based retention
617
660
  const cutoff = now - config.otel.retentionHours * 60 * 60 * 1000
618
- const deletedSpans = db.query(`DELETE FROM spans WHERE start_time_ms < ?`).run(cutoff) as { changes?: number }
619
- const deletedLogs = db.query(`DELETE FROM logs WHERE timestamp_ms < ?`).run(cutoff) as { changes?: number }
620
- deletedData = (deletedSpans.changes ?? 0) > 0 || (deletedLogs.changes ?? 0) > 0
621
661
 
622
- // Size-based retention: if actual data exceeds max, delete oldest 20% of rows.
623
- // Use (page_count - freelist_count) to ignore freed-but-not-vacuumed pages;
624
- // otherwise a large freelist triggers a deletion death spiral.
662
+ // Evict at TRACE granularity so we never leave a trace half-gutted
663
+ // (previous logic deleted oldest 20% of spans, which happily sliced
664
+ // across traces and corrupted the summary rebuild). Running traces
665
+ // are protected — only `active_span_count = 0` summaries are in
666
+ // scope for eviction.
667
+ const toEvict = new Set<string>()
668
+
669
+ // Time-based: completed traces whose last span ended before cutoff.
670
+ const timeExpired = db.query(
671
+ `SELECT trace_id FROM trace_summaries WHERE active_span_count = 0 AND ended_at_ms > 0 AND ended_at_ms < ?`,
672
+ ).all(cutoff) as readonly { trace_id: string }[]
673
+ for (const row of timeExpired) toEvict.add(row.trace_id)
674
+
675
+ // Size-based: if actual data exceeds cap, drop oldest 20% of the
676
+ // remaining completed traces. `(page_count - freelist_count)`
677
+ // ignores freed-but-not-vacuumed pages so a large freelist doesn't
678
+ // trigger a deletion death spiral.
625
679
  const pageCount = (db.query(`PRAGMA page_count`).get() as { page_count: number }).page_count
626
680
  const freePages = (db.query(`PRAGMA freelist_count`).get() as { freelist_count: number }).freelist_count
627
681
  const pageSize = (db.query(`PRAGMA page_size`).get() as { page_size: number }).page_size
628
682
  const dbSize = (pageCount - freePages) * pageSize
629
683
  if (dbSize > maxDbSizeBytes) {
630
- const spanCount = (db.query(`SELECT COUNT(*) AS c FROM spans`).get() as { c: number }).c
631
- const logCount = (db.query(`SELECT COUNT(*) AS c FROM logs`).get() as { c: number }).c
632
- const spanCutCount = Math.max(1, Math.floor(spanCount * 0.2))
633
- const logCutCount = Math.max(1, Math.floor(logCount * 0.2))
634
- db.query(`DELETE FROM spans WHERE rowid IN (SELECT rowid FROM spans ORDER BY start_time_ms ASC LIMIT ?)`).run(spanCutCount)
635
- db.query(`DELETE FROM logs WHERE rowid IN (SELECT rowid FROM logs ORDER BY timestamp_ms ASC LIMIT ?)`).run(logCutCount)
636
- deletedData = true
684
+ const completedCount = (db.query(
685
+ `SELECT COUNT(*) AS c FROM trace_summaries WHERE active_span_count = 0`,
686
+ ).get() as { c: number }).c
687
+ const traceCutCount = Math.max(1, Math.floor(completedCount * 0.2))
688
+ const oldest = db.query(
689
+ `SELECT trace_id FROM trace_summaries WHERE active_span_count = 0 ORDER BY started_at_ms ASC LIMIT ?`,
690
+ ).all(traceCutCount) as readonly { trace_id: string }[]
691
+ // Set.add dedupes overlap with the time-expired batch above.
692
+ for (const row of oldest) toEvict.add(row.trace_id)
637
693
  }
638
694
 
639
- if (deletedData) {
640
- db.query(`DELETE FROM span_attributes WHERE NOT EXISTS (SELECT 1 FROM spans WHERE spans.trace_id = span_attributes.trace_id AND spans.span_id = span_attributes.span_id)`).run()
641
- db.query(`DELETE FROM log_attributes WHERE NOT EXISTS (SELECT 1 FROM logs WHERE logs.id = log_attributes.log_id)`).run()
695
+ // Always prune orphan logs (no trace_id) by timestamp — they're
696
+ // not covered by trace eviction.
697
+ db.query(`DELETE FROM logs WHERE trace_id IS NULL AND timestamp_ms < ?`).run(cutoff)
698
+
699
+ if (toEvict.size === 0) return
700
+
701
+ // Batch the trace-id list so the IN placeholders stay under
702
+ // SQLite's default limit (~999). Each batch wipes every row
703
+ // reachable from those trace_ids across the cascade tables.
704
+ const traceIds = Array.from(toEvict)
705
+ const BATCH_SIZE = 500
706
+ for (let offset = 0; offset < traceIds.length; offset += BATCH_SIZE) {
707
+ const batch = traceIds.slice(offset, offset + BATCH_SIZE)
708
+ const placeholders = batch.map(() => "?").join(",")
709
+ db.query(`DELETE FROM span_attributes WHERE trace_id IN (${placeholders})`).run(...batch)
642
710
  try {
643
- db.query(`DELETE FROM span_operation_fts WHERE NOT EXISTS (SELECT 1 FROM spans WHERE spans.trace_id = span_operation_fts.trace_id AND spans.span_id = span_operation_fts.span_id)`).run()
644
- db.query(`DELETE FROM log_body_fts WHERE NOT EXISTS (SELECT 1 FROM logs WHERE logs.id = CAST(log_body_fts.log_id AS INTEGER))`).run()
711
+ db.query(`DELETE FROM span_operation_fts WHERE trace_id IN (${placeholders})`).run(...batch)
645
712
  } catch {
646
- // FTS tables may not exist.
713
+ // FTS table may not exist on old DBs.
647
714
  }
648
- db.query(`DELETE FROM trace_summaries`).run()
649
- rebuildTraceSummaries.run()
715
+ db.query(`DELETE FROM spans WHERE trace_id IN (${placeholders})`).run(...batch)
716
+ db.query(`DELETE FROM logs WHERE trace_id IN (${placeholders})`).run(...batch)
717
+ db.query(`DELETE FROM trace_summaries WHERE trace_id IN (${placeholders})`).run(...batch)
718
+ }
719
+
720
+ // Log-side orphans (log_attributes + FTS) are keyed by log.id,
721
+ // so prune what no longer has a parent log row.
722
+ db.query(`DELETE FROM log_attributes WHERE NOT EXISTS (SELECT 1 FROM logs WHERE logs.id = log_attributes.log_id)`).run()
723
+ try {
724
+ db.query(`DELETE FROM log_body_fts WHERE NOT EXISTS (SELECT 1 FROM logs WHERE logs.id = CAST(log_body_fts.log_id AS INTEGER))`).run()
725
+ } catch {
726
+ // FTS table may not exist on old DBs.
650
727
  }
651
728
  })
652
729
  })
@@ -654,6 +731,16 @@ export const TelemetryStoreLive = Layer.effect(
654
731
  // Run cleanup every 60 seconds in the background, tied to the layer's scope
655
732
  yield* Effect.forkScoped(Effect.repeat(cleanupExpired(), Schedule.spaced("60 seconds")))
656
733
 
734
+ // Periodically refresh query planner stats. `PRAGMA optimize` is a
735
+ // no-op when nothing has changed, so this is essentially free on idle
736
+ // servers and keeps facet/search planner estimates accurate as data
737
+ // grows. 15 minutes is slower than ingestion rates we care about but
738
+ // frequent enough that the attribute picker stays snappy.
739
+ const refreshPlannerStats = Effect.sync(() => {
740
+ try { db.exec(`PRAGMA optimize;`) } catch { /* ignore */ }
741
+ })
742
+ yield* Effect.forkScoped(Effect.repeat(refreshPlannerStats, Schedule.spaced("15 minutes")))
743
+
657
744
  const ingestTraces = Effect.fn("motel/TelemetryStore.ingestTraces")(function* (payload: OtlpTraceExportRequest) {
658
745
  return yield* Effect.sync(() => {
659
746
  let insertedSpans = 0
@@ -1424,6 +1511,61 @@ export const TelemetryStoreLive = Layer.effect(
1424
1511
  `).all(...(input.serviceName ? [cutoff, input.serviceName, limit] : [cutoff, limit])) as Array<{ value: string; count: number }>
1425
1512
  return rows
1426
1513
  }
1514
+ if (input.field === "attribute_keys") {
1515
+ // Count distinct traces each attribute key appears on, optionally
1516
+ // scoped to a service. Keys with many distinct values (e.g. sessionId,
1517
+ // user id, model) rank higher than keys that are constant across every
1518
+ // trace (service.name, telemetry.sdk.*) — the latter can't discriminate
1519
+ // between traces so they're useless as filters.
1520
+ //
1521
+ // Performance note: we skip rows whose value blob is larger than
1522
+ // FACET_VALUE_MAX_LEN. For opencode this hides `ai.prompt`,
1523
+ // `ai.prompt.messages`, and `ai.prompt.tools` — which are 1-6MB text
1524
+ // blobs that you'd never want to filter by exact match anyway. The
1525
+ // WHERE clause lets SQLite skip reading those pages from disk, taking
1526
+ // the picker open time from ~1.2s to ~370ms on a 2GB database.
1527
+ const params: Array<string | number> = [FACET_VALUE_MAX_LEN, cutoff]
1528
+ if (input.serviceName) params.push(input.serviceName)
1529
+ params.push(limit)
1530
+ const rows = db.query(`
1531
+ SELECT sa.key AS value,
1532
+ COUNT(DISTINCT sa.trace_id) AS count,
1533
+ COUNT(DISTINCT sa.value) AS distinct_values
1534
+ FROM span_attributes sa
1535
+ JOIN spans s ON s.trace_id = sa.trace_id AND s.span_id = sa.span_id
1536
+ WHERE LENGTH(sa.value) < ?
1537
+ AND s.start_time_ms >= ?
1538
+ ${input.serviceName ? "AND s.service_name = ?" : ""}
1539
+ GROUP BY sa.key
1540
+ ORDER BY (CASE WHEN distinct_values = 1 THEN 1 ELSE 0 END) ASC,
1541
+ distinct_values DESC,
1542
+ count DESC,
1543
+ value ASC
1544
+ LIMIT ?
1545
+ `).all(...params) as Array<{ value: string; count: number; distinct_values: number }>
1546
+ return rows.map((row) => ({ value: row.value, count: row.count }))
1547
+ }
1548
+ if (input.field === "attribute_values") {
1549
+ if (!input.key) return [] as FacetItem[]
1550
+ // Skip multi-KB values here too — they blow up GROUP BY on big text.
1551
+ // Matches the attribute_keys pre-filter so the picker stays responsive
1552
+ // if someone hand-crafts a URL that targets a fat key.
1553
+ const params: Array<string | number> = [input.key, FACET_VALUE_MAX_LEN, cutoff]
1554
+ if (input.serviceName) params.push(input.serviceName)
1555
+ params.push(limit)
1556
+ const rows = db.query(`
1557
+ SELECT sa.value AS value, COUNT(DISTINCT sa.trace_id) AS count
1558
+ FROM span_attributes sa
1559
+ JOIN spans s ON s.trace_id = sa.trace_id AND s.span_id = sa.span_id
1560
+ WHERE sa.key = ? AND LENGTH(sa.value) < ?
1561
+ AND s.start_time_ms >= ?
1562
+ ${input.serviceName ? "AND s.service_name = ?" : ""}
1563
+ GROUP BY sa.value
1564
+ ORDER BY count DESC, value ASC
1565
+ LIMIT ?
1566
+ `).all(...params) as Array<{ value: string; count: number }>
1567
+ return rows
1568
+ }
1427
1569
  }
1428
1570
 
1429
1571
  return [] as FacetItem[]
@@ -8,6 +8,8 @@ export class TraceQueryService extends Context.Service<
8
8
  readonly listServices: Effect.Effect<readonly string[], Error>
9
9
  readonly listRecentTraces: (serviceName: string, options?: { readonly lookbackMinutes?: number; readonly limit?: number }) => Effect.Effect<readonly TraceItem[], Error>
10
10
  readonly listTraceSummaries: (serviceName: string, options?: { readonly lookbackMinutes?: number; readonly limit?: number }) => Effect.Effect<readonly TraceSummaryItem[], Error>
11
+ readonly searchTraceSummaries: (input: { readonly serviceName?: string | null; readonly operation?: string | null; readonly status?: "ok" | "error" | null; readonly minDurationMs?: number | null; readonly lookbackMinutes?: number; readonly limit?: number; readonly attributeFilters?: Readonly<Record<string, string>> }) => Effect.Effect<readonly TraceSummaryItem[], Error>
12
+ readonly listFacets: (input: { readonly type: "traces" | "logs"; readonly field: string; readonly serviceName?: string | null; readonly key?: string | null; readonly lookbackMinutes?: number; readonly limit?: number }) => Effect.Effect<readonly { readonly value: string; readonly count: number }[], Error>
11
13
  readonly searchTraces: (input: { readonly serviceName?: string | null; readonly operation?: string | null; readonly status?: "ok" | "error" | null; readonly minDurationMs?: number | null; readonly lookbackMinutes?: number; readonly limit?: number; readonly attributeFilters?: Readonly<Record<string, string>> }) => Effect.Effect<readonly TraceItem[], Error>
12
14
  readonly traceStats: (input: { readonly groupBy: string; readonly agg: "count" | "avg_duration" | "p95_duration" | "error_rate"; readonly serviceName?: string | null; readonly operation?: string | null; readonly status?: "ok" | "error" | null; readonly minDurationMs?: number | null; readonly lookbackMinutes?: number; readonly limit?: number; readonly attributeFilters?: Readonly<Record<string, string>> }) => Effect.Effect<readonly { readonly group: string; readonly value: number; readonly count: number }[], Error>
13
15
  readonly getTrace: (traceId: string) => Effect.Effect<TraceItem | null, Error>
@@ -60,6 +62,8 @@ export const TraceQueryServiceLive = Layer.effect(
60
62
  listServices,
61
63
  listRecentTraces,
62
64
  listTraceSummaries,
65
+ searchTraceSummaries: store.searchTraceSummaries,
66
+ listFacets: store.listFacets,
63
67
  searchTraces: store.searchTraces,
64
68
  traceStats: store.traceStats,
65
69
  getTrace,