@kitlangton/motel 0.1.1 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +22 -8
- package/package.json +2 -1
- package/src/App.tsx +38 -28
- package/src/config.ts +1 -1
- package/src/httpApi.ts +5 -2
- package/src/localServer.ts +1 -0
- package/src/motel.ts +12 -0
- package/src/services/TelemetryStore.ts +166 -24
- package/src/services/TraceQueryService.ts +4 -0
- package/src/ui/AttrFilterModal.tsx +120 -0
- package/src/ui/SpanDetailPane.tsx +1 -2
- package/src/ui/TraceDetailsPane.tsx +14 -22
- package/src/ui/TraceList.tsx +166 -40
- package/src/ui/Waterfall.tsx +104 -46
- package/src/ui/app/TraceListPane.tsx +19 -14
- package/src/ui/app/TraceWorkspace.tsx +60 -31
- package/src/ui/app/useAppLayout.ts +22 -3
- package/src/ui/app/useTraceScreenData.ts +13 -2
- package/src/ui/format.ts +14 -5
- package/src/ui/primitives.tsx +3 -1
- package/src/ui/state.ts +32 -0
- package/src/ui/theme.ts +24 -19
- package/src/ui/traceDetailsWidth.repro.test.ts +115 -0
- package/src/ui/useAttrFilterPicker.ts +47 -0
- package/src/ui/useKeyboardNav.ts +212 -20
- package/src/ui/waterfallNav.test.ts +22 -7
- package/web/dist/assets/{index-BEKIiisE.js → index-DKinj-OE.js} +1 -1
- package/web/dist/index.html +1 -1
package/AGENTS.md
CHANGED
|
@@ -2,8 +2,14 @@
|
|
|
2
2
|
|
|
3
3
|
## Commands
|
|
4
4
|
- Install deps: `bun install`
|
|
5
|
-
- Run the TUI: `bun run dev` or `bun run start`
|
|
6
|
-
|
|
5
|
+
- Run the TUI: `bun run dev` or `bun run start` (auto-ensures a managed
|
|
6
|
+
OTLP daemon is running in the background so traces ingest while the TUI
|
|
7
|
+
is up)
|
|
8
|
+
- Start the background daemon only: `bun run daemon` (same as `motel start`)
|
|
9
|
+
- Stop the managed daemon: `bun run stop`
|
|
10
|
+
- Daemon status JSON: `bun run status`
|
|
11
|
+
- Restart daemon + relaunch TUI: `bun run restart`
|
|
12
|
+
- Run the local server in the foreground (no daemon, no TUI): `bun run server`
|
|
7
13
|
- Run tests: `bun run test`
|
|
8
14
|
- Query services via CLI: `bun run cli services`
|
|
9
15
|
- Query traces via CLI: `bun run cli traces <service> [limit]`
|
|
@@ -36,6 +42,7 @@
|
|
|
36
42
|
- `/api/ai/calls` searches AI SDK calls (streamText, generateText, etc.) with first-class filters for `model`, `provider`, `sessionId`, `functionId`, `operation`, `status`, `text` (cross-field search), and returns compact summaries with previews and token usage.
|
|
37
43
|
- `/api/ai/calls/<span-id>` returns the full detail of a single AI call including complete prompt messages, response text, tool calls, timing, and correlated logs.
|
|
38
44
|
- `/api/ai/stats` aggregates AI call statistics by `provider`, `model`, `functionId`, `sessionId`, or `status` with aggregations: `count`, `avg_duration`, `p95_duration`, `total_input_tokens`, `total_output_tokens`.
|
|
45
|
+
- `/api/facets?type=traces&field=attribute_keys&service=<svc>` lists span-attribute keys for a service, ranked by discriminating power (keys with many distinct values first). Pair with `field=attribute_values&key=<key>` to list values for a specific key. Used by the TUI `f` attribute filter.
|
|
39
46
|
- `/api/docs` lists available documentation; `/api/docs/debug` and `/api/docs/effect` return the full skill content.
|
|
40
47
|
|
|
41
48
|
## Architecture
|
|
@@ -48,11 +55,16 @@
|
|
|
48
55
|
(pane widths, body lines, viewport rows, drill-in level).
|
|
49
56
|
- `src/ui/app/TraceWorkspace.tsx` renders the drill-in state machine:
|
|
50
57
|
L0 (trace list), L1 (waterfall), L2 (span detail), plus the service
|
|
51
|
-
logs side mode.
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
58
|
+
logs side mode. When drilled in the list is hidden entirely and the
|
|
59
|
+
detail pane(s) expand to fill.
|
|
60
|
+
- `src/ui/app/TraceListPane.tsx` hosts the trace list: header + optional
|
|
61
|
+
filter bar + virtual-windowed body (no opentui scrollbox — that had a
|
|
62
|
+
race with Yoga layout timing).
|
|
63
|
+
- `src/ui/TraceList.tsx` exports `TraceListHeader` (the `TRACES 100 · ...`
|
|
64
|
+
strip) and `TraceListBody` (virtual-windowed rows with mouse-wheel
|
|
65
|
+
scrolling). The body owns its own scrollOffset state, preserves the
|
|
66
|
+
selected row's visual position across auto-refresh shifts, and snaps
|
|
67
|
+
the window to follow selection that moves off-screen.
|
|
56
68
|
- `src/ui/Waterfall.tsx` renders the waterfall timeline with a
|
|
57
69
|
virtualised scroll viewport; `src/ui/waterfallNav.ts` is the pure
|
|
58
70
|
collapse/expand/walk resolver (unit-tested).
|
|
@@ -116,7 +128,7 @@
|
|
|
116
128
|
- `MOTEL_OTEL_TRACE_LIMIT`: defaults to `100`
|
|
117
129
|
- `MOTEL_OTEL_LOG_LIMIT`: defaults to `80`
|
|
118
130
|
- `MOTEL_OTEL_RETENTION_HOURS`: defaults to `168` (7d)
|
|
119
|
-
- `MOTEL_OTEL_MAX_DB_SIZE_MB`: defaults to `
|
|
131
|
+
- `MOTEL_OTEL_MAX_DB_SIZE_MB`: defaults to `1024` (size-based retention cap)
|
|
120
132
|
|
|
121
133
|
## TUI Keys
|
|
122
134
|
- `?`: toggle shortcut help
|
|
@@ -133,7 +145,9 @@
|
|
|
133
145
|
- `tab`: toggle service logs view
|
|
134
146
|
- `[` / `]`: switch services
|
|
135
147
|
- `s`: cycle sort mode (recent → slowest → errors)
|
|
148
|
+
- `t`: cycle theme (motel-default → tokyo-night → catppuccin)
|
|
136
149
|
- `/`: enter filter mode (type to match on root operation name; `:error` restricts to failing traces)
|
|
150
|
+
- `f`: open attribute filter picker (browse span-attribute keys → values for the current service; `backspace` walks back to keys; `esc` in the trace list clears the active filter)
|
|
137
151
|
- `a`: pause or resume auto-refresh
|
|
138
152
|
- `r`: refresh now
|
|
139
153
|
- `c`: copy setup instructions for another Effect app
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@kitlangton/motel",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.3",
|
|
4
4
|
"description": "A local OpenTelemetry ingest + TUI viewer for development, backed by SQLite.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"license": "MIT",
|
|
@@ -50,6 +50,7 @@
|
|
|
50
50
|
"daemon": "bun run src/motel.ts daemon",
|
|
51
51
|
"status": "bun run src/motel.ts status",
|
|
52
52
|
"stop": "bun run src/motel.ts stop",
|
|
53
|
+
"restart": "bun run src/motel.ts restart",
|
|
53
54
|
"server": "bun run src/motel.ts server",
|
|
54
55
|
"mcp": "bun run src/mcp.ts",
|
|
55
56
|
"test": "bun test",
|
package/src/App.tsx
CHANGED
|
@@ -1,16 +1,26 @@
|
|
|
1
|
-
import { RGBA, TextAttributes
|
|
1
|
+
import { RGBA, TextAttributes } from "@opentui/core"
|
|
2
2
|
import { useAtom } from "@effect/atom-react"
|
|
3
3
|
import { useTerminalDimensions } from "@opentui/react"
|
|
4
|
-
import { useCallback, useEffect,
|
|
5
|
-
import { formatTimestamp
|
|
4
|
+
import { useCallback, useEffect, useMemo, useRef } from "react"
|
|
5
|
+
import { formatTimestamp } from "./ui/format.ts"
|
|
6
6
|
import { Divider, FooterHints, HelpModal, PlainLine, SplitDivider, TextLine } from "./ui/primitives.tsx"
|
|
7
7
|
import { useAppLayout } from "./ui/app/useAppLayout.ts"
|
|
8
8
|
import { useTraceScreenData } from "./ui/app/useTraceScreenData.ts"
|
|
9
9
|
import { TraceWorkspace } from "./ui/app/TraceWorkspace.tsx"
|
|
10
|
-
import {
|
|
10
|
+
import {
|
|
11
|
+
attrPickerIndexAtom,
|
|
12
|
+
attrPickerInputAtom,
|
|
13
|
+
attrPickerModeAtom,
|
|
14
|
+
attrFacetStateAtom,
|
|
15
|
+
noticeAtom,
|
|
16
|
+
persistSelectedTheme,
|
|
17
|
+
selectedThemeAtom,
|
|
18
|
+
} from "./ui/state.ts"
|
|
11
19
|
import { applyTheme, colors, SEPARATOR, themeLabel } from "./ui/theme.ts"
|
|
12
20
|
import { getVisibleSpans } from "./ui/Waterfall.tsx"
|
|
13
21
|
import { useKeyboardNav } from "./ui/useKeyboardNav.ts"
|
|
22
|
+
import { AttrFilterModal } from "./ui/AttrFilterModal.tsx"
|
|
23
|
+
import { useAttrFilterPicker } from "./ui/useAttrFilterPicker.ts"
|
|
14
24
|
|
|
15
25
|
export const App = () => {
|
|
16
26
|
const { width, height } = useTerminalDimensions()
|
|
@@ -36,11 +46,18 @@ export const App = () => {
|
|
|
36
46
|
autoRefresh,
|
|
37
47
|
filterMode,
|
|
38
48
|
filterText,
|
|
49
|
+
activeAttrKey,
|
|
50
|
+
activeAttrValue,
|
|
39
51
|
traceSort,
|
|
40
52
|
selectedTraceSummary,
|
|
41
53
|
selectedTrace,
|
|
42
54
|
filteredTraces,
|
|
43
55
|
} = useTraceScreenData()
|
|
56
|
+
const [pickerMode] = useAtom(attrPickerModeAtom)
|
|
57
|
+
const [pickerInput] = useAtom(attrPickerInputAtom)
|
|
58
|
+
const [pickerIndex] = useAtom(attrPickerIndexAtom)
|
|
59
|
+
const [attrFacets] = useAtom(attrFacetStateAtom)
|
|
60
|
+
useAttrFilterPicker(activeAttrKey)
|
|
44
61
|
|
|
45
62
|
const layout = useAppLayout({ width, height, notice, detailView, selectedSpanIndex })
|
|
46
63
|
const {
|
|
@@ -61,7 +78,6 @@ export const App = () => {
|
|
|
61
78
|
} = layout
|
|
62
79
|
|
|
63
80
|
const noticeTimeoutRef = useRef<ReturnType<typeof setTimeout> | null>(null)
|
|
64
|
-
const traceListScrollRef = useRef<ScrollBoxRenderable | null>(null)
|
|
65
81
|
|
|
66
82
|
const flashNotice = (message: string) => {
|
|
67
83
|
if (noticeTimeoutRef.current !== null) {
|
|
@@ -83,27 +99,6 @@ export const App = () => {
|
|
|
83
99
|
persistSelectedTheme(selectedTheme)
|
|
84
100
|
}, [selectedTheme])
|
|
85
101
|
|
|
86
|
-
useLayoutEffect(() => {
|
|
87
|
-
const box = traceListScrollRef.current
|
|
88
|
-
const traceId = selectedTraceSummary?.traceId
|
|
89
|
-
if (!box || !traceId) return
|
|
90
|
-
const indexInList = filteredTraces.findIndex((trace) => trace.traceId === traceId)
|
|
91
|
-
if (indexInList < 0) return
|
|
92
|
-
const currentTop = box.scrollTop
|
|
93
|
-
const viewportRows = Math.max(1, traceViewportRows)
|
|
94
|
-
let nextTop = currentTop
|
|
95
|
-
if (indexInList < currentTop) {
|
|
96
|
-
nextTop = indexInList
|
|
97
|
-
} else if (indexInList >= currentTop + viewportRows) {
|
|
98
|
-
nextTop = indexInList - viewportRows + 1
|
|
99
|
-
}
|
|
100
|
-
const maxTop = Math.max(0, filteredTraces.length - viewportRows)
|
|
101
|
-
nextTop = Math.max(0, Math.min(nextTop, maxTop))
|
|
102
|
-
if (nextTop !== currentTop) {
|
|
103
|
-
box.scrollTop = nextTop
|
|
104
|
-
}
|
|
105
|
-
}, [filteredTraces, selectedTraceIndex, selectedTraceSummary?.traceId, traceSort, traceViewportRows])
|
|
106
|
-
|
|
107
102
|
const { spanNavActive } = useKeyboardNav({
|
|
108
103
|
selectedTrace,
|
|
109
104
|
filteredTraces,
|
|
@@ -117,12 +112,15 @@ export const App = () => {
|
|
|
117
112
|
|
|
118
113
|
const headerServiceLabel = selectedTraceService ?? "none"
|
|
119
114
|
const autoLabel = autoRefresh ? "● live" : "○ paused"
|
|
115
|
+
const attrFilterLabel = activeAttrKey && activeAttrValue
|
|
116
|
+
? ` [${activeAttrKey}=${activeAttrValue.length > 20 ? `${activeAttrValue.slice(0, 19)}…` : activeAttrValue}]`
|
|
117
|
+
: ""
|
|
120
118
|
const headerRight = traceState.fetchedAt
|
|
121
119
|
? `${autoLabel} ${formatTimestamp(traceState.fetchedAt)}`
|
|
122
120
|
: traceState.status === "loading"
|
|
123
121
|
? "loading traces..."
|
|
124
122
|
: ""
|
|
125
|
-
const headerLeftLen = "MOTEL".length + SEPARATOR.length + headerServiceLabel.length
|
|
123
|
+
const headerLeftLen = "MOTEL".length + SEPARATOR.length + headerServiceLabel.length + attrFilterLabel.length
|
|
126
124
|
const headerGap = Math.max(2, headerFooterWidth - headerLeftLen - headerRight.length)
|
|
127
125
|
const visibleFooterNotice = footerNotice
|
|
128
126
|
|
|
@@ -168,6 +166,7 @@ export const App = () => {
|
|
|
168
166
|
<span fg={colors.muted} attributes={TextAttributes.BOLD}>MOTEL</span>
|
|
169
167
|
<span fg={colors.separator}>{SEPARATOR}</span>
|
|
170
168
|
<span fg={colors.muted}>{headerServiceLabel}</span>
|
|
169
|
+
{attrFilterLabel ? <span fg={colors.accent} attributes={TextAttributes.BOLD}>{attrFilterLabel}</span> : null}
|
|
171
170
|
<span fg={colors.muted}>{" ".repeat(headerGap)}</span>
|
|
172
171
|
<span fg={colors.muted} attributes={TextAttributes.BOLD}>{headerRight}</span>
|
|
173
172
|
</TextLine>
|
|
@@ -181,7 +180,6 @@ export const App = () => {
|
|
|
181
180
|
filterMode={filterMode}
|
|
182
181
|
filterText={filterText}
|
|
183
182
|
traceListProps={traceListProps}
|
|
184
|
-
traceListScrollRef={traceListScrollRef}
|
|
185
183
|
selectedTraceService={selectedTraceService}
|
|
186
184
|
serviceLogState={serviceLogState}
|
|
187
185
|
selectedServiceLogIndex={selectedServiceLogIndex}
|
|
@@ -212,6 +210,18 @@ export const App = () => {
|
|
|
212
210
|
</>
|
|
213
211
|
) : null}
|
|
214
212
|
{showHelp ? <HelpModal width={width ?? 100} height={height ?? 24} autoRefresh={autoRefresh} themeLabel={themeLabel(selectedTheme)} onClose={() => setShowHelp(false)} /> : null}
|
|
213
|
+
{pickerMode !== "off" ? (
|
|
214
|
+
<AttrFilterModal
|
|
215
|
+
width={width ?? 100}
|
|
216
|
+
height={height ?? 24}
|
|
217
|
+
mode={pickerMode}
|
|
218
|
+
input={pickerInput}
|
|
219
|
+
selectedIndex={pickerIndex}
|
|
220
|
+
selectedKey={activeAttrKey}
|
|
221
|
+
state={attrFacets}
|
|
222
|
+
onClose={() => { /* handled via keyboard */ }}
|
|
223
|
+
/>
|
|
224
|
+
) : null}
|
|
215
225
|
</box>
|
|
216
226
|
)
|
|
217
227
|
}
|
package/src/config.ts
CHANGED
|
@@ -34,6 +34,6 @@ export const config = {
|
|
|
34
34
|
traceFetchLimit: parsePositiveInt(process.env.MOTEL_OTEL_TRACE_LIMIT, 100),
|
|
35
35
|
logFetchLimit: parsePositiveInt(process.env.MOTEL_OTEL_LOG_LIMIT, 80),
|
|
36
36
|
retentionHours: parsePositiveInt(process.env.MOTEL_OTEL_RETENTION_HOURS, 168),
|
|
37
|
-
maxDbSizeMb: parsePositiveInt(process.env.MOTEL_OTEL_MAX_DB_SIZE_MB,
|
|
37
|
+
maxDbSizeMb: parsePositiveInt(process.env.MOTEL_OTEL_MAX_DB_SIZE_MB, 1024),
|
|
38
38
|
},
|
|
39
39
|
} as const
|
package/src/httpApi.ts
CHANGED
|
@@ -310,7 +310,10 @@ export const MotelHttpApi = HttpApi.make("MotelTelemetry")
|
|
|
310
310
|
Schema.annotateKey({ description: "Data source to facet: 'traces' facets span columns, 'logs' facets log columns" }),
|
|
311
311
|
),
|
|
312
312
|
field: Schema.String.pipe(
|
|
313
|
-
Schema.annotateKey({ description: "Column to facet. Traces: service, operation, status. Logs: service, severity, scope" }),
|
|
313
|
+
Schema.annotateKey({ description: "Column to facet. Traces: service, operation, status, attribute_keys, attribute_values. Logs: service, severity, scope. For attribute_values, also pass key=<attribute-name>." }),
|
|
314
|
+
),
|
|
315
|
+
key: Schema.optionalKey(Schema.String).pipe(
|
|
316
|
+
Schema.annotateKey({ description: "Attribute key to get values for (required when field=attribute_values)." }),
|
|
314
317
|
),
|
|
315
318
|
service: ServiceParam,
|
|
316
319
|
lookback: LookbackParam,
|
|
@@ -320,7 +323,7 @@ export const MotelHttpApi = HttpApi.make("MotelTelemetry")
|
|
|
320
323
|
error: ErrorResponse,
|
|
321
324
|
})
|
|
322
325
|
.annotate(OpenApi.Summary, "Get facet value counts")
|
|
323
|
-
.annotate(OpenApi.Description, "Returns distinct values and their counts for a given field, useful for discovering what data exists before querying.
|
|
326
|
+
.annotate(OpenApi.Description, "Returns distinct values and their counts for a given field, useful for discovering what data exists before querying. Examples: ?type=logs&field=severity returns log level distribution; ?type=traces&field=attribute_keys&service=opencode lists top span attribute keys; ?type=traces&field=attribute_values&key=ai.model.id lists values seen for that key."),
|
|
324
327
|
|
|
325
328
|
// AI Call endpoints
|
|
326
329
|
HttpApiEndpoint.get("aiCalls", "/api/ai/calls", {
|
package/src/localServer.ts
CHANGED
|
@@ -504,6 +504,7 @@ const TelemetryGroupLive = HttpApiBuilder.group(
|
|
|
504
504
|
type,
|
|
505
505
|
field,
|
|
506
506
|
serviceName: url.searchParams.get("service"),
|
|
507
|
+
key: url.searchParams.get("key"),
|
|
507
508
|
lookbackMinutes: parseLookbackMinutes(url.searchParams.get("lookback"), config.otel.traceLookbackMinutes),
|
|
508
509
|
limit: parseLimit(url.searchParams.get("limit"), 20),
|
|
509
510
|
}),
|
package/src/motel.ts
CHANGED
|
@@ -36,6 +36,17 @@ case "stop": {
|
|
|
36
36
|
break
|
|
37
37
|
}
|
|
38
38
|
|
|
39
|
+
case "restart": {
|
|
40
|
+
// Stop any running managed daemon, then start a fresh one + launch the
|
|
41
|
+
// TUI. Handy during local development when you've rebuilt the server
|
|
42
|
+
// and want the TUI to reconnect to the new binary in one command.
|
|
43
|
+
await run(stopManagedDaemon)
|
|
44
|
+
await run(applyManagedDaemonEnv)
|
|
45
|
+
await run(ensureManagedDaemon)
|
|
46
|
+
await import("./index.js")
|
|
47
|
+
break
|
|
48
|
+
}
|
|
49
|
+
|
|
39
50
|
case "server": {
|
|
40
51
|
await run(applyManagedDaemonEnv)
|
|
41
52
|
await import("./server.js")
|
|
@@ -56,6 +67,7 @@ case "-h": {
|
|
|
56
67
|
motel daemon
|
|
57
68
|
motel status
|
|
58
69
|
motel stop
|
|
70
|
+
motel restart
|
|
59
71
|
motel server
|
|
60
72
|
motel mcp
|
|
61
73
|
motel services
|
|
@@ -94,6 +94,7 @@ interface FacetSearch {
|
|
|
94
94
|
readonly type: "traces" | "logs"
|
|
95
95
|
readonly field: string
|
|
96
96
|
readonly serviceName?: string | null
|
|
97
|
+
readonly key?: string | null
|
|
97
98
|
readonly lookbackMinutes?: number
|
|
98
99
|
readonly limit?: number
|
|
99
100
|
}
|
|
@@ -162,6 +163,15 @@ const parseSummaryRow = (row: TraceSummaryRow): TraceSummaryItem => ({
|
|
|
162
163
|
warnings: [],
|
|
163
164
|
})
|
|
164
165
|
|
|
166
|
+
// Skip attribute facet rows whose value blob is longer than this. Prevents
|
|
167
|
+
// multi-MB text attrs (ai.prompt, ai.prompt.messages, etc.) from dominating
|
|
168
|
+
// picker-open time — SQLite skips reading those pages from disk when the
|
|
169
|
+
// length predicate is evaluated against the page header, taking queries over
|
|
170
|
+
// a 2GB database from ~1.2s down to ~370ms. Keys whose values are ALL fat
|
|
171
|
+
// simply don't appear in the picker, which is the desired behaviour: you'd
|
|
172
|
+
// never want to filter traces by exact-match on a 1MB prompt blob anyway.
|
|
173
|
+
const FACET_VALUE_MAX_LEN = 512
|
|
174
|
+
|
|
165
175
|
const TRACE_SUMMARY_SELECT_SQL = `
|
|
166
176
|
SELECT
|
|
167
177
|
trace_id,
|
|
@@ -436,13 +446,30 @@ export const TelemetryStoreLive = Layer.effect(
|
|
|
436
446
|
mkdirSync(dirname(config.otel.databasePath), { recursive: true })
|
|
437
447
|
const db = yield* Effect.acquireRelease(
|
|
438
448
|
Effect.sync(() => new Database(config.otel.databasePath, { create: true })),
|
|
439
|
-
(db) => Effect.sync(() =>
|
|
449
|
+
(db) => Effect.sync(() => {
|
|
450
|
+
// `PRAGMA optimize` at close persists any stats SQLite gathered
|
|
451
|
+
// during the session, so the next process start gets an accurate
|
|
452
|
+
// query planner on the first query instead of a 3-second cold
|
|
453
|
+
// run. Cheap: it skips work unless stats have drifted.
|
|
454
|
+
try { db.exec(`PRAGMA optimize;`) } catch { /* nothing */ }
|
|
455
|
+
db.close()
|
|
456
|
+
}),
|
|
440
457
|
)
|
|
441
458
|
db.exec(`
|
|
442
459
|
PRAGMA journal_mode = WAL;
|
|
443
460
|
PRAGMA synchronous = NORMAL;
|
|
444
461
|
PRAGMA temp_store = MEMORY;
|
|
445
462
|
PRAGMA busy_timeout = 5000;
|
|
463
|
+
-- Bump cache above the 2MB default. 64MB fits most hot index pages
|
|
464
|
+
-- (trace_summaries, spans, span_attributes indexes) in RAM even on
|
|
465
|
+
-- multi-GB databases, cutting cold-read latency meaningfully on
|
|
466
|
+
-- picker / search queries that sweep the index.
|
|
467
|
+
PRAGMA cache_size = -65536;
|
|
468
|
+
-- Let SQLite memory-map the first 256MB of the file. This is a
|
|
469
|
+
-- cheap way to avoid read() syscalls on hot pages and lets the OS
|
|
470
|
+
-- page cache serve index lookups directly. Safe on macOS and Linux;
|
|
471
|
+
-- SQLite silently caps at actual file size for smaller DBs.
|
|
472
|
+
PRAGMA mmap_size = 268435456;
|
|
446
473
|
|
|
447
474
|
CREATE TABLE IF NOT EXISTS spans (
|
|
448
475
|
trace_id TEXT NOT NULL,
|
|
@@ -550,6 +577,24 @@ export const TelemetryStoreLive = Layer.effect(
|
|
|
550
577
|
// Existing databases may already have the column.
|
|
551
578
|
}
|
|
552
579
|
|
|
580
|
+
// Prime the query planner. `PRAGMA optimize` is SQLite's modern,
|
|
581
|
+
// lightweight stats refresh: it only re-ANALYZEs indexes whose row
|
|
582
|
+
// counts have drifted significantly since the last run, capped at
|
|
583
|
+
// `analysis_limit` iterations per index so it finishes in a
|
|
584
|
+
// bounded time even on large databases. Without this, queries like
|
|
585
|
+
// the attribute picker facet run with guessed row estimates and
|
|
586
|
+
// pay 3-4s on cold open instead of 400ms.
|
|
587
|
+
try {
|
|
588
|
+
db.exec(`PRAGMA analysis_limit = 1000; PRAGMA optimize;`)
|
|
589
|
+
// First-time databases won't have sqlite_stat1 until we run a
|
|
590
|
+
// real ANALYZE. Force it once if stats haven't been collected.
|
|
591
|
+
const hasStats = db.query(`SELECT 1 FROM sqlite_master WHERE name = 'sqlite_stat1' LIMIT 1`).get() !== null
|
|
592
|
+
if (!hasStats) db.exec(`ANALYZE;`)
|
|
593
|
+
} catch {
|
|
594
|
+
// ANALYZE / optimize failures are never fatal — queries still work,
|
|
595
|
+
// they just run with default row estimates.
|
|
596
|
+
}
|
|
597
|
+
|
|
553
598
|
const insertSpan = db.query(`
|
|
554
599
|
INSERT INTO spans (
|
|
555
600
|
trace_id, span_id, parent_span_id, service_name, scope_name, operation_name, kind,
|
|
@@ -612,41 +657,73 @@ export const TelemetryStoreLive = Layer.effect(
|
|
|
612
657
|
const now = yield* Clock.currentTimeMillis
|
|
613
658
|
|
|
614
659
|
yield* Effect.sync(() => {
|
|
615
|
-
let deletedData = false
|
|
616
|
-
// Time-based retention
|
|
617
660
|
const cutoff = now - config.otel.retentionHours * 60 * 60 * 1000
|
|
618
|
-
const deletedSpans = db.query(`DELETE FROM spans WHERE start_time_ms < ?`).run(cutoff) as { changes?: number }
|
|
619
|
-
const deletedLogs = db.query(`DELETE FROM logs WHERE timestamp_ms < ?`).run(cutoff) as { changes?: number }
|
|
620
|
-
deletedData = (deletedSpans.changes ?? 0) > 0 || (deletedLogs.changes ?? 0) > 0
|
|
621
661
|
|
|
622
|
-
//
|
|
623
|
-
//
|
|
624
|
-
//
|
|
662
|
+
// Evict at TRACE granularity so we never leave a trace half-gutted
|
|
663
|
+
// (previous logic deleted oldest 20% of spans, which happily sliced
|
|
664
|
+
// across traces and corrupted the summary rebuild). Running traces
|
|
665
|
+
// are protected — only `active_span_count = 0` summaries are in
|
|
666
|
+
// scope for eviction.
|
|
667
|
+
const toEvict = new Set<string>()
|
|
668
|
+
|
|
669
|
+
// Time-based: completed traces whose last span ended before cutoff.
|
|
670
|
+
const timeExpired = db.query(
|
|
671
|
+
`SELECT trace_id FROM trace_summaries WHERE active_span_count = 0 AND ended_at_ms > 0 AND ended_at_ms < ?`,
|
|
672
|
+
).all(cutoff) as readonly { trace_id: string }[]
|
|
673
|
+
for (const row of timeExpired) toEvict.add(row.trace_id)
|
|
674
|
+
|
|
675
|
+
// Size-based: if actual data exceeds cap, drop oldest 20% of the
|
|
676
|
+
// remaining completed traces. `(page_count - freelist_count)`
|
|
677
|
+
// ignores freed-but-not-vacuumed pages so a large freelist doesn't
|
|
678
|
+
// trigger a deletion death spiral.
|
|
625
679
|
const pageCount = (db.query(`PRAGMA page_count`).get() as { page_count: number }).page_count
|
|
626
680
|
const freePages = (db.query(`PRAGMA freelist_count`).get() as { freelist_count: number }).freelist_count
|
|
627
681
|
const pageSize = (db.query(`PRAGMA page_size`).get() as { page_size: number }).page_size
|
|
628
682
|
const dbSize = (pageCount - freePages) * pageSize
|
|
629
683
|
if (dbSize > maxDbSizeBytes) {
|
|
630
|
-
const
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
const
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
684
|
+
const completedCount = (db.query(
|
|
685
|
+
`SELECT COUNT(*) AS c FROM trace_summaries WHERE active_span_count = 0`,
|
|
686
|
+
).get() as { c: number }).c
|
|
687
|
+
const traceCutCount = Math.max(1, Math.floor(completedCount * 0.2))
|
|
688
|
+
const oldest = db.query(
|
|
689
|
+
`SELECT trace_id FROM trace_summaries WHERE active_span_count = 0 ORDER BY started_at_ms ASC LIMIT ?`,
|
|
690
|
+
).all(traceCutCount) as readonly { trace_id: string }[]
|
|
691
|
+
// Set.add dedupes overlap with the time-expired batch above.
|
|
692
|
+
for (const row of oldest) toEvict.add(row.trace_id)
|
|
637
693
|
}
|
|
638
694
|
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
695
|
+
// Always prune orphan logs (no trace_id) by timestamp — they're
|
|
696
|
+
// not covered by trace eviction.
|
|
697
|
+
db.query(`DELETE FROM logs WHERE trace_id IS NULL AND timestamp_ms < ?`).run(cutoff)
|
|
698
|
+
|
|
699
|
+
if (toEvict.size === 0) return
|
|
700
|
+
|
|
701
|
+
// Batch the trace-id list so the IN placeholders stay under
|
|
702
|
+
// SQLite's default limit (~999). Each batch wipes every row
|
|
703
|
+
// reachable from those trace_ids across the cascade tables.
|
|
704
|
+
const traceIds = Array.from(toEvict)
|
|
705
|
+
const BATCH_SIZE = 500
|
|
706
|
+
for (let offset = 0; offset < traceIds.length; offset += BATCH_SIZE) {
|
|
707
|
+
const batch = traceIds.slice(offset, offset + BATCH_SIZE)
|
|
708
|
+
const placeholders = batch.map(() => "?").join(",")
|
|
709
|
+
db.query(`DELETE FROM span_attributes WHERE trace_id IN (${placeholders})`).run(...batch)
|
|
642
710
|
try {
|
|
643
|
-
db.query(`DELETE FROM span_operation_fts WHERE
|
|
644
|
-
db.query(`DELETE FROM log_body_fts WHERE NOT EXISTS (SELECT 1 FROM logs WHERE logs.id = CAST(log_body_fts.log_id AS INTEGER))`).run()
|
|
711
|
+
db.query(`DELETE FROM span_operation_fts WHERE trace_id IN (${placeholders})`).run(...batch)
|
|
645
712
|
} catch {
|
|
646
|
-
// FTS
|
|
713
|
+
// FTS table may not exist on old DBs.
|
|
647
714
|
}
|
|
648
|
-
db.query(`DELETE FROM
|
|
649
|
-
|
|
715
|
+
db.query(`DELETE FROM spans WHERE trace_id IN (${placeholders})`).run(...batch)
|
|
716
|
+
db.query(`DELETE FROM logs WHERE trace_id IN (${placeholders})`).run(...batch)
|
|
717
|
+
db.query(`DELETE FROM trace_summaries WHERE trace_id IN (${placeholders})`).run(...batch)
|
|
718
|
+
}
|
|
719
|
+
|
|
720
|
+
// Log-side orphans (log_attributes + FTS) are keyed by log.id,
|
|
721
|
+
// so prune what no longer has a parent log row.
|
|
722
|
+
db.query(`DELETE FROM log_attributes WHERE NOT EXISTS (SELECT 1 FROM logs WHERE logs.id = log_attributes.log_id)`).run()
|
|
723
|
+
try {
|
|
724
|
+
db.query(`DELETE FROM log_body_fts WHERE NOT EXISTS (SELECT 1 FROM logs WHERE logs.id = CAST(log_body_fts.log_id AS INTEGER))`).run()
|
|
725
|
+
} catch {
|
|
726
|
+
// FTS table may not exist on old DBs.
|
|
650
727
|
}
|
|
651
728
|
})
|
|
652
729
|
})
|
|
@@ -654,6 +731,16 @@ export const TelemetryStoreLive = Layer.effect(
|
|
|
654
731
|
// Run cleanup every 60 seconds in the background, tied to the layer's scope
|
|
655
732
|
yield* Effect.forkScoped(Effect.repeat(cleanupExpired(), Schedule.spaced("60 seconds")))
|
|
656
733
|
|
|
734
|
+
// Periodically refresh query planner stats. `PRAGMA optimize` is a
|
|
735
|
+
// no-op when nothing has changed, so this is essentially free on idle
|
|
736
|
+
// servers and keeps facet/search planner estimates accurate as data
|
|
737
|
+
// grows. 15 minutes is slower than ingestion rates we care about but
|
|
738
|
+
// frequent enough that the attribute picker stays snappy.
|
|
739
|
+
const refreshPlannerStats = Effect.sync(() => {
|
|
740
|
+
try { db.exec(`PRAGMA optimize;`) } catch { /* ignore */ }
|
|
741
|
+
})
|
|
742
|
+
yield* Effect.forkScoped(Effect.repeat(refreshPlannerStats, Schedule.spaced("15 minutes")))
|
|
743
|
+
|
|
657
744
|
const ingestTraces = Effect.fn("motel/TelemetryStore.ingestTraces")(function* (payload: OtlpTraceExportRequest) {
|
|
658
745
|
return yield* Effect.sync(() => {
|
|
659
746
|
let insertedSpans = 0
|
|
@@ -1424,6 +1511,61 @@ export const TelemetryStoreLive = Layer.effect(
|
|
|
1424
1511
|
`).all(...(input.serviceName ? [cutoff, input.serviceName, limit] : [cutoff, limit])) as Array<{ value: string; count: number }>
|
|
1425
1512
|
return rows
|
|
1426
1513
|
}
|
|
1514
|
+
if (input.field === "attribute_keys") {
|
|
1515
|
+
// Count distinct traces each attribute key appears on, optionally
|
|
1516
|
+
// scoped to a service. Keys with many distinct values (e.g. sessionId,
|
|
1517
|
+
// user id, model) rank higher than keys that are constant across every
|
|
1518
|
+
// trace (service.name, telemetry.sdk.*) — the latter can't discriminate
|
|
1519
|
+
// between traces so they're useless as filters.
|
|
1520
|
+
//
|
|
1521
|
+
// Performance note: we skip rows whose value blob is larger than
|
|
1522
|
+
// FACET_VALUE_MAX_LEN. For opencode this hides `ai.prompt`,
|
|
1523
|
+
// `ai.prompt.messages`, and `ai.prompt.tools` — which are 1-6MB text
|
|
1524
|
+
// blobs that you'd never want to filter by exact match anyway. The
|
|
1525
|
+
// WHERE clause lets SQLite skip reading those pages from disk, taking
|
|
1526
|
+
// the picker open time from ~1.2s to ~370ms on a 2GB database.
|
|
1527
|
+
const params: Array<string | number> = [FACET_VALUE_MAX_LEN, cutoff]
|
|
1528
|
+
if (input.serviceName) params.push(input.serviceName)
|
|
1529
|
+
params.push(limit)
|
|
1530
|
+
const rows = db.query(`
|
|
1531
|
+
SELECT sa.key AS value,
|
|
1532
|
+
COUNT(DISTINCT sa.trace_id) AS count,
|
|
1533
|
+
COUNT(DISTINCT sa.value) AS distinct_values
|
|
1534
|
+
FROM span_attributes sa
|
|
1535
|
+
JOIN spans s ON s.trace_id = sa.trace_id AND s.span_id = sa.span_id
|
|
1536
|
+
WHERE LENGTH(sa.value) < ?
|
|
1537
|
+
AND s.start_time_ms >= ?
|
|
1538
|
+
${input.serviceName ? "AND s.service_name = ?" : ""}
|
|
1539
|
+
GROUP BY sa.key
|
|
1540
|
+
ORDER BY (CASE WHEN distinct_values = 1 THEN 1 ELSE 0 END) ASC,
|
|
1541
|
+
distinct_values DESC,
|
|
1542
|
+
count DESC,
|
|
1543
|
+
value ASC
|
|
1544
|
+
LIMIT ?
|
|
1545
|
+
`).all(...params) as Array<{ value: string; count: number; distinct_values: number }>
|
|
1546
|
+
return rows.map((row) => ({ value: row.value, count: row.count }))
|
|
1547
|
+
}
|
|
1548
|
+
if (input.field === "attribute_values") {
|
|
1549
|
+
if (!input.key) return [] as FacetItem[]
|
|
1550
|
+
// Skip multi-KB values here too — they blow up GROUP BY on big text.
|
|
1551
|
+
// Matches the attribute_keys pre-filter so the picker stays responsive
|
|
1552
|
+
// if someone hand-crafts a URL that targets a fat key.
|
|
1553
|
+
const params: Array<string | number> = [input.key, FACET_VALUE_MAX_LEN, cutoff]
|
|
1554
|
+
if (input.serviceName) params.push(input.serviceName)
|
|
1555
|
+
params.push(limit)
|
|
1556
|
+
const rows = db.query(`
|
|
1557
|
+
SELECT sa.value AS value, COUNT(DISTINCT sa.trace_id) AS count
|
|
1558
|
+
FROM span_attributes sa
|
|
1559
|
+
JOIN spans s ON s.trace_id = sa.trace_id AND s.span_id = sa.span_id
|
|
1560
|
+
WHERE sa.key = ? AND LENGTH(sa.value) < ?
|
|
1561
|
+
AND s.start_time_ms >= ?
|
|
1562
|
+
${input.serviceName ? "AND s.service_name = ?" : ""}
|
|
1563
|
+
GROUP BY sa.value
|
|
1564
|
+
ORDER BY count DESC, value ASC
|
|
1565
|
+
LIMIT ?
|
|
1566
|
+
`).all(...params) as Array<{ value: string; count: number }>
|
|
1567
|
+
return rows
|
|
1568
|
+
}
|
|
1427
1569
|
}
|
|
1428
1570
|
|
|
1429
1571
|
return [] as FacetItem[]
|
|
@@ -8,6 +8,8 @@ export class TraceQueryService extends Context.Service<
|
|
|
8
8
|
readonly listServices: Effect.Effect<readonly string[], Error>
|
|
9
9
|
readonly listRecentTraces: (serviceName: string, options?: { readonly lookbackMinutes?: number; readonly limit?: number }) => Effect.Effect<readonly TraceItem[], Error>
|
|
10
10
|
readonly listTraceSummaries: (serviceName: string, options?: { readonly lookbackMinutes?: number; readonly limit?: number }) => Effect.Effect<readonly TraceSummaryItem[], Error>
|
|
11
|
+
readonly searchTraceSummaries: (input: { readonly serviceName?: string | null; readonly operation?: string | null; readonly status?: "ok" | "error" | null; readonly minDurationMs?: number | null; readonly lookbackMinutes?: number; readonly limit?: number; readonly attributeFilters?: Readonly<Record<string, string>> }) => Effect.Effect<readonly TraceSummaryItem[], Error>
|
|
12
|
+
readonly listFacets: (input: { readonly type: "traces" | "logs"; readonly field: string; readonly serviceName?: string | null; readonly key?: string | null; readonly lookbackMinutes?: number; readonly limit?: number }) => Effect.Effect<readonly { readonly value: string; readonly count: number }[], Error>
|
|
11
13
|
readonly searchTraces: (input: { readonly serviceName?: string | null; readonly operation?: string | null; readonly status?: "ok" | "error" | null; readonly minDurationMs?: number | null; readonly lookbackMinutes?: number; readonly limit?: number; readonly attributeFilters?: Readonly<Record<string, string>> }) => Effect.Effect<readonly TraceItem[], Error>
|
|
12
14
|
readonly traceStats: (input: { readonly groupBy: string; readonly agg: "count" | "avg_duration" | "p95_duration" | "error_rate"; readonly serviceName?: string | null; readonly operation?: string | null; readonly status?: "ok" | "error" | null; readonly minDurationMs?: number | null; readonly lookbackMinutes?: number; readonly limit?: number; readonly attributeFilters?: Readonly<Record<string, string>> }) => Effect.Effect<readonly { readonly group: string; readonly value: number; readonly count: number }[], Error>
|
|
13
15
|
readonly getTrace: (traceId: string) => Effect.Effect<TraceItem | null, Error>
|
|
@@ -60,6 +62,8 @@ export const TraceQueryServiceLive = Layer.effect(
|
|
|
60
62
|
listServices,
|
|
61
63
|
listRecentTraces,
|
|
62
64
|
listTraceSummaries,
|
|
65
|
+
searchTraceSummaries: store.searchTraceSummaries,
|
|
66
|
+
listFacets: store.listFacets,
|
|
63
67
|
searchTraces: store.searchTraces,
|
|
64
68
|
traceStats: store.traceStats,
|
|
65
69
|
getTrace,
|