@kitlangton/motel 0.1.3 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +11 -1
- package/package.json +5 -3
- package/src/App.tsx +239 -59
- package/src/daemon.test.ts +144 -7
- package/src/daemon.ts +113 -8
- package/src/domain.test.ts +62 -0
- package/src/domain.ts +62 -4
- package/src/httpApi.ts +4 -1
- package/src/localServer.ts +112 -121
- package/src/mcp.ts +172 -0
- package/src/motelClient.ts +166 -14
- package/src/registry.ts +26 -23
- package/src/runtime.ts +8 -2
- package/src/server.ts +10 -9
- package/src/services/AsyncIngest.ts +52 -0
- package/src/services/TelemetryStore.ts +285 -27
- package/src/services/TraceQueryService.ts +4 -2
- package/src/services/ingestRpc.ts +41 -0
- package/src/services/telemetryWorker.ts +62 -0
- package/src/storybook/aiChatStory.tsx +243 -0
- package/src/storybook/fixtures/errorState.ts +44 -0
- package/src/storybook/fixtures/imagePaste.ts +34 -0
- package/src/storybook/fixtures/index.ts +62 -0
- package/src/storybook/fixtures/kitchenSink.ts +148 -0
- package/src/storybook/fixtures/rawPrompt.ts +15 -0
- package/src/storybook/fixtures/short.ts +27 -0
- package/src/storybook/fixtures/toolHeavy.ts +65 -0
- package/src/telemetry.test.ts +61 -0
- package/src/ui/AiChatView.tsx +292 -0
- package/src/ui/SpanContentView.tsx +181 -0
- package/src/ui/SpanDetail.tsx +98 -17
- package/src/ui/TraceDetailsPane.tsx +35 -3
- package/src/ui/Waterfall.tsx +94 -167
- package/src/ui/aiChatModel.test.ts +347 -0
- package/src/ui/aiChatModel.ts +736 -0
- package/src/ui/aiState.ts +71 -0
- package/src/ui/app/TraceWorkspace.tsx +295 -120
- package/src/ui/app/useAppLayout.ts +14 -11
- package/src/ui/app/useTraceScreenData.ts +191 -35
- package/src/ui/atoms.ts +131 -0
- package/src/ui/filterParser.test.ts +56 -0
- package/src/ui/filterParser.ts +45 -0
- package/src/ui/loaders.ts +120 -0
- package/src/ui/persistence.ts +41 -0
- package/src/ui/primitives.tsx +47 -21
- package/src/ui/state.ts +4 -169
- package/src/ui/useAttrFilterPicker.ts +63 -23
- package/src/ui/useKeyboardNav.ts +576 -300
- package/src/ui/waterfallFilter.test.ts +84 -0
- package/src/ui/waterfallFilter.ts +59 -0
- package/src/ui/waterfallModel.ts +130 -0
- package/src/ui/waterfallNav.test.ts +17 -1
- package/src/ui/waterfallNav.ts +1 -1
- package/web/dist/assets/{index-DKinj-OE.js → index-DnyVo03x.js} +1 -1
- package/web/dist/index.html +1 -1
package/src/daemon.ts
CHANGED
|
@@ -2,7 +2,7 @@ import * as fs from "node:fs"
|
|
|
2
2
|
import { promises as fsp } from "node:fs"
|
|
3
3
|
import * as path from "node:path"
|
|
4
4
|
import { Effect } from "effect"
|
|
5
|
-
import { listAliveEntries, MOTEL_SERVICE_ID, type RegistryEntry
|
|
5
|
+
import { isAlive, listAliveEntries, MOTEL_SERVICE_ID, type RegistryEntry } from "./registry.js"
|
|
6
6
|
|
|
7
7
|
const DEFAULT_REPO_ROOT = path.resolve(import.meta.dir, "..")
|
|
8
8
|
const DEFAULT_RUNTIME_DIR = path.join(DEFAULT_REPO_ROOT, ".motel-data")
|
|
@@ -13,6 +13,18 @@ const START_TIMEOUT_MS = 15_000
|
|
|
13
13
|
const STOP_TIMEOUT_MS = 10_000
|
|
14
14
|
const LOCK_TIMEOUT_MS = 10_000
|
|
15
15
|
const POLL_INTERVAL_MS = 150
|
|
16
|
+
/** Fast probe used inside the waitForHealthy poll loop — we call it
|
|
17
|
+
* every POLL_INTERVAL_MS, so a generous budget would stall the loop. */
|
|
18
|
+
const HEALTH_FAST_TIMEOUT_MS = 750
|
|
19
|
+
/** Patient probe used on critical paths: the first getStatus() call
|
|
20
|
+
* in ensure(), and the final pre-throw check after a spawned child
|
|
21
|
+
* dies. A real daemon with a busy SQLite writer (FTS backfill, big
|
|
22
|
+
* DB) can easily take 1-2s to answer /api/health — if we declare
|
|
23
|
+
* the port empty at 750ms we'll spawn a duplicate and collide with
|
|
24
|
+
* EADDRINUSE. 3s is long enough to tolerate a slow healthy daemon
|
|
25
|
+
* and short enough that a truly-down daemon is still detected
|
|
26
|
+
* before START_TIMEOUT_MS fires. */
|
|
27
|
+
const HEALTH_PATIENT_TIMEOUT_MS = 3_000
|
|
16
28
|
|
|
17
29
|
type HealthShape = {
|
|
18
30
|
readonly ok: boolean
|
|
@@ -134,9 +146,9 @@ export const createDaemonManager = (options: DaemonOptions = {}): DaemonManager
|
|
|
134
146
|
const config = resolveConfig(options)
|
|
135
147
|
const mapError = (error: unknown) => new DaemonError(error instanceof Error ? error.message : String(error))
|
|
136
148
|
|
|
137
|
-
const fetchHealth = async (): Promise<HealthShape | null> => {
|
|
149
|
+
const fetchHealth = async (timeoutMs: number = HEALTH_FAST_TIMEOUT_MS): Promise<HealthShape | null> => {
|
|
138
150
|
try {
|
|
139
|
-
const response = await fetch(`${config.baseUrl}/api/health`, { signal: AbortSignal.timeout(
|
|
151
|
+
const response = await fetch(`${config.baseUrl}/api/health`, { signal: AbortSignal.timeout(timeoutMs) })
|
|
140
152
|
if (!response.ok) return null
|
|
141
153
|
return await response.json() as HealthShape
|
|
142
154
|
} catch {
|
|
@@ -157,6 +169,62 @@ export const createDaemonManager = (options: DaemonOptions = {}): DaemonManager
|
|
|
157
169
|
return null
|
|
158
170
|
}
|
|
159
171
|
|
|
172
|
+
/**
|
|
173
|
+
* Mismatch check against a registry entry — mirrors describeManagedMismatch
|
|
174
|
+
* but drives off the registry file instead of an HTTP health response.
|
|
175
|
+
* Used on the fast path in getStatus so warm-start doesn't need to wait
|
|
176
|
+
* on an HTTP round-trip that may queue behind heavy OTLP ingest.
|
|
177
|
+
*
|
|
178
|
+
* The service-id check is implicit: any entry living in the motel
|
|
179
|
+
* registry dir is by construction a motel daemon. databasePath is
|
|
180
|
+
* optional for back-compat with entries written by older builds;
|
|
181
|
+
* when absent we skip the DB check rather than refusing to adopt.
|
|
182
|
+
*/
|
|
183
|
+
const describeRegistryMismatch = (entry: RegistryEntry): string | null => {
|
|
184
|
+
if (!cwdMatches(entry.workdir)) {
|
|
185
|
+
return `Port ${config.port} is serving motel for ${entry.workdir}, not ${process.cwd()}.`
|
|
186
|
+
}
|
|
187
|
+
if (entry.databasePath && entry.databasePath !== config.databasePath) {
|
|
188
|
+
return `Port ${config.port} is serving motel with ${entry.databasePath}, expected ${config.databasePath}.`
|
|
189
|
+
}
|
|
190
|
+
return null
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
/**
|
|
194
|
+
* Build a DaemonStatus from a live registry entry. Returns null when
|
|
195
|
+
* there's no entry for our cwd, the registered pid isn't running, or
|
|
196
|
+
* the entry is for a differently-configured daemon (different port).
|
|
197
|
+
* This is the fast path: no HTTP, no event-loop round-trip, just a
|
|
198
|
+
* directory read and a process.kill(pid, 0) liveness probe.
|
|
199
|
+
*/
|
|
200
|
+
const getStatusFromRegistry = (): DaemonStatus | null => {
|
|
201
|
+
const entry = readRegistryEntry()
|
|
202
|
+
if (!entry) return null
|
|
203
|
+
// Port discriminator: a motel registry shared across several
|
|
204
|
+
// daemons (e.g., user running two instances on different
|
|
205
|
+
// ports from the same workdir, or a test harness on a random
|
|
206
|
+
// port) would otherwise have us adopt an unrelated daemon.
|
|
207
|
+
// URL match is a fast, unambiguous identity check.
|
|
208
|
+
if (entry.url !== config.baseUrl) return null
|
|
209
|
+
const mismatch = describeRegistryMismatch(entry)
|
|
210
|
+
return {
|
|
211
|
+
running: mismatch === null,
|
|
212
|
+
managed: mismatch === null,
|
|
213
|
+
service: MOTEL_SERVICE_ID,
|
|
214
|
+
pid: entry.pid,
|
|
215
|
+
url: entry.url,
|
|
216
|
+
databasePath: entry.databasePath ?? config.databasePath,
|
|
217
|
+
workdir: entry.workdir,
|
|
218
|
+
startedAt: entry.startedAt,
|
|
219
|
+
version: entry.version,
|
|
220
|
+
sameWorkdir: cwdMatches(entry.workdir),
|
|
221
|
+
reason: mismatch,
|
|
222
|
+
logPath: config.logPath,
|
|
223
|
+
lockPath: config.lockPath,
|
|
224
|
+
registryPid: entry.pid,
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
|
|
160
228
|
const readLock = async (): Promise<LockShape | null> => {
|
|
161
229
|
try {
|
|
162
230
|
const raw = await fsp.readFile(config.lockPath, "utf8")
|
|
@@ -218,6 +286,17 @@ export const createDaemonManager = (options: DaemonOptions = {}): DaemonManager
|
|
|
218
286
|
throw new Error(mismatch)
|
|
219
287
|
}
|
|
220
288
|
if (!isAlive(pid)) {
|
|
289
|
+
// The spawned child is gone. Before declaring failure,
|
|
290
|
+
// do one patient probe: the child may have died from
|
|
291
|
+
// EADDRINUSE because another healthy motel is alive on
|
|
292
|
+
// the port but was answering /api/health too slowly for
|
|
293
|
+
// our fast poll. If that's the case, adopt it.
|
|
294
|
+
const patient = await fetchHealth(HEALTH_PATIENT_TIMEOUT_MS)
|
|
295
|
+
if (patient) {
|
|
296
|
+
const mismatch = describeManagedMismatch(patient)
|
|
297
|
+
if (!mismatch) return patient
|
|
298
|
+
throw new Error(mismatch)
|
|
299
|
+
}
|
|
221
300
|
throw new Error(`Daemon process ${pid} exited before becoming healthy. See ${config.logPath}.`)
|
|
222
301
|
}
|
|
223
302
|
await sleep(POLL_INTERVAL_MS)
|
|
@@ -244,9 +323,24 @@ export const createDaemonManager = (options: DaemonOptions = {}): DaemonManager
|
|
|
244
323
|
throw new Error(`Timed out waiting for daemon ${pid} to stop.`)
|
|
245
324
|
}
|
|
246
325
|
|
|
247
|
-
const getStatus = async (): Promise<DaemonStatus> => {
|
|
326
|
+
const getStatus = async (timeoutMs: number = HEALTH_FAST_TIMEOUT_MS): Promise<DaemonStatus> => {
|
|
327
|
+
// Fast path: trust the local filesystem registry. When a motel
|
|
328
|
+
// daemon started on this machine it wrote an entry for its pid
|
|
329
|
+
// + cwd + databasePath; if that entry is still there and the pid
|
|
330
|
+
// is alive, the daemon is almost certainly the one we want to
|
|
331
|
+
// adopt. HTTP health is skipped because the daemon's health
|
|
332
|
+
// endpoint can queue behind heavy OTLP ingest traffic, making
|
|
333
|
+
// the probe unreliable exactly when the daemon is busy.
|
|
334
|
+
const registryStatus = getStatusFromRegistry()
|
|
335
|
+
if (registryStatus) return registryStatus
|
|
336
|
+
|
|
337
|
+
// No local evidence → fall back to HTTP. Covers the edge cases
|
|
338
|
+
// where: a motel daemon is running but was started before this
|
|
339
|
+
// registry-first path shipped; OR the port is held by something
|
|
340
|
+
// entirely unrelated (the mismatch check turns that into a
|
|
341
|
+
// human-readable reason).
|
|
248
342
|
const registry = readRegistryEntry()
|
|
249
|
-
const health = await fetchHealth()
|
|
343
|
+
const health = await fetchHealth(timeoutMs)
|
|
250
344
|
if (!health) {
|
|
251
345
|
return {
|
|
252
346
|
running: false,
|
|
@@ -286,7 +380,11 @@ export const createDaemonManager = (options: DaemonOptions = {}): DaemonManager
|
|
|
286
380
|
}
|
|
287
381
|
|
|
288
382
|
const ensure = async (): Promise<DaemonStatus> => {
|
|
289
|
-
|
|
383
|
+
// Use the patient timeout for the initial probe — this is the
|
|
384
|
+
// critical "is there already a daemon here?" check. A false
|
|
385
|
+
// negative here drops us into the spawn path and collides with
|
|
386
|
+
// any slow-but-healthy daemon sitting on the port.
|
|
387
|
+
const existing = await getStatus(HEALTH_PATIENT_TIMEOUT_MS)
|
|
290
388
|
if (existing.managed && existing.running) return existing
|
|
291
389
|
if (existing.service !== null && existing.reason) {
|
|
292
390
|
throw new Error(existing.reason)
|
|
@@ -295,7 +393,11 @@ export const createDaemonManager = (options: DaemonOptions = {}): DaemonManager
|
|
|
295
393
|
const lock = await acquireStartupLock()
|
|
296
394
|
let spawnedPid: number | null = null
|
|
297
395
|
try {
|
|
298
|
-
|
|
396
|
+
// Same reasoning for the post-lock re-check: another ensure()
|
|
397
|
+
// may have spawned a daemon between our first probe and the
|
|
398
|
+
// lock grant, and its initial health response can be slow
|
|
399
|
+
// while the runtime warms up.
|
|
400
|
+
const rechecked = await getStatus(HEALTH_PATIENT_TIMEOUT_MS)
|
|
299
401
|
if (rechecked.managed && rechecked.running) return rechecked
|
|
300
402
|
if (rechecked.service !== null && rechecked.reason) {
|
|
301
403
|
throw new Error(rechecked.reason)
|
|
@@ -371,7 +473,10 @@ export const createDaemonManager = (options: DaemonOptions = {}): DaemonManager
|
|
|
371
473
|
}),
|
|
372
474
|
getStatus: Effect.fn("DaemonManager.getStatus")(() =>
|
|
373
475
|
Effect.tryPromise({
|
|
374
|
-
|
|
476
|
+
// Wrapped so Effect.tryPromise only sees the no-arg call
|
|
477
|
+
// signature — the optional timeoutMs parameter is an
|
|
478
|
+
// internal detail used by ensure()'s critical probes.
|
|
479
|
+
try: () => getStatus(),
|
|
375
480
|
catch: mapError,
|
|
376
481
|
}),
|
|
377
482
|
)(),
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
import { describe, expect, test } from "bun:test"
|
|
2
|
+
import { AI_FTS_KEYS, isAiSpan } from "./domain.ts"
|
|
3
|
+
|
|
4
|
+
describe("isAiSpan", () => {
|
|
5
|
+
test("returns false for empty tags", () => {
|
|
6
|
+
expect(isAiSpan({})).toBe(false)
|
|
7
|
+
})
|
|
8
|
+
|
|
9
|
+
test("returns false when no AI key is present", () => {
|
|
10
|
+
expect(isAiSpan({
|
|
11
|
+
"service.name": "web",
|
|
12
|
+
"http.method": "GET",
|
|
13
|
+
"db.statement": "SELECT 1",
|
|
14
|
+
})).toBe(false)
|
|
15
|
+
})
|
|
16
|
+
|
|
17
|
+
test("detects Vercel AI SDK keys", () => {
|
|
18
|
+
expect(isAiSpan({ "ai.prompt.messages": "[]" })).toBe(true)
|
|
19
|
+
expect(isAiSpan({ "ai.response.text": "hi" })).toBe(true)
|
|
20
|
+
expect(isAiSpan({ "ai.toolCall.args": "{}" })).toBe(true)
|
|
21
|
+
})
|
|
22
|
+
|
|
23
|
+
test("detects OpenTelemetry gen_ai semconv keys", () => {
|
|
24
|
+
expect(isAiSpan({ "gen_ai.prompt": "foo" })).toBe(true)
|
|
25
|
+
expect(isAiSpan({ "gen_ai.input.messages": "[]" })).toBe(true)
|
|
26
|
+
expect(isAiSpan({ "gen_ai.tool.definitions": "[]" })).toBe(true)
|
|
27
|
+
})
|
|
28
|
+
|
|
29
|
+
test("detects OpenInference keys", () => {
|
|
30
|
+
expect(isAiSpan({ "input.value": "hi" })).toBe(true)
|
|
31
|
+
expect(isAiSpan({ "output.value": "hi" })).toBe(true)
|
|
32
|
+
})
|
|
33
|
+
|
|
34
|
+
test("detects a single AI key among many non-AI keys", () => {
|
|
35
|
+
expect(isAiSpan({
|
|
36
|
+
"service.name": "web",
|
|
37
|
+
"http.method": "POST",
|
|
38
|
+
"http.status_code": "200",
|
|
39
|
+
"ai.model.id": "ignored-not-in-fts-keys",
|
|
40
|
+
"ai.prompt": "tell me a joke",
|
|
41
|
+
})).toBe(true)
|
|
42
|
+
})
|
|
43
|
+
|
|
44
|
+
test("ignores AI-adjacent keys that are not in the FTS set", () => {
|
|
45
|
+
// `ai.model.provider`, `ai.settings.*`, `ai.telemetry.*` carry
|
|
46
|
+
// metadata, not content, so they intentionally aren't part of
|
|
47
|
+
// AI_FTS_KEYS. A span with ONLY those should not be flagged.
|
|
48
|
+
expect(isAiSpan({
|
|
49
|
+
"ai.model.provider": "openai",
|
|
50
|
+
"ai.model.id": "gpt-4",
|
|
51
|
+
"ai.settings.maxRetries": "2",
|
|
52
|
+
})).toBe(false)
|
|
53
|
+
})
|
|
54
|
+
|
|
55
|
+
test("every documented key triggers detection", () => {
|
|
56
|
+
// Guard against a future reshuffle of AI_FTS_KEYS that might
|
|
57
|
+
// drop a key silently — every declared key should round-trip.
|
|
58
|
+
for (const key of AI_FTS_KEYS) {
|
|
59
|
+
expect(isAiSpan({ [key]: "payload" })).toBe(true)
|
|
60
|
+
}
|
|
61
|
+
})
|
|
62
|
+
})
|
package/src/domain.ts
CHANGED
|
@@ -145,14 +145,72 @@ export const AI_ATTR_MAP = {
|
|
|
145
145
|
responseTimestamp: "ai.response.timestamp",
|
|
146
146
|
} as const
|
|
147
147
|
|
|
148
|
-
/**
|
|
149
|
-
|
|
150
|
-
|
|
148
|
+
/**
|
|
149
|
+
* Attribute keys that carry LLM prompt/response content and should be
|
|
150
|
+
* indexed in the span-attribute FTS table. These are the keys emitted by
|
|
151
|
+
* well-known LLM instrumentation conventions:
|
|
152
|
+
*
|
|
153
|
+
* - **Vercel AI SDK** (`ai.*`): rich, SDK-specific attributes captured by
|
|
154
|
+
* `experimental_telemetry` on `generateText` / `streamText` / `generateObject`.
|
|
155
|
+
* - **OpenTelemetry GenAI semantic conventions** (`gen_ai.*`): the
|
|
156
|
+
* cross-vendor standard. The singular `prompt`/`completion` attrs are
|
|
157
|
+
* deprecated in favor of event-based capture but are still emitted by
|
|
158
|
+
* most instrumentations, so we keep them.
|
|
159
|
+
* - **OpenInference** (`input.value` / `output.value`): Arize Phoenix /
|
|
160
|
+
* LangChain-style normalized input/output.
|
|
161
|
+
*
|
|
162
|
+
* Keys here trigger FTS indexing on insert via a trigger in TelemetryStore.
|
|
163
|
+
* Adding a key requires a one-time backfill; removing one leaves orphan
|
|
164
|
+
* FTS entries that get cleaned up on next retention pass.
|
|
165
|
+
*/
|
|
166
|
+
export const AI_FTS_KEYS = [
|
|
167
|
+
// Vercel AI SDK
|
|
151
168
|
"ai.prompt",
|
|
152
|
-
"ai.
|
|
169
|
+
"ai.prompt.messages",
|
|
153
170
|
"ai.prompt.tools",
|
|
171
|
+
"ai.prompt.toolChoice",
|
|
172
|
+
"ai.response.text",
|
|
173
|
+
"ai.response.toolCalls",
|
|
174
|
+
"ai.response.reasoning",
|
|
175
|
+
"ai.response.object",
|
|
176
|
+
"ai.toolCall.args",
|
|
177
|
+
"ai.toolCall.result",
|
|
178
|
+
// OpenTelemetry GenAI semantic conventions
|
|
179
|
+
"gen_ai.prompt",
|
|
180
|
+
"gen_ai.completion",
|
|
181
|
+
"gen_ai.input.messages",
|
|
182
|
+
"gen_ai.output.messages",
|
|
183
|
+
"gen_ai.system_instructions",
|
|
184
|
+
"gen_ai.tool.definitions",
|
|
185
|
+
"gen_ai.tool.message.content",
|
|
186
|
+
// OpenInference (Phoenix, LangChain, etc.)
|
|
187
|
+
"input.value",
|
|
188
|
+
"output.value",
|
|
154
189
|
] as const
|
|
155
190
|
|
|
191
|
+
/**
|
|
192
|
+
* Back-compat alias. The `text` filter on `/api/ai/calls` historically
|
|
193
|
+
* LIKE-searched these four keys; now FTS indexes the broader AI_FTS_KEYS
|
|
194
|
+
* set so the filter transparently covers more content.
|
|
195
|
+
*/
|
|
196
|
+
export const AI_TEXT_SEARCH_KEYS = AI_FTS_KEYS
|
|
197
|
+
|
|
198
|
+
/**
|
|
199
|
+
* True if a span's tags contain any of the AI content keys we track.
|
|
200
|
+
* Used as the single source of truth for "this span has LLM payloads
|
|
201
|
+
* worth a specialized view" — drives the ✦ marker in the waterfall row
|
|
202
|
+
* and picks the chat-flavored renderer when the user drills into the
|
|
203
|
+
* span's detail. Scanning happens once per row during render so this
|
|
204
|
+
* needs to stay O(AI_FTS_KEYS.length) with cheap `in` checks rather
|
|
205
|
+
* than an `Object.keys(...).some(...)` allocation.
|
|
206
|
+
*/
|
|
207
|
+
export const isAiSpan = (tags: Readonly<Record<string, string>>): boolean => {
|
|
208
|
+
for (const key of AI_FTS_KEYS) {
|
|
209
|
+
if (key in tags) return true
|
|
210
|
+
}
|
|
211
|
+
return false
|
|
212
|
+
}
|
|
213
|
+
|
|
156
214
|
const PREVIEW_LENGTH = 200
|
|
157
215
|
|
|
158
216
|
export const truncatePreview = (value: string | null | undefined): string | null => {
|
package/src/httpApi.ts
CHANGED
|
@@ -121,6 +121,9 @@ export const MotelHttpApi = HttpApi.make("MotelTelemetry")
|
|
|
121
121
|
minDurationMs: Schema.optionalKey(Schema.Number).pipe(
|
|
122
122
|
Schema.annotateKey({ description: "Only return traces slower than this threshold (milliseconds)" }),
|
|
123
123
|
),
|
|
124
|
+
aiText: Schema.optionalKey(Schema.String).pipe(
|
|
125
|
+
Schema.annotateKey({ description: "FTS match against AI prompt/response/tool content across all spans in the trace. Tokens are prefix-matched and implicitly AND'd." }),
|
|
126
|
+
),
|
|
124
127
|
lookback: LookbackParam,
|
|
125
128
|
limit: LimitParam,
|
|
126
129
|
cursor: CursorParam,
|
|
@@ -128,7 +131,7 @@ export const MotelHttpApi = HttpApi.make("MotelTelemetry")
|
|
|
128
131
|
success: TraceSummaryList,
|
|
129
132
|
})
|
|
130
133
|
.annotate(OpenApi.Summary, "Search traces with filters")
|
|
131
|
-
.annotate(OpenApi.Description, "Search compact trace summaries with filters. Use /api/traces/{traceId} for full details. Supports cursor pagination
|
|
134
|
+
.annotate(OpenApi.Description, "Search compact trace summaries with filters. Use /api/traces/{traceId} for full details. Supports cursor pagination, attr.<key> filters in the query string, and aiText for full-text search across LLM prompt/response content."),
|
|
132
135
|
|
|
133
136
|
HttpApiEndpoint.get("traceStats", "/api/traces/stats", {
|
|
134
137
|
query: {
|
package/src/localServer.ts
CHANGED
|
@@ -1,18 +1,27 @@
|
|
|
1
1
|
import { promises as fs } from "node:fs"
|
|
2
2
|
import path from "node:path"
|
|
3
|
-
import { Effect, Layer
|
|
4
|
-
import { config, parsePositiveInt
|
|
3
|
+
import { Effect, Layer } from "effect"
|
|
4
|
+
import { config, parsePositiveInt } from "./config.js"
|
|
5
5
|
import { HttpApiBuilder, HttpApiScalar } from "effect/unstable/httpapi"
|
|
6
|
+
import * as HttpMiddleware from "effect/unstable/http/HttpMiddleware"
|
|
6
7
|
import * as HttpRouter from "effect/unstable/http/HttpRouter"
|
|
7
|
-
import * as HttpServer from "effect/unstable/http/HttpServer"
|
|
8
8
|
import * as HttpServerResponse from "effect/unstable/http/HttpServerResponse"
|
|
9
|
+
import * as HttpStaticServer from "effect/unstable/http/HttpStaticServer"
|
|
10
|
+
import * as BunHttpServer from "@effect/platform-bun/BunHttpServer"
|
|
9
11
|
import { MotelHttpApi } from "./httpApi.js"
|
|
10
12
|
import { attributeFiltersFromEntries, attributeContainsFiltersFromEntries, ATTRIBUTE_FILTER_PREFIX, ATTRIBUTE_CONTAINS_PREFIX } from "./queryFilters.js"
|
|
11
|
-
import { MOTEL_SERVICE_ID, MOTEL_VERSION, writeRegistryEntry } from "./registry.js"
|
|
13
|
+
import { MOTEL_SERVICE_ID, MOTEL_VERSION, removeRegistryEntry, writeRegistryEntry } from "./registry.js"
|
|
14
|
+
import { AsyncIngest, AsyncIngestLive } from "./services/AsyncIngest.js"
|
|
12
15
|
import { TelemetryStore, TelemetryStoreLive } from "./services/TelemetryStore.js"
|
|
13
16
|
import type { LogItem, TraceItem, TraceSummaryItem } from "./domain.js"
|
|
14
17
|
import { lifecycleLabel } from "./ui/format.js"
|
|
15
18
|
|
|
19
|
+
// Set by the RegistryLayer acquisition once the Bun socket has bound.
|
|
20
|
+
// Both /api/health and the registry entry read from here so they agree
|
|
21
|
+
// on a single server-start timestamp, and the value reflects actual
|
|
22
|
+
// listen time rather than module-evaluation time.
|
|
23
|
+
let serverStartedAt: string = new Date(0).toISOString()
|
|
24
|
+
|
|
16
25
|
const TRACE_DEFAULT_LIMIT = 20
|
|
17
26
|
const TRACE_MAX_LIMIT = 100
|
|
18
27
|
const TRACE_DEFAULT_LOOKBACK = 60
|
|
@@ -24,28 +33,21 @@ const LOG_MAX_LIMIT = 500
|
|
|
24
33
|
const LOG_DEFAULT_LOOKBACK = 60
|
|
25
34
|
const LOG_MAX_LOOKBACK = 24 * 60
|
|
26
35
|
|
|
27
|
-
let server: ReturnType<typeof Bun.serve> | null = null
|
|
28
|
-
let disposeWebHandler: (() => Promise<void>) | null = null
|
|
29
|
-
let startedAt: string | null = null
|
|
30
|
-
|
|
31
|
-
const resolveBoundUrl = () => {
|
|
32
|
-
if (!server) return config.otel.queryUrl
|
|
33
|
-
const host = server.hostname === "0.0.0.0" || server.hostname === "::" ? "127.0.0.1" : server.hostname
|
|
34
|
-
return `http://${host}:${server.port}`
|
|
35
|
-
}
|
|
36
|
-
|
|
37
36
|
const jsonResponse = (value: unknown, status = 200) => HttpServerResponse.jsonUnsafe(value, { status })
|
|
38
37
|
const textResponse = (value: string) => HttpServerResponse.text(value)
|
|
39
38
|
const htmlResponse = (value: string) => HttpServerResponse.html(value)
|
|
40
39
|
const notFoundResponse = (message = "Not found") => jsonResponse({ error: message }, 404)
|
|
41
40
|
const requestUrl = (request: { readonly url: string }) => new URL(request.url, config.otel.baseUrl)
|
|
42
41
|
const withStore = <A>(f: (store: TelemetryStore["Service"]) => Effect.Effect<A, Error>) => Effect.flatMap(TelemetryStore.asEffect(), f)
|
|
43
|
-
|
|
42
|
+
// Response-building helpers are generic in R so a handler can depend
|
|
43
|
+
// on TelemetryStore (query path) or AsyncIngest (worker-RPC path)
|
|
44
|
+
// without forcing every handler onto the same service surface.
|
|
45
|
+
const respondJson = <A, R>(effect: Effect.Effect<A, unknown, R>) =>
|
|
44
46
|
Effect.match(effect, {
|
|
45
47
|
onFailure: (error) => jsonResponse({ error: error instanceof Error ? error.message : String(error) }, 500),
|
|
46
48
|
onSuccess: (value) => jsonResponse(value),
|
|
47
49
|
})
|
|
48
|
-
const respondRaw = (effect: Effect.Effect<ReturnType<typeof jsonResponse>, unknown,
|
|
50
|
+
const respondRaw = <R>(effect: Effect.Effect<ReturnType<typeof jsonResponse>, unknown, R>) =>
|
|
49
51
|
Effect.match(effect, {
|
|
50
52
|
onFailure: (error) => jsonResponse({ error: error instanceof Error ? error.message : String(error) }, 500),
|
|
51
53
|
onSuccess: (value) => value,
|
|
@@ -284,23 +286,33 @@ const TelemetryGroupLive = HttpApiBuilder.group(
|
|
|
284
286
|
service: MOTEL_SERVICE_ID,
|
|
285
287
|
databasePath: config.otel.databasePath,
|
|
286
288
|
pid: process.pid,
|
|
287
|
-
url:
|
|
289
|
+
url: config.otel.baseUrl,
|
|
288
290
|
workdir: process.cwd(),
|
|
289
|
-
startedAt:
|
|
291
|
+
startedAt: serverStartedAt,
|
|
290
292
|
version: MOTEL_VERSION,
|
|
291
293
|
}),
|
|
292
294
|
)
|
|
295
|
+
// OTLP ingest is routed to the worker thread via AsyncIngest
|
|
296
|
+
// so the main event loop stays free during heavy SQLite writes.
|
|
297
|
+
// Everything else still uses the direct TelemetryStore — reads
|
|
298
|
+
// are fast enough that IPC overhead isn't worth paying.
|
|
293
299
|
.handleRaw("ingestTraces", ({ request }) =>
|
|
294
300
|
respondRaw(
|
|
295
301
|
Effect.flatMap(request.json, (payload) =>
|
|
296
|
-
Effect.map(
|
|
302
|
+
Effect.map(
|
|
303
|
+
Effect.flatMap(AsyncIngest.asEffect(), (ingest) => ingest.ingestTraces({ payload })),
|
|
304
|
+
(result) => jsonResponse(result),
|
|
305
|
+
),
|
|
297
306
|
),
|
|
298
307
|
),
|
|
299
308
|
)
|
|
300
309
|
.handleRaw("ingestLogs", ({ request }) =>
|
|
301
310
|
respondRaw(
|
|
302
311
|
Effect.flatMap(request.json, (payload) =>
|
|
303
|
-
Effect.map(
|
|
312
|
+
Effect.map(
|
|
313
|
+
Effect.flatMap(AsyncIngest.asEffect(), (ingest) => ingest.ingestLogs({ payload })),
|
|
314
|
+
(result) => jsonResponse(result),
|
|
315
|
+
),
|
|
304
316
|
),
|
|
305
317
|
),
|
|
306
318
|
)
|
|
@@ -335,6 +347,7 @@ const TelemetryGroupLive = HttpApiBuilder.group(
|
|
|
335
347
|
status: (url.searchParams.get("status") as "ok" | "error" | null) ?? null,
|
|
336
348
|
minDurationMs: url.searchParams.get("minDurationMs") ? Number.parseFloat(url.searchParams.get("minDurationMs") ?? "") : null,
|
|
337
349
|
attributeFilters,
|
|
350
|
+
aiText: url.searchParams.get("aiText"),
|
|
338
351
|
limit: limit + 1,
|
|
339
352
|
lookbackMinutes,
|
|
340
353
|
cursorStartedAtMs: cursor?.kind === "trace" ? cursor.startedAt : undefined,
|
|
@@ -586,115 +599,93 @@ const TelemetryGroupLive = HttpApiBuilder.group(
|
|
|
586
599
|
),
|
|
587
600
|
)
|
|
588
601
|
|
|
589
|
-
const ApiLive = Layer.provideMerge(
|
|
590
|
-
HttpApiBuilder.layer(MotelHttpApi, { openapiPath: "/openapi.json" }).pipe(
|
|
591
|
-
Layer.provide(TelemetryGroupLive),
|
|
592
|
-
Layer.provide(HttpApiScalar.layer(MotelHttpApi, { scalar: { forceDarkModeState: "dark", showOperationId: true } })),
|
|
593
|
-
Layer.provide(HttpServer.layerServices),
|
|
594
|
-
),
|
|
595
|
-
TelemetryStoreLive,
|
|
596
|
-
)
|
|
597
|
-
|
|
598
602
|
// ---------------------------------------------------------------------------
|
|
599
|
-
//
|
|
603
|
+
// App layer: HTTP router + static SPA + telemetry store
|
|
600
604
|
// ---------------------------------------------------------------------------
|
|
601
605
|
|
|
602
|
-
|
|
603
|
-
//
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
try {
|
|
609
|
-
webUiAvailable = await Bun.file(path.join(WEB_DIST_DIR, "index.html")).exists()
|
|
610
|
-
} catch {
|
|
611
|
-
/* ignore */
|
|
612
|
-
}
|
|
613
|
-
return webUiAvailable
|
|
614
|
-
}
|
|
615
|
-
|
|
616
|
-
/** Routes that must always go through the Effect API handler */
|
|
617
|
-
const isStrictApiRoute = (pathname: string) =>
|
|
618
|
-
pathname.startsWith("/api/") ||
|
|
619
|
-
pathname.startsWith("/v1/") ||
|
|
620
|
-
pathname === "/openapi.json" ||
|
|
621
|
-
pathname === "/docs"
|
|
622
|
-
|
|
623
|
-
const serveWebUi = async (request: Request, apiHandler: (req: Request) => Promise<Response>): Promise<Response> => {
|
|
624
|
-
const url = new URL(request.url)
|
|
625
|
-
const pathname = url.pathname
|
|
626
|
-
|
|
627
|
-
// Strict API routes always go through the Effect handler
|
|
628
|
-
if (isStrictApiRoute(pathname)) return apiHandler(request)
|
|
629
|
-
|
|
630
|
-
// Only serve web UI if built
|
|
631
|
-
if (!(await isWebUiAvailable())) return apiHandler(request)
|
|
606
|
+
// API routes come from the Effect HttpApi definition. Everything under
|
|
607
|
+
// /api/*, /v1/*, /openapi.json, /docs is handled here.
|
|
608
|
+
const ApiLayer = HttpApiBuilder.layer(MotelHttpApi, { openapiPath: "/openapi.json" }).pipe(
|
|
609
|
+
Layer.provide(TelemetryGroupLive),
|
|
610
|
+
Layer.provide(HttpApiScalar.layer(MotelHttpApi, { scalar: { forceDarkModeState: "dark", showOperationId: true } })),
|
|
611
|
+
)
|
|
632
612
|
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
613
|
+
// Web UI: Vite-built SPA served from web/dist. HttpStaticServer.layer
|
|
614
|
+
// handles GET /*, filesystem lookup under `root`, and SPA fallback to
|
|
615
|
+
// index.html for unknown paths — replacing the hand-rolled serveWebUi
|
|
616
|
+
// wrapper that previously lived inline with Bun.serve. The API routes
|
|
617
|
+
// above take precedence because HttpApi registers specific paths that
|
|
618
|
+
// the router matches before falling through to the /* catch-all.
|
|
619
|
+
const WEB_DIST_DIR = path.resolve(import.meta.dir, "../web/dist")
|
|
620
|
+
const StaticLayer = HttpStaticServer.layer({
|
|
621
|
+
root: WEB_DIST_DIR,
|
|
622
|
+
spa: true,
|
|
623
|
+
})
|
|
640
624
|
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
625
|
+
// Registry-entry writer as a scoped acquisition. The entry is published
|
|
626
|
+
// after BunHttpServer.layer binds the socket (scope acquisition order)
|
|
627
|
+
// and removed on scope release, so a bind failure never leaves a zombie
|
|
628
|
+
// entry and a graceful shutdown cleans up alongside the server stop —
|
|
629
|
+
// both in the same finalizer chain managed by Layer.launch.
|
|
630
|
+
const RegistryLayer = Layer.effectDiscard(
|
|
631
|
+
Effect.acquireRelease(
|
|
632
|
+
Effect.sync(() => {
|
|
633
|
+
serverStartedAt = new Date().toISOString()
|
|
634
|
+
try {
|
|
635
|
+
writeRegistryEntry({
|
|
636
|
+
pid: process.pid,
|
|
637
|
+
url: config.otel.baseUrl,
|
|
638
|
+
workdir: process.cwd(),
|
|
639
|
+
startedAt: serverStartedAt,
|
|
640
|
+
version: MOTEL_VERSION,
|
|
641
|
+
databasePath: config.otel.databasePath,
|
|
642
|
+
})
|
|
643
|
+
} catch (err) {
|
|
644
|
+
console.warn(`motel: failed to write registry entry: ${(err as Error).message}`)
|
|
645
|
+
}
|
|
646
|
+
}),
|
|
647
|
+
() => Effect.sync(() => removeRegistryEntry(process.pid)),
|
|
648
|
+
),
|
|
649
|
+
)
|
|
646
650
|
|
|
647
651
|
// ---------------------------------------------------------------------------
|
|
648
652
|
// Server lifecycle
|
|
649
653
|
// ---------------------------------------------------------------------------
|
|
650
654
|
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
655
|
+
/**
|
|
656
|
+
* Launchable server layer. Composes the API + static UI + store + registry,
|
|
657
|
+
* wraps the whole stack in HttpMiddleware.tracer (per-request OTel spans
|
|
658
|
+
* with http.method / url / status / user-agent attributes), and binds the
|
|
659
|
+
* socket via @effect/platform-bun's BunHttpServer. Use from server.ts:
|
|
660
|
+
*
|
|
661
|
+
* await Effect.runPromise(Layer.launch(ServerLive))
|
|
662
|
+
*
|
|
663
|
+
* Socket lifecycle, graceful shutdown, and error propagation are managed
|
|
664
|
+
* by the BunHttpServer layer's Scope — no hand-rolled start/stop plumbing.
|
|
665
|
+
* `reusePort: true` is retained as defense-in-depth against TIME_WAIT
|
|
666
|
+
* rebind conflicts (the registry-based adoption path in daemon.ts is the
|
|
667
|
+
* primary protection, but this covers a raw `bun src/server.ts` restart).
|
|
668
|
+
*/
|
|
669
|
+
export const ServerLive = HttpRouter.serve(
|
|
670
|
+
Layer.mergeAll(ApiLayer, StaticLayer, RegistryLayer),
|
|
671
|
+
{ middleware: HttpMiddleware.tracer },
|
|
672
|
+
).pipe(
|
|
673
|
+
// OTLP ingest paths are NOT traced by the middleware, otherwise
|
|
674
|
+
// MOTEL_OTEL_ENABLED creates a feedback loop: every outbound span
|
|
675
|
+
// POSTs to /v1/traces, the tracer emits a span for that POST, which
|
|
676
|
+
// POSTs again on the next flush. This also shaves ~1 KB of header
|
|
677
|
+
// attributes off every ingest request that would have been written
|
|
678
|
+
// to the spans table as noise.
|
|
679
|
+
Layer.provide(HttpMiddleware.layerTracerDisabledForUrls(["/v1/traces", "/v1/logs"])),
|
|
680
|
+
// AsyncIngest spawns the telemetry worker — keeps the main-thread
|
|
681
|
+
// event loop free during heavy SQLite writes. Provided alongside
|
|
682
|
+
// the direct TelemetryStore so query handlers can still resolve
|
|
683
|
+
// their dependency directly.
|
|
684
|
+
Layer.provideMerge(AsyncIngestLive),
|
|
685
|
+
Layer.provideMerge(TelemetryStoreLive),
|
|
686
|
+
Layer.provideMerge(BunHttpServer.layer({
|
|
657
687
|
port: config.otel.port,
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
startedAt = new Date().toISOString()
|
|
663
|
-
try {
|
|
664
|
-
writeRegistryEntry({
|
|
665
|
-
pid: process.pid,
|
|
666
|
-
url: resolveBoundUrl(),
|
|
667
|
-
workdir: process.cwd(),
|
|
668
|
-
startedAt,
|
|
669
|
-
version: MOTEL_VERSION,
|
|
670
|
-
})
|
|
671
|
-
} catch (err) {
|
|
672
|
-
console.warn(`motel: failed to write registry entry: ${(err as Error).message}`)
|
|
673
|
-
}
|
|
674
|
-
return server
|
|
675
|
-
}
|
|
676
|
-
|
|
677
|
-
export const ensureLocalServer = async () => {
|
|
678
|
-
if (server) return server
|
|
679
|
-
try {
|
|
680
|
-
const response = await fetch(resolveOtelUrl("/api/health"), { signal: AbortSignal.timeout(250) })
|
|
681
|
-
if (response.ok) return null
|
|
682
|
-
} catch {
|
|
683
|
-
// Start local server below.
|
|
684
|
-
}
|
|
685
|
-
return await startLocalServer()
|
|
686
|
-
}
|
|
687
|
-
|
|
688
|
-
export const stopLocalServer = () => {
|
|
689
|
-
server?.stop(true)
|
|
690
|
-
server = null
|
|
691
|
-
startedAt = null
|
|
692
|
-
|
|
693
|
-
const dispose = disposeWebHandler
|
|
694
|
-
disposeWebHandler = null
|
|
695
|
-
if (dispose) {
|
|
696
|
-
void dispose().catch((err) => {
|
|
697
|
-
console.warn(`motel: failed to dispose web handler: ${(err as Error).message}`)
|
|
698
|
-
})
|
|
699
|
-
}
|
|
700
|
-
}
|
|
688
|
+
hostname: config.otel.host,
|
|
689
|
+
reusePort: true,
|
|
690
|
+
})),
|
|
691
|
+
)
|