@kitlangton/motel 0.1.3 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/AGENTS.md +11 -1
  2. package/package.json +5 -3
  3. package/src/App.tsx +239 -59
  4. package/src/daemon.test.ts +144 -7
  5. package/src/daemon.ts +113 -8
  6. package/src/domain.test.ts +62 -0
  7. package/src/domain.ts +62 -4
  8. package/src/httpApi.ts +4 -1
  9. package/src/localServer.ts +112 -121
  10. package/src/mcp.ts +172 -0
  11. package/src/motelClient.ts +166 -14
  12. package/src/registry.ts +26 -23
  13. package/src/runtime.ts +8 -2
  14. package/src/server.ts +10 -9
  15. package/src/services/AsyncIngest.ts +52 -0
  16. package/src/services/TelemetryStore.ts +285 -27
  17. package/src/services/TraceQueryService.ts +4 -2
  18. package/src/services/ingestRpc.ts +41 -0
  19. package/src/services/telemetryWorker.ts +62 -0
  20. package/src/storybook/aiChatStory.tsx +243 -0
  21. package/src/storybook/fixtures/errorState.ts +44 -0
  22. package/src/storybook/fixtures/imagePaste.ts +34 -0
  23. package/src/storybook/fixtures/index.ts +62 -0
  24. package/src/storybook/fixtures/kitchenSink.ts +148 -0
  25. package/src/storybook/fixtures/rawPrompt.ts +15 -0
  26. package/src/storybook/fixtures/short.ts +27 -0
  27. package/src/storybook/fixtures/toolHeavy.ts +65 -0
  28. package/src/telemetry.test.ts +61 -0
  29. package/src/ui/AiChatView.tsx +292 -0
  30. package/src/ui/SpanContentView.tsx +181 -0
  31. package/src/ui/SpanDetail.tsx +98 -17
  32. package/src/ui/TraceDetailsPane.tsx +35 -3
  33. package/src/ui/Waterfall.tsx +94 -167
  34. package/src/ui/aiChatModel.test.ts +347 -0
  35. package/src/ui/aiChatModel.ts +736 -0
  36. package/src/ui/aiState.ts +71 -0
  37. package/src/ui/app/TraceWorkspace.tsx +295 -120
  38. package/src/ui/app/useAppLayout.ts +14 -11
  39. package/src/ui/app/useTraceScreenData.ts +191 -35
  40. package/src/ui/atoms.ts +131 -0
  41. package/src/ui/filterParser.test.ts +56 -0
  42. package/src/ui/filterParser.ts +45 -0
  43. package/src/ui/loaders.ts +120 -0
  44. package/src/ui/persistence.ts +41 -0
  45. package/src/ui/primitives.tsx +47 -21
  46. package/src/ui/state.ts +4 -169
  47. package/src/ui/useAttrFilterPicker.ts +63 -23
  48. package/src/ui/useKeyboardNav.ts +576 -300
  49. package/src/ui/waterfallFilter.test.ts +84 -0
  50. package/src/ui/waterfallFilter.ts +59 -0
  51. package/src/ui/waterfallModel.ts +130 -0
  52. package/src/ui/waterfallNav.test.ts +17 -1
  53. package/src/ui/waterfallNav.ts +1 -1
  54. package/web/dist/assets/{index-DKinj-OE.js → index-DnyVo03x.js} +1 -1
  55. package/web/dist/index.html +1 -1
package/src/daemon.ts CHANGED
@@ -2,7 +2,7 @@ import * as fs from "node:fs"
2
2
  import { promises as fsp } from "node:fs"
3
3
  import * as path from "node:path"
4
4
  import { Effect } from "effect"
5
- import { listAliveEntries, MOTEL_SERVICE_ID, type RegistryEntry, isAlive } from "./registry.js"
5
+ import { isAlive, listAliveEntries, MOTEL_SERVICE_ID, type RegistryEntry } from "./registry.js"
6
6
 
7
7
  const DEFAULT_REPO_ROOT = path.resolve(import.meta.dir, "..")
8
8
  const DEFAULT_RUNTIME_DIR = path.join(DEFAULT_REPO_ROOT, ".motel-data")
@@ -13,6 +13,18 @@ const START_TIMEOUT_MS = 15_000
13
13
  const STOP_TIMEOUT_MS = 10_000
14
14
  const LOCK_TIMEOUT_MS = 10_000
15
15
  const POLL_INTERVAL_MS = 150
16
+ /** Fast probe used inside the waitForHealthy poll loop — we call it
17
+ * every POLL_INTERVAL_MS, so a generous budget would stall the loop. */
18
+ const HEALTH_FAST_TIMEOUT_MS = 750
19
+ /** Patient probe used on critical paths: the first getStatus() call
20
+ * in ensure(), and the final pre-throw check after a spawned child
21
+ * dies. A real daemon with a busy SQLite writer (FTS backfill, big
22
+ * DB) can easily take 1-2s to answer /api/health — if we declare
23
+ * the port empty at 750ms we'll spawn a duplicate and collide with
24
+ * EADDRINUSE. 3s is long enough to tolerate a slow healthy daemon
25
+ * and short enough that a truly-down daemon is still detected
26
+ * before START_TIMEOUT_MS fires. */
27
+ const HEALTH_PATIENT_TIMEOUT_MS = 3_000
16
28
 
17
29
  type HealthShape = {
18
30
  readonly ok: boolean
@@ -134,9 +146,9 @@ export const createDaemonManager = (options: DaemonOptions = {}): DaemonManager
134
146
  const config = resolveConfig(options)
135
147
  const mapError = (error: unknown) => new DaemonError(error instanceof Error ? error.message : String(error))
136
148
 
137
- const fetchHealth = async (): Promise<HealthShape | null> => {
149
+ const fetchHealth = async (timeoutMs: number = HEALTH_FAST_TIMEOUT_MS): Promise<HealthShape | null> => {
138
150
  try {
139
- const response = await fetch(`${config.baseUrl}/api/health`, { signal: AbortSignal.timeout(750) })
151
+ const response = await fetch(`${config.baseUrl}/api/health`, { signal: AbortSignal.timeout(timeoutMs) })
140
152
  if (!response.ok) return null
141
153
  return await response.json() as HealthShape
142
154
  } catch {
@@ -157,6 +169,62 @@ export const createDaemonManager = (options: DaemonOptions = {}): DaemonManager
157
169
  return null
158
170
  }
159
171
 
172
+ /**
173
+ * Mismatch check against a registry entry — mirrors describeManagedMismatch
174
+ * but drives off the registry file instead of an HTTP health response.
175
+ * Used on the fast path in getStatus so warm-start doesn't need to wait
176
+ * on an HTTP round-trip that may queue behind heavy OTLP ingest.
177
+ *
178
+ * The service-id check is implicit: any entry living in the motel
179
+ * registry dir is by construction a motel daemon. databasePath is
180
+ * optional for back-compat with entries written by older builds;
181
+ * when absent we skip the DB check rather than refusing to adopt.
182
+ */
183
+ const describeRegistryMismatch = (entry: RegistryEntry): string | null => {
184
+ if (!cwdMatches(entry.workdir)) {
185
+ return `Port ${config.port} is serving motel for ${entry.workdir}, not ${process.cwd()}.`
186
+ }
187
+ if (entry.databasePath && entry.databasePath !== config.databasePath) {
188
+ return `Port ${config.port} is serving motel with ${entry.databasePath}, expected ${config.databasePath}.`
189
+ }
190
+ return null
191
+ }
192
+
193
+ /**
194
+ * Build a DaemonStatus from a live registry entry. Returns null when
195
+ * there's no entry for our cwd, the registered pid isn't running, or
196
+ * the entry is for a differently-configured daemon (different port).
197
+ * This is the fast path: no HTTP, no event-loop round-trip, just a
198
+ * directory read and a process.kill(pid, 0) liveness probe.
199
+ */
200
+ const getStatusFromRegistry = (): DaemonStatus | null => {
201
+ const entry = readRegistryEntry()
202
+ if (!entry) return null
203
+ // Port discriminator: a motel registry shared across several
204
+ // daemons (e.g., user running two instances on different
205
+ // ports from the same workdir, or a test harness on a random
206
+ // port) would otherwise have us adopt an unrelated daemon.
207
+ // URL match is a fast, unambiguous identity check.
208
+ if (entry.url !== config.baseUrl) return null
209
+ const mismatch = describeRegistryMismatch(entry)
210
+ return {
211
+ running: mismatch === null,
212
+ managed: mismatch === null,
213
+ service: MOTEL_SERVICE_ID,
214
+ pid: entry.pid,
215
+ url: entry.url,
216
+ databasePath: entry.databasePath ?? config.databasePath,
217
+ workdir: entry.workdir,
218
+ startedAt: entry.startedAt,
219
+ version: entry.version,
220
+ sameWorkdir: cwdMatches(entry.workdir),
221
+ reason: mismatch,
222
+ logPath: config.logPath,
223
+ lockPath: config.lockPath,
224
+ registryPid: entry.pid,
225
+ }
226
+ }
227
+
160
228
  const readLock = async (): Promise<LockShape | null> => {
161
229
  try {
162
230
  const raw = await fsp.readFile(config.lockPath, "utf8")
@@ -218,6 +286,17 @@ export const createDaemonManager = (options: DaemonOptions = {}): DaemonManager
218
286
  throw new Error(mismatch)
219
287
  }
220
288
  if (!isAlive(pid)) {
289
+ // The spawned child is gone. Before declaring failure,
290
+ // do one patient probe: the child may have died from
291
+ // EADDRINUSE because another healthy motel is alive on
292
+ // the port but was answering /api/health too slowly for
293
+ // our fast poll. If that's the case, adopt it.
294
+ const patient = await fetchHealth(HEALTH_PATIENT_TIMEOUT_MS)
295
+ if (patient) {
296
+ const mismatch = describeManagedMismatch(patient)
297
+ if (!mismatch) return patient
298
+ throw new Error(mismatch)
299
+ }
221
300
  throw new Error(`Daemon process ${pid} exited before becoming healthy. See ${config.logPath}.`)
222
301
  }
223
302
  await sleep(POLL_INTERVAL_MS)
@@ -244,9 +323,24 @@ export const createDaemonManager = (options: DaemonOptions = {}): DaemonManager
244
323
  throw new Error(`Timed out waiting for daemon ${pid} to stop.`)
245
324
  }
246
325
 
247
- const getStatus = async (): Promise<DaemonStatus> => {
326
+ const getStatus = async (timeoutMs: number = HEALTH_FAST_TIMEOUT_MS): Promise<DaemonStatus> => {
327
+ // Fast path: trust the local filesystem registry. When a motel
328
+ // daemon started on this machine it wrote an entry for its pid
329
+ // + cwd + databasePath; if that entry is still there and the pid
330
+ // is alive, the daemon is almost certainly the one we want to
331
+ // adopt. HTTP health is skipped because the daemon's health
332
+ // endpoint can queue behind heavy OTLP ingest traffic, making
333
+ // the probe unreliable exactly when the daemon is busy.
334
+ const registryStatus = getStatusFromRegistry()
335
+ if (registryStatus) return registryStatus
336
+
337
+ // No local evidence → fall back to HTTP. Covers the edge cases
338
+ // where: a motel daemon is running but was started before this
339
+ // registry-first path shipped; OR the port is held by something
340
+ // entirely unrelated (the mismatch check turns that into a
341
+ // human-readable reason).
248
342
  const registry = readRegistryEntry()
249
- const health = await fetchHealth()
343
+ const health = await fetchHealth(timeoutMs)
250
344
  if (!health) {
251
345
  return {
252
346
  running: false,
@@ -286,7 +380,11 @@ export const createDaemonManager = (options: DaemonOptions = {}): DaemonManager
286
380
  }
287
381
 
288
382
  const ensure = async (): Promise<DaemonStatus> => {
289
- const existing = await getStatus()
383
+ // Use the patient timeout for the initial probe — this is the
384
+ // critical "is there already a daemon here?" check. A false
385
+ // negative here drops us into the spawn path and collides with
386
+ // any slow-but-healthy daemon sitting on the port.
387
+ const existing = await getStatus(HEALTH_PATIENT_TIMEOUT_MS)
290
388
  if (existing.managed && existing.running) return existing
291
389
  if (existing.service !== null && existing.reason) {
292
390
  throw new Error(existing.reason)
@@ -295,7 +393,11 @@ export const createDaemonManager = (options: DaemonOptions = {}): DaemonManager
295
393
  const lock = await acquireStartupLock()
296
394
  let spawnedPid: number | null = null
297
395
  try {
298
- const rechecked = await getStatus()
396
+ // Same reasoning for the post-lock re-check: another ensure()
397
+ // may have spawned a daemon between our first probe and the
398
+ // lock grant, and its initial health response can be slow
399
+ // while the runtime warms up.
400
+ const rechecked = await getStatus(HEALTH_PATIENT_TIMEOUT_MS)
299
401
  if (rechecked.managed && rechecked.running) return rechecked
300
402
  if (rechecked.service !== null && rechecked.reason) {
301
403
  throw new Error(rechecked.reason)
@@ -371,7 +473,10 @@ export const createDaemonManager = (options: DaemonOptions = {}): DaemonManager
371
473
  }),
372
474
  getStatus: Effect.fn("DaemonManager.getStatus")(() =>
373
475
  Effect.tryPromise({
374
- try: getStatus,
476
+ // Wrapped so Effect.tryPromise only sees the no-arg call
477
+ // signature — the optional timeoutMs parameter is an
478
+ // internal detail used by ensure()'s critical probes.
479
+ try: () => getStatus(),
375
480
  catch: mapError,
376
481
  }),
377
482
  )(),
@@ -0,0 +1,62 @@
1
+ import { describe, expect, test } from "bun:test"
2
+ import { AI_FTS_KEYS, isAiSpan } from "./domain.ts"
3
+
4
+ describe("isAiSpan", () => {
5
+ test("returns false for empty tags", () => {
6
+ expect(isAiSpan({})).toBe(false)
7
+ })
8
+
9
+ test("returns false when no AI key is present", () => {
10
+ expect(isAiSpan({
11
+ "service.name": "web",
12
+ "http.method": "GET",
13
+ "db.statement": "SELECT 1",
14
+ })).toBe(false)
15
+ })
16
+
17
+ test("detects Vercel AI SDK keys", () => {
18
+ expect(isAiSpan({ "ai.prompt.messages": "[]" })).toBe(true)
19
+ expect(isAiSpan({ "ai.response.text": "hi" })).toBe(true)
20
+ expect(isAiSpan({ "ai.toolCall.args": "{}" })).toBe(true)
21
+ })
22
+
23
+ test("detects OpenTelemetry gen_ai semconv keys", () => {
24
+ expect(isAiSpan({ "gen_ai.prompt": "foo" })).toBe(true)
25
+ expect(isAiSpan({ "gen_ai.input.messages": "[]" })).toBe(true)
26
+ expect(isAiSpan({ "gen_ai.tool.definitions": "[]" })).toBe(true)
27
+ })
28
+
29
+ test("detects OpenInference keys", () => {
30
+ expect(isAiSpan({ "input.value": "hi" })).toBe(true)
31
+ expect(isAiSpan({ "output.value": "hi" })).toBe(true)
32
+ })
33
+
34
+ test("detects a single AI key among many non-AI keys", () => {
35
+ expect(isAiSpan({
36
+ "service.name": "web",
37
+ "http.method": "POST",
38
+ "http.status_code": "200",
39
+ "ai.model.id": "ignored-not-in-fts-keys",
40
+ "ai.prompt": "tell me a joke",
41
+ })).toBe(true)
42
+ })
43
+
44
+ test("ignores AI-adjacent keys that are not in the FTS set", () => {
45
+ // `ai.model.provider`, `ai.settings.*`, `ai.telemetry.*` carry
46
+ // metadata, not content, so they intentionally aren't part of
47
+ // AI_FTS_KEYS. A span with ONLY those should not be flagged.
48
+ expect(isAiSpan({
49
+ "ai.model.provider": "openai",
50
+ "ai.model.id": "gpt-4",
51
+ "ai.settings.maxRetries": "2",
52
+ })).toBe(false)
53
+ })
54
+
55
+ test("every documented key triggers detection", () => {
56
+ // Guard against a future reshuffle of AI_FTS_KEYS that might
57
+ // drop a key silently — every declared key should round-trip.
58
+ for (const key of AI_FTS_KEYS) {
59
+ expect(isAiSpan({ [key]: "payload" })).toBe(true)
60
+ }
61
+ })
62
+ })
package/src/domain.ts CHANGED
@@ -145,14 +145,72 @@ export const AI_ATTR_MAP = {
145
145
  responseTimestamp: "ai.response.timestamp",
146
146
  } as const
147
147
 
148
- /** Attribute keys to search across when using the `text` filter */
149
- export const AI_TEXT_SEARCH_KEYS = [
150
- "ai.prompt.messages",
148
+ /**
149
+ * Attribute keys that carry LLM prompt/response content and should be
150
+ * indexed in the span-attribute FTS table. These are the keys emitted by
151
+ * well-known LLM instrumentation conventions:
152
+ *
153
+ * - **Vercel AI SDK** (`ai.*`): rich, SDK-specific attributes captured by
154
+ * `experimental_telemetry` on `generateText` / `streamText` / `generateObject`.
155
+ * - **OpenTelemetry GenAI semantic conventions** (`gen_ai.*`): the
156
+ * cross-vendor standard. The singular `prompt`/`completion` attrs are
157
+ * deprecated in favor of event-based capture but are still emitted by
158
+ * most instrumentations, so we keep them.
159
+ * - **OpenInference** (`input.value` / `output.value`): Arize Phoenix /
160
+ * LangChain-style normalized input/output.
161
+ *
162
+ * Keys here trigger FTS indexing on insert via a trigger in TelemetryStore.
163
+ * Adding a key requires a one-time backfill; removing one leaves orphan
164
+ * FTS entries that get cleaned up on next retention pass.
165
+ */
166
+ export const AI_FTS_KEYS = [
167
+ // Vercel AI SDK
151
168
  "ai.prompt",
152
- "ai.response.text",
169
+ "ai.prompt.messages",
153
170
  "ai.prompt.tools",
171
+ "ai.prompt.toolChoice",
172
+ "ai.response.text",
173
+ "ai.response.toolCalls",
174
+ "ai.response.reasoning",
175
+ "ai.response.object",
176
+ "ai.toolCall.args",
177
+ "ai.toolCall.result",
178
+ // OpenTelemetry GenAI semantic conventions
179
+ "gen_ai.prompt",
180
+ "gen_ai.completion",
181
+ "gen_ai.input.messages",
182
+ "gen_ai.output.messages",
183
+ "gen_ai.system_instructions",
184
+ "gen_ai.tool.definitions",
185
+ "gen_ai.tool.message.content",
186
+ // OpenInference (Phoenix, LangChain, etc.)
187
+ "input.value",
188
+ "output.value",
154
189
  ] as const
155
190
 
191
+ /**
192
+ * Back-compat alias. The `text` filter on `/api/ai/calls` historically
193
+ * LIKE-searched these four keys; now FTS indexes the broader AI_FTS_KEYS
194
+ * set so the filter transparently covers more content.
195
+ */
196
+ export const AI_TEXT_SEARCH_KEYS = AI_FTS_KEYS
197
+
198
+ /**
199
+ * True if a span's tags contain any of the AI content keys we track.
200
+ * Used as the single source of truth for "this span has LLM payloads
201
+ * worth a specialized view" — drives the ✦ marker in the waterfall row
202
+ * and picks the chat-flavored renderer when the user drills into the
203
+ * span's detail. Scanning happens once per row during render so this
204
+ * needs to stay O(AI_FTS_KEYS.length) with cheap `in` checks rather
205
+ * than an `Object.keys(...).some(...)` allocation.
206
+ */
207
+ export const isAiSpan = (tags: Readonly<Record<string, string>>): boolean => {
208
+ for (const key of AI_FTS_KEYS) {
209
+ if (key in tags) return true
210
+ }
211
+ return false
212
+ }
213
+
156
214
  const PREVIEW_LENGTH = 200
157
215
 
158
216
  export const truncatePreview = (value: string | null | undefined): string | null => {
package/src/httpApi.ts CHANGED
@@ -121,6 +121,9 @@ export const MotelHttpApi = HttpApi.make("MotelTelemetry")
121
121
  minDurationMs: Schema.optionalKey(Schema.Number).pipe(
122
122
  Schema.annotateKey({ description: "Only return traces slower than this threshold (milliseconds)" }),
123
123
  ),
124
+ aiText: Schema.optionalKey(Schema.String).pipe(
125
+ Schema.annotateKey({ description: "FTS match against AI prompt/response/tool content across all spans in the trace. Tokens are prefix-matched and implicitly AND'd." }),
126
+ ),
124
127
  lookback: LookbackParam,
125
128
  limit: LimitParam,
126
129
  cursor: CursorParam,
@@ -128,7 +131,7 @@ export const MotelHttpApi = HttpApi.make("MotelTelemetry")
128
131
  success: TraceSummaryList,
129
132
  })
130
133
  .annotate(OpenApi.Summary, "Search traces with filters")
131
- .annotate(OpenApi.Description, "Search compact trace summaries with filters. Use /api/traces/{traceId} for full details. Supports cursor pagination and attr.<key> filters in the query string."),
134
+ .annotate(OpenApi.Description, "Search compact trace summaries with filters. Use /api/traces/{traceId} for full details. Supports cursor pagination, attr.<key> filters in the query string, and aiText for full-text search across LLM prompt/response content."),
132
135
 
133
136
  HttpApiEndpoint.get("traceStats", "/api/traces/stats", {
134
137
  query: {
@@ -1,18 +1,27 @@
1
1
  import { promises as fs } from "node:fs"
2
2
  import path from "node:path"
3
- import { Effect, Layer, Context } from "effect"
4
- import { config, parsePositiveInt, resolveOtelUrl } from "./config.js"
3
+ import { Effect, Layer } from "effect"
4
+ import { config, parsePositiveInt } from "./config.js"
5
5
  import { HttpApiBuilder, HttpApiScalar } from "effect/unstable/httpapi"
6
+ import * as HttpMiddleware from "effect/unstable/http/HttpMiddleware"
6
7
  import * as HttpRouter from "effect/unstable/http/HttpRouter"
7
- import * as HttpServer from "effect/unstable/http/HttpServer"
8
8
  import * as HttpServerResponse from "effect/unstable/http/HttpServerResponse"
9
+ import * as HttpStaticServer from "effect/unstable/http/HttpStaticServer"
10
+ import * as BunHttpServer from "@effect/platform-bun/BunHttpServer"
9
11
  import { MotelHttpApi } from "./httpApi.js"
10
12
  import { attributeFiltersFromEntries, attributeContainsFiltersFromEntries, ATTRIBUTE_FILTER_PREFIX, ATTRIBUTE_CONTAINS_PREFIX } from "./queryFilters.js"
11
- import { MOTEL_SERVICE_ID, MOTEL_VERSION, writeRegistryEntry } from "./registry.js"
13
+ import { MOTEL_SERVICE_ID, MOTEL_VERSION, removeRegistryEntry, writeRegistryEntry } from "./registry.js"
14
+ import { AsyncIngest, AsyncIngestLive } from "./services/AsyncIngest.js"
12
15
  import { TelemetryStore, TelemetryStoreLive } from "./services/TelemetryStore.js"
13
16
  import type { LogItem, TraceItem, TraceSummaryItem } from "./domain.js"
14
17
  import { lifecycleLabel } from "./ui/format.js"
15
18
 
19
+ // Set by the RegistryLayer acquisition once the Bun socket has bound.
20
+ // Both /api/health and the registry entry read from here so they agree
21
+ // on a single server-start timestamp, and the value reflects actual
22
+ // listen time rather than module-evaluation time.
23
+ let serverStartedAt: string = new Date(0).toISOString()
24
+
16
25
  const TRACE_DEFAULT_LIMIT = 20
17
26
  const TRACE_MAX_LIMIT = 100
18
27
  const TRACE_DEFAULT_LOOKBACK = 60
@@ -24,28 +33,21 @@ const LOG_MAX_LIMIT = 500
24
33
  const LOG_DEFAULT_LOOKBACK = 60
25
34
  const LOG_MAX_LOOKBACK = 24 * 60
26
35
 
27
- let server: ReturnType<typeof Bun.serve> | null = null
28
- let disposeWebHandler: (() => Promise<void>) | null = null
29
- let startedAt: string | null = null
30
-
31
- const resolveBoundUrl = () => {
32
- if (!server) return config.otel.queryUrl
33
- const host = server.hostname === "0.0.0.0" || server.hostname === "::" ? "127.0.0.1" : server.hostname
34
- return `http://${host}:${server.port}`
35
- }
36
-
37
36
  const jsonResponse = (value: unknown, status = 200) => HttpServerResponse.jsonUnsafe(value, { status })
38
37
  const textResponse = (value: string) => HttpServerResponse.text(value)
39
38
  const htmlResponse = (value: string) => HttpServerResponse.html(value)
40
39
  const notFoundResponse = (message = "Not found") => jsonResponse({ error: message }, 404)
41
40
  const requestUrl = (request: { readonly url: string }) => new URL(request.url, config.otel.baseUrl)
42
41
  const withStore = <A>(f: (store: TelemetryStore["Service"]) => Effect.Effect<A, Error>) => Effect.flatMap(TelemetryStore.asEffect(), f)
43
- const respondJson = <A>(effect: Effect.Effect<A, unknown, TelemetryStore>) =>
42
+ // Response-building helpers are generic in R so a handler can depend
43
+ // on TelemetryStore (query path) or AsyncIngest (worker-RPC path)
44
+ // without forcing every handler onto the same service surface.
45
+ const respondJson = <A, R>(effect: Effect.Effect<A, unknown, R>) =>
44
46
  Effect.match(effect, {
45
47
  onFailure: (error) => jsonResponse({ error: error instanceof Error ? error.message : String(error) }, 500),
46
48
  onSuccess: (value) => jsonResponse(value),
47
49
  })
48
- const respondRaw = (effect: Effect.Effect<ReturnType<typeof jsonResponse>, unknown, TelemetryStore>) =>
50
+ const respondRaw = <R>(effect: Effect.Effect<ReturnType<typeof jsonResponse>, unknown, R>) =>
49
51
  Effect.match(effect, {
50
52
  onFailure: (error) => jsonResponse({ error: error instanceof Error ? error.message : String(error) }, 500),
51
53
  onSuccess: (value) => value,
@@ -284,23 +286,33 @@ const TelemetryGroupLive = HttpApiBuilder.group(
284
286
  service: MOTEL_SERVICE_ID,
285
287
  databasePath: config.otel.databasePath,
286
288
  pid: process.pid,
287
- url: resolveBoundUrl(),
289
+ url: config.otel.baseUrl,
288
290
  workdir: process.cwd(),
289
- startedAt: startedAt ?? new Date(0).toISOString(),
291
+ startedAt: serverStartedAt,
290
292
  version: MOTEL_VERSION,
291
293
  }),
292
294
  )
295
+ // OTLP ingest is routed to the worker thread via AsyncIngest
296
+ // so the main event loop stays free during heavy SQLite writes.
297
+ // Everything else still uses the direct TelemetryStore — reads
298
+ // are fast enough that IPC overhead isn't worth paying.
293
299
  .handleRaw("ingestTraces", ({ request }) =>
294
300
  respondRaw(
295
301
  Effect.flatMap(request.json, (payload) =>
296
- Effect.map(withStore((store) => store.ingestTraces(payload as any)), (result) => jsonResponse(result)),
302
+ Effect.map(
303
+ Effect.flatMap(AsyncIngest.asEffect(), (ingest) => ingest.ingestTraces({ payload })),
304
+ (result) => jsonResponse(result),
305
+ ),
297
306
  ),
298
307
  ),
299
308
  )
300
309
  .handleRaw("ingestLogs", ({ request }) =>
301
310
  respondRaw(
302
311
  Effect.flatMap(request.json, (payload) =>
303
- Effect.map(withStore((store) => store.ingestLogs(payload as any)), (result) => jsonResponse(result)),
312
+ Effect.map(
313
+ Effect.flatMap(AsyncIngest.asEffect(), (ingest) => ingest.ingestLogs({ payload })),
314
+ (result) => jsonResponse(result),
315
+ ),
304
316
  ),
305
317
  ),
306
318
  )
@@ -335,6 +347,7 @@ const TelemetryGroupLive = HttpApiBuilder.group(
335
347
  status: (url.searchParams.get("status") as "ok" | "error" | null) ?? null,
336
348
  minDurationMs: url.searchParams.get("minDurationMs") ? Number.parseFloat(url.searchParams.get("minDurationMs") ?? "") : null,
337
349
  attributeFilters,
350
+ aiText: url.searchParams.get("aiText"),
338
351
  limit: limit + 1,
339
352
  lookbackMinutes,
340
353
  cursorStartedAtMs: cursor?.kind === "trace" ? cursor.startedAt : undefined,
@@ -586,115 +599,93 @@ const TelemetryGroupLive = HttpApiBuilder.group(
586
599
  ),
587
600
  )
588
601
 
589
- const ApiLive = Layer.provideMerge(
590
- HttpApiBuilder.layer(MotelHttpApi, { openapiPath: "/openapi.json" }).pipe(
591
- Layer.provide(TelemetryGroupLive),
592
- Layer.provide(HttpApiScalar.layer(MotelHttpApi, { scalar: { forceDarkModeState: "dark", showOperationId: true } })),
593
- Layer.provide(HttpServer.layerServices),
594
- ),
595
- TelemetryStoreLive,
596
- )
597
-
598
602
  // ---------------------------------------------------------------------------
599
- // Static file serving for the web UI
603
+ // App layer: HTTP router + static SPA + telemetry store
600
604
  // ---------------------------------------------------------------------------
601
605
 
602
- const WEB_DIST_DIR = path.resolve(import.meta.dir, "../web/dist")
603
- // Only cache `true` a `false` result is rechecked so a later `web:build` is picked up
604
- let webUiAvailable = false
605
-
606
- const isWebUiAvailable = async (): Promise<boolean> => {
607
- if (webUiAvailable) return true
608
- try {
609
- webUiAvailable = await Bun.file(path.join(WEB_DIST_DIR, "index.html")).exists()
610
- } catch {
611
- /* ignore */
612
- }
613
- return webUiAvailable
614
- }
615
-
616
- /** Routes that must always go through the Effect API handler */
617
- const isStrictApiRoute = (pathname: string) =>
618
- pathname.startsWith("/api/") ||
619
- pathname.startsWith("/v1/") ||
620
- pathname === "/openapi.json" ||
621
- pathname === "/docs"
622
-
623
- const serveWebUi = async (request: Request, apiHandler: (req: Request) => Promise<Response>): Promise<Response> => {
624
- const url = new URL(request.url)
625
- const pathname = url.pathname
626
-
627
- // Strict API routes always go through the Effect handler
628
- if (isStrictApiRoute(pathname)) return apiHandler(request)
629
-
630
- // Only serve web UI if built
631
- if (!(await isWebUiAvailable())) return apiHandler(request)
606
+ // API routes come from the Effect HttpApi definition. Everything under
607
+ // /api/*, /v1/*, /openapi.json, /docs is handled here.
608
+ const ApiLayer = HttpApiBuilder.layer(MotelHttpApi, { openapiPath: "/openapi.json" }).pipe(
609
+ Layer.provide(TelemetryGroupLive),
610
+ Layer.provide(HttpApiScalar.layer(MotelHttpApi, { scalar: { forceDarkModeState: "dark", showOperationId: true } })),
611
+ )
632
612
 
633
- // Try to serve a static file from web/dist/ (hashed assets, favicon, etc.)
634
- if (pathname.startsWith("/assets/") || (pathname !== "/" && pathname.includes("."))) {
635
- const resolved = path.resolve(WEB_DIST_DIR, pathname.slice(1))
636
- if (resolved.startsWith(WEB_DIST_DIR) && await Bun.file(resolved).exists()) {
637
- return new Response(Bun.file(resolved))
638
- }
639
- }
613
+ // Web UI: Vite-built SPA served from web/dist. HttpStaticServer.layer
614
+ // handles GET /*, filesystem lookup under `root`, and SPA fallback to
615
+ // index.html for unknown paths — replacing the hand-rolled serveWebUi
616
+ // wrapper that previously lived inline with Bun.serve. The API routes
617
+ // above take precedence because HttpApi registers specific paths that
618
+ // the router matches before falling through to the /* catch-all.
619
+ const WEB_DIST_DIR = path.resolve(import.meta.dir, "../web/dist")
620
+ const StaticLayer = HttpStaticServer.layer({
621
+ root: WEB_DIST_DIR,
622
+ spa: true,
623
+ })
640
624
 
641
- // SPA fallback: serve index.html for / and all client routes
642
- return new Response(Bun.file(path.join(WEB_DIST_DIR, "index.html")), {
643
- headers: { "content-type": "text/html; charset=utf-8" },
644
- })
645
- }
625
+ // Registry-entry writer as a scoped acquisition. The entry is published
626
+ // after BunHttpServer.layer binds the socket (scope acquisition order)
627
+ // and removed on scope release, so a bind failure never leaves a zombie
628
+ // entry and a graceful shutdown cleans up alongside the server stop —
629
+ // both in the same finalizer chain managed by Layer.launch.
630
+ const RegistryLayer = Layer.effectDiscard(
631
+ Effect.acquireRelease(
632
+ Effect.sync(() => {
633
+ serverStartedAt = new Date().toISOString()
634
+ try {
635
+ writeRegistryEntry({
636
+ pid: process.pid,
637
+ url: config.otel.baseUrl,
638
+ workdir: process.cwd(),
639
+ startedAt: serverStartedAt,
640
+ version: MOTEL_VERSION,
641
+ databasePath: config.otel.databasePath,
642
+ })
643
+ } catch (err) {
644
+ console.warn(`motel: failed to write registry entry: ${(err as Error).message}`)
645
+ }
646
+ }),
647
+ () => Effect.sync(() => removeRegistryEntry(process.pid)),
648
+ ),
649
+ )
646
650
 
647
651
  // ---------------------------------------------------------------------------
648
652
  // Server lifecycle
649
653
  // ---------------------------------------------------------------------------
650
654
 
651
- export const startLocalServer = async () => {
652
- if (server) return server
653
- const { handler, dispose } = HttpRouter.toWebHandler(ApiLive, { disableLogger: true })
654
- disposeWebHandler = dispose
655
- server = Bun.serve({
656
- hostname: config.otel.host,
655
+ /**
656
+ * Launchable server layer. Composes the API + static UI + store + registry,
657
+ * wraps the whole stack in HttpMiddleware.tracer (per-request OTel spans
658
+ * with http.method / url / status / user-agent attributes), and binds the
659
+ * socket via @effect/platform-bun's BunHttpServer. Use from server.ts:
660
+ *
661
+ * await Effect.runPromise(Layer.launch(ServerLive))
662
+ *
663
+ * Socket lifecycle, graceful shutdown, and error propagation are managed
664
+ * by the BunHttpServer layer's Scope — no hand-rolled start/stop plumbing.
665
+ * `reusePort: true` is retained as defense-in-depth against TIME_WAIT
666
+ * rebind conflicts (the registry-based adoption path in daemon.ts is the
667
+ * primary protection, but this covers a raw `bun src/server.ts` restart).
668
+ */
669
+ export const ServerLive = HttpRouter.serve(
670
+ Layer.mergeAll(ApiLayer, StaticLayer, RegistryLayer),
671
+ { middleware: HttpMiddleware.tracer },
672
+ ).pipe(
673
+ // OTLP ingest paths are NOT traced by the middleware, otherwise
674
+ // MOTEL_OTEL_ENABLED creates a feedback loop: every outbound span
675
+ // POSTs to /v1/traces, the tracer emits a span for that POST, which
676
+ // POSTs again on the next flush. This also shaves ~1 KB of header
677
+ // attributes off every ingest request that would have been written
678
+ // to the spans table as noise.
679
+ Layer.provide(HttpMiddleware.layerTracerDisabledForUrls(["/v1/traces", "/v1/logs"])),
680
+ // AsyncIngest spawns the telemetry worker — keeps the main-thread
681
+ // event loop free during heavy SQLite writes. Provided alongside
682
+ // the direct TelemetryStore so query handlers can still resolve
683
+ // their dependency directly.
684
+ Layer.provideMerge(AsyncIngestLive),
685
+ Layer.provideMerge(TelemetryStoreLive),
686
+ Layer.provideMerge(BunHttpServer.layer({
657
687
  port: config.otel.port,
658
- fetch(request) {
659
- return serveWebUi(request, handler)
660
- },
661
- })
662
- startedAt = new Date().toISOString()
663
- try {
664
- writeRegistryEntry({
665
- pid: process.pid,
666
- url: resolveBoundUrl(),
667
- workdir: process.cwd(),
668
- startedAt,
669
- version: MOTEL_VERSION,
670
- })
671
- } catch (err) {
672
- console.warn(`motel: failed to write registry entry: ${(err as Error).message}`)
673
- }
674
- return server
675
- }
676
-
677
- export const ensureLocalServer = async () => {
678
- if (server) return server
679
- try {
680
- const response = await fetch(resolveOtelUrl("/api/health"), { signal: AbortSignal.timeout(250) })
681
- if (response.ok) return null
682
- } catch {
683
- // Start local server below.
684
- }
685
- return await startLocalServer()
686
- }
687
-
688
- export const stopLocalServer = () => {
689
- server?.stop(true)
690
- server = null
691
- startedAt = null
692
-
693
- const dispose = disposeWebHandler
694
- disposeWebHandler = null
695
- if (dispose) {
696
- void dispose().catch((err) => {
697
- console.warn(`motel: failed to dispose web handler: ${(err as Error).message}`)
698
- })
699
- }
700
- }
688
+ hostname: config.otel.host,
689
+ reusePort: true,
690
+ })),
691
+ )