@kitlangton/motel 0.2.4 → 0.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/AGENTS.md +23 -8
  2. package/README.md +13 -2
  3. package/package.json +35 -19
  4. package/skills/motel-debug/SKILL.md +203 -0
  5. package/skills/motel-debug/references/effect.md +38 -0
  6. package/src/App.tsx +12 -5
  7. package/src/StartupGate.tsx +289 -0
  8. package/src/cli.ts +15 -16
  9. package/src/config.ts +7 -1
  10. package/src/daemon.test.ts +332 -51
  11. package/src/daemon.ts +105 -153
  12. package/src/httpApi.ts +1 -0
  13. package/src/httpListPolicy.test.ts +76 -0
  14. package/src/httpListPolicy.ts +129 -0
  15. package/src/index.tsx +9 -2
  16. package/src/localServer.ts +194 -313
  17. package/src/mcp.ts +2 -1
  18. package/src/motel.ts +0 -2
  19. package/src/opentui-jsx.d.ts +11 -0
  20. package/src/otlp.test.ts +65 -0
  21. package/src/otlp.ts +20 -0
  22. package/src/otlpProtobuf.ts +35 -0
  23. package/src/registry.ts +37 -11
  24. package/src/runtime.ts +2 -6
  25. package/src/services/AsyncIngest.ts +22 -8
  26. package/src/services/LogQueryService.ts +13 -27
  27. package/src/services/TelemetryQuery.ts +62 -0
  28. package/src/services/TelemetryStore.ts +546 -231
  29. package/src/services/TraceQueryService.ts +22 -56
  30. package/src/services/ingestRpc.ts +2 -4
  31. package/src/services/queryRpc.ts +15 -0
  32. package/src/services/telemetryQueryWorker.ts +32 -0
  33. package/src/services/telemetryWorker.ts +5 -8
  34. package/src/startupBench.ts +19 -0
  35. package/src/storybook/aiChatStory.tsx +1 -1
  36. package/src/telemetry.test.ts +307 -41
  37. package/src/ui/AiChatView.tsx +1 -1
  38. package/src/ui/AttrFilterModal.tsx +1 -1
  39. package/src/ui/ServiceLogs.tsx +10 -7
  40. package/src/ui/SpanContentView.tsx +24 -21
  41. package/src/ui/TraceDetailsPane.tsx +1 -1
  42. package/src/ui/TraceList.tsx +1 -1
  43. package/src/ui/aiState.ts +10 -22
  44. package/src/ui/app/TraceWorkspace.tsx +2 -1
  45. package/src/ui/app/useAppLayout.ts +1 -1
  46. package/src/ui/app/useTraceScreenData.ts +35 -23
  47. package/src/ui/atoms.ts +1 -1
  48. package/src/ui/cachedLoader.test.ts +23 -0
  49. package/src/ui/cachedLoader.ts +60 -0
  50. package/src/ui/loaders.ts +34 -53
  51. package/src/ui/persistence.ts +3 -3
  52. package/src/ui/primitives.tsx +1 -1
  53. package/src/ui/state.ts +2 -0
  54. package/src/ui/theme.ts +7 -5
  55. package/src/ui/traceDetailsWidth.repro.test.ts +12 -1
  56. package/src/ui/traceSortNav.repro.seed.ts +1 -1
  57. package/src/ui/traceSortNav.repro.test.ts +12 -2
  58. package/src/ui/useAttrFilterPicker.ts +10 -8
  59. package/src/ui/useKeyboardNav.ts +28 -5
  60. package/src/ui/waterfallNav.repro.seed.ts +1 -1
  61. package/src/ui/waterfallNav.repro.test.ts +16 -8
  62. package/web/dist/assets/index-B01z9BaO.css +2 -0
  63. package/web/dist/assets/index-M86tcih5.js +22 -0
  64. package/web/dist/index.html +2 -2
  65. package/web/dist/assets/index-DnyVo03x.js +0 -27
  66. package/web/dist/assets/index-DzuHNBGV.css +0 -2
package/src/daemon.ts CHANGED
@@ -2,7 +2,7 @@ import * as fs from "node:fs"
2
2
  import { promises as fsp } from "node:fs"
3
3
  import * as path from "node:path"
4
4
  import { Effect } from "effect"
5
- import { isAlive, listAliveEntries, MOTEL_SERVICE_ID, MOTEL_VERSION, type RegistryEntry } from "./registry.js"
5
+ import { isAlive, isManagedDaemonProcess, listAliveEntries, motelStateDir, MOTEL_SERVICE_ID, processIdentity, removeRegistryEntry, type RegistryEntry } from "./registry.js"
6
6
 
7
7
  const DEFAULT_REPO_ROOT = path.resolve(import.meta.dir, "..")
8
8
  const DEFAULT_HOST = "127.0.0.1"
@@ -10,6 +10,7 @@ const DEFAULT_PORT = 27686
10
10
  const START_TIMEOUT_MS = 30_000
11
11
  const STOP_TIMEOUT_MS = 10_000
12
12
  const LOCK_TIMEOUT_MS = 10_000
13
+ const START_POLL_INTERVAL_MS = 25
13
14
  const POLL_INTERVAL_MS = 150
14
15
  /** Fast probe used inside the waitForHealthy poll loop — we call it
15
16
  * every POLL_INTERVAL_MS, so a generous budget would stall the loop. */
@@ -23,6 +24,7 @@ const HEALTH_FAST_TIMEOUT_MS = 750
23
24
  * and short enough that a truly-down daemon is still detected
24
25
  * before START_TIMEOUT_MS fires. */
25
26
  const HEALTH_PATIENT_TIMEOUT_MS = 3_000
27
+ const INGEST_PROBE_TIMEOUT_MS = 3_000
26
28
 
27
29
  type HealthShape = {
28
30
  readonly ok: boolean
@@ -33,11 +35,13 @@ type HealthShape = {
33
35
  readonly workdir: string
34
36
  readonly startedAt: string
35
37
  readonly version: string
38
+ readonly instanceId?: string
36
39
  }
37
40
 
38
41
  type LockShape = {
39
42
  readonly pid: number
40
43
  readonly createdAt: string
44
+ readonly processIdentity?: string
41
45
  }
42
46
 
43
47
  type DaemonConfig = {
@@ -84,6 +88,9 @@ type DaemonOptions = {
84
88
  readonly databasePath?: string
85
89
  readonly host?: string
86
90
  readonly port?: number
91
+ readonly startTimeoutMs?: number
92
+ readonly gracefulStopTimeoutMs?: number
93
+ readonly forceStopTimeoutMs?: number
87
94
  }
88
95
 
89
96
  export class DaemonError extends Error {
@@ -96,12 +103,14 @@ export class DaemonError extends Error {
96
103
  const sleep = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms))
97
104
 
98
105
  const resolveConfig = (options: DaemonOptions = {}): DaemonConfig => {
106
+ const envBaseUrl = new URL(process.env.MOTEL_OTEL_BASE_URL?.trim() || process.env.MOTEL_OTEL_QUERY_URL?.trim() || `http://${DEFAULT_HOST}:${DEFAULT_PORT}`)
99
107
  const repoRoot = path.resolve(options.repoRoot ?? DEFAULT_REPO_ROOT)
100
108
  const workdir = path.resolve(options.workdir ?? process.cwd())
101
- const runtimeDir = path.resolve(options.runtimeDir ?? path.join(workdir, ".motel-data"))
102
- const databasePath = path.resolve(options.databasePath ?? path.join(runtimeDir, "telemetry.sqlite"))
103
- const host = options.host ?? DEFAULT_HOST
104
- const port = options.port ?? DEFAULT_PORT
109
+ const runtimeDir = path.resolve(options.runtimeDir ?? motelStateDir())
110
+ const databasePath = path.resolve(options.databasePath ?? process.env.MOTEL_OTEL_DB_PATH?.trim() ?? path.join(runtimeDir, "telemetry.sqlite"))
111
+ const host = options.host ?? process.env.MOTEL_OTEL_HOST?.trim() ?? envBaseUrl.hostname
112
+ const envPort = Number.parseInt(process.env.MOTEL_OTEL_PORT?.trim() || envBaseUrl.port, 10)
113
+ const port = options.port ?? (Number.isFinite(envPort) && envPort > 0 ? envPort : DEFAULT_PORT)
105
114
  return {
106
115
  repoRoot,
107
116
  serverEntry: path.join(repoRoot, "src/server.ts"),
@@ -122,17 +131,17 @@ const workdirMatches = (targetWorkdir: string, daemonWorkdir: string) => {
122
131
  return normalizedTarget === normalizedDaemon || normalizedTarget.startsWith(normalizedDaemon)
123
132
  }
124
133
 
125
- const pickByWorkdir = (entries: readonly RegistryEntry[], targetWorkdir: string) => {
126
- const withSep = targetWorkdir.endsWith(path.sep) ? targetWorkdir : `${targetWorkdir}${path.sep}`
134
+ const pickByUrl = (entries: readonly RegistryEntry[], baseUrl: string, databasePath: string) => {
127
135
  return entries
128
136
  .filter((entry) => {
129
- const workdir = entry.workdir.endsWith(path.sep) ? entry.workdir : `${entry.workdir}${path.sep}`
130
- return withSep === workdir || withSep.startsWith(workdir)
137
+ return entry.url === baseUrl && (entry.databasePath === undefined || entry.databasePath === databasePath)
131
138
  })
132
- .sort((a, b) => b.workdir.length - a.workdir.length)[0] ?? null
139
+ .sort((a, b) => Number(b.databasePath === databasePath) - Number(a.databasePath === databasePath))[0] ?? null
133
140
  }
134
141
 
135
- const expectedEnv = (config: DaemonConfig) => ({
142
+ const expectedEnv = (config: DaemonConfig, instanceId?: string) => ({
143
+ MOTEL_RUNTIME_DIR: config.runtimeDir,
144
+ ...(instanceId ? { MOTEL_DAEMON_INSTANCE_ID: instanceId } : {}),
136
145
  MOTEL_OTEL_BASE_URL: config.baseUrl,
137
146
  MOTEL_OTEL_QUERY_URL: config.baseUrl,
138
147
  MOTEL_OTEL_HOST: config.host,
@@ -144,43 +153,35 @@ const expectedEnv = (config: DaemonConfig) => ({
144
153
 
145
154
  export const createDaemonManager = (options: DaemonOptions = {}): DaemonManager => {
146
155
  const config = resolveConfig(options)
156
+ const startTimeoutMs = options.startTimeoutMs ?? START_TIMEOUT_MS
157
+ const gracefulStopTimeoutMs = options.gracefulStopTimeoutMs ?? STOP_TIMEOUT_MS
158
+ const forceStopTimeoutMs = options.forceStopTimeoutMs ?? 2_000
147
159
  const mapError = (error: unknown) => new DaemonError(error instanceof Error ? error.message : String(error))
148
- const readRegistryEntry = () => pickByWorkdir(listAliveEntries(), config.workdir)
160
+ const readRegistryEntry = () => pickByUrl(listAliveEntries(config.runtimeDir), config.baseUrl, config.databasePath)
149
161
 
150
162
  const fetchHealth = async (timeoutMs: number = HEALTH_FAST_TIMEOUT_MS): Promise<HealthShape | null> => {
151
163
  try {
152
164
  const response = await fetch(`${config.baseUrl}/api/health`, { signal: AbortSignal.timeout(timeoutMs) })
153
165
  if (!response.ok) return null
154
- return await response.json() as HealthShape
166
+ const health = await response.json() as HealthShape
167
+ return health.ok ? health : null
155
168
  } catch {
156
169
  return null
157
170
  }
158
171
  }
159
172
 
160
- const startupMarkers = [`Listening on ${config.baseUrl}`, `motel local telemetry server listening on ${config.baseUrl}`]
161
-
162
- const readLogSince = async (offset: number) => {
173
+ const fetchIngestProbe = async () => {
163
174
  try {
164
- const raw = await fsp.readFile(config.logPath, "utf8")
165
- return raw.slice(offset)
175
+ const postEmpty = (path: string) => fetch(`${config.baseUrl}${path}`, {
176
+ method: "POST",
177
+ headers: { "content-type": "application/json" },
178
+ body: "{}",
179
+ signal: AbortSignal.timeout(INGEST_PROBE_TIMEOUT_MS),
180
+ })
181
+ const [traces, logs] = await Promise.all([postEmpty("/v1/traces"), postEmpty("/v1/logs")])
182
+ return traces.ok && logs.ok
166
183
  } catch {
167
- return ""
168
- }
169
- }
170
-
171
- const detectStartedFromLog = async (pid: number, offset: number): Promise<HealthShape | null> => {
172
- if (!isAlive(pid)) return null
173
- const tail = await readLogSince(offset)
174
- if (!startupMarkers.some((marker) => tail.includes(marker))) return null
175
- return {
176
- ok: true,
177
- service: MOTEL_SERVICE_ID,
178
- databasePath: config.databasePath,
179
- pid,
180
- url: config.baseUrl,
181
- workdir: config.workdir,
182
- startedAt: new Date().toISOString(),
183
- version: MOTEL_VERSION,
184
+ return false
184
185
  }
185
186
  }
186
187
 
@@ -188,71 +189,12 @@ export const createDaemonManager = (options: DaemonOptions = {}): DaemonManager
188
189
  if (health.service !== MOTEL_SERVICE_ID) {
189
190
  return `Port ${config.port} is in use by ${health.service}, not ${MOTEL_SERVICE_ID}.`
190
191
  }
191
- if (!workdirMatches(config.workdir, health.workdir)) {
192
- return `Port ${config.port} is serving motel for ${health.workdir}, not ${config.workdir}.`
193
- }
194
192
  if (health.databasePath !== config.databasePath) {
195
193
  return `Port ${config.port} is serving motel with ${health.databasePath}, expected ${config.databasePath}.`
196
194
  }
197
195
  return null
198
196
  }
199
197
 
200
- /**
201
- * Mismatch check against a registry entry — mirrors describeManagedMismatch
202
- * but drives off the registry file instead of an HTTP health response.
203
- * Used on the fast path in getStatus so warm-start doesn't need to wait
204
- * on an HTTP round-trip that may queue behind heavy OTLP ingest.
205
- *
206
- * The service-id check is implicit: any entry living in the motel
207
- * registry dir is by construction a motel daemon. databasePath is
208
- * optional for back-compat with entries written by older builds;
209
- * when absent we skip the DB check rather than refusing to adopt.
210
- */
211
- const describeRegistryMismatch = (entry: RegistryEntry): string | null => {
212
- if (!workdirMatches(config.workdir, entry.workdir)) {
213
- return `Port ${config.port} is serving motel for ${entry.workdir}, not ${config.workdir}.`
214
- }
215
- if (entry.databasePath && entry.databasePath !== config.databasePath) {
216
- return `Port ${config.port} is serving motel with ${entry.databasePath}, expected ${config.databasePath}.`
217
- }
218
- return null
219
- }
220
-
221
- /**
222
- * Build a DaemonStatus from a live registry entry. Returns null when
223
- * there's no entry for our cwd, the registered pid isn't running, or
224
- * the entry is for a differently-configured daemon (different port).
225
- * This is the fast path: no HTTP, no event-loop round-trip, just a
226
- * directory read and a process.kill(pid, 0) liveness probe.
227
- */
228
- const getStatusFromRegistry = (): DaemonStatus | null => {
229
- const entry = readRegistryEntry()
230
- if (!entry) return null
231
- // Port discriminator: a motel registry shared across several
232
- // daemons (e.g., user running two instances on different
233
- // ports from the same workdir, or a test harness on a random
234
- // port) would otherwise have us adopt an unrelated daemon.
235
- // URL match is a fast, unambiguous identity check.
236
- if (entry.url !== config.baseUrl) return null
237
- const mismatch = describeRegistryMismatch(entry)
238
- return {
239
- running: mismatch === null,
240
- managed: mismatch === null,
241
- service: MOTEL_SERVICE_ID,
242
- pid: entry.pid,
243
- url: entry.url,
244
- databasePath: entry.databasePath ?? config.databasePath,
245
- workdir: entry.workdir,
246
- startedAt: entry.startedAt,
247
- version: entry.version,
248
- sameWorkdir: workdirMatches(config.workdir, entry.workdir),
249
- reason: mismatch,
250
- logPath: config.logPath,
251
- lockPath: config.lockPath,
252
- registryPid: entry.pid,
253
- }
254
- }
255
-
256
198
  const readLock = async (): Promise<LockShape | null> => {
257
199
  try {
258
200
  const raw = await fsp.readFile(config.lockPath, "utf8")
@@ -268,7 +210,7 @@ export const createDaemonManager = (options: DaemonOptions = {}): DaemonManager
268
210
  await fsp.rm(config.lockPath, { force: true })
269
211
  return true
270
212
  }
271
- if (isAlive(current.pid)) return false
213
+ if (current.processIdentity ? processIdentity(current.pid) === current.processIdentity : isAlive(current.pid)) return false
272
214
  await fsp.rm(config.lockPath, { force: true })
273
215
  return true
274
216
  }
@@ -280,7 +222,7 @@ export const createDaemonManager = (options: DaemonOptions = {}): DaemonManager
280
222
  while (Date.now() < deadline) {
281
223
  try {
282
224
  const handle = await fsp.open(config.lockPath, "wx")
283
- const contents = JSON.stringify({ pid: process.pid, createdAt: new Date().toISOString() } satisfies LockShape)
225
+ const contents = JSON.stringify({ pid: process.pid, createdAt: new Date().toISOString(), processIdentity: processIdentity(process.pid) ?? undefined } satisfies LockShape)
284
226
  await handle.writeFile(contents, "utf8")
285
227
  return {
286
228
  release: async () => {
@@ -304,73 +246,59 @@ export const createDaemonManager = (options: DaemonOptions = {}): DaemonManager
304
246
  return fs.openSync(config.logPath, "a")
305
247
  }
306
248
 
307
- const waitForHealthy = async (pid: number, logOffset: number) => {
308
- const deadline = Date.now() + START_TIMEOUT_MS
249
+ const waitForHealthy = async (pid: number, instanceId: string) => {
250
+ const deadline = Date.now() + startTimeoutMs
309
251
  while (Date.now() < deadline) {
310
252
  const health = await fetchHealth()
311
253
  if (health) {
312
254
  const mismatch = describeManagedMismatch(health)
313
- if (!mismatch) return health
314
- throw new Error(mismatch)
255
+ const registry = readRegistryEntry()
256
+ if (!mismatch && health.pid === pid && health.instanceId === instanceId && registry?.pid === pid && registry.instanceId === instanceId && isManagedDaemonProcess(registry) && await fetchIngestProbe()) return health
257
+ if (mismatch) throw new Error(mismatch)
315
258
  }
316
- const started = await detectStartedFromLog(pid, logOffset)
317
- if (started) return started
318
259
  if (!isAlive(pid)) {
319
- // The spawned child is gone. Before declaring failure,
320
- // do one patient probe: the child may have died from
321
- // EADDRINUSE because another healthy motel is alive on
322
- // the port but was answering /api/health too slowly for
323
- // our fast poll. If that's the case, adopt it.
324
- const patient = await fetchHealth(HEALTH_PATIENT_TIMEOUT_MS)
325
- if (patient) {
326
- const mismatch = describeManagedMismatch(patient)
327
- if (!mismatch) return patient
328
- throw new Error(mismatch)
329
- }
330
260
  throw new Error(`Daemon process ${pid} exited before becoming healthy. See ${config.logPath}.`)
331
261
  }
332
- await sleep(POLL_INTERVAL_MS)
262
+ await sleep(START_POLL_INTERVAL_MS)
333
263
  }
334
264
  throw new Error(`Timed out waiting for daemon health at ${config.baseUrl}/api/health. See ${config.logPath}.`)
335
265
  }
336
266
 
337
- const stopPid = async (pid: number) => {
267
+ const waitUntilNotOwned = async (entry: RegistryEntry, timeoutMs: number) => {
268
+ const deadline = Date.now() + timeoutMs
269
+ while (Date.now() < deadline) {
270
+ if (!isManagedDaemonProcess(entry)) return true
271
+ await sleep(POLL_INTERVAL_MS)
272
+ }
273
+ return !isManagedDaemonProcess(entry)
274
+ }
275
+
276
+ const stopPid = async (entry: RegistryEntry) => {
277
+ if (!isManagedDaemonProcess(entry)) {
278
+ throw new Error(`Refusing to stop pid ${entry.pid}: registry identity does not match the running managed daemon.`)
279
+ }
338
280
  try {
339
- process.kill(pid, "SIGTERM")
281
+ process.kill(entry.pid, "SIGTERM")
340
282
  } catch (error) {
341
283
  const errno = error as NodeJS.ErrnoException
342
284
  if (errno.code !== "ESRCH") throw error
343
285
  }
344
286
 
345
- const deadline = Date.now() + STOP_TIMEOUT_MS
346
- while (Date.now() < deadline) {
347
- if (!isAlive(pid)) return
348
- const health = await fetchHealth()
349
- if (health && health.pid !== pid) return
350
- const registry = readRegistryEntry()
351
- if (!health && (!registry || registry.pid !== pid)) return
352
- await sleep(POLL_INTERVAL_MS)
287
+ if (!await waitUntilNotOwned(entry, gracefulStopTimeoutMs)) {
288
+ try {
289
+ process.kill(entry.pid, "SIGKILL")
290
+ } catch (error) {
291
+ if ((error as NodeJS.ErrnoException).code !== "ESRCH") throw error
292
+ }
293
+ if (!await waitUntilNotOwned(entry, forceStopTimeoutMs)) {
294
+ throw new Error(`Timed out force-killing daemon ${entry.pid}.`)
295
+ }
353
296
  }
354
-
355
- throw new Error(`Timed out waiting for daemon ${pid} to stop.`)
297
+ const current = readRegistryEntry()
298
+ if (current?.pid === entry.pid && current.instanceId === entry.instanceId) removeRegistryEntry(entry.pid, config.runtimeDir)
356
299
  }
357
300
 
358
301
  const getStatus = async (timeoutMs: number = HEALTH_FAST_TIMEOUT_MS): Promise<DaemonStatus> => {
359
- // Fast path: trust the local filesystem registry. When a motel
360
- // daemon started on this machine it wrote an entry for its pid
361
- // + cwd + databasePath; if that entry is still there and the pid
362
- // is alive, the daemon is almost certainly the one we want to
363
- // adopt. HTTP health is skipped because the daemon's health
364
- // endpoint can queue behind heavy OTLP ingest traffic, making
365
- // the probe unreliable exactly when the daemon is busy.
366
- const registryStatus = getStatusFromRegistry()
367
- if (registryStatus) return registryStatus
368
-
369
- // No local evidence → fall back to HTTP. Covers the edge cases
370
- // where: a motel daemon is running but was started before this
371
- // registry-first path shipped; OR the port is held by something
372
- // entirely unrelated (the mismatch check turns that into a
373
- // human-readable reason).
374
302
  const registry = readRegistryEntry()
375
303
  const health = await fetchHealth(timeoutMs)
376
304
  if (!health) {
@@ -393,9 +321,10 @@ export const createDaemonManager = (options: DaemonOptions = {}): DaemonManager
393
321
  }
394
322
 
395
323
  const mismatch = describeManagedMismatch(health)
324
+ const managed = mismatch === null && registry?.pid === health.pid && registry.instanceId === health.instanceId && isManagedDaemonProcess(registry)
396
325
  return {
397
326
  running: mismatch === null,
398
- managed: mismatch === null,
327
+ managed,
399
328
  service: health.service,
400
329
  pid: health.pid,
401
330
  url: health.url,
@@ -404,7 +333,7 @@ export const createDaemonManager = (options: DaemonOptions = {}): DaemonManager
404
333
  startedAt: health.startedAt,
405
334
  version: health.version,
406
335
  sameWorkdir: workdirMatches(config.workdir, health.workdir),
407
- reason: mismatch,
336
+ reason: mismatch ?? (managed ? null : "Responsive motel server is not an identity-verified managed daemon."),
408
337
  logPath: config.logPath,
409
338
  lockPath: config.lockPath,
410
339
  registryPid: registry?.pid ?? null,
@@ -417,26 +346,39 @@ export const createDaemonManager = (options: DaemonOptions = {}): DaemonManager
417
346
  // negative here drops us into the spawn path and collides with
418
347
  // any slow-but-healthy daemon sitting on the port.
419
348
  const existing = await getStatus(HEALTH_PATIENT_TIMEOUT_MS)
420
- if (existing.managed && existing.running) return existing
349
+ const existingEntry = readRegistryEntry()
350
+ if (existing.managed && existing.running) {
351
+ // /api/health can stay healthy after the lazy ingest worker/RPC path
352
+ // has been poisoned by an interrupted request. Empty OTLP posts are
353
+ // side-effect free and exercise the same path real exporters need.
354
+ if (existing.pid === process.pid || await fetchIngestProbe()) return existing
355
+ if (existingEntry) await stopPid(existingEntry)
356
+ }
357
+ if (!existing.running && existingEntry && isManagedDaemonProcess(existingEntry)) await stopPid(existingEntry)
421
358
  if (existing.service !== null && existing.reason) {
422
359
  throw new Error(existing.reason)
423
360
  }
424
361
 
425
362
  const lock = await acquireStartupLock()
426
363
  let spawnedPid: number | null = null
364
+ let spawnedIdentity: string | null = null
427
365
  try {
428
366
  // Same reasoning for the post-lock re-check: another ensure()
429
367
  // may have spawned a daemon between our first probe and the
430
368
  // lock grant, and its initial health response can be slow
431
369
  // while the runtime warms up.
432
370
  const rechecked = await getStatus(HEALTH_PATIENT_TIMEOUT_MS)
433
- if (rechecked.managed && rechecked.running) return rechecked
371
+ if (rechecked.managed && rechecked.running) {
372
+ if (rechecked.pid === process.pid || await fetchIngestProbe()) return rechecked
373
+ const recheckedEntry = readRegistryEntry()
374
+ if (recheckedEntry) await stopPid(recheckedEntry)
375
+ }
434
376
  if (rechecked.service !== null && rechecked.reason) {
435
377
  throw new Error(rechecked.reason)
436
378
  }
437
379
 
438
380
  const logFd = await openLogFile()
439
- const logOffset = fs.fstatSync(logFd).size
381
+ const instanceId = crypto.randomUUID()
440
382
  try {
441
383
  const proc = Bun.spawn({
442
384
  cmd: [process.execPath, "run", config.serverEntry],
@@ -444,11 +386,12 @@ export const createDaemonManager = (options: DaemonOptions = {}): DaemonManager
444
386
  detached: true,
445
387
  env: {
446
388
  ...process.env,
447
- ...expectedEnv(config),
389
+ ...expectedEnv(config, instanceId),
448
390
  },
449
391
  stdio: ["ignore", logFd, logFd],
450
392
  })
451
393
  spawnedPid = proc.pid
394
+ spawnedIdentity = processIdentity(proc.pid)
452
395
  proc.unref()
453
396
  } finally {
454
397
  fs.closeSync(logFd)
@@ -458,7 +401,7 @@ export const createDaemonManager = (options: DaemonOptions = {}): DaemonManager
458
401
  throw new Error("Daemon failed to spawn.")
459
402
  }
460
403
 
461
- const health = await waitForHealthy(spawnedPid, logOffset)
404
+ const health = await waitForHealthy(spawnedPid, instanceId)
462
405
  return {
463
406
  running: true,
464
407
  managed: true,
@@ -477,7 +420,17 @@ export const createDaemonManager = (options: DaemonOptions = {}): DaemonManager
477
420
  }
478
421
  } catch (error) {
479
422
  if (spawnedPid !== null) {
480
- await stopPid(spawnedPid).catch(() => undefined)
423
+ const entry = readRegistryEntry()
424
+ if (entry?.pid === spawnedPid) {
425
+ await stopPid(entry).catch(() => undefined)
426
+ } else if (spawnedIdentity && processIdentity(spawnedPid) === spawnedIdentity) {
427
+ try { process.kill(spawnedPid, "SIGTERM") } catch { /* already exited */ }
428
+ const deadline = Date.now() + gracefulStopTimeoutMs
429
+ while (Date.now() < deadline && processIdentity(spawnedPid) === spawnedIdentity) await sleep(POLL_INTERVAL_MS)
430
+ if (processIdentity(spawnedPid) === spawnedIdentity) {
431
+ try { process.kill(spawnedPid, "SIGKILL") } catch { /* already exited */ }
432
+ }
433
+ }
481
434
  }
482
435
  throw error
483
436
  } finally {
@@ -488,13 +441,12 @@ export const createDaemonManager = (options: DaemonOptions = {}): DaemonManager
488
441
  const stop = async (): Promise<DaemonStatus> => {
489
442
  const status = await getStatus()
490
443
  if (status.pid === null) return status
491
- if (!status.sameWorkdir) {
492
- throw new Error(`Refusing to stop motel owned by ${status.workdir}.`)
493
- }
494
444
  if (status.service !== null && status.service !== MOTEL_SERVICE_ID) {
495
445
  throw new Error(`Refusing to stop non-motel service ${status.service} on ${status.url}.`)
496
446
  }
497
- await stopPid(status.pid)
447
+ const entry = readRegistryEntry()
448
+ if (!entry || entry.pid !== status.pid) throw new Error(`Refusing to stop pid ${status.pid}: no matching managed registry entry.`)
449
+ await stopPid(entry)
498
450
  return await getStatus()
499
451
  }
500
452
 
package/src/httpApi.ts CHANGED
@@ -25,6 +25,7 @@ const Health = Schema.Struct({
25
25
  workdir: Schema.String.pipe(Schema.annotateKey({ description: "Working directory at the time the server started. Used by MCP discovery to match the current project via longest-prefix." })),
26
26
  startedAt: Schema.String.pipe(Schema.annotateKey({ description: "ISO 8601 timestamp of when the server bound its port." })),
27
27
  version: Schema.String.pipe(Schema.annotateKey({ description: "Motel version string." })),
28
+ instanceId: Schema.optionalKey(Schema.String).pipe(Schema.annotateKey({ description: "Managed-daemon instance nonce used for readiness and safe shutdown identity checks." })),
28
29
  })
29
30
  const IngestTraceResponse = Schema.Struct({ insertedSpans: Schema.Number })
30
31
  const IngestLogResponse = Schema.Struct({ insertedLogs: Schema.Number })
@@ -0,0 +1,76 @@
1
+ import { describe, expect, it } from "bun:test"
2
+ import type { LogItem, TraceSummaryItem } from "./domain.js"
3
+ import { LOG_LIST, LOG_STATS, parseListParams, paginateLogs, paginateSummaries, traceCursorArgs } from "./httpListPolicy.js"
4
+
5
+ const BASE_URL = "http://127.0.0.1:27686"
6
+
7
+ describe("HTTP list policy", () => {
8
+ it("bounds list parameters and extracts attribute filters", () => {
9
+ const params = parseListParams({
10
+ url: "/api/logs?limit=9999&lookback=9d&attr.session.id=abc&attrContains.message=failed",
11
+ }, LOG_LIST, BASE_URL)
12
+
13
+ expect(params.limit).toBe(500)
14
+ expect(params.lookbackMinutes).toBe(24 * 60)
15
+ expect(params.attributeFilters).toEqual({ "session.id": "abc" })
16
+ expect(params.attributeContainsFilters).toEqual({ message: "failed" })
17
+ })
18
+
19
+ it("round-trips a trace cursor through page metadata", () => {
20
+ const traces: readonly TraceSummaryItem[] = [
21
+ {
22
+ traceId: "trace-1",
23
+ serviceName: "api",
24
+ rootOperationName: "GET /first",
25
+ startedAt: new Date(1000),
26
+ isRunning: false,
27
+ durationMs: 2,
28
+ spanCount: 1,
29
+ errorCount: 0,
30
+ warnings: [],
31
+ },
32
+ {
33
+ traceId: "trace-2",
34
+ serviceName: "api",
35
+ rootOperationName: "GET /second",
36
+ startedAt: new Date(900),
37
+ isRunning: false,
38
+ durationMs: 1,
39
+ spanCount: 1,
40
+ errorCount: 0,
41
+ warnings: [],
42
+ },
43
+ ]
44
+
45
+ const page = paginateSummaries(traces, { limit: 1, lookbackMinutes: 60 })
46
+ const parsed = parseListParams({ url: `/api/traces?cursor=${page.meta.nextCursor}` }, LOG_LIST, BASE_URL)
47
+
48
+ expect(page.meta.truncated).toBe(true)
49
+ expect(traceCursorArgs(parsed.cursor)).toEqual({ cursorStartedAtMs: 1000, cursorTraceId: "trace-1" })
50
+ })
51
+
52
+ it("formats log page metadata and emits a cursor", () => {
53
+ const logs: readonly LogItem[] = [{
54
+ id: "12",
55
+ timestamp: new Date(1200),
56
+ serviceName: "api",
57
+ severityText: "INFO",
58
+ body: "ready",
59
+ traceId: null,
60
+ spanId: null,
61
+ scopeName: null,
62
+ attributes: {},
63
+ }]
64
+
65
+ const page = paginateLogs(logs, { limit: 10, lookbackMinutes: 120 })
66
+
67
+ expect(page.meta).toMatchObject({ limit: 10, lookback: "2h", returned: 1, truncated: false })
68
+ expect(page.meta.nextCursor).not.toBeNull()
69
+ })
70
+
71
+ it("keeps aggregate log queries bounded to twenty groups by default", () => {
72
+ const params = parseListParams({ url: "/api/logs/stats?groupBy=service&agg=count" }, LOG_STATS, BASE_URL)
73
+
74
+ expect(params.limit).toBe(20)
75
+ })
76
+ })
@@ -0,0 +1,129 @@
1
+ import type { LogItem, TraceSummaryItem } from "./domain.js"
2
+ import { attributeContainsFiltersFromEntries, attributeFiltersFromEntries } from "./queryFilters.js"
3
+
4
+ type CursorShape =
5
+ | { readonly kind: "trace"; readonly startedAt: number; readonly id: string }
6
+ | { readonly kind: "log"; readonly timestamp: number; readonly id: string }
7
+
8
+ export interface ListBounds {
9
+ readonly defaultLimit: number
10
+ readonly maxLimit: number
11
+ readonly defaultLookback: number
12
+ readonly maxLookback: number
13
+ }
14
+
15
+ export interface ListParams {
16
+ readonly url: URL
17
+ readonly limit: number
18
+ readonly lookbackMinutes: number
19
+ readonly cursor: CursorShape | null
20
+ readonly attributeFilters: Readonly<Record<string, string>>
21
+ readonly attributeContainsFilters: Readonly<Record<string, string>>
22
+ }
23
+
24
+ export const TRACE_LIST: ListBounds = { defaultLimit: 20, maxLimit: 100, defaultLookback: 60, maxLookback: 24 * 60 }
25
+ export const SPAN_LIST: ListBounds = { defaultLimit: 100, maxLimit: 500, defaultLookback: 60, maxLookback: 24 * 60 }
26
+ export const LOG_LIST: ListBounds = { defaultLimit: 100, maxLimit: 500, defaultLookback: 60, maxLookback: 24 * 60 }
27
+ export const AI_LIST: ListBounds = { defaultLimit: 20, maxLimit: 500, defaultLookback: 60, maxLookback: 24 * 60 }
28
+ export const TRACE_STATS: ListBounds = { defaultLimit: 20, maxLimit: 100, defaultLookback: 60, maxLookback: 24 * 60 }
29
+ export const LOG_STATS: ListBounds = { defaultLimit: 20, maxLimit: 500, defaultLookback: 60, maxLookback: 24 * 60 }
30
+
31
+ export const requestUrl = (request: { readonly url: string }, baseUrl: string) => new URL(request.url, baseUrl)
32
+
33
+ const parsePositiveInt = (value: string | undefined, defaultValue: number) => {
34
+ const parsed = Number.parseInt(value ?? "", 10)
35
+ return Number.isFinite(parsed) && parsed > 0 ? parsed : defaultValue
36
+ }
37
+
38
+ export const parseLimit = (value: string | null, fallback: number) => parsePositiveInt(value ?? undefined, fallback)
39
+
40
+ export const parseLookbackMinutes = (value: string | null, fallback: number) => {
41
+ if (!value) return fallback
42
+ const match = value.trim().match(/^(\d+)([mhd])$/i)
43
+ if (!match) return fallback
44
+ const amount = Number.parseInt(match[1] ?? "", 10)
45
+ if (!Number.isFinite(amount) || amount <= 0) return fallback
46
+ const unit = (match[2] ?? "m").toLowerCase()
47
+ if (unit === "d") return amount * 1440
48
+ if (unit === "h") return amount * 60
49
+ return amount
50
+ }
51
+
52
+ const clamp = (value: number, min: number, max: number) => Math.max(min, Math.min(value, max))
53
+
54
+ const decodeCursor = (value: string | null): CursorShape | null => {
55
+ if (!value) return null
56
+ try {
57
+ return JSON.parse(Buffer.from(value, "base64url").toString("utf8")) as CursorShape
58
+ } catch {
59
+ return null
60
+ }
61
+ }
62
+
63
+ const encodeCursor = (cursor: CursorShape) => Buffer.from(JSON.stringify(cursor), "utf8").toString("base64url")
64
+
65
+ export const traceCursorArgs = (cursor: CursorShape | null) =>
66
+ cursor?.kind === "trace"
67
+ ? { cursorStartedAtMs: cursor.startedAt, cursorTraceId: cursor.id }
68
+ : {}
69
+
70
+ export const logCursorArgs = (cursor: CursorShape | null) =>
71
+ cursor?.kind === "log"
72
+ ? { cursorTimestampMs: cursor.timestamp, cursorId: cursor.id }
73
+ : {}
74
+
75
+ export const parseListParams = (request: { readonly url: string }, bounds: ListBounds, baseUrl: string): ListParams => {
76
+ const url = requestUrl(request, baseUrl)
77
+ return {
78
+ url,
79
+ limit: clamp(parseLimit(url.searchParams.get("limit"), bounds.defaultLimit), 1, bounds.maxLimit),
80
+ lookbackMinutes: clamp(parseLookbackMinutes(url.searchParams.get("lookback"), bounds.defaultLookback), 1, bounds.maxLookback),
81
+ cursor: decodeCursor(url.searchParams.get("cursor")),
82
+ attributeFilters: attributeFiltersFromEntries(url.searchParams.entries()),
83
+ attributeContainsFilters: attributeContainsFiltersFromEntries(url.searchParams.entries()),
84
+ }
85
+ }
86
+
87
+ const formatLookback = (minutes: number) => {
88
+ if (minutes % 1440 === 0) return `${minutes / 1440}d`
89
+ if (minutes % 60 === 0) return `${minutes / 60}h`
90
+ return `${minutes}m`
91
+ }
92
+
93
+ export const listMeta = (input: { readonly limit: number; readonly lookbackMinutes: number; readonly returned: number; readonly truncated: boolean; readonly nextCursor: string | null }) => ({
94
+ limit: input.limit,
95
+ lookback: formatLookback(input.lookbackMinutes),
96
+ returned: input.returned,
97
+ truncated: input.truncated,
98
+ nextCursor: input.nextCursor,
99
+ })
100
+
101
+ export const paginateSummaries = (summaries: readonly TraceSummaryItem[], options: { readonly limit: number; readonly lookbackMinutes: number }) => {
102
+ const page = summaries.slice(0, options.limit)
103
+ const last = page.at(-1)
104
+ return {
105
+ data: page,
106
+ meta: listMeta({
107
+ limit: options.limit,
108
+ lookbackMinutes: options.lookbackMinutes,
109
+ returned: page.length,
110
+ truncated: summaries.length > page.length,
111
+ nextCursor: last ? encodeCursor({ kind: "trace", startedAt: last.startedAt.getTime(), id: last.traceId }) : null,
112
+ }),
113
+ }
114
+ }
115
+
116
+ export const paginateLogs = (logs: readonly LogItem[], options: { readonly limit: number; readonly lookbackMinutes: number }) => {
117
+ const page = logs.slice(0, options.limit)
118
+ const last = page.at(-1)
119
+ return {
120
+ data: page,
121
+ meta: listMeta({
122
+ limit: options.limit,
123
+ lookbackMinutes: options.lookbackMinutes,
124
+ returned: page.length,
125
+ truncated: logs.length > page.length,
126
+ nextCursor: last ? encodeCursor({ kind: "log", timestamp: last.timestamp.getTime(), id: last.id }) : null,
127
+ }),
128
+ }
129
+ }