@kitlangton/motel 0.2.5 → 0.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. package/AGENTS.md +11 -8
  2. package/README.md +13 -2
  3. package/package.json +31 -19
  4. package/skills/motel-debug/SKILL.md +203 -0
  5. package/skills/motel-debug/references/effect.md +38 -0
  6. package/src/App.tsx +3 -5
  7. package/src/StartupGate.tsx +8 -10
  8. package/src/cli.ts +15 -16
  9. package/src/config.ts +7 -1
  10. package/src/daemon.test.ts +332 -51
  11. package/src/daemon.ts +103 -152
  12. package/src/httpApi.ts +1 -0
  13. package/src/httpListPolicy.test.ts +76 -0
  14. package/src/httpListPolicy.ts +129 -0
  15. package/src/localServer.ts +194 -323
  16. package/src/mcp.ts +2 -1
  17. package/src/opentui-jsx.d.ts +11 -0
  18. package/src/otlp.test.ts +65 -0
  19. package/src/otlp.ts +20 -0
  20. package/src/otlpProtobuf.ts +35 -0
  21. package/src/registry.ts +37 -11
  22. package/src/runtime.ts +2 -6
  23. package/src/services/AsyncIngest.ts +20 -8
  24. package/src/services/LogQueryService.ts +11 -25
  25. package/src/services/TelemetryQuery.ts +62 -0
  26. package/src/services/TelemetryStore.ts +433 -249
  27. package/src/services/TraceQueryService.ts +18 -52
  28. package/src/services/ingestRpc.ts +2 -4
  29. package/src/services/queryRpc.ts +15 -0
  30. package/src/services/telemetryQueryWorker.ts +32 -0
  31. package/src/services/telemetryWorker.ts +5 -8
  32. package/src/storybook/aiChatStory.tsx +1 -1
  33. package/src/telemetry.test.ts +307 -41
  34. package/src/ui/AiChatView.tsx +1 -1
  35. package/src/ui/AttrFilterModal.tsx +1 -1
  36. package/src/ui/ServiceLogs.tsx +10 -7
  37. package/src/ui/SpanContentView.tsx +24 -21
  38. package/src/ui/TraceDetailsPane.tsx +1 -1
  39. package/src/ui/TraceList.tsx +1 -1
  40. package/src/ui/aiState.ts +10 -22
  41. package/src/ui/app/TraceWorkspace.tsx +2 -1
  42. package/src/ui/app/useAppLayout.ts +1 -1
  43. package/src/ui/app/useTraceScreenData.ts +22 -18
  44. package/src/ui/cachedLoader.test.ts +23 -0
  45. package/src/ui/cachedLoader.ts +60 -0
  46. package/src/ui/loaders.ts +34 -53
  47. package/src/ui/primitives.tsx +1 -1
  48. package/src/ui/state.ts +2 -0
  49. package/src/ui/traceDetailsWidth.repro.test.ts +12 -1
  50. package/src/ui/traceSortNav.repro.seed.ts +1 -1
  51. package/src/ui/traceSortNav.repro.test.ts +12 -2
  52. package/src/ui/useAttrFilterPicker.ts +10 -8
  53. package/src/ui/useKeyboardNav.ts +3 -6
  54. package/src/ui/waterfallNav.repro.seed.ts +1 -1
  55. package/src/ui/waterfallNav.repro.test.ts +16 -8
  56. package/web/dist/assets/index-B01z9BaO.css +2 -0
  57. package/web/dist/assets/index-M86tcih5.js +22 -0
  58. package/web/dist/index.html +2 -2
  59. package/web/dist/assets/index-DnyVo03x.js +0 -27
  60. package/web/dist/assets/index-DzuHNBGV.css +0 -2
package/src/daemon.ts CHANGED
@@ -2,7 +2,7 @@ import * as fs from "node:fs"
2
2
  import { promises as fsp } from "node:fs"
3
3
  import * as path from "node:path"
4
4
  import { Effect } from "effect"
5
- import { isAlive, listAliveEntries, MOTEL_SERVICE_ID, MOTEL_VERSION, type RegistryEntry } from "./registry.js"
5
+ import { isAlive, isManagedDaemonProcess, listAliveEntries, motelStateDir, MOTEL_SERVICE_ID, processIdentity, removeRegistryEntry, type RegistryEntry } from "./registry.js"
6
6
 
7
7
  const DEFAULT_REPO_ROOT = path.resolve(import.meta.dir, "..")
8
8
  const DEFAULT_HOST = "127.0.0.1"
@@ -24,6 +24,7 @@ const HEALTH_FAST_TIMEOUT_MS = 750
24
24
  * and short enough that a truly-down daemon is still detected
25
25
  * before START_TIMEOUT_MS fires. */
26
26
  const HEALTH_PATIENT_TIMEOUT_MS = 3_000
27
+ const INGEST_PROBE_TIMEOUT_MS = 3_000
27
28
 
28
29
  type HealthShape = {
29
30
  readonly ok: boolean
@@ -34,11 +35,13 @@ type HealthShape = {
34
35
  readonly workdir: string
35
36
  readonly startedAt: string
36
37
  readonly version: string
38
+ readonly instanceId?: string
37
39
  }
38
40
 
39
41
  type LockShape = {
40
42
  readonly pid: number
41
43
  readonly createdAt: string
44
+ readonly processIdentity?: string
42
45
  }
43
46
 
44
47
  type DaemonConfig = {
@@ -85,6 +88,9 @@ type DaemonOptions = {
85
88
  readonly databasePath?: string
86
89
  readonly host?: string
87
90
  readonly port?: number
91
+ readonly startTimeoutMs?: number
92
+ readonly gracefulStopTimeoutMs?: number
93
+ readonly forceStopTimeoutMs?: number
88
94
  }
89
95
 
90
96
  export class DaemonError extends Error {
@@ -97,12 +103,14 @@ export class DaemonError extends Error {
97
103
  const sleep = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms))
98
104
 
99
105
  const resolveConfig = (options: DaemonOptions = {}): DaemonConfig => {
106
+ const envBaseUrl = new URL(process.env.MOTEL_OTEL_BASE_URL?.trim() || process.env.MOTEL_OTEL_QUERY_URL?.trim() || `http://${DEFAULT_HOST}:${DEFAULT_PORT}`)
100
107
  const repoRoot = path.resolve(options.repoRoot ?? DEFAULT_REPO_ROOT)
101
108
  const workdir = path.resolve(options.workdir ?? process.cwd())
102
- const runtimeDir = path.resolve(options.runtimeDir ?? path.join(workdir, ".motel-data"))
103
- const databasePath = path.resolve(options.databasePath ?? path.join(runtimeDir, "telemetry.sqlite"))
104
- const host = options.host ?? DEFAULT_HOST
105
- const port = options.port ?? DEFAULT_PORT
109
+ const runtimeDir = path.resolve(options.runtimeDir ?? motelStateDir())
110
+ const databasePath = path.resolve(options.databasePath ?? process.env.MOTEL_OTEL_DB_PATH?.trim() ?? path.join(runtimeDir, "telemetry.sqlite"))
111
+ const host = options.host ?? process.env.MOTEL_OTEL_HOST?.trim() ?? envBaseUrl.hostname
112
+ const envPort = Number.parseInt(process.env.MOTEL_OTEL_PORT?.trim() || envBaseUrl.port, 10)
113
+ const port = options.port ?? (Number.isFinite(envPort) && envPort > 0 ? envPort : DEFAULT_PORT)
106
114
  return {
107
115
  repoRoot,
108
116
  serverEntry: path.join(repoRoot, "src/server.ts"),
@@ -123,17 +131,17 @@ const workdirMatches = (targetWorkdir: string, daemonWorkdir: string) => {
123
131
  return normalizedTarget === normalizedDaemon || normalizedTarget.startsWith(normalizedDaemon)
124
132
  }
125
133
 
126
- const pickByWorkdir = (entries: readonly RegistryEntry[], targetWorkdir: string) => {
127
- const withSep = targetWorkdir.endsWith(path.sep) ? targetWorkdir : `${targetWorkdir}${path.sep}`
134
+ const pickByUrl = (entries: readonly RegistryEntry[], baseUrl: string, databasePath: string) => {
128
135
  return entries
129
136
  .filter((entry) => {
130
- const workdir = entry.workdir.endsWith(path.sep) ? entry.workdir : `${entry.workdir}${path.sep}`
131
- return withSep === workdir || withSep.startsWith(workdir)
137
+ return entry.url === baseUrl && (entry.databasePath === undefined || entry.databasePath === databasePath)
132
138
  })
133
- .sort((a, b) => b.workdir.length - a.workdir.length)[0] ?? null
139
+ .sort((a, b) => Number(b.databasePath === databasePath) - Number(a.databasePath === databasePath))[0] ?? null
134
140
  }
135
141
 
136
- const expectedEnv = (config: DaemonConfig) => ({
142
+ const expectedEnv = (config: DaemonConfig, instanceId?: string) => ({
143
+ MOTEL_RUNTIME_DIR: config.runtimeDir,
144
+ ...(instanceId ? { MOTEL_DAEMON_INSTANCE_ID: instanceId } : {}),
137
145
  MOTEL_OTEL_BASE_URL: config.baseUrl,
138
146
  MOTEL_OTEL_QUERY_URL: config.baseUrl,
139
147
  MOTEL_OTEL_HOST: config.host,
@@ -145,43 +153,35 @@ const expectedEnv = (config: DaemonConfig) => ({
145
153
 
146
154
  export const createDaemonManager = (options: DaemonOptions = {}): DaemonManager => {
147
155
  const config = resolveConfig(options)
156
+ const startTimeoutMs = options.startTimeoutMs ?? START_TIMEOUT_MS
157
+ const gracefulStopTimeoutMs = options.gracefulStopTimeoutMs ?? STOP_TIMEOUT_MS
158
+ const forceStopTimeoutMs = options.forceStopTimeoutMs ?? 2_000
148
159
  const mapError = (error: unknown) => new DaemonError(error instanceof Error ? error.message : String(error))
149
- const readRegistryEntry = () => pickByWorkdir(listAliveEntries(), config.workdir)
160
+ const readRegistryEntry = () => pickByUrl(listAliveEntries(config.runtimeDir), config.baseUrl, config.databasePath)
150
161
 
151
162
  const fetchHealth = async (timeoutMs: number = HEALTH_FAST_TIMEOUT_MS): Promise<HealthShape | null> => {
152
163
  try {
153
164
  const response = await fetch(`${config.baseUrl}/api/health`, { signal: AbortSignal.timeout(timeoutMs) })
154
165
  if (!response.ok) return null
155
- return await response.json() as HealthShape
166
+ const health = await response.json() as HealthShape
167
+ return health.ok ? health : null
156
168
  } catch {
157
169
  return null
158
170
  }
159
171
  }
160
172
 
161
- const startupMarkers = [`Listening on ${config.baseUrl}`, `motel local telemetry server listening on ${config.baseUrl}`]
162
-
163
- const readLogSince = async (offset: number) => {
173
+ const fetchIngestProbe = async () => {
164
174
  try {
165
- const raw = await fsp.readFile(config.logPath, "utf8")
166
- return raw.slice(offset)
175
+ const postEmpty = (path: string) => fetch(`${config.baseUrl}${path}`, {
176
+ method: "POST",
177
+ headers: { "content-type": "application/json" },
178
+ body: "{}",
179
+ signal: AbortSignal.timeout(INGEST_PROBE_TIMEOUT_MS),
180
+ })
181
+ const [traces, logs] = await Promise.all([postEmpty("/v1/traces"), postEmpty("/v1/logs")])
182
+ return traces.ok && logs.ok
167
183
  } catch {
168
- return ""
169
- }
170
- }
171
-
172
- const detectStartedFromLog = async (pid: number, offset: number): Promise<HealthShape | null> => {
173
- if (!isAlive(pid)) return null
174
- const tail = await readLogSince(offset)
175
- if (!startupMarkers.some((marker) => tail.includes(marker))) return null
176
- return {
177
- ok: true,
178
- service: MOTEL_SERVICE_ID,
179
- databasePath: config.databasePath,
180
- pid,
181
- url: config.baseUrl,
182
- workdir: config.workdir,
183
- startedAt: new Date().toISOString(),
184
- version: MOTEL_VERSION,
184
+ return false
185
185
  }
186
186
  }
187
187
 
@@ -189,71 +189,12 @@ export const createDaemonManager = (options: DaemonOptions = {}): DaemonManager
189
189
  if (health.service !== MOTEL_SERVICE_ID) {
190
190
  return `Port ${config.port} is in use by ${health.service}, not ${MOTEL_SERVICE_ID}.`
191
191
  }
192
- if (!workdirMatches(config.workdir, health.workdir)) {
193
- return `Port ${config.port} is serving motel for ${health.workdir}, not ${config.workdir}.`
194
- }
195
192
  if (health.databasePath !== config.databasePath) {
196
193
  return `Port ${config.port} is serving motel with ${health.databasePath}, expected ${config.databasePath}.`
197
194
  }
198
195
  return null
199
196
  }
200
197
 
201
- /**
202
- * Mismatch check against a registry entry — mirrors describeManagedMismatch
203
- * but drives off the registry file instead of an HTTP health response.
204
- * Used on the fast path in getStatus so warm-start doesn't need to wait
205
- * on an HTTP round-trip that may queue behind heavy OTLP ingest.
206
- *
207
- * The service-id check is implicit: any entry living in the motel
208
- * registry dir is by construction a motel daemon. databasePath is
209
- * optional for back-compat with entries written by older builds;
210
- * when absent we skip the DB check rather than refusing to adopt.
211
- */
212
- const describeRegistryMismatch = (entry: RegistryEntry): string | null => {
213
- if (!workdirMatches(config.workdir, entry.workdir)) {
214
- return `Port ${config.port} is serving motel for ${entry.workdir}, not ${config.workdir}.`
215
- }
216
- if (entry.databasePath && entry.databasePath !== config.databasePath) {
217
- return `Port ${config.port} is serving motel with ${entry.databasePath}, expected ${config.databasePath}.`
218
- }
219
- return null
220
- }
221
-
222
- /**
223
- * Build a DaemonStatus from a live registry entry. Returns null when
224
- * there's no entry for our cwd, the registered pid isn't running, or
225
- * the entry is for a differently-configured daemon (different port).
226
- * This is the fast path: no HTTP, no event-loop round-trip, just a
227
- * directory read and a process.kill(pid, 0) liveness probe.
228
- */
229
- const getStatusFromRegistry = (): DaemonStatus | null => {
230
- const entry = readRegistryEntry()
231
- if (!entry) return null
232
- // Port discriminator: a motel registry shared across several
233
- // daemons (e.g., user running two instances on different
234
- // ports from the same workdir, or a test harness on a random
235
- // port) would otherwise have us adopt an unrelated daemon.
236
- // URL match is a fast, unambiguous identity check.
237
- if (entry.url !== config.baseUrl) return null
238
- const mismatch = describeRegistryMismatch(entry)
239
- return {
240
- running: mismatch === null,
241
- managed: mismatch === null,
242
- service: MOTEL_SERVICE_ID,
243
- pid: entry.pid,
244
- url: entry.url,
245
- databasePath: entry.databasePath ?? config.databasePath,
246
- workdir: entry.workdir,
247
- startedAt: entry.startedAt,
248
- version: entry.version,
249
- sameWorkdir: workdirMatches(config.workdir, entry.workdir),
250
- reason: mismatch,
251
- logPath: config.logPath,
252
- lockPath: config.lockPath,
253
- registryPid: entry.pid,
254
- }
255
- }
256
-
257
198
  const readLock = async (): Promise<LockShape | null> => {
258
199
  try {
259
200
  const raw = await fsp.readFile(config.lockPath, "utf8")
@@ -269,7 +210,7 @@ export const createDaemonManager = (options: DaemonOptions = {}): DaemonManager
269
210
  await fsp.rm(config.lockPath, { force: true })
270
211
  return true
271
212
  }
272
- if (isAlive(current.pid)) return false
213
+ if (current.processIdentity ? processIdentity(current.pid) === current.processIdentity : isAlive(current.pid)) return false
273
214
  await fsp.rm(config.lockPath, { force: true })
274
215
  return true
275
216
  }
@@ -281,7 +222,7 @@ export const createDaemonManager = (options: DaemonOptions = {}): DaemonManager
281
222
  while (Date.now() < deadline) {
282
223
  try {
283
224
  const handle = await fsp.open(config.lockPath, "wx")
284
- const contents = JSON.stringify({ pid: process.pid, createdAt: new Date().toISOString() } satisfies LockShape)
225
+ const contents = JSON.stringify({ pid: process.pid, createdAt: new Date().toISOString(), processIdentity: processIdentity(process.pid) ?? undefined } satisfies LockShape)
285
226
  await handle.writeFile(contents, "utf8")
286
227
  return {
287
228
  release: async () => {
@@ -305,29 +246,17 @@ export const createDaemonManager = (options: DaemonOptions = {}): DaemonManager
305
246
  return fs.openSync(config.logPath, "a")
306
247
  }
307
248
 
308
- const waitForHealthy = async (pid: number, logOffset: number) => {
309
- const deadline = Date.now() + START_TIMEOUT_MS
249
+ const waitForHealthy = async (pid: number, instanceId: string) => {
250
+ const deadline = Date.now() + startTimeoutMs
310
251
  while (Date.now() < deadline) {
311
252
  const health = await fetchHealth()
312
253
  if (health) {
313
254
  const mismatch = describeManagedMismatch(health)
314
- if (!mismatch) return health
315
- throw new Error(mismatch)
255
+ const registry = readRegistryEntry()
256
+ if (!mismatch && health.pid === pid && health.instanceId === instanceId && registry?.pid === pid && registry.instanceId === instanceId && isManagedDaemonProcess(registry) && await fetchIngestProbe()) return health
257
+ if (mismatch) throw new Error(mismatch)
316
258
  }
317
- const started = await detectStartedFromLog(pid, logOffset)
318
- if (started) return started
319
259
  if (!isAlive(pid)) {
320
- // The spawned child is gone. Before declaring failure,
321
- // do one patient probe: the child may have died from
322
- // EADDRINUSE because another healthy motel is alive on
323
- // the port but was answering /api/health too slowly for
324
- // our fast poll. If that's the case, adopt it.
325
- const patient = await fetchHealth(HEALTH_PATIENT_TIMEOUT_MS)
326
- if (patient) {
327
- const mismatch = describeManagedMismatch(patient)
328
- if (!mismatch) return patient
329
- throw new Error(mismatch)
330
- }
331
260
  throw new Error(`Daemon process ${pid} exited before becoming healthy. See ${config.logPath}.`)
332
261
  }
333
262
  await sleep(START_POLL_INTERVAL_MS)
@@ -335,43 +264,41 @@ export const createDaemonManager = (options: DaemonOptions = {}): DaemonManager
335
264
  throw new Error(`Timed out waiting for daemon health at ${config.baseUrl}/api/health. See ${config.logPath}.`)
336
265
  }
337
266
 
338
- const stopPid = async (pid: number) => {
267
+ const waitUntilNotOwned = async (entry: RegistryEntry, timeoutMs: number) => {
268
+ const deadline = Date.now() + timeoutMs
269
+ while (Date.now() < deadline) {
270
+ if (!isManagedDaemonProcess(entry)) return true
271
+ await sleep(POLL_INTERVAL_MS)
272
+ }
273
+ return !isManagedDaemonProcess(entry)
274
+ }
275
+
276
+ const stopPid = async (entry: RegistryEntry) => {
277
+ if (!isManagedDaemonProcess(entry)) {
278
+ throw new Error(`Refusing to stop pid ${entry.pid}: registry identity does not match the running managed daemon.`)
279
+ }
339
280
  try {
340
- process.kill(pid, "SIGTERM")
281
+ process.kill(entry.pid, "SIGTERM")
341
282
  } catch (error) {
342
283
  const errno = error as NodeJS.ErrnoException
343
284
  if (errno.code !== "ESRCH") throw error
344
285
  }
345
286
 
346
- const deadline = Date.now() + STOP_TIMEOUT_MS
347
- while (Date.now() < deadline) {
348
- if (!isAlive(pid)) return
349
- const health = await fetchHealth()
350
- if (health && health.pid !== pid) return
351
- const registry = readRegistryEntry()
352
- if (!health && (!registry || registry.pid !== pid)) return
353
- await sleep(POLL_INTERVAL_MS)
287
+ if (!await waitUntilNotOwned(entry, gracefulStopTimeoutMs)) {
288
+ try {
289
+ process.kill(entry.pid, "SIGKILL")
290
+ } catch (error) {
291
+ if ((error as NodeJS.ErrnoException).code !== "ESRCH") throw error
292
+ }
293
+ if (!await waitUntilNotOwned(entry, forceStopTimeoutMs)) {
294
+ throw new Error(`Timed out force-killing daemon ${entry.pid}.`)
295
+ }
354
296
  }
355
-
356
- throw new Error(`Timed out waiting for daemon ${pid} to stop.`)
297
+ const current = readRegistryEntry()
298
+ if (current?.pid === entry.pid && current.instanceId === entry.instanceId) removeRegistryEntry(entry.pid, config.runtimeDir)
357
299
  }
358
300
 
359
301
  const getStatus = async (timeoutMs: number = HEALTH_FAST_TIMEOUT_MS): Promise<DaemonStatus> => {
360
- // Fast path: trust the local filesystem registry. When a motel
361
- // daemon started on this machine it wrote an entry for its pid
362
- // + cwd + databasePath; if that entry is still there and the pid
363
- // is alive, the daemon is almost certainly the one we want to
364
- // adopt. HTTP health is skipped because the daemon's health
365
- // endpoint can queue behind heavy OTLP ingest traffic, making
366
- // the probe unreliable exactly when the daemon is busy.
367
- const registryStatus = getStatusFromRegistry()
368
- if (registryStatus) return registryStatus
369
-
370
- // No local evidence → fall back to HTTP. Covers the edge cases
371
- // where: a motel daemon is running but was started before this
372
- // registry-first path shipped; OR the port is held by something
373
- // entirely unrelated (the mismatch check turns that into a
374
- // human-readable reason).
375
302
  const registry = readRegistryEntry()
376
303
  const health = await fetchHealth(timeoutMs)
377
304
  if (!health) {
@@ -394,9 +321,10 @@ export const createDaemonManager = (options: DaemonOptions = {}): DaemonManager
394
321
  }
395
322
 
396
323
  const mismatch = describeManagedMismatch(health)
324
+ const managed = mismatch === null && registry?.pid === health.pid && registry.instanceId === health.instanceId && isManagedDaemonProcess(registry)
397
325
  return {
398
326
  running: mismatch === null,
399
- managed: mismatch === null,
327
+ managed,
400
328
  service: health.service,
401
329
  pid: health.pid,
402
330
  url: health.url,
@@ -405,7 +333,7 @@ export const createDaemonManager = (options: DaemonOptions = {}): DaemonManager
405
333
  startedAt: health.startedAt,
406
334
  version: health.version,
407
335
  sameWorkdir: workdirMatches(config.workdir, health.workdir),
408
- reason: mismatch,
336
+ reason: mismatch ?? (managed ? null : "Responsive motel server is not an identity-verified managed daemon."),
409
337
  logPath: config.logPath,
410
338
  lockPath: config.lockPath,
411
339
  registryPid: registry?.pid ?? null,
@@ -418,26 +346,39 @@ export const createDaemonManager = (options: DaemonOptions = {}): DaemonManager
418
346
  // negative here drops us into the spawn path and collides with
419
347
  // any slow-but-healthy daemon sitting on the port.
420
348
  const existing = await getStatus(HEALTH_PATIENT_TIMEOUT_MS)
421
- if (existing.managed && existing.running) return existing
349
+ const existingEntry = readRegistryEntry()
350
+ if (existing.managed && existing.running) {
351
+ // /api/health can stay healthy after the lazy ingest worker/RPC path
352
+ // has been poisoned by an interrupted request. Empty OTLP posts are
353
+ // side-effect free and exercise the same path real exporters need.
354
+ if (existing.pid === process.pid || await fetchIngestProbe()) return existing
355
+ if (existingEntry) await stopPid(existingEntry)
356
+ }
357
+ if (!existing.running && existingEntry && isManagedDaemonProcess(existingEntry)) await stopPid(existingEntry)
422
358
  if (existing.service !== null && existing.reason) {
423
359
  throw new Error(existing.reason)
424
360
  }
425
361
 
426
362
  const lock = await acquireStartupLock()
427
363
  let spawnedPid: number | null = null
364
+ let spawnedIdentity: string | null = null
428
365
  try {
429
366
  // Same reasoning for the post-lock re-check: another ensure()
430
367
  // may have spawned a daemon between our first probe and the
431
368
  // lock grant, and its initial health response can be slow
432
369
  // while the runtime warms up.
433
370
  const rechecked = await getStatus(HEALTH_PATIENT_TIMEOUT_MS)
434
- if (rechecked.managed && rechecked.running) return rechecked
371
+ if (rechecked.managed && rechecked.running) {
372
+ if (rechecked.pid === process.pid || await fetchIngestProbe()) return rechecked
373
+ const recheckedEntry = readRegistryEntry()
374
+ if (recheckedEntry) await stopPid(recheckedEntry)
375
+ }
435
376
  if (rechecked.service !== null && rechecked.reason) {
436
377
  throw new Error(rechecked.reason)
437
378
  }
438
379
 
439
380
  const logFd = await openLogFile()
440
- const logOffset = fs.fstatSync(logFd).size
381
+ const instanceId = crypto.randomUUID()
441
382
  try {
442
383
  const proc = Bun.spawn({
443
384
  cmd: [process.execPath, "run", config.serverEntry],
@@ -445,11 +386,12 @@ export const createDaemonManager = (options: DaemonOptions = {}): DaemonManager
445
386
  detached: true,
446
387
  env: {
447
388
  ...process.env,
448
- ...expectedEnv(config),
389
+ ...expectedEnv(config, instanceId),
449
390
  },
450
391
  stdio: ["ignore", logFd, logFd],
451
392
  })
452
393
  spawnedPid = proc.pid
394
+ spawnedIdentity = processIdentity(proc.pid)
453
395
  proc.unref()
454
396
  } finally {
455
397
  fs.closeSync(logFd)
@@ -459,7 +401,7 @@ export const createDaemonManager = (options: DaemonOptions = {}): DaemonManager
459
401
  throw new Error("Daemon failed to spawn.")
460
402
  }
461
403
 
462
- const health = await waitForHealthy(spawnedPid, logOffset)
404
+ const health = await waitForHealthy(spawnedPid, instanceId)
463
405
  return {
464
406
  running: true,
465
407
  managed: true,
@@ -478,7 +420,17 @@ export const createDaemonManager = (options: DaemonOptions = {}): DaemonManager
478
420
  }
479
421
  } catch (error) {
480
422
  if (spawnedPid !== null) {
481
- await stopPid(spawnedPid).catch(() => undefined)
423
+ const entry = readRegistryEntry()
424
+ if (entry?.pid === spawnedPid) {
425
+ await stopPid(entry).catch(() => undefined)
426
+ } else if (spawnedIdentity && processIdentity(spawnedPid) === spawnedIdentity) {
427
+ try { process.kill(spawnedPid, "SIGTERM") } catch { /* already exited */ }
428
+ const deadline = Date.now() + gracefulStopTimeoutMs
429
+ while (Date.now() < deadline && processIdentity(spawnedPid) === spawnedIdentity) await sleep(POLL_INTERVAL_MS)
430
+ if (processIdentity(spawnedPid) === spawnedIdentity) {
431
+ try { process.kill(spawnedPid, "SIGKILL") } catch { /* already exited */ }
432
+ }
433
+ }
482
434
  }
483
435
  throw error
484
436
  } finally {
@@ -489,13 +441,12 @@ export const createDaemonManager = (options: DaemonOptions = {}): DaemonManager
489
441
  const stop = async (): Promise<DaemonStatus> => {
490
442
  const status = await getStatus()
491
443
  if (status.pid === null) return status
492
- if (!status.sameWorkdir) {
493
- throw new Error(`Refusing to stop motel owned by ${status.workdir}.`)
494
- }
495
444
  if (status.service !== null && status.service !== MOTEL_SERVICE_ID) {
496
445
  throw new Error(`Refusing to stop non-motel service ${status.service} on ${status.url}.`)
497
446
  }
498
- await stopPid(status.pid)
447
+ const entry = readRegistryEntry()
448
+ if (!entry || entry.pid !== status.pid) throw new Error(`Refusing to stop pid ${status.pid}: no matching managed registry entry.`)
449
+ await stopPid(entry)
499
450
  return await getStatus()
500
451
  }
501
452
 
package/src/httpApi.ts CHANGED
@@ -25,6 +25,7 @@ const Health = Schema.Struct({
25
25
  workdir: Schema.String.pipe(Schema.annotateKey({ description: "Working directory at the time the server started. Used by MCP discovery to match the current project via longest-prefix." })),
26
26
  startedAt: Schema.String.pipe(Schema.annotateKey({ description: "ISO 8601 timestamp of when the server bound its port." })),
27
27
  version: Schema.String.pipe(Schema.annotateKey({ description: "Motel version string." })),
28
+ instanceId: Schema.optionalKey(Schema.String).pipe(Schema.annotateKey({ description: "Managed-daemon instance nonce used for readiness and safe shutdown identity checks." })),
28
29
  })
29
30
  const IngestTraceResponse = Schema.Struct({ insertedSpans: Schema.Number })
30
31
  const IngestLogResponse = Schema.Struct({ insertedLogs: Schema.Number })
@@ -0,0 +1,76 @@
1
+ import { describe, expect, it } from "bun:test"
2
+ import type { LogItem, TraceSummaryItem } from "./domain.js"
3
+ import { LOG_LIST, LOG_STATS, parseListParams, paginateLogs, paginateSummaries, traceCursorArgs } from "./httpListPolicy.js"
4
+
5
+ const BASE_URL = "http://127.0.0.1:27686"
6
+
7
+ describe("HTTP list policy", () => {
8
+ it("bounds list parameters and extracts attribute filters", () => {
9
+ const params = parseListParams({
10
+ url: "/api/logs?limit=9999&lookback=9d&attr.session.id=abc&attrContains.message=failed",
11
+ }, LOG_LIST, BASE_URL)
12
+
13
+ expect(params.limit).toBe(500)
14
+ expect(params.lookbackMinutes).toBe(24 * 60)
15
+ expect(params.attributeFilters).toEqual({ "session.id": "abc" })
16
+ expect(params.attributeContainsFilters).toEqual({ message: "failed" })
17
+ })
18
+
19
+ it("round-trips a trace cursor through page metadata", () => {
20
+ const traces: readonly TraceSummaryItem[] = [
21
+ {
22
+ traceId: "trace-1",
23
+ serviceName: "api",
24
+ rootOperationName: "GET /first",
25
+ startedAt: new Date(1000),
26
+ isRunning: false,
27
+ durationMs: 2,
28
+ spanCount: 1,
29
+ errorCount: 0,
30
+ warnings: [],
31
+ },
32
+ {
33
+ traceId: "trace-2",
34
+ serviceName: "api",
35
+ rootOperationName: "GET /second",
36
+ startedAt: new Date(900),
37
+ isRunning: false,
38
+ durationMs: 1,
39
+ spanCount: 1,
40
+ errorCount: 0,
41
+ warnings: [],
42
+ },
43
+ ]
44
+
45
+ const page = paginateSummaries(traces, { limit: 1, lookbackMinutes: 60 })
46
+ const parsed = parseListParams({ url: `/api/traces?cursor=${page.meta.nextCursor}` }, LOG_LIST, BASE_URL)
47
+
48
+ expect(page.meta.truncated).toBe(true)
49
+ expect(traceCursorArgs(parsed.cursor)).toEqual({ cursorStartedAtMs: 1000, cursorTraceId: "trace-1" })
50
+ })
51
+
52
+ it("formats log page metadata and emits a cursor", () => {
53
+ const logs: readonly LogItem[] = [{
54
+ id: "12",
55
+ timestamp: new Date(1200),
56
+ serviceName: "api",
57
+ severityText: "INFO",
58
+ body: "ready",
59
+ traceId: null,
60
+ spanId: null,
61
+ scopeName: null,
62
+ attributes: {},
63
+ }]
64
+
65
+ const page = paginateLogs(logs, { limit: 10, lookbackMinutes: 120 })
66
+
67
+ expect(page.meta).toMatchObject({ limit: 10, lookback: "2h", returned: 1, truncated: false })
68
+ expect(page.meta.nextCursor).not.toBeNull()
69
+ })
70
+
71
+ it("keeps aggregate log queries bounded to twenty groups by default", () => {
72
+ const params = parseListParams({ url: "/api/logs/stats?groupBy=service&agg=count" }, LOG_STATS, BASE_URL)
73
+
74
+ expect(params.limit).toBe(20)
75
+ })
76
+ })
@@ -0,0 +1,129 @@
1
+ import type { LogItem, TraceSummaryItem } from "./domain.js"
2
+ import { attributeContainsFiltersFromEntries, attributeFiltersFromEntries } from "./queryFilters.js"
3
+
4
+ type CursorShape =
5
+ | { readonly kind: "trace"; readonly startedAt: number; readonly id: string }
6
+ | { readonly kind: "log"; readonly timestamp: number; readonly id: string }
7
+
8
+ export interface ListBounds {
9
+ readonly defaultLimit: number
10
+ readonly maxLimit: number
11
+ readonly defaultLookback: number
12
+ readonly maxLookback: number
13
+ }
14
+
15
+ export interface ListParams {
16
+ readonly url: URL
17
+ readonly limit: number
18
+ readonly lookbackMinutes: number
19
+ readonly cursor: CursorShape | null
20
+ readonly attributeFilters: Readonly<Record<string, string>>
21
+ readonly attributeContainsFilters: Readonly<Record<string, string>>
22
+ }
23
+
24
+ export const TRACE_LIST: ListBounds = { defaultLimit: 20, maxLimit: 100, defaultLookback: 60, maxLookback: 24 * 60 }
25
+ export const SPAN_LIST: ListBounds = { defaultLimit: 100, maxLimit: 500, defaultLookback: 60, maxLookback: 24 * 60 }
26
+ export const LOG_LIST: ListBounds = { defaultLimit: 100, maxLimit: 500, defaultLookback: 60, maxLookback: 24 * 60 }
27
+ export const AI_LIST: ListBounds = { defaultLimit: 20, maxLimit: 500, defaultLookback: 60, maxLookback: 24 * 60 }
28
+ export const TRACE_STATS: ListBounds = { defaultLimit: 20, maxLimit: 100, defaultLookback: 60, maxLookback: 24 * 60 }
29
+ export const LOG_STATS: ListBounds = { defaultLimit: 20, maxLimit: 500, defaultLookback: 60, maxLookback: 24 * 60 }
30
+
31
+ export const requestUrl = (request: { readonly url: string }, baseUrl: string) => new URL(request.url, baseUrl)
32
+
33
+ const parsePositiveInt = (value: string | undefined, defaultValue: number) => {
34
+ const parsed = Number.parseInt(value ?? "", 10)
35
+ return Number.isFinite(parsed) && parsed > 0 ? parsed : defaultValue
36
+ }
37
+
38
+ export const parseLimit = (value: string | null, fallback: number) => parsePositiveInt(value ?? undefined, fallback)
39
+
40
+ export const parseLookbackMinutes = (value: string | null, fallback: number) => {
41
+ if (!value) return fallback
42
+ const match = value.trim().match(/^(\d+)([mhd])$/i)
43
+ if (!match) return fallback
44
+ const amount = Number.parseInt(match[1] ?? "", 10)
45
+ if (!Number.isFinite(amount) || amount <= 0) return fallback
46
+ const unit = (match[2] ?? "m").toLowerCase()
47
+ if (unit === "d") return amount * 1440
48
+ if (unit === "h") return amount * 60
49
+ return amount
50
+ }
51
+
52
+ const clamp = (value: number, min: number, max: number) => Math.max(min, Math.min(value, max))
53
+
54
+ const decodeCursor = (value: string | null): CursorShape | null => {
55
+ if (!value) return null
56
+ try {
57
+ return JSON.parse(Buffer.from(value, "base64url").toString("utf8")) as CursorShape
58
+ } catch {
59
+ return null
60
+ }
61
+ }
62
+
63
+ const encodeCursor = (cursor: CursorShape) => Buffer.from(JSON.stringify(cursor), "utf8").toString("base64url")
64
+
65
+ export const traceCursorArgs = (cursor: CursorShape | null) =>
66
+ cursor?.kind === "trace"
67
+ ? { cursorStartedAtMs: cursor.startedAt, cursorTraceId: cursor.id }
68
+ : {}
69
+
70
+ export const logCursorArgs = (cursor: CursorShape | null) =>
71
+ cursor?.kind === "log"
72
+ ? { cursorTimestampMs: cursor.timestamp, cursorId: cursor.id }
73
+ : {}
74
+
75
+ export const parseListParams = (request: { readonly url: string }, bounds: ListBounds, baseUrl: string): ListParams => {
76
+ const url = requestUrl(request, baseUrl)
77
+ return {
78
+ url,
79
+ limit: clamp(parseLimit(url.searchParams.get("limit"), bounds.defaultLimit), 1, bounds.maxLimit),
80
+ lookbackMinutes: clamp(parseLookbackMinutes(url.searchParams.get("lookback"), bounds.defaultLookback), 1, bounds.maxLookback),
81
+ cursor: decodeCursor(url.searchParams.get("cursor")),
82
+ attributeFilters: attributeFiltersFromEntries(url.searchParams.entries()),
83
+ attributeContainsFilters: attributeContainsFiltersFromEntries(url.searchParams.entries()),
84
+ }
85
+ }
86
+
87
+ const formatLookback = (minutes: number) => {
88
+ if (minutes % 1440 === 0) return `${minutes / 1440}d`
89
+ if (minutes % 60 === 0) return `${minutes / 60}h`
90
+ return `${minutes}m`
91
+ }
92
+
93
+ export const listMeta = (input: { readonly limit: number; readonly lookbackMinutes: number; readonly returned: number; readonly truncated: boolean; readonly nextCursor: string | null }) => ({
94
+ limit: input.limit,
95
+ lookback: formatLookback(input.lookbackMinutes),
96
+ returned: input.returned,
97
+ truncated: input.truncated,
98
+ nextCursor: input.nextCursor,
99
+ })
100
+
101
+ export const paginateSummaries = (summaries: readonly TraceSummaryItem[], options: { readonly limit: number; readonly lookbackMinutes: number }) => {
102
+ const page = summaries.slice(0, options.limit)
103
+ const last = page.at(-1)
104
+ return {
105
+ data: page,
106
+ meta: listMeta({
107
+ limit: options.limit,
108
+ lookbackMinutes: options.lookbackMinutes,
109
+ returned: page.length,
110
+ truncated: summaries.length > page.length,
111
+ nextCursor: last ? encodeCursor({ kind: "trace", startedAt: last.startedAt.getTime(), id: last.traceId }) : null,
112
+ }),
113
+ }
114
+ }
115
+
116
+ export const paginateLogs = (logs: readonly LogItem[], options: { readonly limit: number; readonly lookbackMinutes: number }) => {
117
+ const page = logs.slice(0, options.limit)
118
+ const last = page.at(-1)
119
+ return {
120
+ data: page,
121
+ meta: listMeta({
122
+ limit: options.limit,
123
+ lookbackMinutes: options.lookbackMinutes,
124
+ returned: page.length,
125
+ truncated: logs.length > page.length,
126
+ nextCursor: last ? encodeCursor({ kind: "log", timestamp: last.timestamp.getTime(), id: last.id }) : null,
127
+ }),
128
+ }
129
+ }