@kitlangton/motel 0.2.5 → 0.2.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +11 -8
- package/README.md +13 -2
- package/package.json +31 -19
- package/skills/motel-debug/SKILL.md +203 -0
- package/skills/motel-debug/references/effect.md +38 -0
- package/src/App.tsx +3 -5
- package/src/StartupGate.tsx +8 -10
- package/src/cli.ts +15 -16
- package/src/config.ts +7 -1
- package/src/daemon.test.ts +332 -51
- package/src/daemon.ts +103 -152
- package/src/httpApi.ts +1 -0
- package/src/httpListPolicy.test.ts +76 -0
- package/src/httpListPolicy.ts +129 -0
- package/src/localServer.ts +194 -323
- package/src/mcp.ts +2 -1
- package/src/opentui-jsx.d.ts +11 -0
- package/src/otlp.test.ts +65 -0
- package/src/otlp.ts +20 -0
- package/src/otlpProtobuf.ts +35 -0
- package/src/registry.ts +37 -11
- package/src/runtime.ts +2 -6
- package/src/services/AsyncIngest.ts +20 -8
- package/src/services/LogQueryService.ts +11 -25
- package/src/services/TelemetryQuery.ts +62 -0
- package/src/services/TelemetryStore.ts +433 -249
- package/src/services/TraceQueryService.ts +18 -52
- package/src/services/ingestRpc.ts +2 -4
- package/src/services/queryRpc.ts +15 -0
- package/src/services/telemetryQueryWorker.ts +32 -0
- package/src/services/telemetryWorker.ts +5 -8
- package/src/storybook/aiChatStory.tsx +1 -1
- package/src/telemetry.test.ts +307 -41
- package/src/ui/AiChatView.tsx +1 -1
- package/src/ui/AttrFilterModal.tsx +1 -1
- package/src/ui/ServiceLogs.tsx +10 -7
- package/src/ui/SpanContentView.tsx +24 -21
- package/src/ui/TraceDetailsPane.tsx +1 -1
- package/src/ui/TraceList.tsx +1 -1
- package/src/ui/aiState.ts +10 -22
- package/src/ui/app/TraceWorkspace.tsx +2 -1
- package/src/ui/app/useAppLayout.ts +1 -1
- package/src/ui/app/useTraceScreenData.ts +22 -18
- package/src/ui/cachedLoader.test.ts +23 -0
- package/src/ui/cachedLoader.ts +60 -0
- package/src/ui/loaders.ts +34 -53
- package/src/ui/primitives.tsx +1 -1
- package/src/ui/state.ts +2 -0
- package/src/ui/traceDetailsWidth.repro.test.ts +12 -1
- package/src/ui/traceSortNav.repro.seed.ts +1 -1
- package/src/ui/traceSortNav.repro.test.ts +12 -2
- package/src/ui/useAttrFilterPicker.ts +10 -8
- package/src/ui/useKeyboardNav.ts +3 -6
- package/src/ui/waterfallNav.repro.seed.ts +1 -1
- package/src/ui/waterfallNav.repro.test.ts +16 -8
- package/web/dist/assets/index-B01z9BaO.css +2 -0
- package/web/dist/assets/index-M86tcih5.js +22 -0
- package/web/dist/index.html +2 -2
- package/web/dist/assets/index-DnyVo03x.js +0 -27
- package/web/dist/assets/index-DzuHNBGV.css +0 -2
package/src/daemon.ts
CHANGED
|
@@ -2,7 +2,7 @@ import * as fs from "node:fs"
|
|
|
2
2
|
import { promises as fsp } from "node:fs"
|
|
3
3
|
import * as path from "node:path"
|
|
4
4
|
import { Effect } from "effect"
|
|
5
|
-
import { isAlive, listAliveEntries, MOTEL_SERVICE_ID,
|
|
5
|
+
import { isAlive, isManagedDaemonProcess, listAliveEntries, motelStateDir, MOTEL_SERVICE_ID, processIdentity, removeRegistryEntry, type RegistryEntry } from "./registry.js"
|
|
6
6
|
|
|
7
7
|
const DEFAULT_REPO_ROOT = path.resolve(import.meta.dir, "..")
|
|
8
8
|
const DEFAULT_HOST = "127.0.0.1"
|
|
@@ -24,6 +24,7 @@ const HEALTH_FAST_TIMEOUT_MS = 750
|
|
|
24
24
|
* and short enough that a truly-down daemon is still detected
|
|
25
25
|
* before START_TIMEOUT_MS fires. */
|
|
26
26
|
const HEALTH_PATIENT_TIMEOUT_MS = 3_000
|
|
27
|
+
const INGEST_PROBE_TIMEOUT_MS = 3_000
|
|
27
28
|
|
|
28
29
|
type HealthShape = {
|
|
29
30
|
readonly ok: boolean
|
|
@@ -34,11 +35,13 @@ type HealthShape = {
|
|
|
34
35
|
readonly workdir: string
|
|
35
36
|
readonly startedAt: string
|
|
36
37
|
readonly version: string
|
|
38
|
+
readonly instanceId?: string
|
|
37
39
|
}
|
|
38
40
|
|
|
39
41
|
type LockShape = {
|
|
40
42
|
readonly pid: number
|
|
41
43
|
readonly createdAt: string
|
|
44
|
+
readonly processIdentity?: string
|
|
42
45
|
}
|
|
43
46
|
|
|
44
47
|
type DaemonConfig = {
|
|
@@ -85,6 +88,9 @@ type DaemonOptions = {
|
|
|
85
88
|
readonly databasePath?: string
|
|
86
89
|
readonly host?: string
|
|
87
90
|
readonly port?: number
|
|
91
|
+
readonly startTimeoutMs?: number
|
|
92
|
+
readonly gracefulStopTimeoutMs?: number
|
|
93
|
+
readonly forceStopTimeoutMs?: number
|
|
88
94
|
}
|
|
89
95
|
|
|
90
96
|
export class DaemonError extends Error {
|
|
@@ -97,12 +103,14 @@ export class DaemonError extends Error {
|
|
|
97
103
|
const sleep = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms))
|
|
98
104
|
|
|
99
105
|
const resolveConfig = (options: DaemonOptions = {}): DaemonConfig => {
|
|
106
|
+
const envBaseUrl = new URL(process.env.MOTEL_OTEL_BASE_URL?.trim() || process.env.MOTEL_OTEL_QUERY_URL?.trim() || `http://${DEFAULT_HOST}:${DEFAULT_PORT}`)
|
|
100
107
|
const repoRoot = path.resolve(options.repoRoot ?? DEFAULT_REPO_ROOT)
|
|
101
108
|
const workdir = path.resolve(options.workdir ?? process.cwd())
|
|
102
|
-
const runtimeDir = path.resolve(options.runtimeDir ??
|
|
103
|
-
const databasePath = path.resolve(options.databasePath ?? path.join(runtimeDir, "telemetry.sqlite"))
|
|
104
|
-
const host = options.host ??
|
|
105
|
-
const
|
|
109
|
+
const runtimeDir = path.resolve(options.runtimeDir ?? motelStateDir())
|
|
110
|
+
const databasePath = path.resolve(options.databasePath ?? process.env.MOTEL_OTEL_DB_PATH?.trim() ?? path.join(runtimeDir, "telemetry.sqlite"))
|
|
111
|
+
const host = options.host ?? process.env.MOTEL_OTEL_HOST?.trim() ?? envBaseUrl.hostname
|
|
112
|
+
const envPort = Number.parseInt(process.env.MOTEL_OTEL_PORT?.trim() || envBaseUrl.port, 10)
|
|
113
|
+
const port = options.port ?? (Number.isFinite(envPort) && envPort > 0 ? envPort : DEFAULT_PORT)
|
|
106
114
|
return {
|
|
107
115
|
repoRoot,
|
|
108
116
|
serverEntry: path.join(repoRoot, "src/server.ts"),
|
|
@@ -123,17 +131,17 @@ const workdirMatches = (targetWorkdir: string, daemonWorkdir: string) => {
|
|
|
123
131
|
return normalizedTarget === normalizedDaemon || normalizedTarget.startsWith(normalizedDaemon)
|
|
124
132
|
}
|
|
125
133
|
|
|
126
|
-
const
|
|
127
|
-
const withSep = targetWorkdir.endsWith(path.sep) ? targetWorkdir : `${targetWorkdir}${path.sep}`
|
|
134
|
+
const pickByUrl = (entries: readonly RegistryEntry[], baseUrl: string, databasePath: string) => {
|
|
128
135
|
return entries
|
|
129
136
|
.filter((entry) => {
|
|
130
|
-
|
|
131
|
-
return withSep === workdir || withSep.startsWith(workdir)
|
|
137
|
+
return entry.url === baseUrl && (entry.databasePath === undefined || entry.databasePath === databasePath)
|
|
132
138
|
})
|
|
133
|
-
.sort((a, b) => b.
|
|
139
|
+
.sort((a, b) => Number(b.databasePath === databasePath) - Number(a.databasePath === databasePath))[0] ?? null
|
|
134
140
|
}
|
|
135
141
|
|
|
136
|
-
const expectedEnv = (config: DaemonConfig) => ({
|
|
142
|
+
const expectedEnv = (config: DaemonConfig, instanceId?: string) => ({
|
|
143
|
+
MOTEL_RUNTIME_DIR: config.runtimeDir,
|
|
144
|
+
...(instanceId ? { MOTEL_DAEMON_INSTANCE_ID: instanceId } : {}),
|
|
137
145
|
MOTEL_OTEL_BASE_URL: config.baseUrl,
|
|
138
146
|
MOTEL_OTEL_QUERY_URL: config.baseUrl,
|
|
139
147
|
MOTEL_OTEL_HOST: config.host,
|
|
@@ -145,43 +153,35 @@ const expectedEnv = (config: DaemonConfig) => ({
|
|
|
145
153
|
|
|
146
154
|
export const createDaemonManager = (options: DaemonOptions = {}): DaemonManager => {
|
|
147
155
|
const config = resolveConfig(options)
|
|
156
|
+
const startTimeoutMs = options.startTimeoutMs ?? START_TIMEOUT_MS
|
|
157
|
+
const gracefulStopTimeoutMs = options.gracefulStopTimeoutMs ?? STOP_TIMEOUT_MS
|
|
158
|
+
const forceStopTimeoutMs = options.forceStopTimeoutMs ?? 2_000
|
|
148
159
|
const mapError = (error: unknown) => new DaemonError(error instanceof Error ? error.message : String(error))
|
|
149
|
-
const readRegistryEntry = () =>
|
|
160
|
+
const readRegistryEntry = () => pickByUrl(listAliveEntries(config.runtimeDir), config.baseUrl, config.databasePath)
|
|
150
161
|
|
|
151
162
|
const fetchHealth = async (timeoutMs: number = HEALTH_FAST_TIMEOUT_MS): Promise<HealthShape | null> => {
|
|
152
163
|
try {
|
|
153
164
|
const response = await fetch(`${config.baseUrl}/api/health`, { signal: AbortSignal.timeout(timeoutMs) })
|
|
154
165
|
if (!response.ok) return null
|
|
155
|
-
|
|
166
|
+
const health = await response.json() as HealthShape
|
|
167
|
+
return health.ok ? health : null
|
|
156
168
|
} catch {
|
|
157
169
|
return null
|
|
158
170
|
}
|
|
159
171
|
}
|
|
160
172
|
|
|
161
|
-
const
|
|
162
|
-
|
|
163
|
-
const readLogSince = async (offset: number) => {
|
|
173
|
+
const fetchIngestProbe = async () => {
|
|
164
174
|
try {
|
|
165
|
-
const
|
|
166
|
-
|
|
175
|
+
const postEmpty = (path: string) => fetch(`${config.baseUrl}${path}`, {
|
|
176
|
+
method: "POST",
|
|
177
|
+
headers: { "content-type": "application/json" },
|
|
178
|
+
body: "{}",
|
|
179
|
+
signal: AbortSignal.timeout(INGEST_PROBE_TIMEOUT_MS),
|
|
180
|
+
})
|
|
181
|
+
const [traces, logs] = await Promise.all([postEmpty("/v1/traces"), postEmpty("/v1/logs")])
|
|
182
|
+
return traces.ok && logs.ok
|
|
167
183
|
} catch {
|
|
168
|
-
return
|
|
169
|
-
}
|
|
170
|
-
}
|
|
171
|
-
|
|
172
|
-
const detectStartedFromLog = async (pid: number, offset: number): Promise<HealthShape | null> => {
|
|
173
|
-
if (!isAlive(pid)) return null
|
|
174
|
-
const tail = await readLogSince(offset)
|
|
175
|
-
if (!startupMarkers.some((marker) => tail.includes(marker))) return null
|
|
176
|
-
return {
|
|
177
|
-
ok: true,
|
|
178
|
-
service: MOTEL_SERVICE_ID,
|
|
179
|
-
databasePath: config.databasePath,
|
|
180
|
-
pid,
|
|
181
|
-
url: config.baseUrl,
|
|
182
|
-
workdir: config.workdir,
|
|
183
|
-
startedAt: new Date().toISOString(),
|
|
184
|
-
version: MOTEL_VERSION,
|
|
184
|
+
return false
|
|
185
185
|
}
|
|
186
186
|
}
|
|
187
187
|
|
|
@@ -189,71 +189,12 @@ export const createDaemonManager = (options: DaemonOptions = {}): DaemonManager
|
|
|
189
189
|
if (health.service !== MOTEL_SERVICE_ID) {
|
|
190
190
|
return `Port ${config.port} is in use by ${health.service}, not ${MOTEL_SERVICE_ID}.`
|
|
191
191
|
}
|
|
192
|
-
if (!workdirMatches(config.workdir, health.workdir)) {
|
|
193
|
-
return `Port ${config.port} is serving motel for ${health.workdir}, not ${config.workdir}.`
|
|
194
|
-
}
|
|
195
192
|
if (health.databasePath !== config.databasePath) {
|
|
196
193
|
return `Port ${config.port} is serving motel with ${health.databasePath}, expected ${config.databasePath}.`
|
|
197
194
|
}
|
|
198
195
|
return null
|
|
199
196
|
}
|
|
200
197
|
|
|
201
|
-
/**
|
|
202
|
-
* Mismatch check against a registry entry — mirrors describeManagedMismatch
|
|
203
|
-
* but drives off the registry file instead of an HTTP health response.
|
|
204
|
-
* Used on the fast path in getStatus so warm-start doesn't need to wait
|
|
205
|
-
* on an HTTP round-trip that may queue behind heavy OTLP ingest.
|
|
206
|
-
*
|
|
207
|
-
* The service-id check is implicit: any entry living in the motel
|
|
208
|
-
* registry dir is by construction a motel daemon. databasePath is
|
|
209
|
-
* optional for back-compat with entries written by older builds;
|
|
210
|
-
* when absent we skip the DB check rather than refusing to adopt.
|
|
211
|
-
*/
|
|
212
|
-
const describeRegistryMismatch = (entry: RegistryEntry): string | null => {
|
|
213
|
-
if (!workdirMatches(config.workdir, entry.workdir)) {
|
|
214
|
-
return `Port ${config.port} is serving motel for ${entry.workdir}, not ${config.workdir}.`
|
|
215
|
-
}
|
|
216
|
-
if (entry.databasePath && entry.databasePath !== config.databasePath) {
|
|
217
|
-
return `Port ${config.port} is serving motel with ${entry.databasePath}, expected ${config.databasePath}.`
|
|
218
|
-
}
|
|
219
|
-
return null
|
|
220
|
-
}
|
|
221
|
-
|
|
222
|
-
/**
|
|
223
|
-
* Build a DaemonStatus from a live registry entry. Returns null when
|
|
224
|
-
* there's no entry for our cwd, the registered pid isn't running, or
|
|
225
|
-
* the entry is for a differently-configured daemon (different port).
|
|
226
|
-
* This is the fast path: no HTTP, no event-loop round-trip, just a
|
|
227
|
-
* directory read and a process.kill(pid, 0) liveness probe.
|
|
228
|
-
*/
|
|
229
|
-
const getStatusFromRegistry = (): DaemonStatus | null => {
|
|
230
|
-
const entry = readRegistryEntry()
|
|
231
|
-
if (!entry) return null
|
|
232
|
-
// Port discriminator: a motel registry shared across several
|
|
233
|
-
// daemons (e.g., user running two instances on different
|
|
234
|
-
// ports from the same workdir, or a test harness on a random
|
|
235
|
-
// port) would otherwise have us adopt an unrelated daemon.
|
|
236
|
-
// URL match is a fast, unambiguous identity check.
|
|
237
|
-
if (entry.url !== config.baseUrl) return null
|
|
238
|
-
const mismatch = describeRegistryMismatch(entry)
|
|
239
|
-
return {
|
|
240
|
-
running: mismatch === null,
|
|
241
|
-
managed: mismatch === null,
|
|
242
|
-
service: MOTEL_SERVICE_ID,
|
|
243
|
-
pid: entry.pid,
|
|
244
|
-
url: entry.url,
|
|
245
|
-
databasePath: entry.databasePath ?? config.databasePath,
|
|
246
|
-
workdir: entry.workdir,
|
|
247
|
-
startedAt: entry.startedAt,
|
|
248
|
-
version: entry.version,
|
|
249
|
-
sameWorkdir: workdirMatches(config.workdir, entry.workdir),
|
|
250
|
-
reason: mismatch,
|
|
251
|
-
logPath: config.logPath,
|
|
252
|
-
lockPath: config.lockPath,
|
|
253
|
-
registryPid: entry.pid,
|
|
254
|
-
}
|
|
255
|
-
}
|
|
256
|
-
|
|
257
198
|
const readLock = async (): Promise<LockShape | null> => {
|
|
258
199
|
try {
|
|
259
200
|
const raw = await fsp.readFile(config.lockPath, "utf8")
|
|
@@ -269,7 +210,7 @@ export const createDaemonManager = (options: DaemonOptions = {}): DaemonManager
|
|
|
269
210
|
await fsp.rm(config.lockPath, { force: true })
|
|
270
211
|
return true
|
|
271
212
|
}
|
|
272
|
-
if (isAlive(current.pid)) return false
|
|
213
|
+
if (current.processIdentity ? processIdentity(current.pid) === current.processIdentity : isAlive(current.pid)) return false
|
|
273
214
|
await fsp.rm(config.lockPath, { force: true })
|
|
274
215
|
return true
|
|
275
216
|
}
|
|
@@ -281,7 +222,7 @@ export const createDaemonManager = (options: DaemonOptions = {}): DaemonManager
|
|
|
281
222
|
while (Date.now() < deadline) {
|
|
282
223
|
try {
|
|
283
224
|
const handle = await fsp.open(config.lockPath, "wx")
|
|
284
|
-
const contents = JSON.stringify({ pid: process.pid, createdAt: new Date().toISOString() } satisfies LockShape)
|
|
225
|
+
const contents = JSON.stringify({ pid: process.pid, createdAt: new Date().toISOString(), processIdentity: processIdentity(process.pid) ?? undefined } satisfies LockShape)
|
|
285
226
|
await handle.writeFile(contents, "utf8")
|
|
286
227
|
return {
|
|
287
228
|
release: async () => {
|
|
@@ -305,29 +246,17 @@ export const createDaemonManager = (options: DaemonOptions = {}): DaemonManager
|
|
|
305
246
|
return fs.openSync(config.logPath, "a")
|
|
306
247
|
}
|
|
307
248
|
|
|
308
|
-
const waitForHealthy = async (pid: number,
|
|
309
|
-
const deadline = Date.now() +
|
|
249
|
+
const waitForHealthy = async (pid: number, instanceId: string) => {
|
|
250
|
+
const deadline = Date.now() + startTimeoutMs
|
|
310
251
|
while (Date.now() < deadline) {
|
|
311
252
|
const health = await fetchHealth()
|
|
312
253
|
if (health) {
|
|
313
254
|
const mismatch = describeManagedMismatch(health)
|
|
314
|
-
|
|
315
|
-
|
|
255
|
+
const registry = readRegistryEntry()
|
|
256
|
+
if (!mismatch && health.pid === pid && health.instanceId === instanceId && registry?.pid === pid && registry.instanceId === instanceId && isManagedDaemonProcess(registry) && await fetchIngestProbe()) return health
|
|
257
|
+
if (mismatch) throw new Error(mismatch)
|
|
316
258
|
}
|
|
317
|
-
const started = await detectStartedFromLog(pid, logOffset)
|
|
318
|
-
if (started) return started
|
|
319
259
|
if (!isAlive(pid)) {
|
|
320
|
-
// The spawned child is gone. Before declaring failure,
|
|
321
|
-
// do one patient probe: the child may have died from
|
|
322
|
-
// EADDRINUSE because another healthy motel is alive on
|
|
323
|
-
// the port but was answering /api/health too slowly for
|
|
324
|
-
// our fast poll. If that's the case, adopt it.
|
|
325
|
-
const patient = await fetchHealth(HEALTH_PATIENT_TIMEOUT_MS)
|
|
326
|
-
if (patient) {
|
|
327
|
-
const mismatch = describeManagedMismatch(patient)
|
|
328
|
-
if (!mismatch) return patient
|
|
329
|
-
throw new Error(mismatch)
|
|
330
|
-
}
|
|
331
260
|
throw new Error(`Daemon process ${pid} exited before becoming healthy. See ${config.logPath}.`)
|
|
332
261
|
}
|
|
333
262
|
await sleep(START_POLL_INTERVAL_MS)
|
|
@@ -335,43 +264,41 @@ export const createDaemonManager = (options: DaemonOptions = {}): DaemonManager
|
|
|
335
264
|
throw new Error(`Timed out waiting for daemon health at ${config.baseUrl}/api/health. See ${config.logPath}.`)
|
|
336
265
|
}
|
|
337
266
|
|
|
338
|
-
const
|
|
267
|
+
const waitUntilNotOwned = async (entry: RegistryEntry, timeoutMs: number) => {
|
|
268
|
+
const deadline = Date.now() + timeoutMs
|
|
269
|
+
while (Date.now() < deadline) {
|
|
270
|
+
if (!isManagedDaemonProcess(entry)) return true
|
|
271
|
+
await sleep(POLL_INTERVAL_MS)
|
|
272
|
+
}
|
|
273
|
+
return !isManagedDaemonProcess(entry)
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
const stopPid = async (entry: RegistryEntry) => {
|
|
277
|
+
if (!isManagedDaemonProcess(entry)) {
|
|
278
|
+
throw new Error(`Refusing to stop pid ${entry.pid}: registry identity does not match the running managed daemon.`)
|
|
279
|
+
}
|
|
339
280
|
try {
|
|
340
|
-
process.kill(pid, "SIGTERM")
|
|
281
|
+
process.kill(entry.pid, "SIGTERM")
|
|
341
282
|
} catch (error) {
|
|
342
283
|
const errno = error as NodeJS.ErrnoException
|
|
343
284
|
if (errno.code !== "ESRCH") throw error
|
|
344
285
|
}
|
|
345
286
|
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
if (!
|
|
353
|
-
|
|
287
|
+
if (!await waitUntilNotOwned(entry, gracefulStopTimeoutMs)) {
|
|
288
|
+
try {
|
|
289
|
+
process.kill(entry.pid, "SIGKILL")
|
|
290
|
+
} catch (error) {
|
|
291
|
+
if ((error as NodeJS.ErrnoException).code !== "ESRCH") throw error
|
|
292
|
+
}
|
|
293
|
+
if (!await waitUntilNotOwned(entry, forceStopTimeoutMs)) {
|
|
294
|
+
throw new Error(`Timed out force-killing daemon ${entry.pid}.`)
|
|
295
|
+
}
|
|
354
296
|
}
|
|
355
|
-
|
|
356
|
-
|
|
297
|
+
const current = readRegistryEntry()
|
|
298
|
+
if (current?.pid === entry.pid && current.instanceId === entry.instanceId) removeRegistryEntry(entry.pid, config.runtimeDir)
|
|
357
299
|
}
|
|
358
300
|
|
|
359
301
|
const getStatus = async (timeoutMs: number = HEALTH_FAST_TIMEOUT_MS): Promise<DaemonStatus> => {
|
|
360
|
-
// Fast path: trust the local filesystem registry. When a motel
|
|
361
|
-
// daemon started on this machine it wrote an entry for its pid
|
|
362
|
-
// + cwd + databasePath; if that entry is still there and the pid
|
|
363
|
-
// is alive, the daemon is almost certainly the one we want to
|
|
364
|
-
// adopt. HTTP health is skipped because the daemon's health
|
|
365
|
-
// endpoint can queue behind heavy OTLP ingest traffic, making
|
|
366
|
-
// the probe unreliable exactly when the daemon is busy.
|
|
367
|
-
const registryStatus = getStatusFromRegistry()
|
|
368
|
-
if (registryStatus) return registryStatus
|
|
369
|
-
|
|
370
|
-
// No local evidence → fall back to HTTP. Covers the edge cases
|
|
371
|
-
// where: a motel daemon is running but was started before this
|
|
372
|
-
// registry-first path shipped; OR the port is held by something
|
|
373
|
-
// entirely unrelated (the mismatch check turns that into a
|
|
374
|
-
// human-readable reason).
|
|
375
302
|
const registry = readRegistryEntry()
|
|
376
303
|
const health = await fetchHealth(timeoutMs)
|
|
377
304
|
if (!health) {
|
|
@@ -394,9 +321,10 @@ export const createDaemonManager = (options: DaemonOptions = {}): DaemonManager
|
|
|
394
321
|
}
|
|
395
322
|
|
|
396
323
|
const mismatch = describeManagedMismatch(health)
|
|
324
|
+
const managed = mismatch === null && registry?.pid === health.pid && registry.instanceId === health.instanceId && isManagedDaemonProcess(registry)
|
|
397
325
|
return {
|
|
398
326
|
running: mismatch === null,
|
|
399
|
-
managed
|
|
327
|
+
managed,
|
|
400
328
|
service: health.service,
|
|
401
329
|
pid: health.pid,
|
|
402
330
|
url: health.url,
|
|
@@ -405,7 +333,7 @@ export const createDaemonManager = (options: DaemonOptions = {}): DaemonManager
|
|
|
405
333
|
startedAt: health.startedAt,
|
|
406
334
|
version: health.version,
|
|
407
335
|
sameWorkdir: workdirMatches(config.workdir, health.workdir),
|
|
408
|
-
reason: mismatch,
|
|
336
|
+
reason: mismatch ?? (managed ? null : "Responsive motel server is not an identity-verified managed daemon."),
|
|
409
337
|
logPath: config.logPath,
|
|
410
338
|
lockPath: config.lockPath,
|
|
411
339
|
registryPid: registry?.pid ?? null,
|
|
@@ -418,26 +346,39 @@ export const createDaemonManager = (options: DaemonOptions = {}): DaemonManager
|
|
|
418
346
|
// negative here drops us into the spawn path and collides with
|
|
419
347
|
// any slow-but-healthy daemon sitting on the port.
|
|
420
348
|
const existing = await getStatus(HEALTH_PATIENT_TIMEOUT_MS)
|
|
421
|
-
|
|
349
|
+
const existingEntry = readRegistryEntry()
|
|
350
|
+
if (existing.managed && existing.running) {
|
|
351
|
+
// /api/health can stay healthy after the lazy ingest worker/RPC path
|
|
352
|
+
// has been poisoned by an interrupted request. Empty OTLP posts are
|
|
353
|
+
// side-effect free and exercise the same path real exporters need.
|
|
354
|
+
if (existing.pid === process.pid || await fetchIngestProbe()) return existing
|
|
355
|
+
if (existingEntry) await stopPid(existingEntry)
|
|
356
|
+
}
|
|
357
|
+
if (!existing.running && existingEntry && isManagedDaemonProcess(existingEntry)) await stopPid(existingEntry)
|
|
422
358
|
if (existing.service !== null && existing.reason) {
|
|
423
359
|
throw new Error(existing.reason)
|
|
424
360
|
}
|
|
425
361
|
|
|
426
362
|
const lock = await acquireStartupLock()
|
|
427
363
|
let spawnedPid: number | null = null
|
|
364
|
+
let spawnedIdentity: string | null = null
|
|
428
365
|
try {
|
|
429
366
|
// Same reasoning for the post-lock re-check: another ensure()
|
|
430
367
|
// may have spawned a daemon between our first probe and the
|
|
431
368
|
// lock grant, and its initial health response can be slow
|
|
432
369
|
// while the runtime warms up.
|
|
433
370
|
const rechecked = await getStatus(HEALTH_PATIENT_TIMEOUT_MS)
|
|
434
|
-
if (rechecked.managed && rechecked.running)
|
|
371
|
+
if (rechecked.managed && rechecked.running) {
|
|
372
|
+
if (rechecked.pid === process.pid || await fetchIngestProbe()) return rechecked
|
|
373
|
+
const recheckedEntry = readRegistryEntry()
|
|
374
|
+
if (recheckedEntry) await stopPid(recheckedEntry)
|
|
375
|
+
}
|
|
435
376
|
if (rechecked.service !== null && rechecked.reason) {
|
|
436
377
|
throw new Error(rechecked.reason)
|
|
437
378
|
}
|
|
438
379
|
|
|
439
380
|
const logFd = await openLogFile()
|
|
440
|
-
const
|
|
381
|
+
const instanceId = crypto.randomUUID()
|
|
441
382
|
try {
|
|
442
383
|
const proc = Bun.spawn({
|
|
443
384
|
cmd: [process.execPath, "run", config.serverEntry],
|
|
@@ -445,11 +386,12 @@ export const createDaemonManager = (options: DaemonOptions = {}): DaemonManager
|
|
|
445
386
|
detached: true,
|
|
446
387
|
env: {
|
|
447
388
|
...process.env,
|
|
448
|
-
...expectedEnv(config),
|
|
389
|
+
...expectedEnv(config, instanceId),
|
|
449
390
|
},
|
|
450
391
|
stdio: ["ignore", logFd, logFd],
|
|
451
392
|
})
|
|
452
393
|
spawnedPid = proc.pid
|
|
394
|
+
spawnedIdentity = processIdentity(proc.pid)
|
|
453
395
|
proc.unref()
|
|
454
396
|
} finally {
|
|
455
397
|
fs.closeSync(logFd)
|
|
@@ -459,7 +401,7 @@ export const createDaemonManager = (options: DaemonOptions = {}): DaemonManager
|
|
|
459
401
|
throw new Error("Daemon failed to spawn.")
|
|
460
402
|
}
|
|
461
403
|
|
|
462
|
-
const health = await waitForHealthy(spawnedPid,
|
|
404
|
+
const health = await waitForHealthy(spawnedPid, instanceId)
|
|
463
405
|
return {
|
|
464
406
|
running: true,
|
|
465
407
|
managed: true,
|
|
@@ -478,7 +420,17 @@ export const createDaemonManager = (options: DaemonOptions = {}): DaemonManager
|
|
|
478
420
|
}
|
|
479
421
|
} catch (error) {
|
|
480
422
|
if (spawnedPid !== null) {
|
|
481
|
-
|
|
423
|
+
const entry = readRegistryEntry()
|
|
424
|
+
if (entry?.pid === spawnedPid) {
|
|
425
|
+
await stopPid(entry).catch(() => undefined)
|
|
426
|
+
} else if (spawnedIdentity && processIdentity(spawnedPid) === spawnedIdentity) {
|
|
427
|
+
try { process.kill(spawnedPid, "SIGTERM") } catch { /* already exited */ }
|
|
428
|
+
const deadline = Date.now() + gracefulStopTimeoutMs
|
|
429
|
+
while (Date.now() < deadline && processIdentity(spawnedPid) === spawnedIdentity) await sleep(POLL_INTERVAL_MS)
|
|
430
|
+
if (processIdentity(spawnedPid) === spawnedIdentity) {
|
|
431
|
+
try { process.kill(spawnedPid, "SIGKILL") } catch { /* already exited */ }
|
|
432
|
+
}
|
|
433
|
+
}
|
|
482
434
|
}
|
|
483
435
|
throw error
|
|
484
436
|
} finally {
|
|
@@ -489,13 +441,12 @@ export const createDaemonManager = (options: DaemonOptions = {}): DaemonManager
|
|
|
489
441
|
const stop = async (): Promise<DaemonStatus> => {
|
|
490
442
|
const status = await getStatus()
|
|
491
443
|
if (status.pid === null) return status
|
|
492
|
-
if (!status.sameWorkdir) {
|
|
493
|
-
throw new Error(`Refusing to stop motel owned by ${status.workdir}.`)
|
|
494
|
-
}
|
|
495
444
|
if (status.service !== null && status.service !== MOTEL_SERVICE_ID) {
|
|
496
445
|
throw new Error(`Refusing to stop non-motel service ${status.service} on ${status.url}.`)
|
|
497
446
|
}
|
|
498
|
-
|
|
447
|
+
const entry = readRegistryEntry()
|
|
448
|
+
if (!entry || entry.pid !== status.pid) throw new Error(`Refusing to stop pid ${status.pid}: no matching managed registry entry.`)
|
|
449
|
+
await stopPid(entry)
|
|
499
450
|
return await getStatus()
|
|
500
451
|
}
|
|
501
452
|
|
package/src/httpApi.ts
CHANGED
|
@@ -25,6 +25,7 @@ const Health = Schema.Struct({
|
|
|
25
25
|
workdir: Schema.String.pipe(Schema.annotateKey({ description: "Working directory at the time the server started. Used by MCP discovery to match the current project via longest-prefix." })),
|
|
26
26
|
startedAt: Schema.String.pipe(Schema.annotateKey({ description: "ISO 8601 timestamp of when the server bound its port." })),
|
|
27
27
|
version: Schema.String.pipe(Schema.annotateKey({ description: "Motel version string." })),
|
|
28
|
+
instanceId: Schema.optionalKey(Schema.String).pipe(Schema.annotateKey({ description: "Managed-daemon instance nonce used for readiness and safe shutdown identity checks." })),
|
|
28
29
|
})
|
|
29
30
|
const IngestTraceResponse = Schema.Struct({ insertedSpans: Schema.Number })
|
|
30
31
|
const IngestLogResponse = Schema.Struct({ insertedLogs: Schema.Number })
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
import { describe, expect, it } from "bun:test"
|
|
2
|
+
import type { LogItem, TraceSummaryItem } from "./domain.js"
|
|
3
|
+
import { LOG_LIST, LOG_STATS, parseListParams, paginateLogs, paginateSummaries, traceCursorArgs } from "./httpListPolicy.js"
|
|
4
|
+
|
|
5
|
+
const BASE_URL = "http://127.0.0.1:27686"
|
|
6
|
+
|
|
7
|
+
describe("HTTP list policy", () => {
|
|
8
|
+
it("bounds list parameters and extracts attribute filters", () => {
|
|
9
|
+
const params = parseListParams({
|
|
10
|
+
url: "/api/logs?limit=9999&lookback=9d&attr.session.id=abc&attrContains.message=failed",
|
|
11
|
+
}, LOG_LIST, BASE_URL)
|
|
12
|
+
|
|
13
|
+
expect(params.limit).toBe(500)
|
|
14
|
+
expect(params.lookbackMinutes).toBe(24 * 60)
|
|
15
|
+
expect(params.attributeFilters).toEqual({ "session.id": "abc" })
|
|
16
|
+
expect(params.attributeContainsFilters).toEqual({ message: "failed" })
|
|
17
|
+
})
|
|
18
|
+
|
|
19
|
+
it("round-trips a trace cursor through page metadata", () => {
|
|
20
|
+
const traces: readonly TraceSummaryItem[] = [
|
|
21
|
+
{
|
|
22
|
+
traceId: "trace-1",
|
|
23
|
+
serviceName: "api",
|
|
24
|
+
rootOperationName: "GET /first",
|
|
25
|
+
startedAt: new Date(1000),
|
|
26
|
+
isRunning: false,
|
|
27
|
+
durationMs: 2,
|
|
28
|
+
spanCount: 1,
|
|
29
|
+
errorCount: 0,
|
|
30
|
+
warnings: [],
|
|
31
|
+
},
|
|
32
|
+
{
|
|
33
|
+
traceId: "trace-2",
|
|
34
|
+
serviceName: "api",
|
|
35
|
+
rootOperationName: "GET /second",
|
|
36
|
+
startedAt: new Date(900),
|
|
37
|
+
isRunning: false,
|
|
38
|
+
durationMs: 1,
|
|
39
|
+
spanCount: 1,
|
|
40
|
+
errorCount: 0,
|
|
41
|
+
warnings: [],
|
|
42
|
+
},
|
|
43
|
+
]
|
|
44
|
+
|
|
45
|
+
const page = paginateSummaries(traces, { limit: 1, lookbackMinutes: 60 })
|
|
46
|
+
const parsed = parseListParams({ url: `/api/traces?cursor=${page.meta.nextCursor}` }, LOG_LIST, BASE_URL)
|
|
47
|
+
|
|
48
|
+
expect(page.meta.truncated).toBe(true)
|
|
49
|
+
expect(traceCursorArgs(parsed.cursor)).toEqual({ cursorStartedAtMs: 1000, cursorTraceId: "trace-1" })
|
|
50
|
+
})
|
|
51
|
+
|
|
52
|
+
it("formats log page metadata and emits a cursor", () => {
|
|
53
|
+
const logs: readonly LogItem[] = [{
|
|
54
|
+
id: "12",
|
|
55
|
+
timestamp: new Date(1200),
|
|
56
|
+
serviceName: "api",
|
|
57
|
+
severityText: "INFO",
|
|
58
|
+
body: "ready",
|
|
59
|
+
traceId: null,
|
|
60
|
+
spanId: null,
|
|
61
|
+
scopeName: null,
|
|
62
|
+
attributes: {},
|
|
63
|
+
}]
|
|
64
|
+
|
|
65
|
+
const page = paginateLogs(logs, { limit: 10, lookbackMinutes: 120 })
|
|
66
|
+
|
|
67
|
+
expect(page.meta).toMatchObject({ limit: 10, lookback: "2h", returned: 1, truncated: false })
|
|
68
|
+
expect(page.meta.nextCursor).not.toBeNull()
|
|
69
|
+
})
|
|
70
|
+
|
|
71
|
+
it("keeps aggregate log queries bounded to twenty groups by default", () => {
|
|
72
|
+
const params = parseListParams({ url: "/api/logs/stats?groupBy=service&agg=count" }, LOG_STATS, BASE_URL)
|
|
73
|
+
|
|
74
|
+
expect(params.limit).toBe(20)
|
|
75
|
+
})
|
|
76
|
+
})
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
import type { LogItem, TraceSummaryItem } from "./domain.js"
|
|
2
|
+
import { attributeContainsFiltersFromEntries, attributeFiltersFromEntries } from "./queryFilters.js"
|
|
3
|
+
|
|
4
|
+
type CursorShape =
|
|
5
|
+
| { readonly kind: "trace"; readonly startedAt: number; readonly id: string }
|
|
6
|
+
| { readonly kind: "log"; readonly timestamp: number; readonly id: string }
|
|
7
|
+
|
|
8
|
+
export interface ListBounds {
|
|
9
|
+
readonly defaultLimit: number
|
|
10
|
+
readonly maxLimit: number
|
|
11
|
+
readonly defaultLookback: number
|
|
12
|
+
readonly maxLookback: number
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
export interface ListParams {
|
|
16
|
+
readonly url: URL
|
|
17
|
+
readonly limit: number
|
|
18
|
+
readonly lookbackMinutes: number
|
|
19
|
+
readonly cursor: CursorShape | null
|
|
20
|
+
readonly attributeFilters: Readonly<Record<string, string>>
|
|
21
|
+
readonly attributeContainsFilters: Readonly<Record<string, string>>
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
export const TRACE_LIST: ListBounds = { defaultLimit: 20, maxLimit: 100, defaultLookback: 60, maxLookback: 24 * 60 }
|
|
25
|
+
export const SPAN_LIST: ListBounds = { defaultLimit: 100, maxLimit: 500, defaultLookback: 60, maxLookback: 24 * 60 }
|
|
26
|
+
export const LOG_LIST: ListBounds = { defaultLimit: 100, maxLimit: 500, defaultLookback: 60, maxLookback: 24 * 60 }
|
|
27
|
+
export const AI_LIST: ListBounds = { defaultLimit: 20, maxLimit: 500, defaultLookback: 60, maxLookback: 24 * 60 }
|
|
28
|
+
export const TRACE_STATS: ListBounds = { defaultLimit: 20, maxLimit: 100, defaultLookback: 60, maxLookback: 24 * 60 }
|
|
29
|
+
export const LOG_STATS: ListBounds = { defaultLimit: 20, maxLimit: 500, defaultLookback: 60, maxLookback: 24 * 60 }
|
|
30
|
+
|
|
31
|
+
export const requestUrl = (request: { readonly url: string }, baseUrl: string) => new URL(request.url, baseUrl)
|
|
32
|
+
|
|
33
|
+
const parsePositiveInt = (value: string | undefined, defaultValue: number) => {
|
|
34
|
+
const parsed = Number.parseInt(value ?? "", 10)
|
|
35
|
+
return Number.isFinite(parsed) && parsed > 0 ? parsed : defaultValue
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
export const parseLimit = (value: string | null, fallback: number) => parsePositiveInt(value ?? undefined, fallback)
|
|
39
|
+
|
|
40
|
+
export const parseLookbackMinutes = (value: string | null, fallback: number) => {
|
|
41
|
+
if (!value) return fallback
|
|
42
|
+
const match = value.trim().match(/^(\d+)([mhd])$/i)
|
|
43
|
+
if (!match) return fallback
|
|
44
|
+
const amount = Number.parseInt(match[1] ?? "", 10)
|
|
45
|
+
if (!Number.isFinite(amount) || amount <= 0) return fallback
|
|
46
|
+
const unit = (match[2] ?? "m").toLowerCase()
|
|
47
|
+
if (unit === "d") return amount * 1440
|
|
48
|
+
if (unit === "h") return amount * 60
|
|
49
|
+
return amount
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
const clamp = (value: number, min: number, max: number) => Math.max(min, Math.min(value, max))
|
|
53
|
+
|
|
54
|
+
const decodeCursor = (value: string | null): CursorShape | null => {
|
|
55
|
+
if (!value) return null
|
|
56
|
+
try {
|
|
57
|
+
return JSON.parse(Buffer.from(value, "base64url").toString("utf8")) as CursorShape
|
|
58
|
+
} catch {
|
|
59
|
+
return null
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
const encodeCursor = (cursor: CursorShape) => Buffer.from(JSON.stringify(cursor), "utf8").toString("base64url")
|
|
64
|
+
|
|
65
|
+
export const traceCursorArgs = (cursor: CursorShape | null) =>
|
|
66
|
+
cursor?.kind === "trace"
|
|
67
|
+
? { cursorStartedAtMs: cursor.startedAt, cursorTraceId: cursor.id }
|
|
68
|
+
: {}
|
|
69
|
+
|
|
70
|
+
export const logCursorArgs = (cursor: CursorShape | null) =>
|
|
71
|
+
cursor?.kind === "log"
|
|
72
|
+
? { cursorTimestampMs: cursor.timestamp, cursorId: cursor.id }
|
|
73
|
+
: {}
|
|
74
|
+
|
|
75
|
+
export const parseListParams = (request: { readonly url: string }, bounds: ListBounds, baseUrl: string): ListParams => {
|
|
76
|
+
const url = requestUrl(request, baseUrl)
|
|
77
|
+
return {
|
|
78
|
+
url,
|
|
79
|
+
limit: clamp(parseLimit(url.searchParams.get("limit"), bounds.defaultLimit), 1, bounds.maxLimit),
|
|
80
|
+
lookbackMinutes: clamp(parseLookbackMinutes(url.searchParams.get("lookback"), bounds.defaultLookback), 1, bounds.maxLookback),
|
|
81
|
+
cursor: decodeCursor(url.searchParams.get("cursor")),
|
|
82
|
+
attributeFilters: attributeFiltersFromEntries(url.searchParams.entries()),
|
|
83
|
+
attributeContainsFilters: attributeContainsFiltersFromEntries(url.searchParams.entries()),
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
const formatLookback = (minutes: number) => {
|
|
88
|
+
if (minutes % 1440 === 0) return `${minutes / 1440}d`
|
|
89
|
+
if (minutes % 60 === 0) return `${minutes / 60}h`
|
|
90
|
+
return `${minutes}m`
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
export const listMeta = (input: { readonly limit: number; readonly lookbackMinutes: number; readonly returned: number; readonly truncated: boolean; readonly nextCursor: string | null }) => ({
|
|
94
|
+
limit: input.limit,
|
|
95
|
+
lookback: formatLookback(input.lookbackMinutes),
|
|
96
|
+
returned: input.returned,
|
|
97
|
+
truncated: input.truncated,
|
|
98
|
+
nextCursor: input.nextCursor,
|
|
99
|
+
})
|
|
100
|
+
|
|
101
|
+
export const paginateSummaries = (summaries: readonly TraceSummaryItem[], options: { readonly limit: number; readonly lookbackMinutes: number }) => {
|
|
102
|
+
const page = summaries.slice(0, options.limit)
|
|
103
|
+
const last = page.at(-1)
|
|
104
|
+
return {
|
|
105
|
+
data: page,
|
|
106
|
+
meta: listMeta({
|
|
107
|
+
limit: options.limit,
|
|
108
|
+
lookbackMinutes: options.lookbackMinutes,
|
|
109
|
+
returned: page.length,
|
|
110
|
+
truncated: summaries.length > page.length,
|
|
111
|
+
nextCursor: last ? encodeCursor({ kind: "trace", startedAt: last.startedAt.getTime(), id: last.traceId }) : null,
|
|
112
|
+
}),
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
export const paginateLogs = (logs: readonly LogItem[], options: { readonly limit: number; readonly lookbackMinutes: number }) => {
|
|
117
|
+
const page = logs.slice(0, options.limit)
|
|
118
|
+
const last = page.at(-1)
|
|
119
|
+
return {
|
|
120
|
+
data: page,
|
|
121
|
+
meta: listMeta({
|
|
122
|
+
limit: options.limit,
|
|
123
|
+
lookbackMinutes: options.lookbackMinutes,
|
|
124
|
+
returned: page.length,
|
|
125
|
+
truncated: logs.length > page.length,
|
|
126
|
+
nextCursor: last ? encodeCursor({ kind: "log", timestamp: last.timestamp.getTime(), id: last.id }) : null,
|
|
127
|
+
}),
|
|
128
|
+
}
|
|
129
|
+
}
|