@kitlangton/motel 0.2.4 → 0.2.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +23 -8
- package/README.md +13 -2
- package/package.json +35 -19
- package/skills/motel-debug/SKILL.md +203 -0
- package/skills/motel-debug/references/effect.md +38 -0
- package/src/App.tsx +12 -5
- package/src/StartupGate.tsx +289 -0
- package/src/cli.ts +15 -16
- package/src/config.ts +7 -1
- package/src/daemon.test.ts +332 -51
- package/src/daemon.ts +105 -153
- package/src/httpApi.ts +1 -0
- package/src/httpListPolicy.test.ts +76 -0
- package/src/httpListPolicy.ts +129 -0
- package/src/index.tsx +9 -2
- package/src/localServer.ts +194 -313
- package/src/mcp.ts +2 -1
- package/src/motel.ts +0 -2
- package/src/opentui-jsx.d.ts +11 -0
- package/src/otlp.test.ts +65 -0
- package/src/otlp.ts +20 -0
- package/src/otlpProtobuf.ts +35 -0
- package/src/registry.ts +37 -11
- package/src/runtime.ts +2 -6
- package/src/services/AsyncIngest.ts +22 -8
- package/src/services/LogQueryService.ts +13 -27
- package/src/services/TelemetryQuery.ts +62 -0
- package/src/services/TelemetryStore.ts +546 -231
- package/src/services/TraceQueryService.ts +22 -56
- package/src/services/ingestRpc.ts +2 -4
- package/src/services/queryRpc.ts +15 -0
- package/src/services/telemetryQueryWorker.ts +32 -0
- package/src/services/telemetryWorker.ts +5 -8
- package/src/startupBench.ts +19 -0
- package/src/storybook/aiChatStory.tsx +1 -1
- package/src/telemetry.test.ts +307 -41
- package/src/ui/AiChatView.tsx +1 -1
- package/src/ui/AttrFilterModal.tsx +1 -1
- package/src/ui/ServiceLogs.tsx +10 -7
- package/src/ui/SpanContentView.tsx +24 -21
- package/src/ui/TraceDetailsPane.tsx +1 -1
- package/src/ui/TraceList.tsx +1 -1
- package/src/ui/aiState.ts +10 -22
- package/src/ui/app/TraceWorkspace.tsx +2 -1
- package/src/ui/app/useAppLayout.ts +1 -1
- package/src/ui/app/useTraceScreenData.ts +35 -23
- package/src/ui/atoms.ts +1 -1
- package/src/ui/cachedLoader.test.ts +23 -0
- package/src/ui/cachedLoader.ts +60 -0
- package/src/ui/loaders.ts +34 -53
- package/src/ui/persistence.ts +3 -3
- package/src/ui/primitives.tsx +1 -1
- package/src/ui/state.ts +2 -0
- package/src/ui/theme.ts +7 -5
- package/src/ui/traceDetailsWidth.repro.test.ts +12 -1
- package/src/ui/traceSortNav.repro.seed.ts +1 -1
- package/src/ui/traceSortNav.repro.test.ts +12 -2
- package/src/ui/useAttrFilterPicker.ts +10 -8
- package/src/ui/useKeyboardNav.ts +28 -5
- package/src/ui/waterfallNav.repro.seed.ts +1 -1
- package/src/ui/waterfallNav.repro.test.ts +16 -8
- package/web/dist/assets/index-B01z9BaO.css +2 -0
- package/web/dist/assets/index-M86tcih5.js +22 -0
- package/web/dist/index.html +2 -2
- package/web/dist/assets/index-DnyVo03x.js +0 -27
- package/web/dist/assets/index-DzuHNBGV.css +0 -2
package/src/daemon.ts
CHANGED
|
@@ -2,7 +2,7 @@ import * as fs from "node:fs"
|
|
|
2
2
|
import { promises as fsp } from "node:fs"
|
|
3
3
|
import * as path from "node:path"
|
|
4
4
|
import { Effect } from "effect"
|
|
5
|
-
import { isAlive, listAliveEntries, MOTEL_SERVICE_ID,
|
|
5
|
+
import { isAlive, isManagedDaemonProcess, listAliveEntries, motelStateDir, MOTEL_SERVICE_ID, processIdentity, removeRegistryEntry, type RegistryEntry } from "./registry.js"
|
|
6
6
|
|
|
7
7
|
const DEFAULT_REPO_ROOT = path.resolve(import.meta.dir, "..")
|
|
8
8
|
const DEFAULT_HOST = "127.0.0.1"
|
|
@@ -10,6 +10,7 @@ const DEFAULT_PORT = 27686
|
|
|
10
10
|
const START_TIMEOUT_MS = 30_000
|
|
11
11
|
const STOP_TIMEOUT_MS = 10_000
|
|
12
12
|
const LOCK_TIMEOUT_MS = 10_000
|
|
13
|
+
const START_POLL_INTERVAL_MS = 25
|
|
13
14
|
const POLL_INTERVAL_MS = 150
|
|
14
15
|
/** Fast probe used inside the waitForHealthy poll loop — we call it
|
|
15
16
|
* every POLL_INTERVAL_MS, so a generous budget would stall the loop. */
|
|
@@ -23,6 +24,7 @@ const HEALTH_FAST_TIMEOUT_MS = 750
|
|
|
23
24
|
* and short enough that a truly-down daemon is still detected
|
|
24
25
|
* before START_TIMEOUT_MS fires. */
|
|
25
26
|
const HEALTH_PATIENT_TIMEOUT_MS = 3_000
|
|
27
|
+
const INGEST_PROBE_TIMEOUT_MS = 3_000
|
|
26
28
|
|
|
27
29
|
type HealthShape = {
|
|
28
30
|
readonly ok: boolean
|
|
@@ -33,11 +35,13 @@ type HealthShape = {
|
|
|
33
35
|
readonly workdir: string
|
|
34
36
|
readonly startedAt: string
|
|
35
37
|
readonly version: string
|
|
38
|
+
readonly instanceId?: string
|
|
36
39
|
}
|
|
37
40
|
|
|
38
41
|
type LockShape = {
|
|
39
42
|
readonly pid: number
|
|
40
43
|
readonly createdAt: string
|
|
44
|
+
readonly processIdentity?: string
|
|
41
45
|
}
|
|
42
46
|
|
|
43
47
|
type DaemonConfig = {
|
|
@@ -84,6 +88,9 @@ type DaemonOptions = {
|
|
|
84
88
|
readonly databasePath?: string
|
|
85
89
|
readonly host?: string
|
|
86
90
|
readonly port?: number
|
|
91
|
+
readonly startTimeoutMs?: number
|
|
92
|
+
readonly gracefulStopTimeoutMs?: number
|
|
93
|
+
readonly forceStopTimeoutMs?: number
|
|
87
94
|
}
|
|
88
95
|
|
|
89
96
|
export class DaemonError extends Error {
|
|
@@ -96,12 +103,14 @@ export class DaemonError extends Error {
|
|
|
96
103
|
const sleep = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms))
|
|
97
104
|
|
|
98
105
|
const resolveConfig = (options: DaemonOptions = {}): DaemonConfig => {
|
|
106
|
+
const envBaseUrl = new URL(process.env.MOTEL_OTEL_BASE_URL?.trim() || process.env.MOTEL_OTEL_QUERY_URL?.trim() || `http://${DEFAULT_HOST}:${DEFAULT_PORT}`)
|
|
99
107
|
const repoRoot = path.resolve(options.repoRoot ?? DEFAULT_REPO_ROOT)
|
|
100
108
|
const workdir = path.resolve(options.workdir ?? process.cwd())
|
|
101
|
-
const runtimeDir = path.resolve(options.runtimeDir ??
|
|
102
|
-
const databasePath = path.resolve(options.databasePath ?? path.join(runtimeDir, "telemetry.sqlite"))
|
|
103
|
-
const host = options.host ??
|
|
104
|
-
const
|
|
109
|
+
const runtimeDir = path.resolve(options.runtimeDir ?? motelStateDir())
|
|
110
|
+
const databasePath = path.resolve(options.databasePath ?? process.env.MOTEL_OTEL_DB_PATH?.trim() ?? path.join(runtimeDir, "telemetry.sqlite"))
|
|
111
|
+
const host = options.host ?? process.env.MOTEL_OTEL_HOST?.trim() ?? envBaseUrl.hostname
|
|
112
|
+
const envPort = Number.parseInt(process.env.MOTEL_OTEL_PORT?.trim() || envBaseUrl.port, 10)
|
|
113
|
+
const port = options.port ?? (Number.isFinite(envPort) && envPort > 0 ? envPort : DEFAULT_PORT)
|
|
105
114
|
return {
|
|
106
115
|
repoRoot,
|
|
107
116
|
serverEntry: path.join(repoRoot, "src/server.ts"),
|
|
@@ -122,17 +131,17 @@ const workdirMatches = (targetWorkdir: string, daemonWorkdir: string) => {
|
|
|
122
131
|
return normalizedTarget === normalizedDaemon || normalizedTarget.startsWith(normalizedDaemon)
|
|
123
132
|
}
|
|
124
133
|
|
|
125
|
-
const
|
|
126
|
-
const withSep = targetWorkdir.endsWith(path.sep) ? targetWorkdir : `${targetWorkdir}${path.sep}`
|
|
134
|
+
const pickByUrl = (entries: readonly RegistryEntry[], baseUrl: string, databasePath: string) => {
|
|
127
135
|
return entries
|
|
128
136
|
.filter((entry) => {
|
|
129
|
-
|
|
130
|
-
return withSep === workdir || withSep.startsWith(workdir)
|
|
137
|
+
return entry.url === baseUrl && (entry.databasePath === undefined || entry.databasePath === databasePath)
|
|
131
138
|
})
|
|
132
|
-
.sort((a, b) => b.
|
|
139
|
+
.sort((a, b) => Number(b.databasePath === databasePath) - Number(a.databasePath === databasePath))[0] ?? null
|
|
133
140
|
}
|
|
134
141
|
|
|
135
|
-
const expectedEnv = (config: DaemonConfig) => ({
|
|
142
|
+
const expectedEnv = (config: DaemonConfig, instanceId?: string) => ({
|
|
143
|
+
MOTEL_RUNTIME_DIR: config.runtimeDir,
|
|
144
|
+
...(instanceId ? { MOTEL_DAEMON_INSTANCE_ID: instanceId } : {}),
|
|
136
145
|
MOTEL_OTEL_BASE_URL: config.baseUrl,
|
|
137
146
|
MOTEL_OTEL_QUERY_URL: config.baseUrl,
|
|
138
147
|
MOTEL_OTEL_HOST: config.host,
|
|
@@ -144,43 +153,35 @@ const expectedEnv = (config: DaemonConfig) => ({
|
|
|
144
153
|
|
|
145
154
|
export const createDaemonManager = (options: DaemonOptions = {}): DaemonManager => {
|
|
146
155
|
const config = resolveConfig(options)
|
|
156
|
+
const startTimeoutMs = options.startTimeoutMs ?? START_TIMEOUT_MS
|
|
157
|
+
const gracefulStopTimeoutMs = options.gracefulStopTimeoutMs ?? STOP_TIMEOUT_MS
|
|
158
|
+
const forceStopTimeoutMs = options.forceStopTimeoutMs ?? 2_000
|
|
147
159
|
const mapError = (error: unknown) => new DaemonError(error instanceof Error ? error.message : String(error))
|
|
148
|
-
const readRegistryEntry = () =>
|
|
160
|
+
const readRegistryEntry = () => pickByUrl(listAliveEntries(config.runtimeDir), config.baseUrl, config.databasePath)
|
|
149
161
|
|
|
150
162
|
const fetchHealth = async (timeoutMs: number = HEALTH_FAST_TIMEOUT_MS): Promise<HealthShape | null> => {
|
|
151
163
|
try {
|
|
152
164
|
const response = await fetch(`${config.baseUrl}/api/health`, { signal: AbortSignal.timeout(timeoutMs) })
|
|
153
165
|
if (!response.ok) return null
|
|
154
|
-
|
|
166
|
+
const health = await response.json() as HealthShape
|
|
167
|
+
return health.ok ? health : null
|
|
155
168
|
} catch {
|
|
156
169
|
return null
|
|
157
170
|
}
|
|
158
171
|
}
|
|
159
172
|
|
|
160
|
-
const
|
|
161
|
-
|
|
162
|
-
const readLogSince = async (offset: number) => {
|
|
173
|
+
const fetchIngestProbe = async () => {
|
|
163
174
|
try {
|
|
164
|
-
const
|
|
165
|
-
|
|
175
|
+
const postEmpty = (path: string) => fetch(`${config.baseUrl}${path}`, {
|
|
176
|
+
method: "POST",
|
|
177
|
+
headers: { "content-type": "application/json" },
|
|
178
|
+
body: "{}",
|
|
179
|
+
signal: AbortSignal.timeout(INGEST_PROBE_TIMEOUT_MS),
|
|
180
|
+
})
|
|
181
|
+
const [traces, logs] = await Promise.all([postEmpty("/v1/traces"), postEmpty("/v1/logs")])
|
|
182
|
+
return traces.ok && logs.ok
|
|
166
183
|
} catch {
|
|
167
|
-
return
|
|
168
|
-
}
|
|
169
|
-
}
|
|
170
|
-
|
|
171
|
-
const detectStartedFromLog = async (pid: number, offset: number): Promise<HealthShape | null> => {
|
|
172
|
-
if (!isAlive(pid)) return null
|
|
173
|
-
const tail = await readLogSince(offset)
|
|
174
|
-
if (!startupMarkers.some((marker) => tail.includes(marker))) return null
|
|
175
|
-
return {
|
|
176
|
-
ok: true,
|
|
177
|
-
service: MOTEL_SERVICE_ID,
|
|
178
|
-
databasePath: config.databasePath,
|
|
179
|
-
pid,
|
|
180
|
-
url: config.baseUrl,
|
|
181
|
-
workdir: config.workdir,
|
|
182
|
-
startedAt: new Date().toISOString(),
|
|
183
|
-
version: MOTEL_VERSION,
|
|
184
|
+
return false
|
|
184
185
|
}
|
|
185
186
|
}
|
|
186
187
|
|
|
@@ -188,71 +189,12 @@ export const createDaemonManager = (options: DaemonOptions = {}): DaemonManager
|
|
|
188
189
|
if (health.service !== MOTEL_SERVICE_ID) {
|
|
189
190
|
return `Port ${config.port} is in use by ${health.service}, not ${MOTEL_SERVICE_ID}.`
|
|
190
191
|
}
|
|
191
|
-
if (!workdirMatches(config.workdir, health.workdir)) {
|
|
192
|
-
return `Port ${config.port} is serving motel for ${health.workdir}, not ${config.workdir}.`
|
|
193
|
-
}
|
|
194
192
|
if (health.databasePath !== config.databasePath) {
|
|
195
193
|
return `Port ${config.port} is serving motel with ${health.databasePath}, expected ${config.databasePath}.`
|
|
196
194
|
}
|
|
197
195
|
return null
|
|
198
196
|
}
|
|
199
197
|
|
|
200
|
-
/**
|
|
201
|
-
* Mismatch check against a registry entry — mirrors describeManagedMismatch
|
|
202
|
-
* but drives off the registry file instead of an HTTP health response.
|
|
203
|
-
* Used on the fast path in getStatus so warm-start doesn't need to wait
|
|
204
|
-
* on an HTTP round-trip that may queue behind heavy OTLP ingest.
|
|
205
|
-
*
|
|
206
|
-
* The service-id check is implicit: any entry living in the motel
|
|
207
|
-
* registry dir is by construction a motel daemon. databasePath is
|
|
208
|
-
* optional for back-compat with entries written by older builds;
|
|
209
|
-
* when absent we skip the DB check rather than refusing to adopt.
|
|
210
|
-
*/
|
|
211
|
-
const describeRegistryMismatch = (entry: RegistryEntry): string | null => {
|
|
212
|
-
if (!workdirMatches(config.workdir, entry.workdir)) {
|
|
213
|
-
return `Port ${config.port} is serving motel for ${entry.workdir}, not ${config.workdir}.`
|
|
214
|
-
}
|
|
215
|
-
if (entry.databasePath && entry.databasePath !== config.databasePath) {
|
|
216
|
-
return `Port ${config.port} is serving motel with ${entry.databasePath}, expected ${config.databasePath}.`
|
|
217
|
-
}
|
|
218
|
-
return null
|
|
219
|
-
}
|
|
220
|
-
|
|
221
|
-
/**
|
|
222
|
-
* Build a DaemonStatus from a live registry entry. Returns null when
|
|
223
|
-
* there's no entry for our cwd, the registered pid isn't running, or
|
|
224
|
-
* the entry is for a differently-configured daemon (different port).
|
|
225
|
-
* This is the fast path: no HTTP, no event-loop round-trip, just a
|
|
226
|
-
* directory read and a process.kill(pid, 0) liveness probe.
|
|
227
|
-
*/
|
|
228
|
-
const getStatusFromRegistry = (): DaemonStatus | null => {
|
|
229
|
-
const entry = readRegistryEntry()
|
|
230
|
-
if (!entry) return null
|
|
231
|
-
// Port discriminator: a motel registry shared across several
|
|
232
|
-
// daemons (e.g., user running two instances on different
|
|
233
|
-
// ports from the same workdir, or a test harness on a random
|
|
234
|
-
// port) would otherwise have us adopt an unrelated daemon.
|
|
235
|
-
// URL match is a fast, unambiguous identity check.
|
|
236
|
-
if (entry.url !== config.baseUrl) return null
|
|
237
|
-
const mismatch = describeRegistryMismatch(entry)
|
|
238
|
-
return {
|
|
239
|
-
running: mismatch === null,
|
|
240
|
-
managed: mismatch === null,
|
|
241
|
-
service: MOTEL_SERVICE_ID,
|
|
242
|
-
pid: entry.pid,
|
|
243
|
-
url: entry.url,
|
|
244
|
-
databasePath: entry.databasePath ?? config.databasePath,
|
|
245
|
-
workdir: entry.workdir,
|
|
246
|
-
startedAt: entry.startedAt,
|
|
247
|
-
version: entry.version,
|
|
248
|
-
sameWorkdir: workdirMatches(config.workdir, entry.workdir),
|
|
249
|
-
reason: mismatch,
|
|
250
|
-
logPath: config.logPath,
|
|
251
|
-
lockPath: config.lockPath,
|
|
252
|
-
registryPid: entry.pid,
|
|
253
|
-
}
|
|
254
|
-
}
|
|
255
|
-
|
|
256
198
|
const readLock = async (): Promise<LockShape | null> => {
|
|
257
199
|
try {
|
|
258
200
|
const raw = await fsp.readFile(config.lockPath, "utf8")
|
|
@@ -268,7 +210,7 @@ export const createDaemonManager = (options: DaemonOptions = {}): DaemonManager
|
|
|
268
210
|
await fsp.rm(config.lockPath, { force: true })
|
|
269
211
|
return true
|
|
270
212
|
}
|
|
271
|
-
if (isAlive(current.pid)) return false
|
|
213
|
+
if (current.processIdentity ? processIdentity(current.pid) === current.processIdentity : isAlive(current.pid)) return false
|
|
272
214
|
await fsp.rm(config.lockPath, { force: true })
|
|
273
215
|
return true
|
|
274
216
|
}
|
|
@@ -280,7 +222,7 @@ export const createDaemonManager = (options: DaemonOptions = {}): DaemonManager
|
|
|
280
222
|
while (Date.now() < deadline) {
|
|
281
223
|
try {
|
|
282
224
|
const handle = await fsp.open(config.lockPath, "wx")
|
|
283
|
-
const contents = JSON.stringify({ pid: process.pid, createdAt: new Date().toISOString() } satisfies LockShape)
|
|
225
|
+
const contents = JSON.stringify({ pid: process.pid, createdAt: new Date().toISOString(), processIdentity: processIdentity(process.pid) ?? undefined } satisfies LockShape)
|
|
284
226
|
await handle.writeFile(contents, "utf8")
|
|
285
227
|
return {
|
|
286
228
|
release: async () => {
|
|
@@ -304,73 +246,59 @@ export const createDaemonManager = (options: DaemonOptions = {}): DaemonManager
|
|
|
304
246
|
return fs.openSync(config.logPath, "a")
|
|
305
247
|
}
|
|
306
248
|
|
|
307
|
-
const waitForHealthy = async (pid: number,
|
|
308
|
-
const deadline = Date.now() +
|
|
249
|
+
const waitForHealthy = async (pid: number, instanceId: string) => {
|
|
250
|
+
const deadline = Date.now() + startTimeoutMs
|
|
309
251
|
while (Date.now() < deadline) {
|
|
310
252
|
const health = await fetchHealth()
|
|
311
253
|
if (health) {
|
|
312
254
|
const mismatch = describeManagedMismatch(health)
|
|
313
|
-
|
|
314
|
-
|
|
255
|
+
const registry = readRegistryEntry()
|
|
256
|
+
if (!mismatch && health.pid === pid && health.instanceId === instanceId && registry?.pid === pid && registry.instanceId === instanceId && isManagedDaemonProcess(registry) && await fetchIngestProbe()) return health
|
|
257
|
+
if (mismatch) throw new Error(mismatch)
|
|
315
258
|
}
|
|
316
|
-
const started = await detectStartedFromLog(pid, logOffset)
|
|
317
|
-
if (started) return started
|
|
318
259
|
if (!isAlive(pid)) {
|
|
319
|
-
// The spawned child is gone. Before declaring failure,
|
|
320
|
-
// do one patient probe: the child may have died from
|
|
321
|
-
// EADDRINUSE because another healthy motel is alive on
|
|
322
|
-
// the port but was answering /api/health too slowly for
|
|
323
|
-
// our fast poll. If that's the case, adopt it.
|
|
324
|
-
const patient = await fetchHealth(HEALTH_PATIENT_TIMEOUT_MS)
|
|
325
|
-
if (patient) {
|
|
326
|
-
const mismatch = describeManagedMismatch(patient)
|
|
327
|
-
if (!mismatch) return patient
|
|
328
|
-
throw new Error(mismatch)
|
|
329
|
-
}
|
|
330
260
|
throw new Error(`Daemon process ${pid} exited before becoming healthy. See ${config.logPath}.`)
|
|
331
261
|
}
|
|
332
|
-
await sleep(
|
|
262
|
+
await sleep(START_POLL_INTERVAL_MS)
|
|
333
263
|
}
|
|
334
264
|
throw new Error(`Timed out waiting for daemon health at ${config.baseUrl}/api/health. See ${config.logPath}.`)
|
|
335
265
|
}
|
|
336
266
|
|
|
337
|
-
const
|
|
267
|
+
const waitUntilNotOwned = async (entry: RegistryEntry, timeoutMs: number) => {
|
|
268
|
+
const deadline = Date.now() + timeoutMs
|
|
269
|
+
while (Date.now() < deadline) {
|
|
270
|
+
if (!isManagedDaemonProcess(entry)) return true
|
|
271
|
+
await sleep(POLL_INTERVAL_MS)
|
|
272
|
+
}
|
|
273
|
+
return !isManagedDaemonProcess(entry)
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
const stopPid = async (entry: RegistryEntry) => {
|
|
277
|
+
if (!isManagedDaemonProcess(entry)) {
|
|
278
|
+
throw new Error(`Refusing to stop pid ${entry.pid}: registry identity does not match the running managed daemon.`)
|
|
279
|
+
}
|
|
338
280
|
try {
|
|
339
|
-
process.kill(pid, "SIGTERM")
|
|
281
|
+
process.kill(entry.pid, "SIGTERM")
|
|
340
282
|
} catch (error) {
|
|
341
283
|
const errno = error as NodeJS.ErrnoException
|
|
342
284
|
if (errno.code !== "ESRCH") throw error
|
|
343
285
|
}
|
|
344
286
|
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
if (!
|
|
352
|
-
|
|
287
|
+
if (!await waitUntilNotOwned(entry, gracefulStopTimeoutMs)) {
|
|
288
|
+
try {
|
|
289
|
+
process.kill(entry.pid, "SIGKILL")
|
|
290
|
+
} catch (error) {
|
|
291
|
+
if ((error as NodeJS.ErrnoException).code !== "ESRCH") throw error
|
|
292
|
+
}
|
|
293
|
+
if (!await waitUntilNotOwned(entry, forceStopTimeoutMs)) {
|
|
294
|
+
throw new Error(`Timed out force-killing daemon ${entry.pid}.`)
|
|
295
|
+
}
|
|
353
296
|
}
|
|
354
|
-
|
|
355
|
-
|
|
297
|
+
const current = readRegistryEntry()
|
|
298
|
+
if (current?.pid === entry.pid && current.instanceId === entry.instanceId) removeRegistryEntry(entry.pid, config.runtimeDir)
|
|
356
299
|
}
|
|
357
300
|
|
|
358
301
|
const getStatus = async (timeoutMs: number = HEALTH_FAST_TIMEOUT_MS): Promise<DaemonStatus> => {
|
|
359
|
-
// Fast path: trust the local filesystem registry. When a motel
|
|
360
|
-
// daemon started on this machine it wrote an entry for its pid
|
|
361
|
-
// + cwd + databasePath; if that entry is still there and the pid
|
|
362
|
-
// is alive, the daemon is almost certainly the one we want to
|
|
363
|
-
// adopt. HTTP health is skipped because the daemon's health
|
|
364
|
-
// endpoint can queue behind heavy OTLP ingest traffic, making
|
|
365
|
-
// the probe unreliable exactly when the daemon is busy.
|
|
366
|
-
const registryStatus = getStatusFromRegistry()
|
|
367
|
-
if (registryStatus) return registryStatus
|
|
368
|
-
|
|
369
|
-
// No local evidence → fall back to HTTP. Covers the edge cases
|
|
370
|
-
// where: a motel daemon is running but was started before this
|
|
371
|
-
// registry-first path shipped; OR the port is held by something
|
|
372
|
-
// entirely unrelated (the mismatch check turns that into a
|
|
373
|
-
// human-readable reason).
|
|
374
302
|
const registry = readRegistryEntry()
|
|
375
303
|
const health = await fetchHealth(timeoutMs)
|
|
376
304
|
if (!health) {
|
|
@@ -393,9 +321,10 @@ export const createDaemonManager = (options: DaemonOptions = {}): DaemonManager
|
|
|
393
321
|
}
|
|
394
322
|
|
|
395
323
|
const mismatch = describeManagedMismatch(health)
|
|
324
|
+
const managed = mismatch === null && registry?.pid === health.pid && registry.instanceId === health.instanceId && isManagedDaemonProcess(registry)
|
|
396
325
|
return {
|
|
397
326
|
running: mismatch === null,
|
|
398
|
-
managed
|
|
327
|
+
managed,
|
|
399
328
|
service: health.service,
|
|
400
329
|
pid: health.pid,
|
|
401
330
|
url: health.url,
|
|
@@ -404,7 +333,7 @@ export const createDaemonManager = (options: DaemonOptions = {}): DaemonManager
|
|
|
404
333
|
startedAt: health.startedAt,
|
|
405
334
|
version: health.version,
|
|
406
335
|
sameWorkdir: workdirMatches(config.workdir, health.workdir),
|
|
407
|
-
reason: mismatch,
|
|
336
|
+
reason: mismatch ?? (managed ? null : "Responsive motel server is not an identity-verified managed daemon."),
|
|
408
337
|
logPath: config.logPath,
|
|
409
338
|
lockPath: config.lockPath,
|
|
410
339
|
registryPid: registry?.pid ?? null,
|
|
@@ -417,26 +346,39 @@ export const createDaemonManager = (options: DaemonOptions = {}): DaemonManager
|
|
|
417
346
|
// negative here drops us into the spawn path and collides with
|
|
418
347
|
// any slow-but-healthy daemon sitting on the port.
|
|
419
348
|
const existing = await getStatus(HEALTH_PATIENT_TIMEOUT_MS)
|
|
420
|
-
|
|
349
|
+
const existingEntry = readRegistryEntry()
|
|
350
|
+
if (existing.managed && existing.running) {
|
|
351
|
+
// /api/health can stay healthy after the lazy ingest worker/RPC path
|
|
352
|
+
// has been poisoned by an interrupted request. Empty OTLP posts are
|
|
353
|
+
// side-effect free and exercise the same path real exporters need.
|
|
354
|
+
if (existing.pid === process.pid || await fetchIngestProbe()) return existing
|
|
355
|
+
if (existingEntry) await stopPid(existingEntry)
|
|
356
|
+
}
|
|
357
|
+
if (!existing.running && existingEntry && isManagedDaemonProcess(existingEntry)) await stopPid(existingEntry)
|
|
421
358
|
if (existing.service !== null && existing.reason) {
|
|
422
359
|
throw new Error(existing.reason)
|
|
423
360
|
}
|
|
424
361
|
|
|
425
362
|
const lock = await acquireStartupLock()
|
|
426
363
|
let spawnedPid: number | null = null
|
|
364
|
+
let spawnedIdentity: string | null = null
|
|
427
365
|
try {
|
|
428
366
|
// Same reasoning for the post-lock re-check: another ensure()
|
|
429
367
|
// may have spawned a daemon between our first probe and the
|
|
430
368
|
// lock grant, and its initial health response can be slow
|
|
431
369
|
// while the runtime warms up.
|
|
432
370
|
const rechecked = await getStatus(HEALTH_PATIENT_TIMEOUT_MS)
|
|
433
|
-
if (rechecked.managed && rechecked.running)
|
|
371
|
+
if (rechecked.managed && rechecked.running) {
|
|
372
|
+
if (rechecked.pid === process.pid || await fetchIngestProbe()) return rechecked
|
|
373
|
+
const recheckedEntry = readRegistryEntry()
|
|
374
|
+
if (recheckedEntry) await stopPid(recheckedEntry)
|
|
375
|
+
}
|
|
434
376
|
if (rechecked.service !== null && rechecked.reason) {
|
|
435
377
|
throw new Error(rechecked.reason)
|
|
436
378
|
}
|
|
437
379
|
|
|
438
380
|
const logFd = await openLogFile()
|
|
439
|
-
const
|
|
381
|
+
const instanceId = crypto.randomUUID()
|
|
440
382
|
try {
|
|
441
383
|
const proc = Bun.spawn({
|
|
442
384
|
cmd: [process.execPath, "run", config.serverEntry],
|
|
@@ -444,11 +386,12 @@ export const createDaemonManager = (options: DaemonOptions = {}): DaemonManager
|
|
|
444
386
|
detached: true,
|
|
445
387
|
env: {
|
|
446
388
|
...process.env,
|
|
447
|
-
...expectedEnv(config),
|
|
389
|
+
...expectedEnv(config, instanceId),
|
|
448
390
|
},
|
|
449
391
|
stdio: ["ignore", logFd, logFd],
|
|
450
392
|
})
|
|
451
393
|
spawnedPid = proc.pid
|
|
394
|
+
spawnedIdentity = processIdentity(proc.pid)
|
|
452
395
|
proc.unref()
|
|
453
396
|
} finally {
|
|
454
397
|
fs.closeSync(logFd)
|
|
@@ -458,7 +401,7 @@ export const createDaemonManager = (options: DaemonOptions = {}): DaemonManager
|
|
|
458
401
|
throw new Error("Daemon failed to spawn.")
|
|
459
402
|
}
|
|
460
403
|
|
|
461
|
-
const health = await waitForHealthy(spawnedPid,
|
|
404
|
+
const health = await waitForHealthy(spawnedPid, instanceId)
|
|
462
405
|
return {
|
|
463
406
|
running: true,
|
|
464
407
|
managed: true,
|
|
@@ -477,7 +420,17 @@ export const createDaemonManager = (options: DaemonOptions = {}): DaemonManager
|
|
|
477
420
|
}
|
|
478
421
|
} catch (error) {
|
|
479
422
|
if (spawnedPid !== null) {
|
|
480
|
-
|
|
423
|
+
const entry = readRegistryEntry()
|
|
424
|
+
if (entry?.pid === spawnedPid) {
|
|
425
|
+
await stopPid(entry).catch(() => undefined)
|
|
426
|
+
} else if (spawnedIdentity && processIdentity(spawnedPid) === spawnedIdentity) {
|
|
427
|
+
try { process.kill(spawnedPid, "SIGTERM") } catch { /* already exited */ }
|
|
428
|
+
const deadline = Date.now() + gracefulStopTimeoutMs
|
|
429
|
+
while (Date.now() < deadline && processIdentity(spawnedPid) === spawnedIdentity) await sleep(POLL_INTERVAL_MS)
|
|
430
|
+
if (processIdentity(spawnedPid) === spawnedIdentity) {
|
|
431
|
+
try { process.kill(spawnedPid, "SIGKILL") } catch { /* already exited */ }
|
|
432
|
+
}
|
|
433
|
+
}
|
|
481
434
|
}
|
|
482
435
|
throw error
|
|
483
436
|
} finally {
|
|
@@ -488,13 +441,12 @@ export const createDaemonManager = (options: DaemonOptions = {}): DaemonManager
|
|
|
488
441
|
const stop = async (): Promise<DaemonStatus> => {
|
|
489
442
|
const status = await getStatus()
|
|
490
443
|
if (status.pid === null) return status
|
|
491
|
-
if (!status.sameWorkdir) {
|
|
492
|
-
throw new Error(`Refusing to stop motel owned by ${status.workdir}.`)
|
|
493
|
-
}
|
|
494
444
|
if (status.service !== null && status.service !== MOTEL_SERVICE_ID) {
|
|
495
445
|
throw new Error(`Refusing to stop non-motel service ${status.service} on ${status.url}.`)
|
|
496
446
|
}
|
|
497
|
-
|
|
447
|
+
const entry = readRegistryEntry()
|
|
448
|
+
if (!entry || entry.pid !== status.pid) throw new Error(`Refusing to stop pid ${status.pid}: no matching managed registry entry.`)
|
|
449
|
+
await stopPid(entry)
|
|
498
450
|
return await getStatus()
|
|
499
451
|
}
|
|
500
452
|
|
package/src/httpApi.ts
CHANGED
|
@@ -25,6 +25,7 @@ const Health = Schema.Struct({
|
|
|
25
25
|
workdir: Schema.String.pipe(Schema.annotateKey({ description: "Working directory at the time the server started. Used by MCP discovery to match the current project via longest-prefix." })),
|
|
26
26
|
startedAt: Schema.String.pipe(Schema.annotateKey({ description: "ISO 8601 timestamp of when the server bound its port." })),
|
|
27
27
|
version: Schema.String.pipe(Schema.annotateKey({ description: "Motel version string." })),
|
|
28
|
+
instanceId: Schema.optionalKey(Schema.String).pipe(Schema.annotateKey({ description: "Managed-daemon instance nonce used for readiness and safe shutdown identity checks." })),
|
|
28
29
|
})
|
|
29
30
|
const IngestTraceResponse = Schema.Struct({ insertedSpans: Schema.Number })
|
|
30
31
|
const IngestLogResponse = Schema.Struct({ insertedLogs: Schema.Number })
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
import { describe, expect, it } from "bun:test"
|
|
2
|
+
import type { LogItem, TraceSummaryItem } from "./domain.js"
|
|
3
|
+
import { LOG_LIST, LOG_STATS, parseListParams, paginateLogs, paginateSummaries, traceCursorArgs } from "./httpListPolicy.js"
|
|
4
|
+
|
|
5
|
+
const BASE_URL = "http://127.0.0.1:27686"
|
|
6
|
+
|
|
7
|
+
describe("HTTP list policy", () => {
|
|
8
|
+
it("bounds list parameters and extracts attribute filters", () => {
|
|
9
|
+
const params = parseListParams({
|
|
10
|
+
url: "/api/logs?limit=9999&lookback=9d&attr.session.id=abc&attrContains.message=failed",
|
|
11
|
+
}, LOG_LIST, BASE_URL)
|
|
12
|
+
|
|
13
|
+
expect(params.limit).toBe(500)
|
|
14
|
+
expect(params.lookbackMinutes).toBe(24 * 60)
|
|
15
|
+
expect(params.attributeFilters).toEqual({ "session.id": "abc" })
|
|
16
|
+
expect(params.attributeContainsFilters).toEqual({ message: "failed" })
|
|
17
|
+
})
|
|
18
|
+
|
|
19
|
+
it("round-trips a trace cursor through page metadata", () => {
|
|
20
|
+
const traces: readonly TraceSummaryItem[] = [
|
|
21
|
+
{
|
|
22
|
+
traceId: "trace-1",
|
|
23
|
+
serviceName: "api",
|
|
24
|
+
rootOperationName: "GET /first",
|
|
25
|
+
startedAt: new Date(1000),
|
|
26
|
+
isRunning: false,
|
|
27
|
+
durationMs: 2,
|
|
28
|
+
spanCount: 1,
|
|
29
|
+
errorCount: 0,
|
|
30
|
+
warnings: [],
|
|
31
|
+
},
|
|
32
|
+
{
|
|
33
|
+
traceId: "trace-2",
|
|
34
|
+
serviceName: "api",
|
|
35
|
+
rootOperationName: "GET /second",
|
|
36
|
+
startedAt: new Date(900),
|
|
37
|
+
isRunning: false,
|
|
38
|
+
durationMs: 1,
|
|
39
|
+
spanCount: 1,
|
|
40
|
+
errorCount: 0,
|
|
41
|
+
warnings: [],
|
|
42
|
+
},
|
|
43
|
+
]
|
|
44
|
+
|
|
45
|
+
const page = paginateSummaries(traces, { limit: 1, lookbackMinutes: 60 })
|
|
46
|
+
const parsed = parseListParams({ url: `/api/traces?cursor=${page.meta.nextCursor}` }, LOG_LIST, BASE_URL)
|
|
47
|
+
|
|
48
|
+
expect(page.meta.truncated).toBe(true)
|
|
49
|
+
expect(traceCursorArgs(parsed.cursor)).toEqual({ cursorStartedAtMs: 1000, cursorTraceId: "trace-1" })
|
|
50
|
+
})
|
|
51
|
+
|
|
52
|
+
it("formats log page metadata and emits a cursor", () => {
|
|
53
|
+
const logs: readonly LogItem[] = [{
|
|
54
|
+
id: "12",
|
|
55
|
+
timestamp: new Date(1200),
|
|
56
|
+
serviceName: "api",
|
|
57
|
+
severityText: "INFO",
|
|
58
|
+
body: "ready",
|
|
59
|
+
traceId: null,
|
|
60
|
+
spanId: null,
|
|
61
|
+
scopeName: null,
|
|
62
|
+
attributes: {},
|
|
63
|
+
}]
|
|
64
|
+
|
|
65
|
+
const page = paginateLogs(logs, { limit: 10, lookbackMinutes: 120 })
|
|
66
|
+
|
|
67
|
+
expect(page.meta).toMatchObject({ limit: 10, lookback: "2h", returned: 1, truncated: false })
|
|
68
|
+
expect(page.meta.nextCursor).not.toBeNull()
|
|
69
|
+
})
|
|
70
|
+
|
|
71
|
+
it("keeps aggregate log queries bounded to twenty groups by default", () => {
|
|
72
|
+
const params = parseListParams({ url: "/api/logs/stats?groupBy=service&agg=count" }, LOG_STATS, BASE_URL)
|
|
73
|
+
|
|
74
|
+
expect(params.limit).toBe(20)
|
|
75
|
+
})
|
|
76
|
+
})
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
import type { LogItem, TraceSummaryItem } from "./domain.js"
|
|
2
|
+
import { attributeContainsFiltersFromEntries, attributeFiltersFromEntries } from "./queryFilters.js"
|
|
3
|
+
|
|
4
|
+
type CursorShape =
|
|
5
|
+
| { readonly kind: "trace"; readonly startedAt: number; readonly id: string }
|
|
6
|
+
| { readonly kind: "log"; readonly timestamp: number; readonly id: string }
|
|
7
|
+
|
|
8
|
+
export interface ListBounds {
|
|
9
|
+
readonly defaultLimit: number
|
|
10
|
+
readonly maxLimit: number
|
|
11
|
+
readonly defaultLookback: number
|
|
12
|
+
readonly maxLookback: number
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
export interface ListParams {
|
|
16
|
+
readonly url: URL
|
|
17
|
+
readonly limit: number
|
|
18
|
+
readonly lookbackMinutes: number
|
|
19
|
+
readonly cursor: CursorShape | null
|
|
20
|
+
readonly attributeFilters: Readonly<Record<string, string>>
|
|
21
|
+
readonly attributeContainsFilters: Readonly<Record<string, string>>
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
export const TRACE_LIST: ListBounds = { defaultLimit: 20, maxLimit: 100, defaultLookback: 60, maxLookback: 24 * 60 }
|
|
25
|
+
export const SPAN_LIST: ListBounds = { defaultLimit: 100, maxLimit: 500, defaultLookback: 60, maxLookback: 24 * 60 }
|
|
26
|
+
export const LOG_LIST: ListBounds = { defaultLimit: 100, maxLimit: 500, defaultLookback: 60, maxLookback: 24 * 60 }
|
|
27
|
+
export const AI_LIST: ListBounds = { defaultLimit: 20, maxLimit: 500, defaultLookback: 60, maxLookback: 24 * 60 }
|
|
28
|
+
export const TRACE_STATS: ListBounds = { defaultLimit: 20, maxLimit: 100, defaultLookback: 60, maxLookback: 24 * 60 }
|
|
29
|
+
export const LOG_STATS: ListBounds = { defaultLimit: 20, maxLimit: 500, defaultLookback: 60, maxLookback: 24 * 60 }
|
|
30
|
+
|
|
31
|
+
export const requestUrl = (request: { readonly url: string }, baseUrl: string) => new URL(request.url, baseUrl)
|
|
32
|
+
|
|
33
|
+
const parsePositiveInt = (value: string | undefined, defaultValue: number) => {
|
|
34
|
+
const parsed = Number.parseInt(value ?? "", 10)
|
|
35
|
+
return Number.isFinite(parsed) && parsed > 0 ? parsed : defaultValue
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
export const parseLimit = (value: string | null, fallback: number) => parsePositiveInt(value ?? undefined, fallback)
|
|
39
|
+
|
|
40
|
+
export const parseLookbackMinutes = (value: string | null, fallback: number) => {
|
|
41
|
+
if (!value) return fallback
|
|
42
|
+
const match = value.trim().match(/^(\d+)([mhd])$/i)
|
|
43
|
+
if (!match) return fallback
|
|
44
|
+
const amount = Number.parseInt(match[1] ?? "", 10)
|
|
45
|
+
if (!Number.isFinite(amount) || amount <= 0) return fallback
|
|
46
|
+
const unit = (match[2] ?? "m").toLowerCase()
|
|
47
|
+
if (unit === "d") return amount * 1440
|
|
48
|
+
if (unit === "h") return amount * 60
|
|
49
|
+
return amount
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
const clamp = (value: number, min: number, max: number) => Math.max(min, Math.min(value, max))
|
|
53
|
+
|
|
54
|
+
const decodeCursor = (value: string | null): CursorShape | null => {
|
|
55
|
+
if (!value) return null
|
|
56
|
+
try {
|
|
57
|
+
return JSON.parse(Buffer.from(value, "base64url").toString("utf8")) as CursorShape
|
|
58
|
+
} catch {
|
|
59
|
+
return null
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
const encodeCursor = (cursor: CursorShape) => Buffer.from(JSON.stringify(cursor), "utf8").toString("base64url")
|
|
64
|
+
|
|
65
|
+
export const traceCursorArgs = (cursor: CursorShape | null) =>
|
|
66
|
+
cursor?.kind === "trace"
|
|
67
|
+
? { cursorStartedAtMs: cursor.startedAt, cursorTraceId: cursor.id }
|
|
68
|
+
: {}
|
|
69
|
+
|
|
70
|
+
export const logCursorArgs = (cursor: CursorShape | null) =>
|
|
71
|
+
cursor?.kind === "log"
|
|
72
|
+
? { cursorTimestampMs: cursor.timestamp, cursorId: cursor.id }
|
|
73
|
+
: {}
|
|
74
|
+
|
|
75
|
+
export const parseListParams = (request: { readonly url: string }, bounds: ListBounds, baseUrl: string): ListParams => {
|
|
76
|
+
const url = requestUrl(request, baseUrl)
|
|
77
|
+
return {
|
|
78
|
+
url,
|
|
79
|
+
limit: clamp(parseLimit(url.searchParams.get("limit"), bounds.defaultLimit), 1, bounds.maxLimit),
|
|
80
|
+
lookbackMinutes: clamp(parseLookbackMinutes(url.searchParams.get("lookback"), bounds.defaultLookback), 1, bounds.maxLookback),
|
|
81
|
+
cursor: decodeCursor(url.searchParams.get("cursor")),
|
|
82
|
+
attributeFilters: attributeFiltersFromEntries(url.searchParams.entries()),
|
|
83
|
+
attributeContainsFilters: attributeContainsFiltersFromEntries(url.searchParams.entries()),
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
const formatLookback = (minutes: number) => {
|
|
88
|
+
if (minutes % 1440 === 0) return `${minutes / 1440}d`
|
|
89
|
+
if (minutes % 60 === 0) return `${minutes / 60}h`
|
|
90
|
+
return `${minutes}m`
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
export const listMeta = (input: { readonly limit: number; readonly lookbackMinutes: number; readonly returned: number; readonly truncated: boolean; readonly nextCursor: string | null }) => ({
|
|
94
|
+
limit: input.limit,
|
|
95
|
+
lookback: formatLookback(input.lookbackMinutes),
|
|
96
|
+
returned: input.returned,
|
|
97
|
+
truncated: input.truncated,
|
|
98
|
+
nextCursor: input.nextCursor,
|
|
99
|
+
})
|
|
100
|
+
|
|
101
|
+
export const paginateSummaries = (summaries: readonly TraceSummaryItem[], options: { readonly limit: number; readonly lookbackMinutes: number }) => {
|
|
102
|
+
const page = summaries.slice(0, options.limit)
|
|
103
|
+
const last = page.at(-1)
|
|
104
|
+
return {
|
|
105
|
+
data: page,
|
|
106
|
+
meta: listMeta({
|
|
107
|
+
limit: options.limit,
|
|
108
|
+
lookbackMinutes: options.lookbackMinutes,
|
|
109
|
+
returned: page.length,
|
|
110
|
+
truncated: summaries.length > page.length,
|
|
111
|
+
nextCursor: last ? encodeCursor({ kind: "trace", startedAt: last.startedAt.getTime(), id: last.traceId }) : null,
|
|
112
|
+
}),
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
export const paginateLogs = (logs: readonly LogItem[], options: { readonly limit: number; readonly lookbackMinutes: number }) => {
|
|
117
|
+
const page = logs.slice(0, options.limit)
|
|
118
|
+
const last = page.at(-1)
|
|
119
|
+
return {
|
|
120
|
+
data: page,
|
|
121
|
+
meta: listMeta({
|
|
122
|
+
limit: options.limit,
|
|
123
|
+
lookbackMinutes: options.lookbackMinutes,
|
|
124
|
+
returned: page.length,
|
|
125
|
+
truncated: logs.length > page.length,
|
|
126
|
+
nextCursor: last ? encodeCursor({ kind: "log", timestamp: last.timestamp.getTime(), id: last.id }) : null,
|
|
127
|
+
}),
|
|
128
|
+
}
|
|
129
|
+
}
|