@kitlangton/motel 0.2.4 → 0.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/AGENTS.md +23 -8
  2. package/README.md +13 -2
  3. package/package.json +35 -19
  4. package/skills/motel-debug/SKILL.md +203 -0
  5. package/skills/motel-debug/references/effect.md +38 -0
  6. package/src/App.tsx +12 -5
  7. package/src/StartupGate.tsx +289 -0
  8. package/src/cli.ts +15 -16
  9. package/src/config.ts +7 -1
  10. package/src/daemon.test.ts +332 -51
  11. package/src/daemon.ts +105 -153
  12. package/src/httpApi.ts +1 -0
  13. package/src/httpListPolicy.test.ts +76 -0
  14. package/src/httpListPolicy.ts +129 -0
  15. package/src/index.tsx +9 -2
  16. package/src/localServer.ts +194 -313
  17. package/src/mcp.ts +2 -1
  18. package/src/motel.ts +0 -2
  19. package/src/opentui-jsx.d.ts +11 -0
  20. package/src/otlp.test.ts +65 -0
  21. package/src/otlp.ts +20 -0
  22. package/src/otlpProtobuf.ts +35 -0
  23. package/src/registry.ts +37 -11
  24. package/src/runtime.ts +2 -6
  25. package/src/services/AsyncIngest.ts +22 -8
  26. package/src/services/LogQueryService.ts +13 -27
  27. package/src/services/TelemetryQuery.ts +62 -0
  28. package/src/services/TelemetryStore.ts +546 -231
  29. package/src/services/TraceQueryService.ts +22 -56
  30. package/src/services/ingestRpc.ts +2 -4
  31. package/src/services/queryRpc.ts +15 -0
  32. package/src/services/telemetryQueryWorker.ts +32 -0
  33. package/src/services/telemetryWorker.ts +5 -8
  34. package/src/startupBench.ts +19 -0
  35. package/src/storybook/aiChatStory.tsx +1 -1
  36. package/src/telemetry.test.ts +307 -41
  37. package/src/ui/AiChatView.tsx +1 -1
  38. package/src/ui/AttrFilterModal.tsx +1 -1
  39. package/src/ui/ServiceLogs.tsx +10 -7
  40. package/src/ui/SpanContentView.tsx +24 -21
  41. package/src/ui/TraceDetailsPane.tsx +1 -1
  42. package/src/ui/TraceList.tsx +1 -1
  43. package/src/ui/aiState.ts +10 -22
  44. package/src/ui/app/TraceWorkspace.tsx +2 -1
  45. package/src/ui/app/useAppLayout.ts +1 -1
  46. package/src/ui/app/useTraceScreenData.ts +35 -23
  47. package/src/ui/atoms.ts +1 -1
  48. package/src/ui/cachedLoader.test.ts +23 -0
  49. package/src/ui/cachedLoader.ts +60 -0
  50. package/src/ui/loaders.ts +34 -53
  51. package/src/ui/persistence.ts +3 -3
  52. package/src/ui/primitives.tsx +1 -1
  53. package/src/ui/state.ts +2 -0
  54. package/src/ui/theme.ts +7 -5
  55. package/src/ui/traceDetailsWidth.repro.test.ts +12 -1
  56. package/src/ui/traceSortNav.repro.seed.ts +1 -1
  57. package/src/ui/traceSortNav.repro.test.ts +12 -2
  58. package/src/ui/useAttrFilterPicker.ts +10 -8
  59. package/src/ui/useKeyboardNav.ts +28 -5
  60. package/src/ui/waterfallNav.repro.seed.ts +1 -1
  61. package/src/ui/waterfallNav.repro.test.ts +16 -8
  62. package/web/dist/assets/index-B01z9BaO.css +2 -0
  63. package/web/dist/assets/index-M86tcih5.js +22 -0
  64. package/web/dist/index.html +2 -2
  65. package/web/dist/assets/index-DnyVo03x.js +0 -27
  66. package/web/dist/assets/index-DzuHNBGV.css +0 -2
@@ -1,11 +1,11 @@
1
1
  import { Database } from "bun:sqlite"
2
- import { mkdirSync } from "node:fs"
2
+ import * as BunFileSystem from "@effect/platform-bun/BunFileSystem"
3
3
  import { dirname } from "node:path"
4
- import { Clock, Effect, Layer, Schedule, Context } from "effect"
4
+ import { Cause, Clock, Effect, FileSystem, Layer, Schedule, Context } from "effect"
5
5
  import { config } from "../config.js"
6
6
  import type { AiCallDetail, AiCallSummary, FacetItem, LogItem, SpanItem, StatsItem, TraceItem, TraceSummaryItem, TraceSpanEvent, TraceSpanItem } from "../domain.js"
7
7
  import { AI_ATTR_MAP, AI_FTS_KEYS, AI_TEXT_SEARCH_KEYS, truncatePreview } from "../domain.js"
8
- import { attributeMap, nanosToMilliseconds, parseAnyValue, spanKindLabel, spanStatusLabel, stringifyValue, type OtlpLogExportRequest, type OtlpTraceExportRequest } from "../otlp.js"
8
+ import { attributeMap, nanosToMilliseconds, normalizeOtlpBinaryId, parseAnyValue, spanKindLabel, spanStatusLabel, stringifyValue, type OtlpLogExportRequest, type OtlpTraceExportRequest } from "../otlp.js"
9
9
 
10
10
  const isSqliteLockError = (error: unknown) =>
11
11
  error instanceof Error && /(database is locked|database table is locked|SQLITE_BUSY)/i.test(error.message)
@@ -196,18 +196,40 @@ const TRACE_SUMMARY_SELECT_SQL = `
196
196
  FROM spans
197
197
  `
198
198
 
199
+ // Memoize small repeated JSON records. Resource attributes are the primary
200
+ // beneficiary because many spans share the same serialized value; compact
201
+ // repeated span attributes also benefit while large unique payloads bypass
202
+ // the cache to keep memory bounded for long-running daemons.
203
+ const RECORD_PARSE_CACHE_MAX_VALUE_LEN = 1024
204
+ const RECORD_PARSE_CACHE_LIMIT = 256
205
+ const recordParseCache = new Map<string, Record<string, string>>()
206
+ const EMPTY_RECORD: Record<string, string> = {}
207
+
199
208
  const parseRecord = (value: string): Record<string, string> => {
209
+ if (value === "" || value === "{}") return EMPTY_RECORD
210
+ const cacheable = value.length <= RECORD_PARSE_CACHE_MAX_VALUE_LEN
211
+ if (cacheable) {
212
+ const cached = recordParseCache.get(value)
213
+ if (cached !== undefined) return cached
214
+ }
215
+ let parsed: Record<string, string>
200
216
  try {
201
- const parsed = JSON.parse(value) as Record<string, unknown>
202
- return Object.fromEntries(Object.entries(parsed).map(([key, entry]) => [key, stringifyValue(entry)]))
217
+ const json = JSON.parse(value) as Record<string, unknown>
218
+ parsed = Object.fromEntries(Object.entries(json).map(([key, entry]) => [key, stringifyValue(entry)]))
203
219
  } catch {
204
- return {}
220
+ parsed = EMPTY_RECORD
205
221
  }
222
+ if (cacheable && recordParseCache.size < RECORD_PARSE_CACHE_LIMIT) {
223
+ recordParseCache.set(value, parsed)
224
+ }
225
+ return parsed
206
226
  }
207
227
 
208
228
  const parseEvents = (value: string): readonly TraceSpanEvent[] => {
229
+ if (value === "" || value === "[]") return []
209
230
  try {
210
231
  const parsed = JSON.parse(value) as Array<{ name: string; timestamp: number; attributes: Record<string, string> }>
232
+ if (parsed.length === 0) return []
211
233
  return parsed.map((event) => ({
212
234
  name: event.name,
213
235
  timestamp: new Date(event.timestamp),
@@ -423,29 +445,41 @@ const buildContainsAttributeMatchSubquery = (
423
445
  }
424
446
  }
425
447
 
448
+ // Read-only surface of the telemetry store. Pulled out so a readonly
449
+ // SQLite connection (TUI / HTTP query handlers) can be expressed as a
450
+ // distinct service identifier from the writer, without re-declaring
451
+ // every query in a wrapper layer. The writer's value still satisfies
452
+ // this shape — TelemetryStoreLive can provide both identifiers from
453
+ // one underlying object if needed.
454
+ export interface TelemetryStoreReader {
455
+ readonly listServices: Effect.Effect<readonly string[], Error>
456
+ readonly listRecentTraces: (serviceName: string | null, options?: { readonly lookbackMinutes?: number; readonly limit?: number; readonly cursorStartedAtMs?: number; readonly cursorTraceId?: string }) => Effect.Effect<readonly TraceItem[], Error>
457
+ readonly listTraceSummaries: (serviceName: string | null, options?: { readonly lookbackMinutes?: number; readonly limit?: number; readonly cursorStartedAtMs?: number; readonly cursorTraceId?: string }) => Effect.Effect<readonly TraceSummaryItem[], Error>
458
+ readonly searchTraces: (input: TraceSearch) => Effect.Effect<readonly TraceItem[], Error>
459
+ readonly searchTraceSummaries: (input: TraceSearch) => Effect.Effect<readonly TraceSummaryItem[], Error>
460
+ readonly traceStats: (input: TraceStatsSearch) => Effect.Effect<readonly StatsItem[], Error>
461
+ readonly getTrace: (traceId: string) => Effect.Effect<TraceItem | null, Error>
462
+ readonly getSpan: (spanId: string) => Effect.Effect<SpanItem | null, Error>
463
+ readonly listTraceSpans: (traceId: string) => Effect.Effect<readonly SpanItem[], Error>
464
+ readonly searchSpans: (input: SpanSearch) => Effect.Effect<readonly SpanItem[], Error>
465
+ readonly searchLogs: (input: LogSearch) => Effect.Effect<readonly LogItem[], Error>
466
+ readonly logStats: (input: LogStatsSearch) => Effect.Effect<readonly StatsItem[], Error>
467
+ readonly listFacets: (input: FacetSearch) => Effect.Effect<readonly FacetItem[], Error>
468
+ readonly listRecentLogs: (serviceName: string) => Effect.Effect<readonly LogItem[], Error>
469
+ readonly listTraceLogs: (traceId: string) => Effect.Effect<readonly LogItem[], Error>
470
+ readonly searchAiCalls: (input: AiCallSearch) => Effect.Effect<readonly AiCallSummary[], Error>
471
+ readonly getAiCall: (spanId: string) => Effect.Effect<AiCallDetail | null, Error>
472
+ readonly aiCallStats: (input: AiCallStatsSearch) => Effect.Effect<readonly StatsItem[], Error>
473
+ }
474
+
475
+ export class TelemetryStoreReadonly extends Context.Service<TelemetryStoreReadonly, TelemetryStoreReader>()("motel/TelemetryStoreReadonly") {}
476
+
426
477
  export class TelemetryStore extends Context.Service<
427
478
  TelemetryStore,
428
- {
479
+ TelemetryStoreReader & {
429
480
  readonly ingestTraces: (payload: OtlpTraceExportRequest) => Effect.Effect<{ readonly insertedSpans: number }, Error>
430
481
  readonly ingestLogs: (payload: OtlpLogExportRequest) => Effect.Effect<{ readonly insertedLogs: number }, Error>
431
- readonly listServices: Effect.Effect<readonly string[], Error>
432
- readonly listRecentTraces: (serviceName: string | null, options?: { readonly lookbackMinutes?: number; readonly limit?: number; readonly cursorStartedAtMs?: number; readonly cursorTraceId?: string }) => Effect.Effect<readonly TraceItem[], Error>
433
- readonly listTraceSummaries: (serviceName: string | null, options?: { readonly lookbackMinutes?: number; readonly limit?: number; readonly cursorStartedAtMs?: number; readonly cursorTraceId?: string }) => Effect.Effect<readonly TraceSummaryItem[], Error>
434
- readonly searchTraces: (input: TraceSearch) => Effect.Effect<readonly TraceItem[], Error>
435
- readonly searchTraceSummaries: (input: TraceSearch) => Effect.Effect<readonly TraceSummaryItem[], Error>
436
- readonly traceStats: (input: TraceStatsSearch) => Effect.Effect<readonly StatsItem[], Error>
437
- readonly getTrace: (traceId: string) => Effect.Effect<TraceItem | null, Error>
438
- readonly getSpan: (spanId: string) => Effect.Effect<SpanItem | null, Error>
439
- readonly listTraceSpans: (traceId: string) => Effect.Effect<readonly SpanItem[], Error>
440
- readonly searchSpans: (input: SpanSearch) => Effect.Effect<readonly SpanItem[], Error>
441
- readonly searchLogs: (input: LogSearch) => Effect.Effect<readonly LogItem[], Error>
442
- readonly logStats: (input: LogStatsSearch) => Effect.Effect<readonly StatsItem[], Error>
443
- readonly listFacets: (input: FacetSearch) => Effect.Effect<readonly FacetItem[], Error>
444
- readonly listRecentLogs: (serviceName: string) => Effect.Effect<readonly LogItem[], Error>
445
- readonly listTraceLogs: (traceId: string) => Effect.Effect<readonly LogItem[], Error>
446
- readonly searchAiCalls: (input: AiCallSearch) => Effect.Effect<readonly AiCallSummary[], Error>
447
- readonly getAiCall: (spanId: string) => Effect.Effect<AiCallDetail | null, Error>
448
- readonly aiCallStats: (input: AiCallStatsSearch) => Effect.Effect<readonly StatsItem[], Error>
482
+ readonly runRetentionNow: Effect.Effect<void, Error>
449
483
  }
450
484
  >()("motel/TelemetryStore") {}
451
485
 
@@ -462,18 +496,17 @@ export class TelemetryStore extends Context.Service<
462
496
  *
463
497
  * - `runRetention` — fork the background cleanup loop (age + size cap
464
498
  * eviction, WAL checkpoint). Only one process should own this at a
465
- * time. Currently the main daemon (localServer) does; the ingest
466
- * worker and the TUI skip it.
499
+ * time. The ingest worker owns it; the HTTP thread and TUI skip it.
467
500
  */
468
501
  export interface TelemetryStoreOptions {
469
502
  readonly readonly: boolean
470
503
  readonly runRetention: boolean
471
504
  }
472
505
 
473
- export const makeTelemetryStoreLayer = (opts: TelemetryStoreOptions) => Layer.effect(
474
- TelemetryStore,
506
+ const makeTelemetryStoreEffect = (opts: TelemetryStoreOptions) =>
475
507
  Effect.gen(function* () {
476
- mkdirSync(dirname(config.otel.databasePath), { recursive: true })
508
+ const fileSystem = yield* FileSystem.FileSystem
509
+ yield* fileSystem.makeDirectory(dirname(config.otel.databasePath), { recursive: true })
477
510
  const db = yield* Effect.acquireRelease(
478
511
  Effect.sync(() => new Database(config.otel.databasePath, {
479
512
  create: !opts.readonly,
@@ -517,6 +550,13 @@ export const makeTelemetryStoreLayer = (opts: TelemetryStoreOptions) => Layer.ef
517
550
  -- SQLite silently caps at actual file size for smaller DBs.
518
551
  PRAGMA mmap_size = 268435456;
519
552
  `)
553
+ // auto_vacuum is a header-level setting: it only takes effect on
554
+ // an empty DB, or on the next VACUUM after a change. Setting it
555
+ // here, BEFORE the first CREATE TABLE, is the only path that
556
+ // makes incremental_vacuum work without a full VACUUM. For
557
+ // existing DBs that predate this setting keep their current mode;
558
+ // Motel never performs a surprise full-file VACUUM at startup.
559
+ try { db.exec(`PRAGMA auto_vacuum = INCREMENTAL;`) } catch { /* ignore */ }
520
560
  try {
521
561
  db.exec(`
522
562
  PRAGMA journal_mode = WAL;
@@ -526,6 +566,13 @@ export const makeTelemetryStoreLayer = (opts: TelemetryStoreOptions) => Layer.ef
526
566
  -- this the WAL happily runs into the hundreds of MB and queries
527
567
  -- start paying the cost of walking the WAL on every read.
528
568
  PRAGMA wal_autocheckpoint = 4000;
569
+ -- Hard floor for the WAL file. Auto-checkpoint controls *when*
570
+ -- pages move out of the WAL; size_limit controls how much the
571
+ -- WAL file is allowed to grow on disk. 128MB is generous enough
572
+ -- to absorb a long write burst without blocking on truncation,
573
+ -- tight enough that a wedged retention loop can't hide a 20GB
574
+ -- WAL the way a default no-limit configuration can.
575
+ PRAGMA journal_size_limit = 134217728;
529
576
 
530
577
  CREATE TABLE IF NOT EXISTS spans (
531
578
  trace_id TEXT NOT NULL,
@@ -604,6 +651,11 @@ export const makeTelemetryStoreLayer = (opts: TelemetryStoreOptions) => Layer.ef
604
651
 
605
652
  CREATE INDEX IF NOT EXISTS idx_log_attributes_key_value ON log_attributes(key, value, log_id);
606
653
  CREATE INDEX IF NOT EXISTS idx_log_attributes_log_id ON log_attributes(log_id);
654
+
655
+ CREATE TABLE IF NOT EXISTS motel_maintenance (
656
+ key TEXT PRIMARY KEY,
657
+ value TEXT NOT NULL
658
+ );
607
659
  `)
608
660
  } catch (err) {
609
661
  if (!isSqliteLockError(err)) throw err
@@ -624,7 +676,8 @@ export const makeTelemetryStoreLayer = (opts: TelemetryStoreOptions) => Layer.ef
624
676
  } catch { hasFts = false }
625
677
  try {
626
678
  const row = db.query(`SELECT name FROM sqlite_master WHERE type='table' AND name='span_attr_fts'`).get()
627
- hasAttrFts = row !== null
679
+ const backfill = db.query(`SELECT value FROM motel_maintenance WHERE key = 'span_attr_fts_v1'`).get() as { value: string } | null
680
+ hasAttrFts = row !== null && backfill?.value === "complete"
628
681
  } catch { hasAttrFts = false }
629
682
  }
630
683
 
@@ -723,10 +776,6 @@ export const makeTelemetryStoreLayer = (opts: TelemetryStoreOptions) => Layer.ef
723
776
  // pay 3-4s on cold open instead of 400ms.
724
777
  try {
725
778
  db.exec(`PRAGMA analysis_limit = 1000; PRAGMA optimize;`)
726
- // First-time databases won't have sqlite_stat1 until we run a
727
- // real ANALYZE. Force it once if stats haven't been collected.
728
- const hasStats = db.query(`SELECT 1 FROM sqlite_master WHERE name = 'sqlite_stat1' LIMIT 1`).get() !== null
729
- if (!hasStats) db.exec(`ANALYZE;`)
730
779
  } catch {
731
780
  // ANALYZE / optimize failures are never fatal — queries still work,
732
781
  // they just run with default row estimates.
@@ -777,33 +826,157 @@ export const makeTelemetryStoreLayer = (opts: TelemetryStoreOptions) => Layer.ef
777
826
  )
778
827
  `)
779
828
 
780
- const rebuildTraceSummaries = db.query(`
781
- INSERT INTO trace_summaries (
782
- trace_id, service_name, root_operation_name, started_at_ms, ended_at_ms, active_span_count, duration_ms, span_count, error_count
783
- )
784
- ${TRACE_SUMMARY_SELECT_SQL}
785
- GROUP BY trace_id
786
- `)
787
-
788
829
  const reconcileTraceSummaries = Effect.sync(() => {
789
- try {
790
- db.query(`DELETE FROM trace_summaries`).run()
791
- rebuildTraceSummaries.run()
792
- } catch (err) {
793
- if (!isSqliteLockError(err)) throw err
794
- console.warn(`motel: trace summary rebuild skipped during startup: ${(err as Error).message}`)
830
+ const marker = db.query(`SELECT value FROM motel_maintenance WHERE key = 'trace_summary_cursor'`).get() as { value: string } | null
831
+ const cursor = Number(marker?.value ?? 0)
832
+ const rows = db.query(`SELECT rowid, trace_id FROM spans WHERE rowid > ? ORDER BY rowid ASC LIMIT ?`).all(cursor, config.otel.retentionTraceBatch) as Array<{ rowid: number; trace_id: string }>
833
+ if (rows.length === 0) {
834
+ db.query(`INSERT OR REPLACE INTO motel_maintenance(key, value) VALUES ('trace_summary_cursor', '0')`).run()
835
+ return
795
836
  }
837
+ const transaction = db.transaction(() => {
838
+ for (const traceId of new Set(rows.map((row) => row.trace_id))) upsertTraceSummary.run(traceId)
839
+ db.query(`INSERT OR REPLACE INTO motel_maintenance(key, value) VALUES ('trace_summary_cursor', ?)`).run(String(rows.at(-1)!.rowid))
840
+ })
841
+ transaction()
796
842
  })
797
843
 
798
844
  const deleteSpanAttributes = db.query(`DELETE FROM span_attributes WHERE trace_id = ? AND span_id = ?`)
799
845
  const insertSpanAttribute = db.query(`INSERT INTO span_attributes (trace_id, span_id, key, value) VALUES (?, ?, ?, ?)`)
846
+ const spanAttributeInsertManyByCount = new Map<number, ReturnType<Database["query"]>>()
847
+ const insertSpanAttributesMany = (traceId: string, spanId: string, attributes: Readonly<Record<string, string>>) => {
848
+ const entries = Object.entries(attributes)
849
+ if (entries.length === 0) return
850
+ if (entries.length === 1) {
851
+ const [key, value] = entries[0]!
852
+ insertSpanAttribute.run(traceId, spanId, key, value)
853
+ return
854
+ }
855
+ let query = spanAttributeInsertManyByCount.get(entries.length)
856
+ if (!query) {
857
+ query = db.query(`INSERT INTO span_attributes (trace_id, span_id, key, value) VALUES ${entries.map(() => "(?, ?, ?, ?)").join(", ")}`)
858
+ spanAttributeInsertManyByCount.set(entries.length, query)
859
+ }
860
+ query.run(...entries.flatMap(([key, value]) => [traceId, spanId, key, value]))
861
+ }
800
862
  const deleteSpanOperationSearch = db.query(`DELETE FROM span_operation_fts WHERE trace_id = ? AND span_id = ?`)
801
863
  const insertSpanOperationSearch = db.query(`INSERT INTO span_operation_fts (trace_id, span_id, operation_name) VALUES (?, ?, ?)`)
864
+ const deleteSpanOperationSearchManyByCount = new Map<number, ReturnType<Database["query"]>>()
865
+ const insertSpanOperationSearchManyByCount = new Map<number, ReturnType<Database["query"]>>()
866
+ const updateSpanOperationSearchMany = (operations: ReadonlyArray<readonly [string, string, string]>) => {
867
+ if (operations.length === 0) return
868
+ if (operations.length === 1) {
869
+ const [traceId, spanId, operationName] = operations[0]!
870
+ deleteSpanOperationSearch.run(traceId, spanId)
871
+ insertSpanOperationSearch.run(traceId, spanId, operationName)
872
+ return
873
+ }
874
+
875
+ let deleteQuery = deleteSpanOperationSearchManyByCount.get(operations.length)
876
+ if (!deleteQuery) {
877
+ deleteQuery = db.query(`DELETE FROM span_operation_fts WHERE ${operations.map(() => "(trace_id = ? AND span_id = ?)").join(" OR ")}`)
878
+ deleteSpanOperationSearchManyByCount.set(operations.length, deleteQuery)
879
+ }
880
+ deleteQuery.run(...operations.flatMap(([traceId, spanId]) => [traceId, spanId]))
881
+
882
+ let insertQuery = insertSpanOperationSearchManyByCount.get(operations.length)
883
+ if (!insertQuery) {
884
+ insertQuery = db.query(`INSERT INTO span_operation_fts (trace_id, span_id, operation_name) VALUES ${operations.map(() => "(?, ?, ?)").join(", ")}`)
885
+ insertSpanOperationSearchManyByCount.set(operations.length, insertQuery)
886
+ }
887
+ insertQuery.run(...operations.flatMap(([traceId, spanId, operationName]) => [traceId, spanId, operationName]))
888
+ }
802
889
  const insertLogAttribute = db.query(`INSERT INTO log_attributes (log_id, key, value) VALUES (?, ?, ?)`)
890
+ const logAttributeInsertManyByCount = new Map<number, ReturnType<Database["query"]>>()
891
+ const insertLogAttributesMany = (logId: number, attributes: Readonly<Record<string, string>>) => {
892
+ const entries = Object.entries(attributes)
893
+ if (entries.length === 0) return
894
+ if (entries.length === 1) {
895
+ const [key, value] = entries[0]!
896
+ insertLogAttribute.run(logId, key, value)
897
+ return
898
+ }
899
+ let query = logAttributeInsertManyByCount.get(entries.length)
900
+ if (!query) {
901
+ query = db.query(`INSERT INTO log_attributes (log_id, key, value) VALUES ${entries.map(() => "(?, ?, ?)").join(", ")}`)
902
+ logAttributeInsertManyByCount.set(entries.length, query)
903
+ }
904
+ query.run(...entries.flatMap(([key, value]) => [logId, key, value]))
905
+ }
803
906
  const insertLogBodySearch = db.query(`INSERT INTO log_body_fts (log_id, body) VALUES (?, ?)`)
907
+ const insertLogBodySearchManyByCount = new Map<number, ReturnType<Database["query"]>>()
908
+ const insertLogBodySearchMany = (entries: ReadonlyArray<readonly [string, string]>) => {
909
+ if (entries.length === 0) return
910
+ if (entries.length === 1) {
911
+ const [logId, body] = entries[0]!
912
+ insertLogBodySearch.run(logId, body)
913
+ return
914
+ }
915
+ let query = insertLogBodySearchManyByCount.get(entries.length)
916
+ if (!query) {
917
+ query = db.query(`INSERT INTO log_body_fts (log_id, body) VALUES ${entries.map(() => "(?, ?)").join(", ")}`)
918
+ insertLogBodySearchManyByCount.set(entries.length, query)
919
+ }
920
+ query.run(...entries.flatMap(([logId, body]) => [logId, body]))
921
+ }
804
922
 
805
923
  const maxDbSizeBytes = config.otel.maxDbSizeMb * 1024 * 1024
806
924
 
925
+ // Freelist-ratio thresholds for the adaptive reclaim loop. Below the
926
+ // LOW threshold there's nothing worth doing; above HIGH we are in the
927
+ // 17GB-DB-with-10GB-freelist failure mode and need to reclaim aggressively
928
+ // even if it costs writer-lock time.
929
+ const FREELIST_LOW_RATIO = 0.05
930
+ const FREELIST_MID_RATIO = 0.20
931
+ const FREELIST_HIGH_RATIO = 0.50
932
+ const VACUUM_PAGES_NORMAL = 2000 // ~8MB/pass
933
+ const VACUUM_PAGES_BUSY = 20000 // ~80MB/pass — used when freelist > 20%
934
+ const VACUUM_PAGES_PANIC = 50000 // ~200MB/pass — only when ratio > 50%
935
+
936
+ const ftsTableNames = ["span_attr_fts", "log_body_fts", "span_operation_fts"] as const
937
+
938
+ const incrementalFtsMerge = (pages: number) => {
939
+ // FTS5 segment merges drop tombstone rows that DELETE leaves behind.
940
+ // Without periodic merges, deleted FTS rows stay on disk indefinitely
941
+ // — a major source of freelist pages on a heavy-deletion workload.
942
+ // `merge=N` is a bounded, online operation: it merges at most N
943
+ // pages of work and returns. Per FTS5 docs, missing tables silently
944
+ // throw; we swallow because not every DB has every FTS table.
945
+ for (const name of ftsTableNames) {
946
+ try { db.query(`INSERT INTO ${name}(${name}) VALUES (?)`).run(`merge=${pages}`) } catch { /* table absent or older schema */ }
947
+ }
948
+ }
949
+
950
+ const reclaimSpace = Effect.fn("motel/TelemetryStore.reclaimSpace")(function* () {
951
+ yield* Effect.sync(() => {
952
+ const pageCount = (db.query(`PRAGMA page_count`).get() as { page_count: number }).page_count
953
+ const freePages = (db.query(`PRAGMA freelist_count`).get() as { freelist_count: number }).freelist_count
954
+ if (pageCount === 0) return
955
+ const ratio = freePages / pageCount
956
+ if (ratio < FREELIST_LOW_RATIO) return
957
+
958
+ // Adaptive vacuum sizing — fixed 2000 pages/min could not keep
959
+ // up with sustained deletions, leaking 10GB of freelist over
960
+ // time. Scale the per-pass work to the size of the backlog so
961
+ // we stay roughly proportional to the deficit.
962
+ const pages =
963
+ ratio >= FREELIST_HIGH_RATIO ? VACUUM_PAGES_PANIC :
964
+ ratio >= FREELIST_MID_RATIO ? VACUUM_PAGES_BUSY :
965
+ VACUUM_PAGES_NORMAL
966
+
967
+ try { db.exec(`PRAGMA incremental_vacuum(${pages});`) } catch { /* ignore */ }
968
+
969
+ // In WAL mode incremental_vacuum only moves pages — the file
970
+ // shrinks on the next checkpoint. PASSIVE silently skips when
971
+ // readers are active (the failure mode the agent's research
972
+ // flagged: checkpoint starvation). Use RESTART normally and
973
+ // TRUNCATE in panic mode to physically shrink the WAL when it
974
+ // has grown.
975
+ const mode = ratio >= FREELIST_HIGH_RATIO ? "TRUNCATE" : "RESTART"
976
+ try { db.exec(`PRAGMA wal_checkpoint(${mode});`) } catch { /* ignore */ }
977
+ })
978
+ })
979
+
807
980
  const cleanupExpired = Effect.fn("motel/TelemetryStore.cleanupExpired")(function* () {
808
981
  const now = yield* Clock.currentTimeMillis
809
982
 
@@ -819,12 +992,12 @@ export const makeTelemetryStoreLayer = (opts: TelemetryStoreOptions) => Layer.ef
819
992
 
820
993
  // Time-based: completed traces whose last span ended before cutoff.
821
994
  const timeExpired = db.query(
822
- `SELECT trace_id FROM trace_summaries WHERE active_span_count = 0 AND ended_at_ms > 0 AND ended_at_ms < ?`,
823
- ).all(cutoff) as readonly { trace_id: string }[]
995
+ `SELECT trace_id FROM trace_summaries WHERE active_span_count = 0 AND ended_at_ms > 0 AND ended_at_ms < ? ORDER BY ended_at_ms ASC LIMIT ?`,
996
+ ).all(cutoff, config.otel.retentionTraceBatch) as readonly { trace_id: string }[]
824
997
  for (const row of timeExpired) toEvict.add(row.trace_id)
825
998
 
826
- // Size-based: if actual data exceeds cap, drop oldest 20% of the
827
- // remaining completed traces. `(page_count - freelist_count)`
999
+ // Size-based: if actual data exceeds the target, drop one bounded
1000
+ // batch of the oldest completed traces. `(page_count - freelist_count)`
828
1001
  // ignores freed-but-not-vacuumed pages so a large freelist doesn't
829
1002
  // trigger a deletion death spiral.
830
1003
  const pageCount = (db.query(`PRAGMA page_count`).get() as { page_count: number }).page_count
@@ -832,22 +1005,22 @@ export const makeTelemetryStoreLayer = (opts: TelemetryStoreOptions) => Layer.ef
832
1005
  const pageSize = (db.query(`PRAGMA page_size`).get() as { page_size: number }).page_size
833
1006
  const dbSize = (pageCount - freePages) * pageSize
834
1007
  if (dbSize > maxDbSizeBytes) {
835
- const completedCount = (db.query(
836
- `SELECT COUNT(*) AS c FROM trace_summaries WHERE active_span_count = 0`,
837
- ).get() as { c: number }).c
838
- const traceCutCount = Math.max(1, Math.floor(completedCount * 0.2))
839
1008
  const oldest = db.query(
840
1009
  `SELECT trace_id FROM trace_summaries WHERE active_span_count = 0 ORDER BY started_at_ms ASC LIMIT ?`,
841
- ).all(traceCutCount) as readonly { trace_id: string }[]
1010
+ ).all(config.otel.retentionTraceBatch) as readonly { trace_id: string }[]
842
1011
  // Set.add dedupes overlap with the time-expired batch above.
843
1012
  for (const row of oldest) toEvict.add(row.trace_id)
844
1013
  }
845
1014
 
846
- // Always prune orphan logs (no trace_id) by timestamp they're
847
- // not covered by trace eviction.
848
- db.query(`DELETE FROM logs WHERE trace_id IS NULL AND timestamp_ms < ?`).run(cutoff)
849
-
850
- if (toEvict.size === 0) return
1015
+ // Logs have their own retention boundary. A correlated log may refer
1016
+ // to a trace that was sampled elsewhere or never reached Motel, so
1017
+ // tying log eviction to trace_summaries lets those rows grow forever.
1018
+ const expiredLogs = db.query(`DELETE FROM logs WHERE id IN (SELECT id FROM logs WHERE timestamp_ms < ? ORDER BY timestamp_ms ASC LIMIT ?)`).run(cutoff, config.otel.retentionLogBatch)
1019
+ let deletedLogs = Number(expiredLogs.changes) > 0
1020
+ if (dbSize > maxDbSizeBytes) {
1021
+ const oversizedLogs = db.query(`DELETE FROM logs WHERE id IN (SELECT id FROM logs ORDER BY timestamp_ms ASC LIMIT ?)`).run(config.otel.retentionLogBatch)
1022
+ deletedLogs = deletedLogs || Number(oversizedLogs.changes) > 0
1023
+ }
851
1024
 
852
1025
  // Batch the trace-id list so the IN placeholders stay under
853
1026
  // SQLite's default limit (~999). Each batch wipes every row
@@ -870,48 +1043,54 @@ export const makeTelemetryStoreLayer = (opts: TelemetryStoreOptions) => Layer.ef
870
1043
 
871
1044
  // Log-side orphans (log_attributes + FTS) are keyed by log.id,
872
1045
  // so prune what no longer has a parent log row.
873
- db.query(`DELETE FROM log_attributes WHERE NOT EXISTS (SELECT 1 FROM logs WHERE logs.id = log_attributes.log_id)`).run()
1046
+ const orphanAttributes = db.query(`DELETE FROM log_attributes WHERE rowid IN (SELECT log_attributes.rowid FROM log_attributes WHERE NOT EXISTS (SELECT 1 FROM logs WHERE logs.id = log_attributes.log_id) LIMIT ?)`).run(config.otel.retentionLogBatch)
1047
+ let deletedOrphans = Number(orphanAttributes.changes) > 0
874
1048
  try {
875
- db.query(`DELETE FROM log_body_fts WHERE NOT EXISTS (SELECT 1 FROM logs WHERE logs.id = CAST(log_body_fts.log_id AS INTEGER))`).run()
1049
+ const orphanFts = db.query(`DELETE FROM log_body_fts WHERE rowid IN (SELECT rowid FROM log_body_fts WHERE NOT EXISTS (SELECT 1 FROM logs WHERE logs.id = CAST(log_body_fts.log_id AS INTEGER)) LIMIT ?)`).run(config.otel.retentionLogBatch)
1050
+ deletedOrphans = deletedOrphans || Number(orphanFts.changes) > 0
876
1051
  } catch {
877
1052
  // FTS table may not exist on old DBs.
878
1053
  }
879
1054
 
880
- // Truncate the WAL after a big delete pass. Without this the
881
- // WAL keeps growing (observed: 640MB) because wal_autocheckpoint
882
- // only triggers when WAL pages exceed the threshold during
883
- // writes a retention pass that evicts millions of rows can
884
- // blow far past that before the auto-checkpoint fires. Using
885
- // PASSIVE so active readers aren't interrupted; if the WAL
886
- // can't be fully reclaimed right now, we'll try again next
887
- // cycle.
888
- try { db.exec(`PRAGMA wal_checkpoint(PASSIVE);`) } catch { /* ignore */ }
889
-
890
- // Incremental vacuum reclaims some of the freed pages back
891
- // to the OS so the file size actually shrinks over time
892
- // instead of just growing the freelist. Bounded to 2000
893
- // pages per pass (≈8MB) to avoid a long-running transaction.
894
- try { db.exec(`PRAGMA incremental_vacuum(2000);`) } catch { /* ignore */ }
1055
+ // Checkpoint after a big delete pass so the freed pages land
1056
+ // in the main DB file and become eligible for incremental
1057
+ // vacuum. Use RESTART (not PASSIVE): PASSIVE silently no-ops
1058
+ // when readers are active, which is the documented mechanism
1059
+ // behind WAL/freelist starvation when ingest is busy.
1060
+ if (toEvict.size === 0 && !deletedLogs && !deletedOrphans) return
1061
+ try { db.exec(`PRAGMA wal_checkpoint(RESTART);`) } catch { /* ignore */ }
1062
+
1063
+ // Incremental FTS5 merge DELETE on an FTS5-indexed row
1064
+ // leaves a tombstone in the segment tree that only `merge`
1065
+ // reclaims. Skipping this is the second compounding cause
1066
+ // (after fixed-size vacuum) of the slow freelist accretion
1067
+ // that took the DB to 17GB. 100 pages of merge work per
1068
+ // retention tick is bounded and runs in milliseconds.
1069
+ incrementalFtsMerge(100)
1070
+
1071
+ // Actual page reclamation lives in `reclaimSpace`, which
1072
+ // runs on its own faster cadence so the file shrinks even
1073
+ // when no traces are evicted in a given retention tick (e.g.
1074
+ // after a large historical eviction has already happened).
895
1075
  })
896
1076
  })
897
1077
 
898
- // Retention only runs in processes that opt in (currently the main
899
- // daemon). The ingest worker and TUI skip it to avoid two writers
900
- // competing for the write lock with overlapping DELETE passes.
1078
+ // Retention only runs in the ingest worker so maintenance never blocks
1079
+ // the HTTP event loop and no second writer duplicates cleanup work.
901
1080
  if (opts.runRetention) {
902
- // Reconcile any summary drift from interrupted ingests, but do it
903
- // after the server becomes healthy. Running this synchronously at
904
- // open can sit behind another writer's lock for ~15s and make the
905
- // daemon look hung even though the port is already bound.
906
- yield* Effect.forkScoped(reconcileTraceSummaries)
907
-
908
- // Enable incremental vacuum so retention can reclaim freed
909
- // pages over time instead of needing a stop-the-world VACUUM.
910
- // Idempotent: repeat calls after the first are no-ops.
911
- try { db.exec(`PRAGMA auto_vacuum = INCREMENTAL;`) } catch { /* ignore */ }
912
-
913
- // Run cleanup every 60 seconds in the background, tied to the layer's scope
914
- yield* Effect.forkScoped(Effect.repeat(cleanupExpired(), Schedule.spaced("60 seconds")))
1081
+ // Cleanup runs on the telemetry worker, never the HTTP event loop.
1082
+ yield* Effect.forkScoped(Effect.repeat(
1083
+ Effect.andThen(reconcileTraceSummaries, cleanupExpired()).pipe(Effect.catchCause((cause) => Effect.logWarning(`motel: maintenance pass failed: ${Cause.pretty(cause)}`))),
1084
+ Schedule.spaced(`${config.otel.retentionIntervalSeconds} seconds`),
1085
+ ))
1086
+
1087
+ // Page reclamation runs on a separate, faster cadence (10s) and
1088
+ // is independent of the eviction loop. The reason: a single sweep
1089
+ // at 60s intervals can move only ~8MB of pages before the next
1090
+ // burst of inserts grows the freelist again. Decoupling lets us
1091
+ // catch up adaptively (see VACUUM_PAGES_BUSY/PANIC) without
1092
+ // changing the cost of the heavier delete sweep.
1093
+ yield* Effect.forkScoped(Effect.repeat(reclaimSpace(), Schedule.spaced("10 seconds")))
915
1094
 
916
1095
  // Periodically refresh query planner stats. `PRAGMA optimize` is a
917
1096
  // no-op when nothing has changed, so this is essentially free on idle
@@ -924,35 +1103,48 @@ export const makeTelemetryStoreLayer = (opts: TelemetryStoreOptions) => Layer.ef
924
1103
  yield* Effect.forkScoped(Effect.repeat(refreshPlannerStats, Schedule.spaced("15 minutes")))
925
1104
  }
926
1105
 
927
- // One-time backfill for existing DBs: if span_attr_fts is empty but
928
- // span_attributes has rows with AI_FTS_KEYS, populate the index.
929
- // Runs forked so server startup isn't blocked; queries hitting the
930
- // FTS will just return empty until the fill lands. On a 2 GB DB with
931
- // ~400 matching rows this takes ~3-8 seconds. Writer-only because
932
- // it does INSERT INTO ... — readonly connections would error.
1106
+ // Incrementally rebuild historical AI attributes in bounded batches.
1107
+ // Queries fall back to LIKE until the persistent marker is complete.
933
1108
  if (hasAttrFts && !opts.readonly) {
934
- const backfillAttrFts = Effect.sync(() => {
1109
+ const backfillAttrFtsBatch = Effect.sync(() => {
935
1110
  try {
936
- const ftsCount = (db.query(`SELECT COUNT(*) AS c FROM span_attr_fts`).get() as { c: number }).c
937
- if (ftsCount > 0) return
938
1111
  const keyList = AI_FTS_KEYS.map((k) => `'${k.replace(/'/g, "''")}'`).join(", ")
939
- const attrCount = (db.query(
940
- `SELECT COUNT(*) AS c FROM span_attributes WHERE key IN (${keyList})`,
941
- ).get() as { c: number }).c
942
- if (attrCount === 0) return
943
- // Single INSERT..SELECT is atomic and fast; FTS5 batches
944
- // its internal segment writes. No transaction wrapper
945
- // needed — it runs as one statement.
946
- db.exec(`
947
- INSERT INTO span_attr_fts(rowid, value)
948
- SELECT rowid, value FROM span_attributes WHERE key IN (${keyList})
949
- `)
1112
+ const marker = db.query(`SELECT value FROM motel_maintenance WHERE key = 'span_attr_fts_v1'`).get() as { value: string } | null
1113
+ if (marker?.value === "complete") return false
1114
+ let cursor = 0
1115
+ let maxRowId = 0
1116
+ if (marker) {
1117
+ [cursor, maxRowId] = marker.value.split(":").map(Number)
1118
+ } else {
1119
+ maxRowId = (db.query(`SELECT COALESCE(MAX(rowid), 0) AS value FROM span_attributes`).get() as { value: number }).value
1120
+ db.query(`INSERT INTO span_attr_fts(span_attr_fts) VALUES ('delete-all')`).run()
1121
+ db.query(`INSERT OR REPLACE INTO motel_maintenance(key, value) VALUES ('span_attr_fts_v1', ?)`).run(`0:${maxRowId}`)
1122
+ }
1123
+ const rows = db.query(`SELECT rowid, value FROM span_attributes WHERE key IN (${keyList}) AND rowid > ? AND rowid <= ? ORDER BY rowid ASC LIMIT 500`).all(cursor, maxRowId) as Array<{ rowid: number; value: string }>
1124
+ if (rows.length === 0) {
1125
+ db.query(`UPDATE motel_maintenance SET value = 'complete' WHERE key = 'span_attr_fts_v1'`).run()
1126
+ hasAttrFts = true
1127
+ return false
1128
+ }
1129
+ const insert = db.query(`INSERT INTO span_attr_fts(rowid, value) VALUES (?, ?)`)
1130
+ const transaction = db.transaction(() => {
1131
+ for (const row of rows) insert.run(row.rowid, row.value)
1132
+ db.query(`UPDATE motel_maintenance SET value = ? WHERE key = 'span_attr_fts_v1'`).run(`${rows.at(-1)!.rowid}:${maxRowId}`)
1133
+ })
1134
+ transaction()
1135
+ return true
950
1136
  } catch {
951
1137
  // Backfill failure is never fatal — new ingests still
952
1138
  // populate FTS via the trigger, and queries fall back to
953
1139
  // LIKE when FTS lookups return empty.
1140
+ return true
954
1141
  }
955
1142
  })
1143
+ const backfillAttrFts: Effect.Effect<void> = Effect.suspend(() =>
1144
+ Effect.flatMap(backfillAttrFtsBatch, (pending) =>
1145
+ pending ? Effect.andThen(Effect.sleep("100 millis"), backfillAttrFts) : Effect.void,
1146
+ ),
1147
+ )
956
1148
  yield* Effect.forkScoped(backfillAttrFts)
957
1149
  }
958
1150
 
@@ -961,6 +1153,7 @@ export const makeTelemetryStoreLayer = (opts: TelemetryStoreOptions) => Layer.ef
961
1153
  let insertedSpans = 0
962
1154
  const transaction = db.transaction((request: OtlpTraceExportRequest) => {
963
1155
  const touchedTraceIds = new Set<string>()
1156
+ const touchedOperations: Array<readonly [string, string, string]> = []
964
1157
  for (const resourceSpans of request.resourceSpans ?? []) {
965
1158
  const resourceAttributes = attributeMap(resourceSpans.resource?.attributes)
966
1159
  const serviceName = resourceAttributes["service.name"] || resourceAttributes["service_name"] || "unknown"
@@ -969,6 +1162,10 @@ export const makeTelemetryStoreLayer = (opts: TelemetryStoreOptions) => Layer.ef
969
1162
  const scopeName = scopeSpans.scope?.name ?? null
970
1163
 
971
1164
  for (const span of scopeSpans.spans ?? []) {
1165
+ const traceId = normalizeOtlpBinaryId(span.traceId, 16)
1166
+ const spanId = normalizeOtlpBinaryId(span.spanId, 8)
1167
+ if (!traceId || !spanId) continue
1168
+ const parentSpanId = normalizeOtlpBinaryId(span.parentSpanId, 8)
972
1169
  const spanAttributes = attributeMap(span.attributes)
973
1170
  const mergedAttributes = { ...resourceAttributes, ...spanAttributes }
974
1171
  const startTimeMs = nanosToMilliseconds(span.startTimeUnixNano)
@@ -980,9 +1177,9 @@ export const makeTelemetryStoreLayer = (opts: TelemetryStoreOptions) => Layer.ef
980
1177
  }))
981
1178
 
982
1179
  insertSpan.run(
983
- span.traceId,
984
- span.spanId,
985
- span.parentSpanId ?? null,
1180
+ traceId,
1181
+ spanId,
1182
+ parentSpanId,
986
1183
  serviceName,
987
1184
  scopeName,
988
1185
  span.name ?? "unknown",
@@ -995,21 +1192,22 @@ export const makeTelemetryStoreLayer = (opts: TelemetryStoreOptions) => Layer.ef
995
1192
  JSON.stringify(resourceAttributes),
996
1193
  JSON.stringify(events),
997
1194
  )
998
- deleteSpanAttributes.run(span.traceId, span.spanId)
999
- for (const [key, value] of Object.entries(mergedAttributes)) {
1000
- insertSpanAttribute.run(span.traceId, span.spanId, key, value)
1001
- }
1002
- try {
1003
- deleteSpanOperationSearch.run(span.traceId, span.spanId)
1004
- insertSpanOperationSearch.run(span.traceId, span.spanId, span.name ?? "unknown")
1005
- } catch {
1006
- // FTS is optional.
1007
- }
1008
- touchedTraceIds.add(span.traceId)
1195
+ deleteSpanAttributes.run(traceId, spanId)
1196
+ insertSpanAttributesMany(traceId, spanId, mergedAttributes)
1197
+ touchedOperations.push([traceId, spanId, span.name ?? "unknown"])
1198
+ touchedTraceIds.add(traceId)
1009
1199
  insertedSpans += 1
1010
1200
  }
1011
1201
  }
1012
1202
  }
1203
+ try {
1204
+ const BATCH_SIZE = 500
1205
+ for (let offset = 0; offset < touchedOperations.length; offset += BATCH_SIZE) {
1206
+ updateSpanOperationSearchMany(touchedOperations.slice(offset, offset + BATCH_SIZE))
1207
+ }
1208
+ } catch {
1209
+ // FTS is optional.
1210
+ }
1013
1211
  for (const traceId of touchedTraceIds) {
1014
1212
  upsertTraceSummary.run(traceId)
1015
1213
  }
@@ -1024,6 +1222,7 @@ export const makeTelemetryStoreLayer = (opts: TelemetryStoreOptions) => Layer.ef
1024
1222
  return yield* Effect.sync(() => {
1025
1223
  let insertedLogs = 0
1026
1224
  const transaction = db.transaction((request: OtlpLogExportRequest) => {
1225
+ const touchedLogBodies: Array<readonly [string, string]> = []
1027
1226
  for (const resourceLogs of request.resourceLogs ?? []) {
1028
1227
  const resourceAttributes = attributeMap(resourceLogs.resource?.attributes)
1029
1228
  const serviceName = resourceAttributes["service.name"] || resourceAttributes["service_name"] || "unknown"
@@ -1036,9 +1235,11 @@ export const makeTelemetryStoreLayer = (opts: TelemetryStoreOptions) => Layer.ef
1036
1235
  const mergedAttributes = { ...resourceAttributes, ...attributes }
1037
1236
  const timestampMs = nanosToMilliseconds(record.timeUnixNano ?? record.observedTimeUnixNano)
1038
1237
  const body = stringifyValue(parseAnyValue(record.body))
1238
+ const rawTraceId = attributes.traceId || attributes.trace_id || record.traceId || null
1239
+ const rawSpanId = attributes.spanId || attributes.span_id || record.spanId || null
1039
1240
  const result = insertLog.run(
1040
- attributes.traceId || attributes.trace_id || record.traceId || null,
1041
- attributes.spanId || attributes.span_id || record.spanId || null,
1241
+ normalizeOtlpBinaryId(rawTraceId, 16),
1242
+ normalizeOtlpBinaryId(rawSpanId, 8),
1042
1243
  serviceName,
1043
1244
  scopeName,
1044
1245
  record.severityText ?? "INFO",
@@ -1048,18 +1249,20 @@ export const makeTelemetryStoreLayer = (opts: TelemetryStoreOptions) => Layer.ef
1048
1249
  JSON.stringify(resourceAttributes),
1049
1250
  )
1050
1251
  const logId = Number((result as { lastInsertRowid: number | bigint }).lastInsertRowid)
1051
- for (const [key, value] of Object.entries(mergedAttributes)) {
1052
- insertLogAttribute.run(logId, key, value)
1053
- }
1054
- try {
1055
- insertLogBodySearch.run(String(logId), body)
1056
- } catch {
1057
- // FTS is optional.
1058
- }
1252
+ insertLogAttributesMany(logId, mergedAttributes)
1253
+ touchedLogBodies.push([String(logId), body])
1059
1254
  insertedLogs += 1
1060
1255
  }
1061
1256
  }
1062
1257
  }
1258
+ try {
1259
+ const BATCH_SIZE = 500
1260
+ for (let offset = 0; offset < touchedLogBodies.length; offset += BATCH_SIZE) {
1261
+ insertLogBodySearchMany(touchedLogBodies.slice(offset, offset + BATCH_SIZE))
1262
+ }
1263
+ } catch {
1264
+ // FTS is optional.
1265
+ }
1063
1266
  })
1064
1267
 
1065
1268
  transaction(payload)
@@ -1068,9 +1271,11 @@ export const makeTelemetryStoreLayer = (opts: TelemetryStoreOptions) => Layer.ef
1068
1271
  })
1069
1272
 
1070
1273
  const listServices = Effect.fn("motel/TelemetryStore.listServices")(function* () {
1071
-
1072
1274
  const cutoff = (yield* Clock.currentTimeMillis) - config.otel.traceLookbackMinutes * 60 * 1000
1073
- return yield* Effect.sync(() => {
1275
+ const services = yield* Effect.sync(() => {
1276
+ // Discover recent activity from span rows, not trace starts: a
1277
+ // long-running trace can emit a current child after its root ages
1278
+ // outside the lookback window.
1074
1279
  const rows = db.query(`
1075
1280
  SELECT service_name FROM spans WHERE start_time_ms >= ?
1076
1281
  UNION
@@ -1079,6 +1284,8 @@ export const makeTelemetryStoreLayer = (opts: TelemetryStoreOptions) => Layer.ef
1079
1284
  `).all(cutoff, cutoff) as Array<{ service_name: string }>
1080
1285
  return rows.map((row) => row.service_name)
1081
1286
  })
1287
+ yield* Effect.annotateCurrentSpan("trace.service_count", services.length)
1288
+ return services
1082
1289
  })()
1083
1290
 
1084
1291
  const loadTracesByIds = (traceIds: readonly string[]) => {
@@ -1104,15 +1311,19 @@ export const makeTelemetryStoreLayer = (opts: TelemetryStoreOptions) => Layer.ef
1104
1311
  }
1105
1312
 
1106
1313
  const listRecentTraces = Effect.fn("motel/TelemetryStore.listRecentTraces")(function* (serviceName: string | null, options?: { readonly lookbackMinutes?: number; readonly limit?: number }) {
1314
+ yield* Effect.annotateCurrentSpan("trace.service_name", serviceName ?? "all")
1107
1315
  const summaries = yield* listTraceSummaries(serviceName, options)
1108
- return yield* Effect.sync(() => loadTracesByIds(summaries.map((summary) => summary.traceId)))
1316
+ const traces = yield* Effect.sync(() => loadTracesByIds(summaries.map((summary) => summary.traceId)))
1317
+ yield* Effect.annotateCurrentSpan("trace.result_count", traces.length)
1318
+ return traces
1109
1319
  })
1110
1320
 
1111
1321
  const listTraceSummaries = Effect.fn("motel/TelemetryStore.listTraceSummaries")(function* (serviceName: string | null, options?: { readonly lookbackMinutes?: number; readonly limit?: number; readonly cursorStartedAtMs?: number; readonly cursorTraceId?: string }) {
1322
+ yield* Effect.annotateCurrentSpan("trace.service_name", serviceName ?? "all")
1112
1323
  const cutoff = (yield* Clock.currentTimeMillis) - (options?.lookbackMinutes ?? config.otel.traceLookbackMinutes) * 60 * 1000
1113
1324
  const limit = options?.limit ?? config.otel.traceFetchLimit
1114
1325
 
1115
- return yield* Effect.sync(() => {
1326
+ const summaries = yield* Effect.sync(() => {
1116
1327
  const clauses = ["started_at_ms >= ?"]
1117
1328
  const params: Array<string | number> = [cutoff]
1118
1329
 
@@ -1134,6 +1345,8 @@ export const makeTelemetryStoreLayer = (opts: TelemetryStoreOptions) => Layer.ef
1134
1345
  LIMIT ?
1135
1346
  `).all(...params, limit) as TraceSummaryRow[]
1136
1347
  }).pipe(Effect.map((rows) => rows.map(parseSummaryRow)))
1348
+ yield* Effect.annotateCurrentSpan("trace.result_count", summaries.length)
1349
+ return summaries
1137
1350
  })
1138
1351
 
1139
1352
  const searchTraceSummaries = Effect.fn("motel/TelemetryStore.searchTraceSummaries")(function* (input: TraceSearch) {
@@ -1212,6 +1425,7 @@ export const makeTelemetryStoreLayer = (opts: TelemetryStoreOptions) => Layer.ef
1212
1425
  })
1213
1426
 
1214
1427
  const getTrace = Effect.fn("motel/TelemetryStore.getTrace")(function* (traceId: string) {
1428
+ yield* Effect.annotateCurrentSpan("trace.trace_id", traceId)
1215
1429
  return yield* Effect.sync(() => {
1216
1430
  const rows = db.query(`
1217
1431
  SELECT * FROM spans WHERE trace_id = ? ORDER BY start_time_ms ASC
@@ -1221,6 +1435,7 @@ export const makeTelemetryStoreLayer = (opts: TelemetryStoreOptions) => Layer.ef
1221
1435
  })
1222
1436
 
1223
1437
  const getSpan = Effect.fn("motel/TelemetryStore.getSpan")(function* (spanId: string) {
1438
+ yield* Effect.annotateCurrentSpan("trace.span_id", spanId)
1224
1439
  return yield* Effect.sync(() => {
1225
1440
  // Fetch only the target span row (uses idx_spans_span_id)
1226
1441
  const spanRow = db.query(`SELECT * FROM spans WHERE span_id = ? LIMIT 1`).get(spanId) as SpanRow | null
@@ -1228,7 +1443,28 @@ export const makeTelemetryStoreLayer = (opts: TelemetryStoreOptions) => Layer.ef
1228
1443
 
1229
1444
  const traceId = spanRow.trace_id
1230
1445
 
1231
- // Get root operation name (indexed by trace_id)
1446
+ // Walk the parent chain in one recursive CTE instead of one query
1447
+ // per hop. Root context remains the earliest root in the trace,
1448
+ // matching full trace hydration even when input has multiple roots.
1449
+ let parentOperationName: string | null = null
1450
+ let depth = 0
1451
+ if (spanRow.parent_span_id) {
1452
+ const ancestors = db.query(`
1453
+ WITH RECURSIVE ancestors(span_id, parent_span_id, operation_name, hop) AS (
1454
+ SELECT span_id, parent_span_id, operation_name, 1
1455
+ FROM spans WHERE trace_id = ? AND span_id = ?
1456
+ UNION ALL
1457
+ SELECT s.span_id, s.parent_span_id, s.operation_name, a.hop + 1
1458
+ FROM ancestors a
1459
+ JOIN spans s ON s.trace_id = ? AND s.span_id = a.parent_span_id
1460
+ )
1461
+ SELECT span_id, parent_span_id, operation_name, hop FROM ancestors ORDER BY hop ASC
1462
+ `).all(traceId, spanRow.parent_span_id, traceId) as Array<{ span_id: string; parent_span_id: string | null; operation_name: string; hop: number }>
1463
+
1464
+ parentOperationName = ancestors[0]?.operation_name ?? null
1465
+ depth = ancestors.length
1466
+ }
1467
+
1232
1468
  const rootRow = db.query(`
1233
1469
  SELECT operation_name FROM spans
1234
1470
  WHERE trace_id = ? AND parent_span_id IS NULL
@@ -1236,28 +1472,6 @@ export const makeTelemetryStoreLayer = (opts: TelemetryStoreOptions) => Layer.ef
1236
1472
  `).get(traceId) as { operation_name: string } | null
1237
1473
  const rootOperationName = rootRow?.operation_name ?? "unknown"
1238
1474
 
1239
- // Get parent operation name if span has a parent (PK lookup)
1240
- let parentOperationName: string | null = null
1241
- if (spanRow.parent_span_id) {
1242
- const parentRow = db.query(`
1243
- SELECT operation_name FROM spans
1244
- WHERE trace_id = ? AND span_id = ?
1245
- `).get(traceId, spanRow.parent_span_id) as { operation_name: string } | null
1246
- parentOperationName = parentRow?.operation_name ?? null
1247
- }
1248
-
1249
- // Compute depth by walking up parent chain (typically 3-5 hops)
1250
- let depth = 0
1251
- let currentParentId = spanRow.parent_span_id
1252
- while (currentParentId) {
1253
- const parentRow = db.query(`
1254
- SELECT parent_span_id FROM spans WHERE trace_id = ? AND span_id = ?
1255
- `).get(traceId, currentParentId) as { parent_span_id: string | null } | null
1256
- if (!parentRow) break
1257
- depth++
1258
- currentParentId = parentRow.parent_span_id
1259
- }
1260
-
1261
1475
  const parsed = parseSpanRow(spanRow)
1262
1476
  return {
1263
1477
  traceId,
@@ -1279,9 +1493,24 @@ export const makeTelemetryStoreLayer = (opts: TelemetryStoreOptions) => Layer.ef
1279
1493
  const cutoff = (yield* Clock.currentTimeMillis) - (input.lookbackMinutes ?? config.otel.traceLookbackMinutes) * 60 * 1000
1280
1494
  const limit = input.limit ?? 100
1281
1495
  const hasContainsFilters = Object.keys(input.attributeContainsFilters ?? {}).length > 0
1282
- const candidateLimit = hasContainsFilters ? Math.max(limit * 20, 500) : Math.max(limit * 10, 200)
1496
+ // Only over-fetch when post-filtering will discard rows. Without
1497
+ // a parentOperation filter the SQL `LIMIT` already returns the
1498
+ // final set, and over-fetching just makes us parse JSON blobs
1499
+ // for rows we'll throw away.
1500
+ const needsPostFilter = !!input.parentOperation
1501
+ const candidateLimit = !needsPostFilter
1502
+ ? limit
1503
+ : hasContainsFilters
1504
+ ? Math.max(limit * 20, 500)
1505
+ : Math.max(limit * 10, 200)
1283
1506
 
1284
1507
  return yield* Effect.sync(() => {
1508
+ // First pass: fetch only the columns needed to filter and
1509
+ // to drive the parent-context lookup. Parsing the heavy
1510
+ // `*_json` blobs is deferred until after we've sliced down
1511
+ // to the final `limit`.
1512
+ let fromSql = "FROM spans AS s"
1513
+ const joinParams: Array<string | number> = []
1285
1514
  const clauses: string[] = ["s.start_time_ms >= ?"]
1286
1515
  const params: Array<string | number> = [cutoff]
1287
1516
 
@@ -1296,8 +1525,8 @@ export const makeTelemetryStoreLayer = (opts: TelemetryStoreOptions) => Layer.ef
1296
1525
  if (input.operation) {
1297
1526
  const ftsQuery = toFtsMatchQuery(input.operation)
1298
1527
  if (hasFts && ftsQuery) {
1299
- clauses.push("EXISTS (SELECT 1 FROM span_operation_fts WHERE span_operation_fts.trace_id = s.trace_id AND span_operation_fts.span_id = s.span_id AND span_operation_fts MATCH ?)")
1300
- params.push(ftsQuery)
1528
+ fromSql += ` INNER JOIN (SELECT trace_id, span_id FROM span_operation_fts WHERE span_operation_fts MATCH ?) AS span_operation_match ON span_operation_match.trace_id = s.trace_id AND span_operation_match.span_id = s.span_id`
1529
+ joinParams.push(ftsQuery)
1301
1530
  } else {
1302
1531
  clauses.push("s.operation_name LIKE ? COLLATE NOCASE")
1303
1532
  params.push(`%${input.operation}%`)
@@ -1320,51 +1549,111 @@ export const makeTelemetryStoreLayer = (opts: TelemetryStoreOptions) => Layer.ef
1320
1549
  params.push(...containsAttrMatch.params)
1321
1550
  }
1322
1551
 
1323
- const rows = db.query(`
1324
- SELECT trace_id, span_id
1325
- FROM spans AS s
1552
+ const candidateRows = db.query(`
1553
+ SELECT s.trace_id, s.span_id, s.parent_span_id, s.operation_name, s.start_time_ms
1554
+ ${fromSql}
1326
1555
  WHERE ${clauses.join(" AND ")}
1327
1556
  ORDER BY s.start_time_ms DESC
1328
1557
  LIMIT ?
1329
- `).all(...params, candidateLimit) as Array<{ trace_id: string; span_id: string }>
1558
+ `).all(...joinParams, ...params, candidateLimit) as Array<{ trace_id: string; span_id: string; parent_span_id: string | null; operation_name: string; start_time_ms: number }>
1330
1559
 
1331
- const traceIds = [...new Set(rows.map((row) => row.trace_id))]
1560
+ const traceIds = [...new Set(candidateRows.map((row) => row.trace_id))]
1332
1561
  if (traceIds.length === 0) return [] as readonly SpanItem[]
1333
1562
 
1563
+ const keyOf = (traceId: string, spanId: string) => `${traceId}:${spanId}`
1564
+ const spanContextById = new Map<string, { readonly parentSpanId: string | null; readonly operationName: string }>()
1565
+
1566
+ // Bulk-prefetch parent metadata for every span in every trace
1567
+ // touched by the candidate set. One indexed scan per trace_id
1568
+ // is much cheaper than a per-span lookup loop while computing
1569
+ // depth, and we get the trace-root lookup in the same pass.
1334
1570
  const placeholders = traceIds.map(() => "?").join(", ")
1335
- const spanRows = db.query(`
1336
- SELECT * FROM spans
1571
+ const allSpanRows = db.query(`
1572
+ SELECT trace_id, span_id, parent_span_id, operation_name, start_time_ms
1573
+ FROM spans
1337
1574
  WHERE trace_id IN (${placeholders})
1338
- ORDER BY start_time_ms ASC
1339
- `).all(...traceIds) as SpanRow[]
1575
+ `).all(...traceIds) as Array<{ trace_id: string; span_id: string; parent_span_id: string | null; operation_name: string; start_time_ms: number }>
1340
1576
 
1341
- const grouped = new Map<string, SpanRow[]>()
1342
- for (const row of spanRows) {
1343
- const group = grouped.get(row.trace_id) ?? []
1344
- group.push(row)
1345
- grouped.set(row.trace_id, group)
1577
+ const rootOperationByTraceId = new Map<string, { operationName: string; startTimeMs: number }>()
1578
+ for (const row of allSpanRows) {
1579
+ spanContextById.set(keyOf(row.trace_id, row.span_id), {
1580
+ parentSpanId: row.parent_span_id,
1581
+ operationName: row.operation_name,
1582
+ })
1583
+ if (row.parent_span_id === null) {
1584
+ const existing = rootOperationByTraceId.get(row.trace_id)
1585
+ if (!existing || row.start_time_ms < existing.startTimeMs) {
1586
+ rootOperationByTraceId.set(row.trace_id, { operationName: row.operation_name, startTimeMs: row.start_time_ms })
1587
+ }
1588
+ }
1346
1589
  }
1347
1590
 
1348
- const itemById = new Map<string, SpanItem>()
1349
- for (const traceId of traceIds) {
1350
- const traceSpanRows = grouped.get(traceId)
1351
- if (!traceSpanRows) continue
1352
- for (const item of buildSpanItems(traceId, traceSpanRows)) {
1353
- itemById.set(`${item.traceId}:${item.span.spanId}`, item)
1591
+ const getSpanContext = (traceId: string, spanId: string) => spanContextById.get(keyOf(traceId, spanId)) ?? null
1592
+
1593
+ const depthById = new Map<string, number>()
1594
+ const getDepth = (traceId: string, spanId: string, visiting = new Set<string>()): number => {
1595
+ const key = keyOf(traceId, spanId)
1596
+ const cached = depthById.get(key)
1597
+ if (cached !== undefined) return cached
1598
+ if (visiting.has(key)) return 0
1599
+ visiting.add(key)
1600
+ const context = getSpanContext(traceId, spanId)
1601
+ const depth = context?.parentSpanId ? getDepth(traceId, context.parentSpanId, visiting) + 1 : 0
1602
+ depthById.set(key, depth)
1603
+ return depth
1604
+ }
1605
+
1606
+ // Apply parentOperation post-filter on the lite candidate set
1607
+ // (cheap — string compare against cached parent op) and then
1608
+ // slice down to the final result size before parsing any JSON.
1609
+ const parentOperationNeedle = input.parentOperation?.toLowerCase() ?? null
1610
+ const filteredLite: typeof candidateRows = []
1611
+ for (const row of candidateRows) {
1612
+ if (parentOperationNeedle) {
1613
+ const parent = row.parent_span_id ? getSpanContext(row.trace_id, row.parent_span_id) : null
1614
+ if (!parent?.operationName.toLowerCase().includes(parentOperationNeedle)) continue
1354
1615
  }
1616
+ filteredLite.push(row)
1617
+ if (filteredLite.length >= limit) break
1355
1618
  }
1356
1619
 
1357
- return rows
1358
- .map((row) => itemById.get(`${row.trace_id}:${row.span_id}`))
1359
- .filter((item): item is SpanItem => item !== undefined)
1360
- .filter((item) => {
1361
- if (input.parentOperation) {
1362
- const needle = input.parentOperation.toLowerCase()
1363
- if (!item.parentOperationName?.toLowerCase().includes(needle)) return false
1364
- }
1365
- return true
1620
+ if (filteredLite.length === 0) return [] as readonly SpanItem[]
1621
+
1622
+ // Hydrate only the kept rows: one batched fetch of the full
1623
+ // SpanRow (with resource_json / attributes_json / events_json)
1624
+ // using SQLite's row-value `IN` syntax, then parseSpanRow per
1625
+ // kept row. Result order follows `filteredLite` so the caller
1626
+ // sees the same ordering the candidate scan produced.
1627
+ const keptValues = filteredLite.map(() => "(?, ?)").join(", ")
1628
+ const fullRows = db.query(`
1629
+ SELECT * FROM spans WHERE (trace_id, span_id) IN (VALUES ${keptValues})
1630
+ `).all(...filteredLite.flatMap((row) => [row.trace_id, row.span_id])) as SpanRow[]
1631
+ const fullRowByKey = new Map<string, SpanRow>()
1632
+ for (const row of fullRows) {
1633
+ fullRowByKey.set(keyOf(row.trace_id, row.span_id), row)
1634
+ }
1635
+
1636
+ const items: SpanItem[] = []
1637
+ for (const lite of filteredLite) {
1638
+ const row = fullRowByKey.get(keyOf(lite.trace_id, lite.span_id))
1639
+ if (!row) continue
1640
+ const parentContext = row.parent_span_id ? getSpanContext(row.trace_id, row.parent_span_id) : null
1641
+ const parsedSpan = parseSpanRow(row)
1642
+ const span = {
1643
+ ...parsedSpan,
1644
+ depth: getDepth(row.trace_id, row.span_id),
1645
+ warnings: row.parent_span_id && !parentContext
1646
+ ? [`missing span ${row.parent_span_id} (1 child)`]
1647
+ : parsedSpan.warnings,
1648
+ }
1649
+ items.push({
1650
+ traceId: row.trace_id,
1651
+ rootOperationName: rootOperationByTraceId.get(row.trace_id)?.operationName ?? span.operationName,
1652
+ parentOperationName: parentContext?.operationName ?? null,
1653
+ span,
1366
1654
  })
1367
- .slice(0, limit)
1655
+ }
1656
+ return items
1368
1657
  })
1369
1658
  })
1370
1659
 
@@ -1662,11 +1951,13 @@ export const makeTelemetryStoreLayer = (opts: TelemetryStoreOptions) => Layer.ef
1662
1951
  })
1663
1952
 
1664
1953
  const listRecentLogs = Effect.fn("motel/TelemetryStore.listRecentLogs")(function* (serviceName: string) {
1665
- return yield* searchLogs({ serviceName, limit: config.otel.logFetchLimit })
1954
+ yield* Effect.annotateCurrentSpan("log.service_name", serviceName)
1955
+ const logs = yield* searchLogs({ serviceName, limit: config.otel.logFetchLimit })
1956
+ yield* Effect.annotateCurrentSpan("log.result_count", logs.length)
1957
+ return logs
1666
1958
  })
1667
1959
 
1668
1960
  const listFacets = Effect.fn("motel/TelemetryStore.listFacets")(function* (input: FacetSearch) {
1669
-
1670
1961
  const cutoff = (yield* Clock.currentTimeMillis) - (input.lookbackMinutes ?? config.otel.traceLookbackMinutes) * 60 * 1000
1671
1962
  const limit = input.limit ?? 20
1672
1963
 
@@ -1756,21 +2047,30 @@ export const makeTelemetryStoreLayer = (opts: TelemetryStoreOptions) => Layer.ef
1756
2047
  // FACET_VALUE_MAX_LEN. For opencode this hides `ai.prompt`,
1757
2048
  // `ai.prompt.messages`, and `ai.prompt.tools` — which are 1-6MB text
1758
2049
  // blobs that you'd never want to filter by exact match anyway. The
1759
- // WHERE clause lets SQLite skip reading those pages from disk, taking
1760
- // the picker open time from ~1.2s to ~370ms on a 2GB database.
1761
- const params: Array<string | number> = [FACET_VALUE_MAX_LEN, cutoff]
1762
- if (input.serviceName) params.push(input.serviceName)
1763
- params.push(limit)
2050
+ // WHERE clause lets SQLite skip reading those pages from disk.
2051
+ // COUNT(DISTINCT ...) does its own per-group dedup via a temp B-tree,
2052
+ // so the outer query needs no DISTINCT subquery in front of it. We
2053
+ // pre-filter trace_ids through trace_summaries (an indexed lookup) so
2054
+ // the planner can use a SEMI JOIN against the small in-window set
2055
+ // instead of joining every span_attributes row to trace_summaries.
2056
+ const params: Array<string | number> = []
2057
+ let traceFilter: string
2058
+ if (input.serviceName) {
2059
+ traceFilter = `(SELECT trace_id FROM trace_summaries WHERE started_at_ms >= ? AND service_name = ?)`
2060
+ params.push(cutoff, input.serviceName)
2061
+ } else {
2062
+ traceFilter = `(SELECT trace_id FROM trace_summaries WHERE started_at_ms >= ?)`
2063
+ params.push(cutoff)
2064
+ }
2065
+ params.push(FACET_VALUE_MAX_LEN, limit)
1764
2066
  const rows = db.query(`
1765
- SELECT sa.key AS value,
1766
- COUNT(DISTINCT sa.trace_id) AS count,
1767
- COUNT(DISTINCT sa.value) AS distinct_values
1768
- FROM span_attributes sa
1769
- JOIN spans s ON s.trace_id = sa.trace_id AND s.span_id = sa.span_id
1770
- WHERE LENGTH(sa.value) < ?
1771
- AND s.start_time_ms >= ?
1772
- ${input.serviceName ? "AND s.service_name = ?" : ""}
1773
- GROUP BY sa.key
2067
+ SELECT key AS value,
2068
+ COUNT(DISTINCT trace_id) AS count,
2069
+ COUNT(DISTINCT value) AS distinct_values
2070
+ FROM span_attributes
2071
+ WHERE trace_id IN ${traceFilter}
2072
+ AND LENGTH(value) < ?
2073
+ GROUP BY key
1774
2074
  ORDER BY (CASE WHEN distinct_values = 1 THEN 1 ELSE 0 END) ASC,
1775
2075
  distinct_values DESC,
1776
2076
  count DESC,
@@ -1807,7 +2107,10 @@ export const makeTelemetryStoreLayer = (opts: TelemetryStoreOptions) => Layer.ef
1807
2107
  })
1808
2108
 
1809
2109
  const listTraceLogs = Effect.fn("motel/TelemetryStore.listTraceLogs")(function* (traceId: string) {
1810
- return yield* searchLogs({ traceId, limit: config.otel.logFetchLimit })
2110
+ yield* Effect.annotateCurrentSpan("log.trace_id", traceId)
2111
+ const logs = yield* searchLogs({ traceId, limit: config.otel.logFetchLimit })
2112
+ yield* Effect.annotateCurrentSpan("log.result_count", logs.length)
2113
+ return logs
1811
2114
  })
1812
2115
 
1813
2116
  // ---------------------------------------------------------------------------
@@ -2212,28 +2515,40 @@ export const makeTelemetryStoreLayer = (opts: TelemetryStoreOptions) => Layer.ef
2212
2515
  searchAiCalls,
2213
2516
  getAiCall,
2214
2517
  aiCallStats,
2518
+ runRetentionNow: cleanupExpired(),
2215
2519
  })
2216
- }),
2217
- )
2520
+ })
2521
+
2522
+ /** Compatibility factory for callers constructing a writer/query-capable store layer. */
2523
+ export const makeTelemetryStoreLayer = (opts: TelemetryStoreOptions) =>
2524
+ Layer.effect(TelemetryStore, makeTelemetryStoreEffect(opts)).pipe(Layer.provide(BunFileSystem.layer))
2218
2525
 
2219
2526
  /**
2220
- * Default writer instance: the main daemon uses this. Owns schema
2221
- * migrations, FTS backfill, and the retention loop.
2527
+ * Default writer runtime used by tests and direct store consumers.
2222
2528
  */
2223
2529
  export const TelemetryStoreLive = makeTelemetryStoreLayer({ readonly: false, runRetention: true })
2224
2530
 
2225
2531
  /**
2226
- * Writer instance that SKIPS retention. The ingest worker uses this
2227
- * so the daemon and the worker aren't both running DELETE passes at
2228
- * the same time (they'd just serialise behind the write lock and
2229
- * duplicate work).
2532
+ * The ingest worker's writer. It is the managed daemon's sole owner of
2533
+ * schema migrations, FTS backfill, retention, and page reclamation.
2230
2534
  */
2231
- export const TelemetryStoreWorkerLive = makeTelemetryStoreLayer({ readonly: false, runRetention: false })
2535
+ export const TelemetryStoreWorkerLive = TelemetryStoreLive
2232
2536
 
2233
2537
  /**
2234
- * Read-only instance for query-only processes (currently the TUI).
2235
- * Skips every DDL/DML statement at startup so the connection can be
2236
- * opened while a writer is mid-transaction without racing for the
2237
- * write lock. Writes through the service interface will throw.
2538
+ * Read-only instance for query-only processes (currently the TUI and
2539
+ * HTTP query handlers). Skips every DDL/DML statement at startup so
2540
+ * the connection can be opened while a writer is mid-transaction
2541
+ * without racing for the write lock. Provided as TelemetryStoreReadonly
2542
+ * — a distinct service identifier so it can coexist with the writer
2543
+ * TelemetryStore in the same runtime.
2238
2544
  */
2239
- export const TelemetryStoreReadonlyLive = makeTelemetryStoreLayer({ readonly: true, runRetention: false })
2545
+ export const TelemetryStoreReadonlyLive = Layer.effect(TelemetryStoreReadonly, makeTelemetryStoreEffect({ readonly: true, runRetention: false })).pipe(Layer.provide(BunFileSystem.layer))
2546
+
2547
+ /** Query-worker reader that waits for the sole writer to finish schema bootstrap. */
2548
+ export const TelemetryStoreQueryWorkerLive = Layer.effect(
2549
+ TelemetryStoreReadonly,
2550
+ makeTelemetryStoreEffect({ readonly: true, runRetention: false }).pipe(
2551
+ Effect.map((store) => TelemetryStoreReadonly.of(store)),
2552
+ Effect.retry(Schedule.spaced("50 millis")),
2553
+ ),
2554
+ ).pipe(Layer.provide(BunFileSystem.layer))